#!perl
# authprof.pl written 21.04.97 by Reini Urban, <rurban@xarch.tu-graz.ac.at>
# calcs number of news postings of some persons from the
# actual dejanews database. (a few weeks old)
# read old from file "authors",
# store new numbers in file "authors.new"
# Yet another problem is to get enough names per domain...
#----------------------------
# Win32 version (use the Internet module from
#    http://www.divinf.it/dada/perl/internet)
# for *unix* use LWP instead
#----------------------------

$debug=1;
$fast=();   #does not work yet,
$old=();    #only old articles up to 3/11/97	(http://www.dejanews.com/forms/dnq.html)
#$both=();   #double time;

use Win32::Internet;
$INET = new Win32::Internet();

@local_authors = (
    "Reini Urban",
    "Heidrun Kirchweger",
    "Michael Resch",
    "Herbert Rosmanith",
    "Alexandra Bader"
);

foreach $name (@local_authors) {
    $authors{$name} = 0;
}

open (AUTH,"<authors");   # stored numbers
while (<AUTH>) {
    if (m/(^[^,]+),\s*(\d+)/) {
        $name=$1; $count=$2;
        #$name =~ tr/ /+/;
        $authors{$name} = $count;
    }
}
close AUTH;
open(AUTH, ">>authors.new");

foreach $author (sort ByNum keys %authors) {
    print STDERR "\n$author\t $authors{$author} -> ";
    $name = $author;
    $name =~ tr/ /+/;
    $chunk = ();
    $url="http://search.dejanews.com/profile.xp?author=".$name;
    $url .= "&svcclass=dnold" if $old;	# -3/11/97
    $INET->OpenURL($URL, $url);
    if ($fast) {
        $howmuch = $URL->QueryDataAvailable();
        $buffersize = 3000; # 3000 byte, das reicht (ca. 2200 im header)
        while ($howmuch > 0) {
            $chunk .= $URL->ReadFile( ($howmuch<$buffersize) ? $howmuch : $buffersize);
            print STDERR " ";
            if ($chunk =~ m/(\d+) unique articles posted/im) {
                $authors{$author} = $1;
                print STDERR "  new: $authors{$author}";
                print AUTH "\n$author,\t", $authors{$author};
                $howmuch = 0;
            } else {
                $howmuch = $URL->QueryDataAvailable();
            }
        }
        $URL->Close;
    } else { #slow but works
        $chunk = $URL->ReadEntireFile();
        if ($chunk =~ m/(\d+) unique articles posted/im) {
            $authors{$author} = $1;
            print STDERR " $authors{$author}";
            print AUTH "\n$author,\t", $authors{$author};
        } else {
            $authors{$author} = 0;
            print STDERR " oops!";
        }
    }

    if (!$chunk) {
        $authors{$author} = 0;
        print STDERR " oops!";
    };

    if ($debug) {
        open (LOG, ">$name.html");
        print LOG $chunk;
        close LOG;
    }
}

print "\nTable:\n";
foreach $author (sort ByNum keys %authors) {
    print "\n$author:\t", $authors{$author};
}
close AUTH;

sub ByNum {
    $authors{$b} <=> $authors{$a};
}
