#!/usr/bin/perl -w use LWP::Simple; use DBI; require("db_connect.inc"); # In development mode (use dev db, more output), 0 for normal/live $dev="0"; if ($dev) { print "user $userid and password $passwd on host $host \n"; } # details for open database for recording $db="ircount"; $connectionInfo="dbi:mysql:$db;$host"; if ($dev) { $tbl_ircount = "ircount_dev"; } else { $tbl_ircount = "ircount"; } # # # get the data from eprints.org # first set the url $URL = "http://roar.eprints.org/rawlist.txt"; # now do the actually getting, easy thanks to lwp::simple $content = get $URL; die "Couldn't get it!" unless defined $content; # split content into seperate lines (@lines) = split /\n/, $content; # set up vars my $data; my $field; my $save; my $id; my $name; my $records=0; my $identifier; my $currentdate; my $query; my $country; my $type; #get current time #which is bloody annoying in perl ($day, $month, $year) = (localtime)[3,4,5]; # perl returns funny values $year += 1900; $month += 1; # and single digit days and months should start with a 0 if ($month < 10) { # we're basically turning it into a string here # having just treated it like an int. # who needs strongly typed languages! $month = "0" . "$month"; } if ($day < 10) { $day = "0" . "$day"; } $currentdate = "$year$month$day"; # make connection to database $dbh = DBI->connect($connectionInfo,$userid,$passwd); #make sure we use utf8 for, especially for the archive names #(and make sure tbl and field are utf8 general as well!) $dbh->{'mysql_enable_utf8'} = 1; $dbh->do('SET NAMES utf8'); # open file for recording data open (LOGFILE, ">>datalog.txt") or die "can not open datalog \n"; print LOGFILE "METAMETA\t$year $month $day\tstart\n"; ######################################## # main loop foreach $line(@lines) { # each record has one field per line, and each record # is seperated by an empty line # if this is an empty line we have reached the end of # a record and so output to file/db # if line empty write stuff to file/db if ($line eq "") { # blank line means we have reached the end of this record # new record # a few things before we write the record. # skip this if not type:institutional next if ($type ne "institutional"); # # write the record out to file $save = "$identifier\t$name\t$records"; $save .= "\t$identifier\t$currentdate\t$country\n"; print LOGFILE "$save"; # write record to db $query = "INSERT INTO $tbl_ircount (archiveid, archivename,"; $query .= " records,"; $query .= " currentdate, identifier, "; $query .= "country, collected_date)"; $query .= " VALUES ($identifier, \"$name\","; $query .= "$records, $currentdate, \"$identifier\","; $query .= " \"$country\", now())"; #print "$query\n\n"; $sth = $dbh->prepare($query); $sth->execute(); $query = ""; # clear values ready for next $oai = ""; $id = ""; $name = ""; $records = "0"; $identifier = ""; $data = ""; $country = ""; $type = ""; next; } # this splits and removes the space after the colon ($field, $data) = split /: /, $line, 2; next if (!$data); next if $data eq ""; if ($field eq "oai_pmh") { $oai = $data; } elsif ($field eq "title") { $data =~ s/"/ /; # replace quote strings $name = $data; } elsif ($field eq "recordcount") { $records = $data; next if ($records eq "0"); } elsif ($field eq "eprintid") { $identifier = $data; } elsif ($field eq "location_country") { $country = $data; } elsif ($field eq "type") { $type = $data; } } #byeeeeeeeeeeeeeeeeeeeeeeee exit;