comics/fetch.pl.new - view

File: [Local Repository] / comics / fetch.pl.new
Revision 1.4: download - view: text, annotated - select for diffs
Fri Sep 23 01:36:54 2011 UTC (13 years, 1 month ago) by nick
Branches: MAIN
CVS tags: HEAD

Added poorly but quickly written code to find images > 640 width and resize them to 640 width.

#!/usr/bin/perl -w use strict; use File::Path; use Data::Dumper; ## ## Some default values ## my $ver = q/$Id: fetch.pl.new,v 1.4 2011/09/23 01:36:54 nick Exp $/; my $comicFile = "comics.conf"; my %comics = &readComicConfig ( $comicFile ); my %dates = &fetchDates(); my $baseDir = $comics{'configs'}{'base_directory'} || "."; my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . "/$dates{'mon2'}$dates{'year2'}"; my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" ); my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18"; my $DATE=`date`; chomp $DATE; print STDOUT "Starting comic fetch at $DATE\n"; ## ## Main program starts here ## &checkDir ( [ $imageDir, $indexDir ] ); #&writeTitle ( \%dates ); foreach my $comic ( sort keys %comics ) { next if ( $comic =~ m/config/ ); $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); &writeComic ( \%comics, $comic, \%dates ); } my $D = `date +%d`; chomp $D; print "Finding in $imageDir/*-$D.jpg\n"; foreach my $file ( glob( "$imageDir/*-$D.jpg" ) ) { my $size = `/usr/bin/identify $file`; $size =~ s/.*\s(\d+)x\d+.*/$1/; system( "/usr/bin/convert -resize 640 $file $file" ) if ( $size > 640 ) } ## &writeMainIndex ( \%dates ); &writeFooter( \%dates ); $DATE=`date`; chomp( $DATE ); print STDOUT "Completed comic fetch at $DATE\n"; ## End ####################################################################### ## Function : downloadComic ## ## Description : ## This function determines the download method being used to ## retrieve the comic and calls the apprioriate function. ## ## If the mode is invalid an error will be returned. ## ####################################################################### sub downloadComic ($$) { my ( $comics, $comic, $date ) = @_; SWITCH: { if ( $comics->{$comic}{'mode'} eq 1 ) { return indexDownload ( \%comics, $comic, $date ); last SWITCH; } if ( $comics->{$comic}{'mode'} eq 2 ) { return directDownload ( \%comics, $comic, $date ); last SWITCH; } } return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}."; } ####################################################################### ####################################################################### sub readComicConfig ($$) { my ( $comicFile ) = @_; my %comicConfig = ( ); my %config = ( ); open FILEN, "<$comicFile"; while (<FILEN>) { if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ my @res = split /,/, $_; $comicConfig{$res[0]}{'url'} = $res[1]; $comicConfig{$res[0]}{'search'} = $res[2]; $comicConfig{$res[0]}{'mode'} = $res[3]; $comicConfig{$res[0]}{'fullName'} = $res[4]; $comicConfig{$res[0]}{'ext'} = $res[5]; $comicConfig{$res[0]}{'error'} = 0; } elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { $comicConfig{'configs'}{$1} = $2; } } close (FILEN); return %comicConfig; } ####################################################################### ####################################################################### sub writeComic ($$) { my ( $comics, $comic, $date ) = @_; my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . $date->{'day2'} . ".html"; my $content = <<EOF;  <tr> <td align="left"> $comics->{$comic}{'fullName'}     <a href="$comics->{$comic}{'url'}"> $comics->{$comic}{'url'} </a> <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" /> </td></tr>  EOF open INDEX, ">>$indexFile"; print INDEX $content if ( ! $comics->{$comic}{'error'} ); print INDEX <<EOF $comics->{$comic}{'fullName'}     < <a href="$comics->{$comic}{'url'}"> $comics->{$comic}{'url'} </a> $comic : $comics->{$comic}{'error'} </td> </tr> EOF if ( $comics->{$comic}{'error'} ); close (INDEX); return 0; } ####################################################################### ####################################################################### sub writeMainIndex ($$) { my ( $date ) = @_; } ####################################################################### ####################################################################### sub writeFooter { my ( $date ) = @_; my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . $date->{'day2'} . ".html"; my $sysDate = `date`; open INDEX, ">>$indexFile"; print INDEX <<EOF; </table> <center> Generated on: $sysDate Version: $ver <a href="http://validator.w3.org/check?uri=referer"><img src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> </center> </body> </html> EOF close( INDEX ); } ####################################################################### ####################################################################### sub checkDir ($$) { my @dir = @_; foreach ( @dir ) { if ( ! -d $_ ) { mkpath( $_ ); } } } ####################################################################### ####################################################################### sub writeTitle ($$) { my ( $date ) = @_; my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . $date->{'day2'} . ".html"; my $today = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'}; open INDEX, ">$indexFile"; print INDEX <<EOF; <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <title>Daily Comics for $today</title> </head> <body bgcolor="#FFFFFF"> <h1>Daily Comics for $today</h1> <table align="center" cellpadding="5" cellspacing="0"> EOF close (INDEX); } ####################################################################### ####################################################################### sub directDownload ($$) { my ( $comics, $comic, $date ) = @_; my $file = &parseComic ( $comics, $comic, $date ); ## ## Save the file to the appropriate directory ## my $cDir = $date->{'mon2'} . $date->{'year2'}; my $cDate = $date->{'day2'}; my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; return system($cmd); } ####################################################################### ####################################################################### sub indexDownload ($$) { my ( $comics, $comic, $date ) = @_; my ( @lines, $comicLine, $mainURL ); my $comicIndex = "indexes/index.$comic"; `wget -q $comics->{$comic}{'url'} -O $comicIndex`; if ( ! open FILEN, "<$comicIndex" ) { return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . " (" . $comics->{$comic}{'url'} . ")"; } @lines = <FILEN>; close (FILEN); unlink ("$comicIndex"); $mainURL = $comics->{$comic}{'url'}; ## I need to figure out how to merge these two in to one regex. $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/; $mainURL =~ s/([a-z])\/.*/$1/i; ## ## Find the comic strip URL based on the specified regex in the search ## foreach my $line (@lines) { if ( $line =~ m/$comics->{$comic}{'search'}/ ) { $comicLine = $1; chomp $comicLine; } } ## ## Save the file to the appropriate directory ## my $cDir = $date->{'mon2'} . $date->{'year2'}; my $cDate = $date->{'day2'}; if ( $comicLine ) { if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; system( $cmd ); return 0; } unlink "index.html"; return "ERROR: Could not download comic $comics->{$comic}{'fullName'}"; } ####################################################################### ####################################################################### sub parseComic ($$) { my ( $comics, $comic, $date ) = @_; my $string = $comics->{$comic}{'search'}; $string =~ s/__year__/$date->{'year'}/g; $string =~ s/__year2__/$date->{'year2'}/g; $string =~ s/__mon__/$date->{'mon'}/g; $string =~ s/__mon2__/$date->{'mon2'}/g; $string =~ s/__day__/$date->{'day'}/g; $string =~ s/__day2__/$date->{'day2'}/g; $string =~ s/__ext__/$comics->{$comic}{'ext'}/g; chomp $string; return $string; } ####################################################################### ####################################################################### sub fetchDates () { my %dates = (); ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6]; ## If you missed a day or two, reflect it here: # $dates{'day'}-=1; ## <-- 5 days ago $dates{'year'} += 1900; $dates{'year2'} = substr $dates{'year'}, 2, 2; $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; $dates{'mon'}++; $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'}; return %dates; }