comics/fetch.pl.new - view

File: [Local Repository] / comics / fetch.pl.new
Revision 1.16: download - view: text, annotated - select for diffs
Thu Feb 5 18:05:58 2015 UTC (9 years, 8 months ago) by nick
Branches: MAIN
CVS tags: HEAD

Changed the background and added a fancy title.

#!/usr/bin/perl -w ############################################################################### # $Log: fetch.pl.new,v $ # Revision 1.16 2015/02/05 18:05:58 nick # Changed the background and added a fancy title. # # Revision 1.15 2015/01/19 13:46:19 nick # *** empty log message *** # ############################################################################### use strict; use File::Path; use Data::Dumper; use Pod::Usage; use Getopt::Long; use Date::Calc qw/Date_to_Text_Long Today/; ## ## Some default values ## my $ver = '$Id: fetch.pl.new,v 1.16 2015/02/05 18:05:58 nick Exp $'; my $comicFile = "comics.conf"; my %comics = &readComicConfig ( $comicFile ); my %opts = &fetchOptions( ); my $days_ago = $opts{'days'} || 0; my %dates = &fetchDates(); my $baseDir = $comics{'configs'}{'base_directory'} || "."; my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . "/$dates{'mon2'}$dates{'year2'}"; my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" ); my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18"; my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /; my $DATE=`date`; chomp $DATE; print STDOUT "Starting comic fetch at $DATE\n"; ## ## Main program starts here ## &checkDir ( [ $imageDir, $indexDir ] ); &writeTitle ( \%dates ); foreach my $comic ( sort keys %comics ) { next if ( $comic =~ m/config/ ); if ( ( $dates{'day2'} eq "Sunday" ) && ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; } $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); &writeComic ( \%comics, $comic, \%dates ); } print "Finding in $imageDir/*-$dates{'day2'}.jpg\n"; foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) ) { my $size = `/usr/bin/identify $file`; $size =~ s/.*\s(\d+)x\d+.*/$1/; system( "/usr/bin/convert -resize 640 $file $file" ) if ( $size > 640 ) } ## &writeMainIndex ( \%dates ); &writeFooter( \%dates ); $DATE=`date`; chomp( $DATE ); print STDOUT "Completed comic fetch at $DATE\n"; ## End ####################################################################### ## Function : downloadComic ## ## Description : ## This function determines the download method being used to ## retrieve the comic and calls the apprioriate function. ## ## If the mode is invalid an error will be returned. ## ####################################################################### sub downloadComic ($$) { my ( $comics, $comic, $date ) = @_; SWITCH: { if ( $comics->{$comic}{'mode'} eq 1 ) { return indexDownload ( \%comics, $comic, $date ); last SWITCH; } if ( $comics->{$comic}{'mode'} eq 2 ) { return directDownload ( \%comics, $comic, $date ); last SWITCH; } } return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}."; } ####################################################################### ####################################################################### sub readComicConfig ($$) { my ( $comicFile ) = @_; my %comicConfig = ( ); my %config = ( ); my ($year, $mon, $day) =( localtime(time))[5,4,3]; $year += 1900; $mon = sprintf("%02d", ($mon + 1)); $day = sprintf("%02d", $day); open FILEN, "<$comicFile"; while (<FILEN>) { if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ $_ =~ s/__YEAR__/$year/g; $_ =~ s/__MON__/$mon/g; $_ =~ s/__DAY__/$day/g; my @res = split /,/, $_; $comicConfig{$res[0]}{'url'} = $res[1]; $comicConfig{$res[0]}{'search'} = $res[2]; $comicConfig{$res[0]}{'mode'} = $res[3]; $comicConfig{$res[0]}{'fullName'} = $res[4]; $comicConfig{$res[0]}{'ext'} = $res[5]; $comicConfig{$res[0]}{'sunday'} = $res[6] || 1; $comicConfig{$res[0]}{'error'} = 0; } elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { $comicConfig{'configs'}{$1} = $2; } } close (FILEN); return %comicConfig; } ####################################################################### ####################################################################### sub writeComic ($$) { my ( $comics, $comic, $date ) = @_; my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . $date->{'day2'} . "-" . $sd . ".html"; my $content = <<EOF;  <tr> <td align="left"> $comics->{$comic}{'fullName'}     <a href="$comics->{$comic}{'url'}"> $comics->{$comic}{'url'} </a> <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" /> </td></tr>  EOF open INDEX, ">>$indexFile"; print INDEX $content if ( ! $comics->{$comic}{'error'} ); print INDEX <<EOF $comics->{$comic}{'fullName'}     < <a href="$comics->{$comic}{'url'}"> $comics->{$comic}{'url'} </a> $comic : $comics->{$comic}{'error'} </td> </tr> EOF if ( $comics->{$comic}{'error'} ); close (INDEX); return 0; } ####################################################################### ####################################################################### sub writeMainIndex ($$) { my ( $date ) = @_; } ####################################################################### ####################################################################### sub writeFooter { my ( $date ) = @_; my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . $date->{'day2'} . "-" . $sd . ".html"; my $sysDate = `date`; open INDEX, ">>$indexFile"; print INDEX <<EOF; </table> <center> Generated on: $sysDate Version: $ver CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> <a href="http://validator.w3.org/check?uri=referer"><img src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> </center> </body> </html> EOF close( INDEX ); } ####################################################################### ####################################################################### sub checkDir ($$) { my @dir = @_; foreach ( @dir ) { if ( ! -d $_ ) { mkpath( $_ ); } } } ####################################################################### ####################################################################### sub writeTitle ($$) { my ( $date ) = @_; my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . $date->{'day2'} . "-" . $sd . ".html"; my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'}; my $today_long = Date_to_Text_Long(Today()); open INDEX, ">$indexFile"; print INDEX <<EOF; <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen"> <title>Daily Comics for $today</title> </head> <body bgcolor="#FFFFFF"> <table align="center" cellpadding="5" cellspacing="0"> <tr><td> <table cellpadding="0" cellspacing="0" border="0"> <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr> <tr><td align="left">$today_long</td></tr> <tr><td> </td></tr> </td</tr> EOF close (INDEX); } ####################################################################### ####################################################################### sub directDownload ($$) { my ( $comics, $comic, $date ) = @_; my $file = &parseComic ( $comics, $comic, $date ); ## ## Save the file to the appropriate directory ## my $cDir = $date->{'mon2'} . $date->{'year2'}; my $cDate = $date->{'day2'}; my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; return system($cmd); } ####################################################################### ####################################################################### sub indexDownload ($$) { my ( $comics, $comic, $date ) = @_; my ( @lines, $comicLine, $mainURL ); my $comicIndex = "indexes/index.$comic"; `wget -q $comics->{$comic}{'url'} -O $comicIndex`; if ( ! open FILEN, "<$comicIndex" ) { return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . " (" . $comics->{$comic}{'url'} . ")"; } @lines = <FILEN>; close (FILEN); unlink ("$comicIndex"); $mainURL = $comics->{$comic}{'url'}; ## I need to figure out how to merge these two in to one regex. $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/; $mainURL =~ s/([a-z])\/.*/$1/i; ## ## Find the comic strip URL based on the specified regex in the search ## foreach my $line (@lines) { if ( $line =~ m/$comics->{$comic}{'search'}/ ) { $comicLine = $1; chomp $comicLine; } } ## ## Save the file to the appropriate directory ## my $cDir = $date->{'mon2'} . $date->{'year2'}; my $cDate = $date->{'day2'}; if ( $comicLine ) { if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; system( $cmd ); return 0; } unlink "index.html"; return "ERROR: Could not download comic $comics->{$comic}{'fullName'}"; } ####################################################################### ####################################################################### sub parseComic ($$) { my ( $comics, $comic, $date ) = @_; my $string = $comics->{$comic}{'search'}; $string =~ s/__year__/$date->{'year'}/g; $string =~ s/__year2__/$date->{'year2'}/g; $string =~ s/__mon__/$date->{'mon'}/g; $string =~ s/__mon2__/$date->{'mon2'}/g; $string =~ s/__day__/$date->{'day'}/g; $string =~ s/__day2__/$date->{'day2'}/g; $string =~ s/__ext__/$comics->{$comic}{'ext'}/g; chomp $string; return $string; } ####################################################################### ####################################################################### sub fetchDates () { my %dates = (); ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6]; $dates{'year'} += 1900; $dates{'year2'} = substr $dates{'year'}, 2, 2; $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; $dates{'mon'}++; $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'}; return %dates; } ############################################################################### ## ## &fetchOptions( ); ## ## Grab our command line arguments and toss them in to a hash ## ############################################################################### sub fetchOptions { my %opts; &GetOptions( "days:i" => \$opts{'days'}, "help|?" => \$opts{'help'}, "man" => \$opts{'man'}, ) || &pod2usage( ); &pod2usage( ) if defined $opts{'help'}; &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'}; return %opts; } __END__ =head1 NAME fetch.pl - Fetches comics and places them all locally in a single html file. =head1 SYNOPSIS fetch.pl [options] Options: --days,d Fetch comics from X days ago --help,? Display the basic help menu --man,m Display the detailed man page =head1 DESCRIPTION =head1 HISTORY =head1 AUTHOR Nicholas DeClario <nick@declario.com> =head1 BUGS This is a work in progress. Please report all bugs to the author. =head1 SEE ALSO =head1 COPYRIGHT =cut