--- comics/fetch.pl.new 2015/05/07 12:31:43 1.18 +++ comics/fetch.pl.new 2018/04/22 14:03:54 1.26 @@ -2,6 +2,38 @@ ############################################################################### # $Log: fetch.pl.new,v $ +# Revision 1.26 2018/04/22 14:03:54 nick +# Changed the default for Sunday comics that was causing issues with some comics. +# +# Revision 1.25 2018/02/12 13:30:58 nick +# Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't. +# +# Revision 1.24 2018/02/06 14:31:06 nick +# A status report is now generated in JSON that can easily be scanned so that +# I can be alerted when there are failures that I miss if I don't read the +# comics that day. +# +# Revision 1.23 2018/01/26 13:05:27 nick +# Added a new config option to remove all newline from the resulting index.html +# file. This allows for easier parsing for certain comics. I then updated +# the URLs to search for and enabled the newline removal for a handful +# of uComics. +# +# I believe I've also properly fixed the Comic Config version displayed on +# the webpage itself. +# +# Revision 1.22 2017/12/05 13:37:40 nick +# Added the CVS config version to the outpuit. +# +# Revision 1.21 2015/10/26 14:25:40 nick +# Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly. +# +# Revision 1.20 2015/10/22 12:58:44 nick +# Added the ability for Sunday only comics. Stonesoup is no longer weekdays, this has been added to Sunday only. I also added Foxtrot Classics for weekdays and Foxtrot for Sundays. +# +# Revision 1.19 2015/07/13 12:56:58 nick +# Added Sally Forth and Pearls Before Swine. Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'. +# # Revision 1.18 2015/05/07 12:31:43 nick # Added favicon # @@ -21,14 +53,16 @@ use File::Path; use Data::Dumper; use Pod::Usage; use Getopt::Long; - -use Date::Calc qw/Date_to_Text_Long Today/; +use JSON::Create 'create_json'; +use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/; ## ## Some default values ## -my $ver = '$Id: fetch.pl.new,v 1.18 2015/05/07 12:31:43 nick Exp $'; +my $ver = '$Id: fetch.pl.new,v 1.26 2018/04/22 14:03:54 nick Exp $'; my $comicFile = "comics.conf"; +my $comicConfigVer = "Unknown"; +my $reportFile = "/home/httpd/html/daily/comics/status_report.json"; my %comics = &readComicConfig ( $comicFile ); my %opts = &fetchOptions( ); my $days_ago = $opts{'days'} || 0; @@ -51,9 +85,22 @@ print STDOUT "Starting comic fetch at $D &writeTitle ( \%dates ); foreach my $comic ( sort keys %comics ) { + + ## Skip if this is Sunday and the comic is weekdays only next if ( $comic =~ m/config/ ); - if ( ( $dates{'day2'} eq "Sunday" ) && - ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; } + if (($dates{'wday'} eq "Sunday") && + ($comics{$comic}{'not_sunday'} == 1)) { + print "Skipping '$comic'; Weekdays only.\n"; + next; + } + + ## Skip if Sunday only comic and it's not Sunday. + if (($dates{'wday'} ne "Sunday") && + ($comics{$comic}{'sunday_only'} == 1)) { + print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; + next + } + $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); &writeComic ( \%comics, $comic, \%dates ); @@ -69,6 +116,7 @@ foreach my $comic ( sort keys %comics ) } close(IMG); + system( "/usr/bin/convert -resize 640 $file $file" ) if ( $size > 640 ) } @@ -77,6 +125,9 @@ foreach my $comic ( sort keys %comics ) &writeFooter( \%dates ); +print STDOUT "Status written to $reportFile.\n" + if (&writeStatusReportJSON(\%comics, $reportFile)); + $DATE=`date`; chomp( $DATE ); print STDOUT "Completed comic fetch at $DATE\n"; @@ -123,6 +174,10 @@ sub readComicConfig ($$) { open FILEN, "<$comicFile"; while () { + #if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) { + if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) { + $comicConfigVer = $1; + } if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ $_ =~ s/__YEAR__/$year/g; $_ =~ s/__MON__/$mon/g; @@ -134,7 +189,9 @@ sub readComicConfig ($$) { $comicConfig{$res[0]}{'mode'} = $res[3]; $comicConfig{$res[0]}{'fullName'} = $res[4]; $comicConfig{$res[0]}{'ext'} = $res[5]; - $comicConfig{$res[0]}{'sunday'} = $res[6] || 1; + $comicConfig{$res[0]}{'not_sunday'} = sprintf("%d", $res[6] || 0); + $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); + $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0); $comicConfig{$res[0]}{'error'} = 0; } elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { @@ -148,6 +205,39 @@ sub readComicConfig ($$) { ####################################################################### ####################################################################### +sub writeStatusReportJSON ($$) { + my ( $comicsRef, $filename ) = @_; + my %comics = %$comicsRef; + my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, + (localtime)[4] + 1, + (localtime)[3]); + my %json = ('date' => $shortDate, 'comics' => []); + my $totalErrors = 0; + + foreach my $comic (sort keys %comics) { + next unless $comics{$comic}{'fullName'}; + if ($comics{$comic}{'error'}) { + my %error = ('comicName' => "$comics{$comic}{'fullName'}", + 'error' => "$comics{$comic}{'error'}", + 'status' => "Error"); + push $json{'comics'}, \%error; + $totalErrors += 1; + } else { + my %status = ('comicName' => "$comics{$comic}{'fullName'}", + 'error' => 0, + 'status' => "Successfull"); + push $json{'comics'}, \%status; + } + } + $json{'totalErrors'} = $totalErrors; + + open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n"); + print SR create_json (\%json); + close(SR); +} + +####################################################################### +####################################################################### sub writeComic ($$) { my ( $comics, $comic, $date ) = @_; my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); @@ -219,6 +309,7 @@ sub writeFooter { Generated on: $sysDate
Version: $ver
+Config Version: $comicConfigVer
CVS: http://demandred.dyndns.org/cgi-bin/cvsweb/comics/

{$comic}{'url'} -O $comicIndex`; + my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " . + "--user-agent=\"$USER_AGENT\" " . + "$comics->{$comic}{'url'} -O $comicIndex"; + system($wget_cmd); if ( ! open FILEN, "<$comicIndex" ) { return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . " (" . $comics->{$comic}{'url'} . ")"; } - @lines = ; + while () { + my $line = $_; + $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} ); + push @lines, $line; + } close (FILEN); unlink ("$comicIndex"); @@ -375,6 +473,8 @@ sub fetchDates () { $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; $dates{'mon'}++; $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'}; + my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /; + $dates{'wday'} = $days[$dates{'dow'}]; return %dates; }