|
|
| version 1.21, 2015/10/26 14:25:40 | version 1.31, 2024/12/13 16:03:49 |
|---|---|
| Line 2 | Line 2 |
| ############################################################################### | ############################################################################### |
| # $Log$ | # $Log$ |
| # Revision 1.31 2024/12/13 16:03:49 nick | |
| # This adds the ability to specify a comic as a link only with a default splash image. | |
| # | |
| # Revision 1.30 2022/10/04 12:02:03 nick | |
| # Added --no-check-certificate for wget calls as arcamax was failing its cert check. Meh, whatever. It's just comics. | |
| # | |
| # Revision 1.29 2020/06/10 21:32:52 nick | |
| # Centered page | |
| # | |
| # Revision 1.28 2020/06/10 21:14:31 nick | |
| # Updated for w3 validation. | |
| # | |
| # Revision 1.27 2019/04/15 12:50:23 nick | |
| # The script was unable to handle html '&' and convert it, so I added that. I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick. | |
| # | |
| # Revision 1.26 2018/04/22 14:03:54 nick | |
| # Changed the default for Sunday comics that was causing issues with some comics. | |
| # | |
| # Revision 1.25 2018/02/12 13:30:58 nick | |
| # Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't. | |
| # | |
| # Revision 1.24 2018/02/06 14:31:06 nick | |
| # A status report is now generated in JSON that can easily be scanned so that | |
| # I can be alerted when there are failures that I miss if I don't read the | |
| # comics that day. | |
| # | |
| # Revision 1.23 2018/01/26 13:05:27 nick | |
| # Added a new config option to remove all newline from the resulting index.html | |
| # file. This allows for easier parsing for certain comics. I then updated | |
| # the URLs to search for and enabled the newline removal for a handful | |
| # of uComics. | |
| # | |
| # I believe I've also properly fixed the Comic Config version displayed on | |
| # the webpage itself. | |
| # | |
| # Revision 1.22 2017/12/05 13:37:40 nick | |
| # Added the CVS config version to the outpuit. | |
| # | |
| # Revision 1.21 2015/10/26 14:25:40 nick | # Revision 1.21 2015/10/26 14:25:40 nick |
| # Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly. | # Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly. |
| # | # |
| Line 30 use File::Path; | Line 68 use File::Path; |
| use Data::Dumper; | use Data::Dumper; |
| use Pod::Usage; | use Pod::Usage; |
| use Getopt::Long; | use Getopt::Long; |
| use JSON::Create 'create_json'; | |
| use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/; | use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/; |
| use Data::Dumper; | |
| print("Running"); | |
| ## | ## |
| ## Some default values | ## Some default values |
| ## | ## |
| my $ver = '$Id$'; | my $ver = '$Id$'; |
| my $comicFile = "comics.conf"; | my $comicFile = "comics.conf"; |
| my $comicConfigVer = "Unknown"; | |
| my $reportFile = "/home/httpd/html/daily/comics/status_report.json"; | |
| my %comics = &readComicConfig ( $comicFile ); | my %comics = &readComicConfig ( $comicFile ); |
| my %opts = &fetchOptions( ); | my %opts = &fetchOptions( ); |
| my $days_ago = $opts{'days'} || 0; | my $days_ago = $opts{'days'} || 0; |
| Line 61 print STDOUT "Starting comic fetch at $D | Line 104 print STDOUT "Starting comic fetch at $D |
| foreach my $comic ( sort keys %comics ) { | foreach my $comic ( sort keys %comics ) { |
| print("Checking Comic $comic\n"); | |
| ## Skip if this is Sunday and the comic is weekdays only | ## Skip if this is Sunday and the comic is weekdays only |
| next if ( $comic =~ m/config/ ); | next if ( $comic =~ m/config/ ); |
| if (($dates{'wday'} eq "Sunday") && | if (($dates{'wday'} eq "Sunday") && |
| ($comics{$comic}{'sunday'} == 0)) { | ($comics{$comic}{'not_sunday'} == 1)) { |
| print "Skipping '$comic'; Weekdays only.\n"; | print "Skipping '$comic'; Weekdays only.\n"; |
| next; | next; |
| } | } |
| Line 75 foreach my $comic ( sort keys %comics ) | Line 120 foreach my $comic ( sort keys %comics ) |
| print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; | print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; |
| next | next |
| } | } |
| $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); | $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
| &writeComic ( \%comics, $comic, \%dates ); | &writeComic ( \%comics, $comic, \%dates ); |
| my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}"; | my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}"; |
| my $size = 0; | my $size = 0; |
| my $cmd = "/usr/bin/identify -verbose $file|"; | ## Resize downloaded images |
| open(IMG, $cmd) || die ("Can't open: $!\n"); | if($comics{$comic}{'mode'} != 3) { |
| while(<IMG>) { | my $cmd = "/usr/bin/identify -verbose $file|"; |
| if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) { | open(IMG, $cmd) || die ("Can't open: $!\n"); |
| $size = $1 if ( $size == 0); | while(<IMG>) { |
| } | if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) { |
| } | $size = $1 if ( $size == 0); |
| close(IMG); | } |
| } | |
| close(IMG); | |
| system( "/usr/bin/convert -resize 640 $file $file" ) | |
| if ( $size > 640 ) | system( "/usr/bin/convert -resize 800 $file $file" ) |
| if ( $size > 800 ) | |
| } | |
| } | } |
| ## &writeMainIndex ( \%dates ); | ## &writeMainIndex ( \%dates ); |
| &writeFooter( \%dates ); | &writeFooter( \%dates ); |
| print STDOUT "Status written to $reportFile.\n" | |
| if (&writeStatusReportJSON(\%comics, $reportFile)); | |
| $DATE=`date`; chomp( $DATE ); | $DATE=`date`; chomp( $DATE ); |
| print STDOUT "Completed comic fetch at $DATE\n"; | print STDOUT "Completed comic fetch at $DATE\n"; |
| Line 127 sub downloadComic ($$) { | Line 177 sub downloadComic ($$) { |
| return directDownload ( \%comics, $comic, $date ); | return directDownload ( \%comics, $comic, $date ); |
| last SWITCH; | last SWITCH; |
| } | } |
| if ( $comics->{$comic}{'mode'} eq 3 ) { | |
| return 0; | |
| last SWITCH; | |
| } | |
| } | } |
| return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}."; | return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}."; |
| Line 146 sub readComicConfig ($$) { | Line 200 sub readComicConfig ($$) { |
| open FILEN, "<$comicFile"; | open FILEN, "<$comicFile"; |
| while (<FILEN>) { | while (<FILEN>) { |
| #if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) { | |
| if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) { | |
| $comicConfigVer = $1; | |
| } | |
| if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ | if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
| $_ =~ s/__YEAR__/$year/g; | $_ =~ s/__YEAR__/$year/g; |
| $_ =~ s/__MON__/$mon/g; | $_ =~ s/__MON__/$mon/g; |
| Line 157 sub readComicConfig ($$) { | Line 215 sub readComicConfig ($$) { |
| $comicConfig{$res[0]}{'mode'} = $res[3]; | $comicConfig{$res[0]}{'mode'} = $res[3]; |
| $comicConfig{$res[0]}{'fullName'} = $res[4]; | $comicConfig{$res[0]}{'fullName'} = $res[4]; |
| $comicConfig{$res[0]}{'ext'} = $res[5]; | $comicConfig{$res[0]}{'ext'} = $res[5]; |
| $comicConfig{$res[0]}{'sunday'} = sprintf("%d", $res[6] || 1); | $comicConfig{$res[0]}{'not_sunday'} = sprintf("%d", $res[6] || 0); |
| $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); | $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); |
| $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0); | |
| $comicConfig{$res[0]}{'error'} = 0; | $comicConfig{$res[0]}{'error'} = 0; |
| } | } |
| elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { | elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
| Line 172 sub readComicConfig ($$) { | Line 231 sub readComicConfig ($$) { |
| ####################################################################### | ####################################################################### |
| ####################################################################### | ####################################################################### |
| sub writeStatusReportJSON ($$) { | |
| my ( $comicsRef, $filename ) = @_; | |
| my %comics = %$comicsRef; | |
| my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, | |
| (localtime)[4] + 1, | |
| (localtime)[3]); | |
| my %json = ('date' => $shortDate, 'comics' => ()); | |
| my $totalErrors = 0; | |
| foreach my $comic (sort keys %comics) { | |
| next unless $comics{$comic}{'fullName'}; | |
| if ($comics{$comic}{'error'}) { | |
| my %error = ('comicName' => "$comics{$comic}{'fullName'}", | |
| 'error' => "$comics{$comic}{'error'}", | |
| 'status' => "Error"); | |
| push @{$json{'comics'}}, \%error; | |
| $totalErrors += 1; | |
| } else { | |
| my %status = ('comicName' => "$comics{$comic}{'fullName'}", | |
| 'error' => 0, | |
| 'status' => "Successfull"); | |
| push @{$json{'comics'}}, \%status; | |
| } | |
| } | |
| $json{'totalErrors'} = $totalErrors; | |
| open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n"); | |
| print SR create_json (\%json); | |
| close(SR); | |
| } | |
| ####################################################################### | |
| ####################################################################### | |
| sub writeComic ($$) { | sub writeComic ($$) { |
| my ( $comics, $comic, $date ) = @_; | my ( $comics, $comic, $date ) = @_; |
| my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); | my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
| my $indexFile = $indexDir . "/index-" . $date->{'year2'} . | my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
| $date->{'mon2'} . $date->{'day2'} . "-" . | $date->{'mon2'} . $date->{'day2'} . "-" . |
| $sd . ".html"; | $sd . ".html"; |
| $comics->{$comic}{'fullName'} =~ s/&/&/g; | |
| my $content = <<EOF; | my $content = <<EOF; |
| <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> | <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
| Line 185 sub writeComic ($$) { | Line 279 sub writeComic ($$) { |
| <td align="left"> | <td align="left"> |
| <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> | <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> |
| <font size="-2"> | <font size="-2"> |
| <a href="$comics->{$comic}{'url'}"> | <a href="$comics->{$comic}{'url'}" target="_blank"> |
| $comics->{$comic}{'url'} | $comics->{$comic}{'url'} |
| </a> | </a> |
| </font><br/> | </font><br/> |
| EOF | |
| if ( $comics->{$comic}{'mode'} == 3 ) { | |
| print("Mode 3\n"); | |
| $content .= <<EOF; | |
| <img src="$comics->{$comic}{'ext'}" alt="$comics->{$comic}{'fullName'}" /> | |
| EOF | |
| } else { | |
| $content .= <<EOF; | |
| <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" /> | <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" /> |
| EOF | |
| } | |
| $content .= <<EOF; | |
| <br/><br/> | <br/><br/> |
| </td></tr> | </td></tr> |
| <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> | <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
| Line 240 sub writeFooter { | Line 345 sub writeFooter { |
| print INDEX <<EOF; | print INDEX <<EOF; |
| </table> | </table> |
| <center> | <center> |
| <font size="2"> | Generated on: <font size="2" color="green">$sysDate</font><br/> |
| Generated on: <font color="green">$sysDate</font><br/> | Version: <font size="2" color="green">$ver</font><br /> |
| Version: <font color="green">$ver</font><br /> | Config Version: <font size="2" color="green">$comicConfigVer</font><br /> |
| CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> | CVS: <a href="http://demandred.dyndns.org:3000/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> |
| <p> | <br /> |
| <a href="http://validator.w3.org/check?uri=referer"><img | <a href="http://validator.w3.org/check?uri=referer"><img |
| src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> | src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
| </p> | |
| </center> | </center> |
| </body> | </body> |
| Line 284 sub writeTitle ($$) { | Line 388 sub writeTitle ($$) { |
| <html xmlns="http://www.w3.org/1999/xhtml"> | <html xmlns="http://www.w3.org/1999/xhtml"> |
| <head> | <head> |
| <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> | <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
| <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen"> | <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" /> |
| <link rel="shortcut icon" href="./favicon.ico"> | <link rel="shortcut icon" href="./favicon.ico" /> |
| <title>Daily Comics for $today</title> | <title>Daily Comics for $today</title> |
| </head> | </head> |
| <body bgcolor="#FFFFFF"> | <body bgcolor="#FFFFFF"> |
| <table align="center" cellpadding="5" cellspacing="0"> | <table align="center" cellpadding="0" cellspacing="0" border="0"> |
| <tr><td> | <tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr> |
| <table cellpadding="0" cellspacing="0" border="0"> | |
| <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr> | |
| <tr><td align="left">$today_long</td></tr> | <tr><td align="left">$today_long</td></tr> |
| <tr><td> </td></tr> | <tr><td> </td></tr> |
| </td</tr> | |
| EOF | EOF |
| close (INDEX); | close (INDEX); |
| } | } |
| Line 313 sub directDownload ($$) { | Line 413 sub directDownload ($$) { |
| my $cDir = $date->{'mon2'} . $date->{'year2'}; | my $cDir = $date->{'mon2'} . $date->{'year2'}; |
| my $cDate = $date->{'day2'}; | my $cDate = $date->{'day2'}; |
| my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; | my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
| return system($cmd); | return system($cmd); |
| } | } |
| ####################################################################### | ####################################################################### |
| ####################################################################### | ####################################################################### |
| sub linkOnly ($$) { | |
| my ( $comics, $comic, $date ) = @_; | |
| return 0; | |
| } | |
| ####################################################################### | |
| ####################################################################### | |
| sub indexDownload ($$) { | sub indexDownload ($$) { |
| my ( $comics, $comic, $date ) = @_; | my ( $comics, $comic, $date ) = @_; |
| my ( @lines, $comicLine, $mainURL ); | my ( @lines, $comicLine, $mainURL ); |
| my $comicIndex = "indexes/index.$comic"; | my $comicIndex = "indexes/index.$comic"; |
| my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " . | print("Getching Index $comicIndex.\n"); |
| "--user-agent=\"$USER_AGENT\" " . | print("comic url: $comics->{$comic}{'url'}\n"); |
| print Dumper($comics->{$comic}); | |
| my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " . | |
| "--no-check-certificate --user-agent=\"$USER_AGENT\" " . | |
| "$comics->{$comic}{'url'} -O $comicIndex"; | "$comics->{$comic}{'url'} -O $comicIndex"; |
| system($wget_cmd); | print ("Using wget command:\n$wget_cmd\n"); |
| my $status = system($wget_cmd); | |
| print ("Return status: $status\n"); | |
| if ( ! open FILEN, "<$comicIndex" ) { | if ( ! open FILEN, "<$comicIndex" ) { |
| return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . | return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . |
| " (" . $comics->{$comic}{'url'} . ")"; | " (" . $comics->{$comic}{'url'} . ")"; |
| } | } |
| @lines = <FILEN>; | while (<FILEN>) { |
| my $line = $_; | |
| $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} ); | |
| push @lines, $line; | |
| } | |
| close (FILEN); | close (FILEN); |
| unlink ("$comicIndex"); | unlink ("$comicIndex"); |
| $mainURL = $comics->{$comic}{'url'}; | $mainURL = $comics->{$comic}{'url'}; |
| Line 347 sub indexDownload ($$) { | Line 468 sub indexDownload ($$) { |
| ## | ## |
| ## Find the comic strip URL based on the specified regex in the search | ## Find the comic strip URL based on the specified regex in the search |
| ## | ## |
| print "Using search $comics->{$comic}{'search'}\n"; | |
| foreach my $line (@lines) { | foreach my $line (@lines) { |
| if ( $line =~ m/$comics->{$comic}{'search'}/i ) { | if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
| print "Found match:\n"; | |
| $comicLine = $1; chomp $comicLine; | $comicLine = $1; chomp $comicLine; |
| print "+ $comicLine\n"; | |
| } | } |
| } | } |
| Line 360 sub indexDownload ($$) { | Line 486 sub indexDownload ($$) { |
| my $cDate = $date->{'day2'}; | my $cDate = $date->{'day2'}; |
| if ( $comicLine ) { | if ( $comicLine ) { |
| print "Downloading Comic\n"; | |
| if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } | if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
| my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; | my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
| my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; | print "Final URL: $comicURL\n"; |
| # Strip & | |
| $comicURL =~ s/\&\;/&/g; | |
| my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; | |
| system( $cmd ); | system( $cmd ); |
| return 0; | return 0; |
| } | } |