Diff for /comics/fetch.pl.new between versions 1.23 and 1.29

version 1.23, 2018/01/26 13:05:27 version 1.29, 2020/06/10 21:32:52
Line 2 Line 2
   
 ###############################################################################  ###############################################################################
 # $Log$  # $Log$
   # Revision 1.29  2020/06/10 21:32:52  nick
   # Centered page
   #
   # Revision 1.28  2020/06/10 21:14:31  nick
   # Updated for w3 validation.
   #
   # Revision 1.27  2019/04/15 12:50:23  nick
   # The script was unable to handle html '&' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
   #
   # Revision 1.26  2018/04/22 14:03:54  nick
   # Changed the default for Sunday comics that was causing issues with some comics.
   #
   # Revision 1.25  2018/02/12 13:30:58  nick
   # Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't.
   #
   # Revision 1.24  2018/02/06 14:31:06  nick
   # A status report is now generated in JSON that can easily be scanned so that
   # I can be alerted when there are failures that I miss if I don't read the
   # comics that day.
   #
 # Revision 1.23  2018/01/26 13:05:27  nick  # Revision 1.23  2018/01/26 13:05:27  nick
 # Added a new config option to remove all newline from the resulting index.html  # Added a new config option to remove all newline from the resulting index.html
 # file.  This allows for easier parsing for certain comics.  I then updated  # file.  This allows for easier parsing for certain comics.  I then updated
Line 42  use File::Path; Line 62  use File::Path;
 use Data::Dumper;  use Data::Dumper;
 use Pod::Usage;  use Pod::Usage;
 use Getopt::Long;  use Getopt::Long;
   use JSON::Create 'create_json';
 use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;  use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
   
 ##   ## 
Line 51  use Date::Calc qw/Date_to_Text_Long Toda Line 71  use Date::Calc qw/Date_to_Text_Long Toda
 my $ver         = '$Id$';  my $ver         = '$Id$';
 my $comicFile   = "comics.conf";  my $comicFile   = "comics.conf";
 my $comicConfigVer = "Unknown";  my $comicConfigVer = "Unknown";
   my $reportFile = "/home/httpd/html/daily/comics/status_report.json";
 my %comics      = &readComicConfig ( $comicFile );  my %comics      = &readComicConfig ( $comicFile );
 my %opts        = &fetchOptions( );  my %opts        = &fetchOptions( );
 my $days_ago    = $opts{'days'} || 0;  my $days_ago    = $opts{'days'} || 0;
Line 77  foreach my $comic ( sort keys %comics ) Line 98  foreach my $comic ( sort keys %comics )
   ## Skip if this is Sunday and the comic is weekdays only    ## Skip if this is Sunday and the comic is weekdays only
   next if ( $comic =~ m/config/ );    next if ( $comic =~ m/config/ );
   if (($dates{'wday'} eq "Sunday") &&     if (($dates{'wday'} eq "Sunday") && 
       ($comics{$comic}{'sunday'} == 0)) {        ($comics{$comic}{'not_sunday'} == 1)) {
     print "Skipping '$comic'; Weekdays only.\n";      print "Skipping '$comic'; Weekdays only.\n";
     next;      next;
   }    }
Line 88  foreach my $comic ( sort keys %comics ) Line 109  foreach my $comic ( sort keys %comics )
     print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";      print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
     next      next
   }    }
     
   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );    $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
   &writeComic ( \%comics, $comic, \%dates );    &writeComic ( \%comics, $comic, \%dates );
   
Line 113  foreach my $comic ( sort keys %comics ) Line 134  foreach my $comic ( sort keys %comics )
   
 &writeFooter( \%dates );  &writeFooter( \%dates );
   
   print STDOUT "Status written to $reportFile.\n"
       if (&writeStatusReportJSON(\%comics, $reportFile));
   
 $DATE=`date`;  chomp( $DATE );  $DATE=`date`;  chomp( $DATE );
 print STDOUT "Completed comic fetch at $DATE\n";  print STDOUT "Completed comic fetch at $DATE\n";
   
Line 159  sub readComicConfig ($$) { Line 183  sub readComicConfig ($$) {
   
         open FILEN, "<$comicFile";          open FILEN, "<$comicFile";
                 while (<FILEN>) {                  while (<FILEN>) {
             #if ($_ =~ m/^#.* \$Id$/) {              #if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) {
             if ($_ =~ m/^#.* \$Id$$/) {              if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) {
                 $comicConfigVer = $1;                  $comicConfigVer = $1;
             }              }
                         if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){                          if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
Line 174  sub readComicConfig ($$) { Line 198  sub readComicConfig ($$) {
                                 $comicConfig{$res[0]}{'mode'}     = $res[3];                                  $comicConfig{$res[0]}{'mode'}     = $res[3];
                                 $comicConfig{$res[0]}{'fullName'} = $res[4];                                  $comicConfig{$res[0]}{'fullName'} = $res[4];
                                 $comicConfig{$res[0]}{'ext'}      = $res[5];                                  $comicConfig{$res[0]}{'ext'}      = $res[5];
                 $comicConfig{$res[0]}{'sunday'}   = sprintf("%d", $res[6] || 1);                  $comicConfig{$res[0]}{'not_sunday'}   = sprintf("%d", $res[6] || 0);
                 $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);                  $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
                 $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0);                  $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0);
                                 $comicConfig{$res[0]}{'error'}    = 0;                                  $comicConfig{$res[0]}{'error'}    = 0;
Line 190  sub readComicConfig ($$) { Line 214  sub readComicConfig ($$) {
   
 #######################################################################  #######################################################################
 #######################################################################  #######################################################################
   sub writeStatusReportJSON ($$) {
           my ( $comicsRef, $filename ) = @_;
       my %comics = %$comicsRef;
       my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,
                                             (localtime)[4] + 1,
                                             (localtime)[3]);
       my %json = ('date' => $shortDate, 'comics' => ());
       my $totalErrors = 0;
   
       foreach my $comic (sort keys %comics) {
         next unless $comics{$comic}{'fullName'};
         if ($comics{$comic}{'error'}) {
           my %error = ('comicName' => "$comics{$comic}{'fullName'}",
                        'error' => "$comics{$comic}{'error'}",
                        'status' => "Error");
           push @{$json{'comics'}}, \%error;
           $totalErrors += 1;
         } else {
           my %status = ('comicName' => "$comics{$comic}{'fullName'}",
                         'error' => 0,
                         'status' => "Successfull");
           push @{$json{'comics'}}, \%status;
         }
       }
       $json{'totalErrors'} = $totalErrors;    
   
       open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n");
       print SR create_json (\%json);
       close(SR); 
   }
   
   #######################################################################
   #######################################################################
 sub writeComic ($$) {  sub writeComic ($$) {
         my ( $comics, $comic, $date ) = @_;          my ( $comics, $comic, $date ) = @_;
         my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );          my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
         my $indexFile = $indexDir . "/index-" . $date->{'year2'} .           my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                         $date->{'mon2'} . $date->{'day2'} . "-" .                           $date->{'mon2'} . $date->{'day2'} . "-" . 
                         $sd . ".html";                          $sd . ".html";
       $comics->{$comic}{'fullName'} =~ s/&/&amp;/g;
         my $content = <<EOF;          my $content = <<EOF;
   
 <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->  <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
Line 258  sub writeFooter { Line 316  sub writeFooter {
         print INDEX <<EOF;          print INDEX <<EOF;
 </table>  </table>
 <center>  <center>
 <font size="2">  Generated on: <font size="2" color="green">$sysDate</font><br/>
 Generated on: <font color="green">$sysDate</font><br/>  Version: <font size="2" color="green">$ver</font><br />
 Version: <font color="green">$ver</font><br />  Config Version: <font size="2" color="green">$comicConfigVer</font><br />
 Config Version: <font color="green">$comicConfigVer</font><br />  CVS: <a href="http://demandred.dyndns.org:3000/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
 CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>    <br />
   <p>  
     <a href="http://validator.w3.org/check?uri=referer"><img      <a href="http://validator.w3.org/check?uri=referer"><img
       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>        src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
   </p>  
 </center>  </center>
   
 </body>  </body>
Line 303  sub writeTitle ($$) { Line 359  sub writeTitle ($$) {
 <html xmlns="http://www.w3.org/1999/xhtml">  <html xmlns="http://www.w3.org/1999/xhtml">
 <head>  <head>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />  <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">  <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" />
 <link rel="shortcut icon" href="./favicon.ico">  <link rel="shortcut icon" href="./favicon.ico" />
     <title>Daily Comics for $today</title>      <title>Daily Comics for $today</title>
   </head>    </head>
 <body bgcolor="#FFFFFF">  <body bgcolor="#FFFFFF">
 <table align="center" cellpadding="5" cellspacing="0">  <table align="center" cellpadding="0" cellspacing="0" border="0">
 <tr><td>  <tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr>
 <table cellpadding="0" cellspacing="0" border="0">  
 <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>  
 <tr><td align="left">$today_long</td></tr>  <tr><td align="left">$today_long</td></tr>
 <tr><td>&nbsp;</td></tr>  <tr><td>&nbsp;</td></tr>
 </td</tr>  
   
 EOF  EOF
         close (INDEX);          close (INDEX);
 }  }
Line 332  sub directDownload ($$) { Line 384  sub directDownload ($$) {
         my $cDir  = $date->{'mon2'} . $date->{'year2'};          my $cDir  = $date->{'mon2'} . $date->{'year2'};
         my $cDate = $date->{'day2'};          my $cDate = $date->{'day2'};
   
         my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";          my $cmd = "wget -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
   
         return system($cmd);          return system($cmd);
 }  }
Line 344  sub indexDownload ($$) { Line 396  sub indexDownload ($$) {
         my ( @lines, $comicLine, $mainURL );          my ( @lines, $comicLine, $mainURL );
         my $comicIndex = "indexes/index.$comic";          my $comicIndex = "indexes/index.$comic";
   
     my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .      my $wget_cmd = "wget -q --referer='$comics->{$comic}{'url'}' " .
                    "--user-agent=\"$USER_AGENT\" " .                     "--user-agent=\"$USER_AGENT\" " .
                    "$comics->{$comic}{'url'} -O $comicIndex";                     "$comics->{$comic}{'url'} -O $comicIndex";
     system($wget_cmd);      system($wget_cmd);
Line 355  sub indexDownload ($$) { Line 407  sub indexDownload ($$) {
         }           } 
     while (<FILEN>) {      while (<FILEN>) {
         my $line = $_;          my $line = $_;
         $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} );          $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} );
         push @lines, $line;          push @lines, $line;
     }      }
         close (FILEN);            close (FILEN);  
   
   
         unlink ("$comicIndex");          unlink ("$comicIndex");
   
         $mainURL = $comics->{$comic}{'url'};          $mainURL = $comics->{$comic}{'url'};
Line 370  sub indexDownload ($$) { Line 423  sub indexDownload ($$) {
         ##          ##
         ## Find the comic strip URL based on the specified regex in the search          ## Find the comic strip URL based on the specified regex in the search
         ##          ##
   
         foreach my $line (@lines) {          foreach my $line (@lines) {
                 if ( $line =~ m/$comics->{$comic}{'search'}/i ) {                  if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
                         $comicLine = $1; chomp $comicLine;                          $comicLine = $1; chomp $comicLine;
Line 385  sub indexDownload ($$) { Line 439  sub indexDownload ($$) {
         if ( $comicLine ) {          if ( $comicLine ) {
                 if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }                  if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                 my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;                  my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
                 my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";          # Strip &amp;
           $comicURL =~ s/\&amp\;/&/g;
                   my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
                 system( $cmd );                  system( $cmd );
                 return 0;                  return 0;
         }          }

Removed from v.1.23  
changed lines
  Added in v.1.29


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>