Diff for /comics/fetch.pl.new between versions 1.27 and 1.31

version 1.27, 2019/04/15 12:50:23 version 1.31, 2024/12/13 16:03:49
Line 2 Line 2
   
 ###############################################################################  ###############################################################################
 # $Log$  # $Log$
   # Revision 1.31  2024/12/13 16:03:49  nick
   # This adds the ability to specify a comic as a link only with a default splash image.
   #
   # Revision 1.30  2022/10/04 12:02:03  nick
   # Added --no-check-certificate for wget calls as arcamax was failing its cert check.  Meh, whatever.  It's just comics.
   #
   # Revision 1.29  2020/06/10 21:32:52  nick
   # Centered page
   #
   # Revision 1.28  2020/06/10 21:14:31  nick
   # Updated for w3 validation.
   #
 # Revision 1.27  2019/04/15 12:50:23  nick  # Revision 1.27  2019/04/15 12:50:23  nick
 # The script was unable to handle html '&' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.  # The script was unable to handle html '&' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
 #  #
Line 58  use Pod::Usage; Line 70  use Pod::Usage;
 use Getopt::Long;  use Getopt::Long;
 use JSON::Create 'create_json';  use JSON::Create 'create_json';
 use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;  use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
   use Data::Dumper;
   
   print("Running");
   
 ##   ## 
 ## Some default values  ## Some default values
Line 89  print STDOUT "Starting comic fetch at $D Line 104  print STDOUT "Starting comic fetch at $D
   
 foreach my $comic ( sort keys %comics ) {  foreach my $comic ( sort keys %comics ) {
   
     print("Checking Comic $comic\n");
   
   ## Skip if this is Sunday and the comic is weekdays only    ## Skip if this is Sunday and the comic is weekdays only
   next if ( $comic =~ m/config/ );    next if ( $comic =~ m/config/ );
   if (($dates{'wday'} eq "Sunday") &&     if (($dates{'wday'} eq "Sunday") && 
Line 110  foreach my $comic ( sort keys %comics ) Line 127  foreach my $comic ( sort keys %comics )
     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";      my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
     my $size = 0;          my $size = 0;    
   
     my $cmd = "/usr/bin/identify -verbose $file|";      ## Resize downloaded images
     open(IMG, $cmd) || die ("Can't open: $!\n");      if($comics{$comic}{'mode'} != 3) {
     while(<IMG>) {              my $cmd = "/usr/bin/identify -verbose $file|";
         if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {              open(IMG, $cmd) || die ("Can't open: $!\n");
             $size = $1 if ( $size == 0);              while(<IMG>) {
         }                  if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
     }                      $size = $1 if ( $size == 0);
     close(IMG);                  }
               }
               close(IMG);
         system( "/usr/bin/convert -resize 640 $file $file" )  
                 if ( $size > 640 )               system( "/usr/bin/convert -resize 800 $file $file" )
               if ( $size > 800 ) 
        }
 }  }
   
 ## &writeMainIndex ( \%dates );  ## &writeMainIndex ( \%dates );
Line 158  sub downloadComic ($$) { Line 177  sub downloadComic ($$) {
                         return directDownload ( \%comics, $comic, $date );                          return directDownload ( \%comics, $comic, $date );
                         last SWITCH;                          last SWITCH;
                 }                  }
                   if ( $comics->{$comic}{'mode'} eq 3 ) { 
                           return 0;
                           last SWITCH;
                   }
         }          }
                     
         return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";          return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
Line 247  sub writeComic ($$) { Line 270  sub writeComic ($$) {
         my $indexFile = $indexDir . "/index-" . $date->{'year2'} .           my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                         $date->{'mon2'} . $date->{'day2'} . "-" .                           $date->{'mon2'} . $date->{'day2'} . "-" . 
                         $sd . ".html";                          $sd . ".html";
           $comics->{$comic}{'fullName'} =~ s/&/&amp;/g;
   
         my $content = <<EOF;          my $content = <<EOF;
   
 <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->  <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
Line 254  sub writeComic ($$) { Line 279  sub writeComic ($$) {
     <td align="left">      <td align="left">
 <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;   <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
 <font size="-2">  <font size="-2">
         <a href="$comics->{$comic}{'url'}">          <a href="$comics->{$comic}{'url'}" target="_blank">
                 $comics->{$comic}{'url'}                  $comics->{$comic}{'url'}
         </a>          </a>
 </font><br/>  </font><br/>
   EOF
           if ( $comics->{$comic}{'mode'} == 3 ) {
                   print("Mode 3\n");
                   $content .= <<EOF;
   <img src="$comics->{$comic}{'ext'}" alt="$comics->{$comic}{'fullName'}" />
   EOF
           } else {
                   $content .= <<EOF;
 <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />  <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
   EOF
           }
           $content .= <<EOF;
 <br/><br/>  <br/><br/>
 </td></tr>  </td></tr>
 <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->  <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
Line 309  sub writeFooter { Line 345  sub writeFooter {
         print INDEX <<EOF;          print INDEX <<EOF;
 </table>  </table>
 <center>  <center>
 <font size="2">  Generated on: <font size="2" color="green">$sysDate</font><br/>
 Generated on: <font color="green">$sysDate</font><br/>  Version: <font size="2" color="green">$ver</font><br />
 Version: <font color="green">$ver</font><br />  Config Version: <font size="2" color="green">$comicConfigVer</font><br />
 Config Version: <font color="green">$comicConfigVer</font><br />  CVS: <a href="http://demandred.dyndns.org:3000/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
 CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>    <br />
   <p>  
     <a href="http://validator.w3.org/check?uri=referer"><img      <a href="http://validator.w3.org/check?uri=referer"><img
       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>        src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
   </p>  
 </center>  </center>
   
 </body>  </body>
Line 354  sub writeTitle ($$) { Line 388  sub writeTitle ($$) {
 <html xmlns="http://www.w3.org/1999/xhtml">  <html xmlns="http://www.w3.org/1999/xhtml">
 <head>  <head>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />  <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">  <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" />
 <link rel="shortcut icon" href="./favicon.ico">  <link rel="shortcut icon" href="./favicon.ico" />
     <title>Daily Comics for $today</title>      <title>Daily Comics for $today</title>
   </head>    </head>
 <body bgcolor="#FFFFFF">  <body bgcolor="#FFFFFF">
 <table align="center" cellpadding="5" cellspacing="0">  <table align="center" cellpadding="0" cellspacing="0" border="0">
 <tr><td>  <tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr>
 <table cellpadding="0" cellspacing="0" border="0">  
 <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>  
 <tr><td align="left">$today_long</td></tr>  <tr><td align="left">$today_long</td></tr>
 <tr><td>&nbsp;</td></tr>  <tr><td>&nbsp;</td></tr>
 </td</tr>  
   
 EOF  EOF
         close (INDEX);          close (INDEX);
 }  }
Line 383  sub directDownload ($$) { Line 413  sub directDownload ($$) {
         my $cDir  = $date->{'mon2'} . $date->{'year2'};          my $cDir  = $date->{'mon2'} . $date->{'year2'};
         my $cDate = $date->{'day2'};          my $cDate = $date->{'day2'};
   
         my $cmd = "wget -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";          my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
   
         return system($cmd);          return system($cmd);
 }  }
   
 #######################################################################  #######################################################################
 #######################################################################  #######################################################################
   sub linkOnly ($$) {
           my ( $comics, $comic, $date ) = @_;
   
           return 0;
   }
   #######################################################################
   #######################################################################
 sub indexDownload ($$) {  sub indexDownload ($$) {
         my ( $comics, $comic, $date ) = @_;          my ( $comics, $comic, $date ) = @_;
         my ( @lines, $comicLine, $mainURL );          my ( @lines, $comicLine, $mainURL );
         my $comicIndex = "indexes/index.$comic";          my $comicIndex = "indexes/index.$comic";
   
     my $wget_cmd = "wget -q --referer='$comics->{$comic}{'url'}' " .      print("Getching Index $comicIndex.\n");
                    "--user-agent=\"$USER_AGENT\" " .      print("comic url: $comics->{$comic}{'url'}\n");
   
       print Dumper($comics->{$comic});
   
       my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " .
                      "--no-check-certificate --user-agent=\"$USER_AGENT\" " .
                    "$comics->{$comic}{'url'} -O $comicIndex";                     "$comics->{$comic}{'url'} -O $comicIndex";
     system($wget_cmd);      print ("Using wget command:\n$wget_cmd\n");
   
       my $status = system($wget_cmd);
   
       print ("Return status: $status\n");
   
         if ( ! open FILEN, "<$comicIndex" ) {            if ( ! open FILEN, "<$comicIndex" ) {  
                 return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .                   return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
Line 423  sub indexDownload ($$) { Line 469  sub indexDownload ($$) {
         ## Find the comic strip URL based on the specified regex in the search          ## Find the comic strip URL based on the specified regex in the search
         ##          ##
   
       print "Using search $comics->{$comic}{'search'}\n";
   
         foreach my $line (@lines) {          foreach my $line (@lines) {
                 if ( $line =~ m/$comics->{$comic}{'search'}/i ) {                  if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
               print "Found match:\n";
                         $comicLine = $1; chomp $comicLine;                          $comicLine = $1; chomp $comicLine;
               print "+ $comicLine\n";
                 }                  }
     }      }
   
Line 436  sub indexDownload ($$) { Line 486  sub indexDownload ($$) {
         my $cDate   = $date->{'day2'};          my $cDate   = $date->{'day2'};
   
         if ( $comicLine ) {          if ( $comicLine ) {
           print "Downloading Comic\n";
                 if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }                  if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                 my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;                  my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
           print "Final URL: $comicURL\n";
         # Strip &amp;          # Strip &amp;
         $comicURL =~ s/\&amp\;/&/g;          $comicURL =~ s/\&amp\;/&/g;
                 my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";                  my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
                 system( $cmd );                  system( $cmd );
                 return 0;                  return 0;
         }          }

Removed from v.1.27  
changed lines
  Added in v.1.31


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>