Diff for /comics/fetch.pl.new between versions 1.26 and 1.27

version 1.26, 2018/04/22 14:03:54 version 1.27, 2019/04/15 12:50:23
Line 2 Line 2
   
 ###############################################################################  ###############################################################################
 # $Log$  # $Log$
   # Revision 1.27  2019/04/15 12:50:23  nick
   # The script was unable to handle html '&' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
   #
 # Revision 1.26  2018/04/22 14:03:54  nick  # Revision 1.26  2018/04/22 14:03:54  nick
 # Changed the default for Sunday comics that was causing issues with some comics.  # Changed the default for Sunday comics that was causing issues with some comics.
 #  #
Line 211  sub writeStatusReportJSON ($$) { Line 214  sub writeStatusReportJSON ($$) {
     my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,      my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,
                                           (localtime)[4] + 1,                                            (localtime)[4] + 1,
                                           (localtime)[3]);                                            (localtime)[3]);
     my %json = ('date' => $shortDate, 'comics' => []);      my %json = ('date' => $shortDate, 'comics' => ());
     my $totalErrors = 0;      my $totalErrors = 0;
   
     foreach my $comic (sort keys %comics) {      foreach my $comic (sort keys %comics) {
Line 220  sub writeStatusReportJSON ($$) { Line 223  sub writeStatusReportJSON ($$) {
         my %error = ('comicName' => "$comics{$comic}{'fullName'}",          my %error = ('comicName' => "$comics{$comic}{'fullName'}",
                      'error' => "$comics{$comic}{'error'}",                       'error' => "$comics{$comic}{'error'}",
                      'status' => "Error");                       'status' => "Error");
         push $json{'comics'}, \%error;          push @{$json{'comics'}}, \%error;
         $totalErrors += 1;          $totalErrors += 1;
       } else {        } else {
         my %status = ('comicName' => "$comics{$comic}{'fullName'}",          my %status = ('comicName' => "$comics{$comic}{'fullName'}",
                       'error' => 0,                        'error' => 0,
                       'status' => "Successfull");                        'status' => "Successfull");
         push $json{'comics'}, \%status;          push @{$json{'comics'}}, \%status;
       }        }
     }      }
     $json{'totalErrors'} = $totalErrors;          $json{'totalErrors'} = $totalErrors;    
Line 380  sub directDownload ($$) { Line 383  sub directDownload ($$) {
         my $cDir  = $date->{'mon2'} . $date->{'year2'};          my $cDir  = $date->{'mon2'} . $date->{'year2'};
         my $cDate = $date->{'day2'};          my $cDate = $date->{'day2'};
   
         my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";          my $cmd = "wget -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
   
         return system($cmd);          return system($cmd);
 }  }
Line 392  sub indexDownload ($$) { Line 395  sub indexDownload ($$) {
         my ( @lines, $comicLine, $mainURL );          my ( @lines, $comicLine, $mainURL );
         my $comicIndex = "indexes/index.$comic";          my $comicIndex = "indexes/index.$comic";
   
     my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .      my $wget_cmd = "wget -q --referer='$comics->{$comic}{'url'}' " .
                    "--user-agent=\"$USER_AGENT\" " .                     "--user-agent=\"$USER_AGENT\" " .
                    "$comics->{$comic}{'url'} -O $comicIndex";                     "$comics->{$comic}{'url'} -O $comicIndex";
     system($wget_cmd);      system($wget_cmd);
Line 403  sub indexDownload ($$) { Line 406  sub indexDownload ($$) {
         }           } 
     while (<FILEN>) {      while (<FILEN>) {
         my $line = $_;          my $line = $_;
         $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} );          $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} );
         push @lines, $line;          push @lines, $line;
     }      }
         close (FILEN);            close (FILEN);  
   
   
         unlink ("$comicIndex");          unlink ("$comicIndex");
   
         $mainURL = $comics->{$comic}{'url'};          $mainURL = $comics->{$comic}{'url'};
Line 418  sub indexDownload ($$) { Line 422  sub indexDownload ($$) {
         ##          ##
         ## Find the comic strip URL based on the specified regex in the search          ## Find the comic strip URL based on the specified regex in the search
         ##          ##
   
         foreach my $line (@lines) {          foreach my $line (@lines) {
                 if ( $line =~ m/$comics->{$comic}{'search'}/i ) {                  if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
                         $comicLine = $1; chomp $comicLine;                          $comicLine = $1; chomp $comicLine;
Line 433  sub indexDownload ($$) { Line 438  sub indexDownload ($$) {
         if ( $comicLine ) {          if ( $comicLine ) {
                 if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }                  if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                 my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;                  my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
                 my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";          # Strip &amp;
           $comicURL =~ s/\&amp\;/&/g;
                   my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
                 system( $cmd );                  system( $cmd );
                 return 0;                  return 0;
         }          }

Removed from v.1.26  
changed lines
  Added in v.1.27


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>