Diff for /comics/fetch.pl.new between versions 1.15 and 1.19

version 1.15, 2015/01/19 13:46:19 version 1.19, 2015/07/13 12:56:58
Line 2 Line 2
   
 ###############################################################################  ###############################################################################
 # $Log$  # $Log$
   # Revision 1.19  2015/07/13 12:56:58  nick
   # Added Sally Forth and Pearls Before Swine.  Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
   #
   # Revision 1.18  2015/05/07 12:31:43  nick
   # Added favicon
   #
   # Revision 1.17  2015/02/19 14:56:10  nick
   # Fixed a problem that forced everything to JPG.  This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation.  I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file.  Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
   #
   # Revision 1.16  2015/02/05 18:05:58  nick
   # Changed the background and added a fancy title.
   #
 # Revision 1.15  2015/01/19 13:46:19  nick  # Revision 1.15  2015/01/19 13:46:19  nick
 # *** empty log message ***  # *** empty log message ***
 #  #
Line 13  use Data::Dumper; Line 25  use Data::Dumper;
 use Pod::Usage;  use Pod::Usage;
 use Getopt::Long;  use Getopt::Long;
   
   use Date::Calc qw/Date_to_Text_Long Today/;
   
 ##   ## 
 ## Some default values  ## Some default values
 ##  ##
Line 29  my $indexDir    = $baseDir . "/" . ( $co Line 43  my $indexDir    = $baseDir . "/" . ( $co
 my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";  my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
 my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;  my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
   
   
 my $DATE=`date`; chomp $DATE;  my $DATE=`date`; chomp $DATE;
 print STDOUT "Starting comic fetch at $DATE\n";  print STDOUT "Starting comic fetch at $DATE\n";
   
Line 46  foreach my $comic ( sort keys %comics ) Line 59  foreach my $comic ( sort keys %comics )
        ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }         ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );    $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
   &writeComic ( \%comics, $comic, \%dates );    &writeComic ( \%comics, $comic, \%dates );
 }  
   
 print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";      my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
 foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) )       my $size = 0;    
 {  
         my $size = `/usr/bin/identify $file`;      my $cmd = "/usr/bin/identify -verbose $file|";
         $size =~ s/.*\s(\d+)x\d+.*/$1/;      open(IMG, $cmd) || die ("Can't open: $!\n");
       while(<IMG>) {
           if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
               $size = $1 if ( $size == 0);
           }
       }
       close(IMG);
   
   
         system( "/usr/bin/convert -resize 640 $file $file" )          system( "/usr/bin/convert -resize 640 $file $file" )
                 if ( $size > 640 )                   if ( $size > 640 ) 
Line 150  sub writeComic ($$) { Line 169  sub writeComic ($$) {
                 $comics->{$comic}{'url'}                  $comics->{$comic}{'url'}
         </a>          </a>
 </font><br/>  </font><br/>
 <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />  <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
 <br/><br/>  <br/><br/>
 </td></tr>  </td></tr>
 <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->  <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
Line 236  sub writeTitle ($$) { Line 255  sub writeTitle ($$) {
                         $date->{'mon2'} . $date->{'day2'} . "-" .                           $date->{'mon2'} . $date->{'day2'} . "-" . 
                         $sd . ".html";                          $sd . ".html";
         my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};          my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
       my $today_long = Date_to_Text_Long(Today());
   
         open INDEX, ">$indexFile";          open INDEX, ">$indexFile";
         print INDEX <<EOF;          print INDEX <<EOF;
Line 245  sub writeTitle ($$) { Line 265  sub writeTitle ($$) {
 <head>  <head>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />  <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
 <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">  <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
   <link rel="shortcut icon" href="./favicon.ico">
     <title>Daily Comics for $today</title>      <title>Daily Comics for $today</title>
   </head>    </head>
 <body bgcolor="#FFFFFF">  <body bgcolor="#FFFFFF">
 <h1>Daily Comics for $today</h1>  
 <table align="center" cellpadding="5" cellspacing="0">  <table align="center" cellpadding="5" cellspacing="0">
   <tr><td>
   <table cellpadding="0" cellspacing="0" border="0">
   <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
   <tr><td align="left">$today_long</td></tr>
   <tr><td>&nbsp;</td></tr>
   </td</tr>
   
 EOF  EOF
         close (INDEX);          close (INDEX);
 }  }
Line 266  sub directDownload ($$) { Line 293  sub directDownload ($$) {
         my $cDir  = $date->{'mon2'} . $date->{'year2'};          my $cDir  = $date->{'mon2'} . $date->{'year2'};
         my $cDate = $date->{'day2'};          my $cDate = $date->{'day2'};
   
         my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";          my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
   
         return system($cmd);          return system($cmd);
 }  }
Line 278  sub indexDownload ($$) { Line 305  sub indexDownload ($$) {
         my ( @lines, $comicLine, $mainURL );          my ( @lines, $comicLine, $mainURL );
         my $comicIndex = "indexes/index.$comic";          my $comicIndex = "indexes/index.$comic";
   
         `wget -q $comics->{$comic}{'url'} -O $comicIndex`;      my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
                      "--user-agent=\"$USER_AGENT\" " .
                      "$comics->{$comic}{'url'} -O $comicIndex";
       system($wget_cmd);
   
         if ( ! open FILEN, "<$comicIndex" ) {            if ( ! open FILEN, "<$comicIndex" ) {  
                 return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .                   return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
Line 298  sub indexDownload ($$) { Line 328  sub indexDownload ($$) {
         ## Find the comic strip URL based on the specified regex in the search          ## Find the comic strip URL based on the specified regex in the search
         ##          ##
         foreach my $line (@lines) {          foreach my $line (@lines) {
                 if ( $line =~ m/$comics->{$comic}{'search'}/ ) {                  if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
                         $comicLine = $1; chomp $comicLine;                          $comicLine = $1; chomp $comicLine;
                 }                  }
         }      }
   
         ##          ##
         ## Save the file to the appropriate directory          ## Save the file to the appropriate directory
Line 312  sub indexDownload ($$) { Line 342  sub indexDownload ($$) {
         if ( $comicLine ) {          if ( $comicLine ) {
                 if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }                  if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                 my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;                  my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
                 my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";                  my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
                 system( $cmd );                  system( $cmd );
                 return 0;                  return 0;
         }          }

Removed from v.1.15  
changed lines
  Added in v.1.19


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>