Diff for /comics/fetch.pl.new between versions 1.6 and 1.19

version 1.6, 2012/12/20 16:48:06 version 1.19, 2015/07/13 12:56:58
Line 1 Line 1
 #!/usr/bin/perl -w  #!/usr/bin/perl -w
   
   ###############################################################################
   # $Log$
   # Revision 1.19  2015/07/13 12:56:58  nick
   # Added Sally Forth and Pearls Before Swine.  Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
   #
   # Revision 1.18  2015/05/07 12:31:43  nick
   # Added favicon
   #
   # Revision 1.17  2015/02/19 14:56:10  nick
   # Fixed a problem that forced everything to JPG.  This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation.  I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file.  Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
   #
   # Revision 1.16  2015/02/05 18:05:58  nick
   # Changed the background and added a fancy title.
   #
   # Revision 1.15  2015/01/19 13:46:19  nick
   # *** empty log message ***
   #
   ###############################################################################
   
 use strict;  use strict;
 use File::Path;  use File::Path;
 use Data::Dumper;  use Data::Dumper;
   use Pod::Usage;
   use Getopt::Long;
   
   use Date::Calc qw/Date_to_Text_Long Today/;
   
 ##   ## 
 ## Some default values  ## Some default values
 ##  ##
 my $ver         = q/$Id$/;  my $ver         = '$Id$';
 my $comicFile   = "comics.conf";  my $comicFile   = "comics.conf";
 my %comics      = &readComicConfig ( $comicFile );  my %comics      = &readComicConfig ( $comicFile );
 my $days_ago    = 0;  # Set this to fetch comics from X days ago  my %opts        = &fetchOptions( );
   my $days_ago    = $opts{'days'} || 0;
 my %dates       = &fetchDates();  my %dates       = &fetchDates();
 my $baseDir     = $comics{'configs'}{'base_directory'} || ".";  my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
 my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .   my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                   "/$dates{'mon2'}$dates{'year2'}";                    "/$dates{'mon2'}$dates{'year2'}";
 my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );  my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
 my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";  my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
   my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
   
 my $DATE=`date`; chomp $DATE;  my $DATE=`date`; chomp $DATE;
 print STDOUT "Starting comic fetch at $DATE\n";  print STDOUT "Starting comic fetch at $DATE\n";
Line 31  print STDOUT "Starting comic fetch at $D Line 55  print STDOUT "Starting comic fetch at $D
   
 foreach my $comic ( sort keys %comics ) {  foreach my $comic ( sort keys %comics ) {
   next if ( $comic =~ m/config/ );    next if ( $comic =~ m/config/ );
     if ( ( $dates{'day2'} eq "Sunday" ) && 
          ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );    $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
   &writeComic ( \%comics, $comic, \%dates );    &writeComic ( \%comics, $comic, \%dates );
 }  
   
 my $D  = `date +%d`; chomp $D;      my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
 print "Finding in $imageDir/*-$D.jpg\n";      my $size = 0;    
 foreach my $file ( glob( "$imageDir/*-$D.jpg" ) )   
 {      my $cmd = "/usr/bin/identify -verbose $file|";
         my $size = `/usr/bin/identify $file`;      open(IMG, $cmd) || die ("Can't open: $!\n");
         $size =~ s/.*\s(\d+)x\d+.*/$1/;      while(<IMG>) {
           if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
               $size = $1 if ( $size == 0);
           }
       }
       close(IMG);
   
   
         system( "/usr/bin/convert -resize 640 $file $file" )          system( "/usr/bin/convert -resize 640 $file $file" )
                 if ( $size > 640 )                   if ( $size > 640 ) 
Line 89  sub readComicConfig ($$) { Line 120  sub readComicConfig ($$) {
         my %comicConfig   = ( );          my %comicConfig   = ( );
         my %config        = ( );          my %config        = ( );
   
       my ($year, $mon, $day) =( localtime(time))[5,4,3];
       $year += 1900;
       $mon = sprintf("%02d", ($mon + 1));
       $day = sprintf("%02d", $day);
   
         open FILEN, "<$comicFile";          open FILEN, "<$comicFile";
                 while (<FILEN>) {                  while (<FILEN>) {
                         if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){                          if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
                   $_ =~ s/__YEAR__/$year/g;
                   $_ =~ s/__MON__/$mon/g;
                   $_ =~ s/__DAY__/$day/g;
                   
                                 my @res = split /,/, $_;                                  my @res = split /,/, $_;
                                 $comicConfig{$res[0]}{'url'}      = $res[1];                                  $comicConfig{$res[0]}{'url'}      = $res[1];
                                 $comicConfig{$res[0]}{'search'}   = $res[2];                                  $comicConfig{$res[0]}{'search'}   = $res[2];
                                 $comicConfig{$res[0]}{'mode'}     = $res[3];                                  $comicConfig{$res[0]}{'mode'}     = $res[3];
                                 $comicConfig{$res[0]}{'fullName'} = $res[4];                                  $comicConfig{$res[0]}{'fullName'} = $res[4];
                                 $comicConfig{$res[0]}{'ext'}      = $res[5];                                  $comicConfig{$res[0]}{'ext'}      = $res[5];
                   $comicConfig{$res[0]}{'sunday'}   = $res[6] || 1;
                                 $comicConfig{$res[0]}{'error'}    = 0;                                  $comicConfig{$res[0]}{'error'}    = 0;
                         }                          }
                         elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {                          elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
Line 113  sub readComicConfig ($$) { Line 154  sub readComicConfig ($$) {
 #######################################################################  #######################################################################
 sub writeComic ($$) {  sub writeComic ($$) {
         my ( $comics, $comic, $date ) = @_;          my ( $comics, $comic, $date ) = @_;
         my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .          my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
                         $date->{'day2'} . ".html";          my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                           $date->{'mon2'} . $date->{'day2'} . "-" . 
                           $sd . ".html";
         my $content = <<EOF;          my $content = <<EOF;
   
 <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->  <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
Line 126  sub writeComic ($$) { Line 169  sub writeComic ($$) {
                 $comics->{$comic}{'url'}                  $comics->{$comic}{'url'}
         </a>          </a>
 </font><br/>  </font><br/>
 <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />  <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
 <br/><br/>  <br/><br/>
 </td></tr>  </td></tr>
 <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->  <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
Line 167  sub writeMainIndex ($$) { Line 210  sub writeMainIndex ($$) {
 #######################################################################  #######################################################################
 sub writeFooter {  sub writeFooter {
         my ( $date ) = @_;          my ( $date ) = @_;
         my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .          my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
                         $date->{'day2'} . ".html";          my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                           $date->{'mon2'} . $date->{'day2'} . "-" . 
                           $sd . ".html";
         my $sysDate = `date`;          my $sysDate = `date`;
   
         open INDEX, ">>$indexFile";          open INDEX, ">>$indexFile";
Line 177  sub writeFooter { Line 222  sub writeFooter {
 <center>  <center>
 <font size="2">  <font size="2">
 Generated on: <font color="green">$sysDate</font><br/>  Generated on: <font color="green">$sysDate</font><br/>
 Version: <font color="green">$ver</font></font>  Version: <font color="green">$ver</font><br />
   CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
   <p>    <p>
     <a href="http://validator.w3.org/check?uri=referer"><img      <a href="http://validator.w3.org/check?uri=referer"><img
       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>        src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
Line 204  sub checkDir ($$) { Line 250  sub checkDir ($$) {
 #######################################################################  #######################################################################
 sub writeTitle ($$) {  sub writeTitle ($$) {
         my ( $date ) = @_;          my ( $date ) = @_;
         my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .          my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
                          $date->{'day2'} . ".html";          my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
         my $today     = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};                          $date->{'mon2'} . $date->{'day2'} . "-" . 
                           $sd . ".html";
           my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
       my $today_long = Date_to_Text_Long(Today());
   
         open INDEX, ">$indexFile";          open INDEX, ">$indexFile";
         print INDEX <<EOF;          print INDEX <<EOF;
Line 215  sub writeTitle ($$) { Line 264  sub writeTitle ($$) {
 <html xmlns="http://www.w3.org/1999/xhtml">  <html xmlns="http://www.w3.org/1999/xhtml">
 <head>  <head>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />  <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
   <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
   <link rel="shortcut icon" href="./favicon.ico">
     <title>Daily Comics for $today</title>      <title>Daily Comics for $today</title>
   </head>    </head>
 <body bgcolor="#FFFFFF">  <body bgcolor="#FFFFFF">
 <h1>Daily Comics for $today</h1>  
 <table align="center" cellpadding="5" cellspacing="0">  <table align="center" cellpadding="5" cellspacing="0">
   <tr><td>
   <table cellpadding="0" cellspacing="0" border="0">
   <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
   <tr><td align="left">$today_long</td></tr>
   <tr><td>&nbsp;</td></tr>
   </td</tr>
   
 EOF  EOF
         close (INDEX);          close (INDEX);
 }  }
Line 236  sub directDownload ($$) { Line 293  sub directDownload ($$) {
         my $cDir  = $date->{'mon2'} . $date->{'year2'};          my $cDir  = $date->{'mon2'} . $date->{'year2'};
         my $cDate = $date->{'day2'};          my $cDate = $date->{'day2'};
   
         my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";          my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
   
         return system($cmd);          return system($cmd);
 }  }
   
Line 247  sub indexDownload ($$) { Line 305  sub indexDownload ($$) {
         my ( @lines, $comicLine, $mainURL );          my ( @lines, $comicLine, $mainURL );
         my $comicIndex = "indexes/index.$comic";          my $comicIndex = "indexes/index.$comic";
   
         `wget -q $comics->{$comic}{'url'} -O $comicIndex`;      my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
                      "--user-agent=\"$USER_AGENT\" " .
                      "$comics->{$comic}{'url'} -O $comicIndex";
       system($wget_cmd);
   
         if ( ! open FILEN, "<$comicIndex" ) {            if ( ! open FILEN, "<$comicIndex" ) {  
                 return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .                   return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
Line 267  sub indexDownload ($$) { Line 328  sub indexDownload ($$) {
         ## Find the comic strip URL based on the specified regex in the search          ## Find the comic strip URL based on the specified regex in the search
         ##          ##
         foreach my $line (@lines) {          foreach my $line (@lines) {
                 if ( $line =~ m/$comics->{$comic}{'search'}/ ) {                  if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
                         $comicLine = $1; chomp $comicLine;                          $comicLine = $1; chomp $comicLine;
                 }                  }
         }      }
   
         ##          ##
         ## Save the file to the appropriate directory          ## Save the file to the appropriate directory
Line 281  sub indexDownload ($$) { Line 342  sub indexDownload ($$) {
         if ( $comicLine ) {          if ( $comicLine ) {
                 if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }                  if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                 my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;                  my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
                 my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";                  my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
                 system( $cmd );                  system( $cmd );
                 return 0;                  return 0;
         }          }
Line 314  sub parseComic ($$) { Line 375  sub parseComic ($$) {
 sub fetchDates () {  sub fetchDates () {
         my %dates = ();          my %dates = ();
   
         ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];          ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
   
         ## If you missed a day or two, reflect it here:  
         $dates{'day'} -= $days_ago;  ## <-- 5 days ago  
   
         $dates{'year'} += 1900;          $dates{'year'} += 1900;
         $dates{'year2'} = substr $dates{'year'}, 2, 2;          $dates{'year2'} = substr $dates{'year'}, 2, 2;
Line 327  sub fetchDates () { Line 385  sub fetchDates () {
   
         return %dates;          return %dates;
 }  }
   
   ###############################################################################
   ##
   ## &fetchOptions( );
   ##
   ##      Grab our command line arguments and toss them in to a hash
   ##
   ###############################################################################
   sub fetchOptions {
           my %opts;
   
           &GetOptions(
                           "days:i"        => \$opts{'days'},
                           "help|?"        => \$opts{'help'},
                           "man"           => \$opts{'man'},
                      ) || &pod2usage( );
           &pod2usage( ) if defined $opts{'help'};
           &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
   
           return %opts;
   }
   
   __END__
   
   =head1 NAME
   
   fetch.pl - Fetches comics and places them all locally in a single html file.
   
   =head1 SYNOPSIS
   
   fetch.pl [options]
   
   Options:
           --days,d        Fetch comics from X days ago
           --help,?        Display the basic help menu
           --man,m         Display the detailed man page
   
   =head1 DESCRIPTION
   
   =head1 HISTORY
   
   =head1 AUTHOR
   
   Nicholas DeClario <nick@declario.com>
   
   =head1 BUGS
   
   This is a work in progress.  Please report all bugs to the author.
   
   =head1 SEE ALSO
   
   =head1 COPYRIGHT
   
   =cut
   
   

Removed from v.1.6  
changed lines
  Added in v.1.19


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>