Annotation of comics/fetch.pl.new, revision 1.21

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
1.15      nick        3: ###############################################################################
1.16      nick        4: # $Log: fetch.pl.new,v $
1.21    ! nick        5: # Revision 1.20  2015/10/22 12:58:44  nick
        !             6: # Added the ability for Sunday only comics.  Stonesoup is no longer weekdays, this has been added to Sunday only.  I also added Foxtrot Classics for weekdays and Foxtrot for Sundays.
        !             7: #
1.20      nick        8: # Revision 1.19  2015/07/13 12:56:58  nick
                      9: # Added Sally Forth and Pearls Before Swine.  Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
                     10: #
1.19      nick       11: # Revision 1.18  2015/05/07 12:31:43  nick
                     12: # Added favicon
                     13: #
1.18      nick       14: # Revision 1.17  2015/02/19 14:56:10  nick
                     15: # Fixed a problem that forced everything to JPG.  This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation.  I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file.  Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
                     16: #
1.17      nick       17: # Revision 1.16  2015/02/05 18:05:58  nick
                     18: # Changed the background and added a fancy title.
                     19: #
1.16      nick       20: # Revision 1.15  2015/01/19 13:46:19  nick
                     21: # *** empty log message ***
                     22: #
1.15      nick       23: ###############################################################################
                     24: 
1.1       nick       25: use strict;
                     26: use File::Path;
                     27: use Data::Dumper;
1.8       nick       28: use Pod::Usage;
                     29: use Getopt::Long;
1.1       nick       30: 
1.21    ! nick       31: use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
1.16      nick       32: 
1.1       nick       33: ## 
                     34: ## Some default values
                     35: ##
1.21    ! nick       36: my $ver                = '$Id: fetch.pl.new,v 1.20 2015/10/22 12:58:44 nick Exp $';
1.1       nick       37: my $comicFile   = "comics.conf";
                     38: my %comics     = &readComicConfig ( $comicFile );
1.8       nick       39: my %opts        = &fetchOptions( );
                     40: my $days_ago    = $opts{'days'} || 0;
1.1       nick       41: my %dates      = &fetchDates();
                     42: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     43: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     44:                  "/$dates{'mon2'}$dates{'year2'}";
                     45: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       46: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8       nick       47: my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1       nick       48: 
                     49: my $DATE=`date`; chomp $DATE;
                     50: print STDOUT "Starting comic fetch at $DATE\n";
                     51: 
                     52: ##
                     53: ## Main program starts here
                     54: ##
                     55: &checkDir ( [ $imageDir, $indexDir ] );
                     56: 
1.5       nick       57: &writeTitle ( \%dates );
1.1       nick       58: 
                     59: foreach my $comic ( sort keys %comics ) {
1.20      nick       60: 
                     61:   ## Skip if this is Sunday and the comic is weekdays only
1.1       nick       62:   next if ( $comic =~ m/config/ );
1.21    ! nick       63:   if (($dates{'wday'} eq "Sunday") && 
1.20      nick       64:       ($comics{$comic}{'sunday'} == 0)) {
                     65:     print "Skipping '$comic'; Weekdays only.\n";
                     66:     next;
                     67:   }
                     68: 
                     69:   ## Skip if Sunday only comic and it's not Sunday.
1.21    ! nick       70:   if (($dates{'wday'} ne "Sunday") &&
1.20      nick       71:       ($comics{$comic}{'sunday_only'} == 1)) {
1.21    ! nick       72:     print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
1.20      nick       73:     next
                     74:   }
                     75: 
1.1       nick       76:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                     77:   &writeComic ( \%comics, $comic, \%dates );
                     78: 
1.17      nick       79:     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
                     80:     my $size = 0;    
                     81: 
                     82:     my $cmd = "/usr/bin/identify -verbose $file|";
                     83:     open(IMG, $cmd) || die ("Can't open: $!\n");
                     84:     while(<IMG>) {
                     85:         if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
                     86:             $size = $1 if ( $size == 0);
                     87:         }
                     88:     }
                     89:     close(IMG);
1.4       nick       90: 
1.19      nick       91: 
1.4       nick       92:        system( "/usr/bin/convert -resize 640 $file $file" )
                     93:                if ( $size > 640 ) 
                     94: }
                     95: 
1.1       nick       96: ## &writeMainIndex ( \%dates );
                     97: 
                     98: &writeFooter( \%dates );
                     99: 
                    100: $DATE=`date`;  chomp( $DATE );
                    101: print STDOUT "Completed comic fetch at $DATE\n";
                    102: 
                    103: ## End
                    104: 
                    105: #######################################################################
                    106: ## Function :  downloadComic
                    107: ##
                    108: ##   Description :
                    109: ##     This function determines the download method being used to 
                    110: ##      retrieve the comic and calls the apprioriate function.
                    111: ##
                    112: ##      If the mode is invalid an error will be returned.
                    113: ##
                    114: #######################################################################
                    115: sub downloadComic ($$) {
                    116:        my ( $comics, $comic, $date ) = @_;
                    117: 
                    118:        SWITCH: {
                    119:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                    120:                        return indexDownload ( \%comics, $comic, $date );
                    121:                        last SWITCH;
                    122:                }
                    123:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                    124:                        return directDownload ( \%comics, $comic, $date );
                    125:                        last SWITCH;
                    126:                }
                    127:        }
                    128:         
                    129:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                    130: }
                    131: 
                    132: #######################################################################
                    133: #######################################################################
                    134: sub readComicConfig ($$) {
                    135:        my ( $comicFile ) = @_;
                    136:        my %comicConfig   = ( );
                    137:        my %config        = ( );
                    138: 
1.14      nick      139:     my ($year, $mon, $day) =( localtime(time))[5,4,3];
                    140:     $year += 1900;
                    141:     $mon = sprintf("%02d", ($mon + 1));
                    142:     $day = sprintf("%02d", $day);
                    143: 
1.1       nick      144:        open FILEN, "<$comicFile";
                    145:                while (<FILEN>) {
                    146:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14      nick      147:                 $_ =~ s/__YEAR__/$year/g;
                    148:                 $_ =~ s/__MON__/$mon/g;
                    149:                 $_ =~ s/__DAY__/$day/g;
                    150:                 
1.1       nick      151:                                my @res = split /,/, $_;
                    152:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                    153:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                    154:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                    155:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    156:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
1.21    ! nick      157:                 $comicConfig{$res[0]}{'sunday'}   = sprintf("%d", $res[6] || 1);
        !           158:                 $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
1.1       nick      159:                                $comicConfig{$res[0]}{'error'}    = 0;
                    160:                        }
                    161:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    162:                                $comicConfig{'configs'}{$1} = $2;
                    163:                        }
                    164:                }
                    165:        close (FILEN);
                    166: 
                    167:        return %comicConfig;
                    168: }
                    169: 
                    170: #######################################################################
                    171: #######################################################################
                    172: sub writeComic ($$) {
                    173:        my ( $comics, $comic, $date ) = @_;
1.11      nick      174:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      175:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    176:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    177:                        $sd . ".html";
1.1       nick      178:        my $content = <<EOF;
                    179: 
                    180: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    181:   <tr>
                    182:     <td align="left">
                    183: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    184: <font size="-2">
                    185:        <a href="$comics->{$comic}{'url'}">
                    186:                $comics->{$comic}{'url'}
                    187:        </a>
                    188: </font><br/>
1.17      nick      189: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1       nick      190: <br/><br/>
                    191: </td></tr>
                    192: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    193: 
                    194: EOF
                    195:        open INDEX, ">>$indexFile";
                    196: 
                    197:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    198: 
                    199:        print INDEX <<EOF
                    200: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    201: <font size="-2"><
                    202:         <a href="$comics->{$comic}{'url'}">
                    203:                 $comics->{$comic}{'url'}
                    204:         </a>
                    205: </font><br/>
                    206: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    207:   </td>
                    208: </tr>
                    209: EOF
                    210:                if ( $comics->{$comic}{'error'} );
                    211: 
                    212:        close (INDEX);
                    213: 
                    214:        return 0;
                    215: }
                    216: 
                    217: 
                    218: #######################################################################
                    219: #######################################################################
                    220: sub writeMainIndex ($$) {
                    221:        my ( $date ) = @_;
                    222: 
                    223: }
                    224: 
                    225: 
                    226: #######################################################################
                    227: #######################################################################
                    228: sub writeFooter {
                    229:        my ( $date ) = @_;
1.11      nick      230:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      231:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    232:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    233:                        $sd . ".html";
1.1       nick      234:        my $sysDate = `date`;
                    235: 
                    236:        open INDEX, ">>$indexFile";
                    237:        print INDEX <<EOF;
                    238: </table>
1.3       nick      239: <center>
                    240: <font size="2">
                    241: Generated on: <font color="green">$sysDate</font><br/>
1.7       nick      242: Version: <font color="green">$ver</font><br />
                    243: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1       nick      244:   <p>
                    245:     <a href="http://validator.w3.org/check?uri=referer"><img
                    246:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    247:   </p>
                    248: </center>
                    249: 
                    250: </body>
                    251: </html>
                    252: EOF
                    253:        close( INDEX );
                    254: }
                    255: 
                    256: #######################################################################
                    257: #######################################################################
                    258: sub checkDir ($$) {
                    259:        my @dir = @_;
                    260: 
                    261:        foreach ( @dir ) {
                    262:                if ( ! -d $_ ) { mkpath( $_ ); }
                    263:        }
                    264: }
                    265: 
                    266: #######################################################################
                    267: #######################################################################
                    268: sub writeTitle ($$) {
                    269:        my ( $date ) = @_;
1.11      nick      270:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      271:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    272:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    273:                        $sd . ".html";
1.8       nick      274:        my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16      nick      275:     my $today_long = Date_to_Text_Long(Today());
1.1       nick      276: 
                    277:        open INDEX, ">$indexFile";
                    278:        print INDEX <<EOF;
                    279: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    280: 
                    281: <html xmlns="http://www.w3.org/1999/xhtml">
                    282: <head>
                    283: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13      nick      284: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18      nick      285: <link rel="shortcut icon" href="./favicon.ico">
1.1       nick      286:     <title>Daily Comics for $today</title>
                    287:   </head>
                    288: <body bgcolor="#FFFFFF">
                    289: <table align="center" cellpadding="5" cellspacing="0">
1.16      nick      290: <tr><td>
                    291: <table cellpadding="0" cellspacing="0" border="0">
                    292: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
                    293: <tr><td align="left">$today_long</td></tr>
                    294: <tr><td>&nbsp;</td></tr>
                    295: </td</tr>
                    296: 
1.1       nick      297: EOF
                    298:        close (INDEX);
                    299: }
                    300: 
                    301: #######################################################################
                    302: #######################################################################
                    303: sub directDownload ($$) {
                    304:        my ( $comics, $comic, $date ) = @_;
                    305:        my $file = &parseComic ( $comics, $comic, $date );
                    306: 
                    307:         ##
                    308:         ## Save the file to the appropriate directory
                    309:         ##
                    310:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    311:         my $cDate = $date->{'day2'};
                    312: 
1.18      nick      313:        my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14      nick      314: 
1.1       nick      315:         return system($cmd);
                    316: }
                    317: 
                    318: #######################################################################
                    319: #######################################################################
                    320: sub indexDownload ($$) {
                    321:        my ( $comics, $comic, $date ) = @_;
                    322:        my ( @lines, $comicLine, $mainURL );
                    323:        my $comicIndex = "indexes/index.$comic";
                    324: 
1.19      nick      325:     my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
                    326:                    "--user-agent=\"$USER_AGENT\" " .
                    327:                    "$comics->{$comic}{'url'} -O $comicIndex";
                    328:     system($wget_cmd);
1.1       nick      329: 
                    330:        if ( ! open FILEN, "<$comicIndex" ) {  
                    331:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    332:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    333:        } 
                    334:                @lines = <FILEN>;
                    335:        close (FILEN);  
                    336: 
                    337:        unlink ("$comicIndex");
                    338: 
                    339:        $mainURL = $comics->{$comic}{'url'};
                    340:        ## I need to figure out how to merge these two in to one regex.
                    341:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    342:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    343: 
                    344:        ##
                    345:        ## Find the comic strip URL based on the specified regex in the search
                    346:        ##
                    347:        foreach my $line (@lines) {
1.17      nick      348:                if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1       nick      349:                        $comicLine = $1; chomp $comicLine;
                    350:                }
1.17      nick      351:     }
1.1       nick      352: 
                    353:        ##
                    354:        ## Save the file to the appropriate directory
                    355:        ##
                    356:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    357:        my $cDate   = $date->{'day2'};
                    358: 
                    359:        if ( $comicLine ) {
                    360:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    361:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17      nick      362:                my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1       nick      363:                system( $cmd );
                    364:                return 0;
                    365:        }
                    366: 
                    367:        unlink "index.html";
                    368: 
                    369:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    370: }
                    371: 
                    372: #######################################################################
                    373: #######################################################################
                    374: sub parseComic ($$) {
                    375:        my ( $comics, $comic, $date ) = @_;
                    376:        my $string = $comics->{$comic}{'search'};
                    377: 
                    378:        $string =~ s/__year__/$date->{'year'}/g;
                    379:        $string =~ s/__year2__/$date->{'year2'}/g;
                    380:        $string =~ s/__mon__/$date->{'mon'}/g;
                    381:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    382:        $string =~ s/__day__/$date->{'day'}/g;
                    383:        $string =~ s/__day2__/$date->{'day2'}/g;
                    384:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    385:        chomp $string;
                    386: 
                    387:        return $string;
                    388: }
                    389: 
                    390: #######################################################################
                    391: #######################################################################
                    392: sub fetchDates () {
                    393:        my %dates = ();
                    394: 
1.8       nick      395:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1       nick      396: 
                    397:        $dates{'year'} += 1900;
                    398:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    399:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    400:        $dates{'mon'}++;
                    401:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
1.21    ! nick      402:     my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
        !           403:     $dates{'wday'} = $days[$dates{'dow'}];
1.1       nick      404: 
                    405:        return %dates;
                    406: }
1.8       nick      407: 
                    408: ###############################################################################
                    409: ##
                    410: ## &fetchOptions( );
                    411: ##
                    412: ##      Grab our command line arguments and toss them in to a hash
                    413: ##
                    414: ###############################################################################
                    415: sub fetchOptions {
                    416:         my %opts;
                    417: 
                    418:         &GetOptions(
                    419:                         "days:i"        => \$opts{'days'},
                    420:                         "help|?"        => \$opts{'help'},
                    421:                         "man"           => \$opts{'man'},
                    422:                    ) || &pod2usage( );
                    423:         &pod2usage( ) if defined $opts{'help'};
                    424:         &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
                    425: 
                    426:         return %opts;
                    427: }
                    428: 
                    429: __END__
                    430: 
                    431: =head1 NAME
                    432: 
                    433: fetch.pl - Fetches comics and places them all locally in a single html file.
                    434: 
                    435: =head1 SYNOPSIS
                    436: 
                    437: fetch.pl [options]
                    438: 
                    439: Options:
                    440:         --days,d        Fetch comics from X days ago
                    441:         --help,?        Display the basic help menu
                    442:         --man,m         Display the detailed man page
                    443: 
                    444: =head1 DESCRIPTION
                    445: 
                    446: =head1 HISTORY
                    447: 
                    448: =head1 AUTHOR
                    449: 
                    450: Nicholas DeClario <nick@declario.com>
                    451: 
                    452: =head1 BUGS
                    453: 
                    454: This is a work in progress.  Please report all bugs to the author.
                    455: 
                    456: =head1 SEE ALSO
                    457: 
                    458: =head1 COPYRIGHT
                    459: 
                    460: =cut
                    461: 
                    462: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>