Annotation of comics/fetch.pl.new, revision 1.19

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
1.15      nick        3: ###############################################################################
1.16      nick        4: # $Log: fetch.pl.new,v $
1.19    ! nick        5: # Revision 1.18  2015/05/07 12:31:43  nick
        !             6: # Added favicon
        !             7: #
1.18      nick        8: # Revision 1.17  2015/02/19 14:56:10  nick
                      9: # Fixed a problem that forced everything to JPG.  This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation.  I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file.  Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
                     10: #
1.17      nick       11: # Revision 1.16  2015/02/05 18:05:58  nick
                     12: # Changed the background and added a fancy title.
                     13: #
1.16      nick       14: # Revision 1.15  2015/01/19 13:46:19  nick
                     15: # *** empty log message ***
                     16: #
1.15      nick       17: ###############################################################################
                     18: 
1.1       nick       19: use strict;
                     20: use File::Path;
                     21: use Data::Dumper;
1.8       nick       22: use Pod::Usage;
                     23: use Getopt::Long;
1.1       nick       24: 
1.16      nick       25: use Date::Calc qw/Date_to_Text_Long Today/;
                     26: 
1.1       nick       27: ## 
                     28: ## Some default values
                     29: ##
1.19    ! nick       30: my $ver                = '$Id: fetch.pl.new,v 1.18 2015/05/07 12:31:43 nick Exp $';
1.1       nick       31: my $comicFile   = "comics.conf";
                     32: my %comics     = &readComicConfig ( $comicFile );
1.8       nick       33: my %opts        = &fetchOptions( );
                     34: my $days_ago    = $opts{'days'} || 0;
1.1       nick       35: my %dates      = &fetchDates();
                     36: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     37: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     38:                  "/$dates{'mon2'}$dates{'year2'}";
                     39: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       40: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8       nick       41: my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1       nick       42: 
                     43: my $DATE=`date`; chomp $DATE;
                     44: print STDOUT "Starting comic fetch at $DATE\n";
                     45: 
                     46: ##
                     47: ## Main program starts here
                     48: ##
                     49: &checkDir ( [ $imageDir, $indexDir ] );
                     50: 
1.5       nick       51: &writeTitle ( \%dates );
1.1       nick       52: 
                     53: foreach my $comic ( sort keys %comics ) {
                     54:   next if ( $comic =~ m/config/ );
1.14      nick       55:   if ( ( $dates{'day2'} eq "Sunday" ) && 
                     56:        ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1       nick       57:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                     58:   &writeComic ( \%comics, $comic, \%dates );
                     59: 
1.17      nick       60:     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
                     61:     my $size = 0;    
                     62: 
                     63:     my $cmd = "/usr/bin/identify -verbose $file|";
                     64:     open(IMG, $cmd) || die ("Can't open: $!\n");
                     65:     while(<IMG>) {
                     66:         if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
                     67:             $size = $1 if ( $size == 0);
                     68:         }
                     69:     }
                     70:     close(IMG);
1.4       nick       71: 
1.19    ! nick       72: 
1.4       nick       73:        system( "/usr/bin/convert -resize 640 $file $file" )
                     74:                if ( $size > 640 ) 
                     75: }
                     76: 
1.1       nick       77: ## &writeMainIndex ( \%dates );
                     78: 
                     79: &writeFooter( \%dates );
                     80: 
                     81: $DATE=`date`;  chomp( $DATE );
                     82: print STDOUT "Completed comic fetch at $DATE\n";
                     83: 
                     84: ## End
                     85: 
                     86: #######################################################################
                     87: ## Function :  downloadComic
                     88: ##
                     89: ##   Description :
                     90: ##     This function determines the download method being used to 
                     91: ##      retrieve the comic and calls the apprioriate function.
                     92: ##
                     93: ##      If the mode is invalid an error will be returned.
                     94: ##
                     95: #######################################################################
                     96: sub downloadComic ($$) {
                     97:        my ( $comics, $comic, $date ) = @_;
                     98: 
                     99:        SWITCH: {
                    100:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                    101:                        return indexDownload ( \%comics, $comic, $date );
                    102:                        last SWITCH;
                    103:                }
                    104:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                    105:                        return directDownload ( \%comics, $comic, $date );
                    106:                        last SWITCH;
                    107:                }
                    108:        }
                    109:         
                    110:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                    111: }
                    112: 
                    113: #######################################################################
                    114: #######################################################################
                    115: sub readComicConfig ($$) {
                    116:        my ( $comicFile ) = @_;
                    117:        my %comicConfig   = ( );
                    118:        my %config        = ( );
                    119: 
1.14      nick      120:     my ($year, $mon, $day) =( localtime(time))[5,4,3];
                    121:     $year += 1900;
                    122:     $mon = sprintf("%02d", ($mon + 1));
                    123:     $day = sprintf("%02d", $day);
                    124: 
1.1       nick      125:        open FILEN, "<$comicFile";
                    126:                while (<FILEN>) {
                    127:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14      nick      128:                 $_ =~ s/__YEAR__/$year/g;
                    129:                 $_ =~ s/__MON__/$mon/g;
                    130:                 $_ =~ s/__DAY__/$day/g;
                    131:                 
1.1       nick      132:                                my @res = split /,/, $_;
                    133:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                    134:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                    135:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                    136:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    137:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
1.14      nick      138:                 $comicConfig{$res[0]}{'sunday'}   = $res[6] || 1;
1.1       nick      139:                                $comicConfig{$res[0]}{'error'}    = 0;
                    140:                        }
                    141:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    142:                                $comicConfig{'configs'}{$1} = $2;
                    143:                        }
                    144:                }
                    145:        close (FILEN);
                    146: 
                    147:        return %comicConfig;
                    148: }
                    149: 
                    150: #######################################################################
                    151: #######################################################################
                    152: sub writeComic ($$) {
                    153:        my ( $comics, $comic, $date ) = @_;
1.11      nick      154:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      155:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    156:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    157:                        $sd . ".html";
1.1       nick      158:        my $content = <<EOF;
                    159: 
                    160: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    161:   <tr>
                    162:     <td align="left">
                    163: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    164: <font size="-2">
                    165:        <a href="$comics->{$comic}{'url'}">
                    166:                $comics->{$comic}{'url'}
                    167:        </a>
                    168: </font><br/>
1.17      nick      169: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1       nick      170: <br/><br/>
                    171: </td></tr>
                    172: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    173: 
                    174: EOF
                    175:        open INDEX, ">>$indexFile";
                    176: 
                    177:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    178: 
                    179:        print INDEX <<EOF
                    180: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    181: <font size="-2"><
                    182:         <a href="$comics->{$comic}{'url'}">
                    183:                 $comics->{$comic}{'url'}
                    184:         </a>
                    185: </font><br/>
                    186: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    187:   </td>
                    188: </tr>
                    189: EOF
                    190:                if ( $comics->{$comic}{'error'} );
                    191: 
                    192:        close (INDEX);
                    193: 
                    194:        return 0;
                    195: }
                    196: 
                    197: 
                    198: #######################################################################
                    199: #######################################################################
                    200: sub writeMainIndex ($$) {
                    201:        my ( $date ) = @_;
                    202: 
                    203: }
                    204: 
                    205: 
                    206: #######################################################################
                    207: #######################################################################
                    208: sub writeFooter {
                    209:        my ( $date ) = @_;
1.11      nick      210:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      211:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    212:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    213:                        $sd . ".html";
1.1       nick      214:        my $sysDate = `date`;
                    215: 
                    216:        open INDEX, ">>$indexFile";
                    217:        print INDEX <<EOF;
                    218: </table>
1.3       nick      219: <center>
                    220: <font size="2">
                    221: Generated on: <font color="green">$sysDate</font><br/>
1.7       nick      222: Version: <font color="green">$ver</font><br />
                    223: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1       nick      224:   <p>
                    225:     <a href="http://validator.w3.org/check?uri=referer"><img
                    226:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    227:   </p>
                    228: </center>
                    229: 
                    230: </body>
                    231: </html>
                    232: EOF
                    233:        close( INDEX );
                    234: }
                    235: 
                    236: #######################################################################
                    237: #######################################################################
                    238: sub checkDir ($$) {
                    239:        my @dir = @_;
                    240: 
                    241:        foreach ( @dir ) {
                    242:                if ( ! -d $_ ) { mkpath( $_ ); }
                    243:        }
                    244: }
                    245: 
                    246: #######################################################################
                    247: #######################################################################
                    248: sub writeTitle ($$) {
                    249:        my ( $date ) = @_;
1.11      nick      250:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      251:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    252:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    253:                        $sd . ".html";
1.8       nick      254:        my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16      nick      255:     my $today_long = Date_to_Text_Long(Today());
1.1       nick      256: 
                    257:        open INDEX, ">$indexFile";
                    258:        print INDEX <<EOF;
                    259: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    260: 
                    261: <html xmlns="http://www.w3.org/1999/xhtml">
                    262: <head>
                    263: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13      nick      264: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18      nick      265: <link rel="shortcut icon" href="./favicon.ico">
1.1       nick      266:     <title>Daily Comics for $today</title>
                    267:   </head>
                    268: <body bgcolor="#FFFFFF">
                    269: <table align="center" cellpadding="5" cellspacing="0">
1.16      nick      270: <tr><td>
                    271: <table cellpadding="0" cellspacing="0" border="0">
                    272: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
                    273: <tr><td align="left">$today_long</td></tr>
                    274: <tr><td>&nbsp;</td></tr>
                    275: </td</tr>
                    276: 
1.1       nick      277: EOF
                    278:        close (INDEX);
                    279: }
                    280: 
                    281: #######################################################################
                    282: #######################################################################
                    283: sub directDownload ($$) {
                    284:        my ( $comics, $comic, $date ) = @_;
                    285:        my $file = &parseComic ( $comics, $comic, $date );
                    286: 
                    287:         ##
                    288:         ## Save the file to the appropriate directory
                    289:         ##
                    290:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    291:         my $cDate = $date->{'day2'};
                    292: 
1.18      nick      293:        my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14      nick      294: 
1.1       nick      295:         return system($cmd);
                    296: }
                    297: 
                    298: #######################################################################
                    299: #######################################################################
                    300: sub indexDownload ($$) {
                    301:        my ( $comics, $comic, $date ) = @_;
                    302:        my ( @lines, $comicLine, $mainURL );
                    303:        my $comicIndex = "indexes/index.$comic";
                    304: 
1.19    ! nick      305:     my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
        !           306:                    "--user-agent=\"$USER_AGENT\" " .
        !           307:                    "$comics->{$comic}{'url'} -O $comicIndex";
        !           308:     system($wget_cmd);
1.1       nick      309: 
                    310:        if ( ! open FILEN, "<$comicIndex" ) {  
                    311:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    312:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    313:        } 
                    314:                @lines = <FILEN>;
                    315:        close (FILEN);  
                    316: 
                    317:        unlink ("$comicIndex");
                    318: 
                    319:        $mainURL = $comics->{$comic}{'url'};
                    320:        ## I need to figure out how to merge these two in to one regex.
                    321:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    322:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    323: 
                    324:        ##
                    325:        ## Find the comic strip URL based on the specified regex in the search
                    326:        ##
                    327:        foreach my $line (@lines) {
1.17      nick      328:                if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1       nick      329:                        $comicLine = $1; chomp $comicLine;
                    330:                }
1.17      nick      331:     }
1.1       nick      332: 
                    333:        ##
                    334:        ## Save the file to the appropriate directory
                    335:        ##
                    336:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    337:        my $cDate   = $date->{'day2'};
                    338: 
                    339:        if ( $comicLine ) {
                    340:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    341:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17      nick      342:                my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1       nick      343:                system( $cmd );
                    344:                return 0;
                    345:        }
                    346: 
                    347:        unlink "index.html";
                    348: 
                    349:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    350: }
                    351: 
                    352: #######################################################################
                    353: #######################################################################
                    354: sub parseComic ($$) {
                    355:        my ( $comics, $comic, $date ) = @_;
                    356:        my $string = $comics->{$comic}{'search'};
                    357: 
                    358:        $string =~ s/__year__/$date->{'year'}/g;
                    359:        $string =~ s/__year2__/$date->{'year2'}/g;
                    360:        $string =~ s/__mon__/$date->{'mon'}/g;
                    361:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    362:        $string =~ s/__day__/$date->{'day'}/g;
                    363:        $string =~ s/__day2__/$date->{'day2'}/g;
                    364:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    365:        chomp $string;
                    366: 
                    367:        return $string;
                    368: }
                    369: 
                    370: #######################################################################
                    371: #######################################################################
                    372: sub fetchDates () {
                    373:        my %dates = ();
                    374: 
1.8       nick      375:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1       nick      376: 
                    377:        $dates{'year'} += 1900;
                    378:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    379:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    380:        $dates{'mon'}++;
                    381:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
                    382: 
                    383:        return %dates;
                    384: }
1.8       nick      385: 
                    386: ###############################################################################
                    387: ##
                    388: ## &fetchOptions( );
                    389: ##
                    390: ##      Grab our command line arguments and toss them in to a hash
                    391: ##
                    392: ###############################################################################
                    393: sub fetchOptions {
                    394:         my %opts;
                    395: 
                    396:         &GetOptions(
                    397:                         "days:i"        => \$opts{'days'},
                    398:                         "help|?"        => \$opts{'help'},
                    399:                         "man"           => \$opts{'man'},
                    400:                    ) || &pod2usage( );
                    401:         &pod2usage( ) if defined $opts{'help'};
                    402:         &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
                    403: 
                    404:         return %opts;
                    405: }
                    406: 
                    407: __END__
                    408: 
                    409: =head1 NAME
                    410: 
                    411: fetch.pl - Fetches comics and places them all locally in a single html file.
                    412: 
                    413: =head1 SYNOPSIS
                    414: 
                    415: fetch.pl [options]
                    416: 
                    417: Options:
                    418:         --days,d        Fetch comics from X days ago
                    419:         --help,?        Display the basic help menu
                    420:         --man,m         Display the detailed man page
                    421: 
                    422: =head1 DESCRIPTION
                    423: 
                    424: =head1 HISTORY
                    425: 
                    426: =head1 AUTHOR
                    427: 
                    428: Nicholas DeClario <nick@declario.com>
                    429: 
                    430: =head1 BUGS
                    431: 
                    432: This is a work in progress.  Please report all bugs to the author.
                    433: 
                    434: =head1 SEE ALSO
                    435: 
                    436: =head1 COPYRIGHT
                    437: 
                    438: =cut
                    439: 
                    440: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>