Annotation of comics/fetch.pl.new, revision 1.17

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
1.15      nick        3: ###############################################################################
1.16      nick        4: # $Log: fetch.pl.new,v $
1.17    ! nick        5: # Revision 1.16  2015/02/05 18:05:58  nick
        !             6: # Changed the background and added a fancy title.
        !             7: #
1.16      nick        8: # Revision 1.15  2015/01/19 13:46:19  nick
                      9: # *** empty log message ***
                     10: #
1.15      nick       11: ###############################################################################
                     12: 
1.1       nick       13: use strict;
                     14: use File::Path;
                     15: use Data::Dumper;
1.8       nick       16: use Pod::Usage;
                     17: use Getopt::Long;
1.1       nick       18: 
1.16      nick       19: use Date::Calc qw/Date_to_Text_Long Today/;
                     20: 
1.1       nick       21: ## 
                     22: ## Some default values
                     23: ##
1.17    ! nick       24: my $ver                = '$Id: fetch.pl.new,v 1.16 2015/02/05 18:05:58 nick Exp $';
1.1       nick       25: my $comicFile   = "comics.conf";
                     26: my %comics     = &readComicConfig ( $comicFile );
1.8       nick       27: my %opts        = &fetchOptions( );
                     28: my $days_ago    = $opts{'days'} || 0;
1.1       nick       29: my %dates      = &fetchDates();
                     30: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     31: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     32:                  "/$dates{'mon2'}$dates{'year2'}";
                     33: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       34: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8       nick       35: my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1       nick       36: 
                     37: my $DATE=`date`; chomp $DATE;
                     38: print STDOUT "Starting comic fetch at $DATE\n";
                     39: 
                     40: ##
                     41: ## Main program starts here
                     42: ##
                     43: &checkDir ( [ $imageDir, $indexDir ] );
                     44: 
1.5       nick       45: &writeTitle ( \%dates );
1.1       nick       46: 
                     47: foreach my $comic ( sort keys %comics ) {
                     48:   next if ( $comic =~ m/config/ );
1.14      nick       49:   if ( ( $dates{'day2'} eq "Sunday" ) && 
                     50:        ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1       nick       51:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                     52:   &writeComic ( \%comics, $comic, \%dates );
                     53: 
1.17    ! nick       54:     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
        !            55:     my $size = 0;    
        !            56: 
        !            57:     my $cmd = "/usr/bin/identify -verbose $file|";
        !            58:     open(IMG, $cmd) || die ("Can't open: $!\n");
        !            59:     while(<IMG>) {
        !            60:         if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
        !            61:             $size = $1 if ( $size == 0);
        !            62:         }
        !            63:     }
        !            64:     close(IMG);
1.4       nick       65: 
                     66:        system( "/usr/bin/convert -resize 640 $file $file" )
                     67:                if ( $size > 640 ) 
                     68: }
                     69: 
1.1       nick       70: ## &writeMainIndex ( \%dates );
                     71: 
                     72: &writeFooter( \%dates );
                     73: 
                     74: $DATE=`date`;  chomp( $DATE );
                     75: print STDOUT "Completed comic fetch at $DATE\n";
                     76: 
                     77: ## End
                     78: 
                     79: #######################################################################
                     80: ## Function :  downloadComic
                     81: ##
                     82: ##   Description :
                     83: ##     This function determines the download method being used to 
                     84: ##      retrieve the comic and calls the apprioriate function.
                     85: ##
                     86: ##      If the mode is invalid an error will be returned.
                     87: ##
                     88: #######################################################################
                     89: sub downloadComic ($$) {
                     90:        my ( $comics, $comic, $date ) = @_;
                     91: 
                     92:        SWITCH: {
                     93:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                     94:                        return indexDownload ( \%comics, $comic, $date );
                     95:                        last SWITCH;
                     96:                }
                     97:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                     98:                        return directDownload ( \%comics, $comic, $date );
                     99:                        last SWITCH;
                    100:                }
                    101:        }
                    102:         
                    103:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                    104: }
                    105: 
                    106: #######################################################################
                    107: #######################################################################
                    108: sub readComicConfig ($$) {
                    109:        my ( $comicFile ) = @_;
                    110:        my %comicConfig   = ( );
                    111:        my %config        = ( );
                    112: 
1.14      nick      113:     my ($year, $mon, $day) =( localtime(time))[5,4,3];
                    114:     $year += 1900;
                    115:     $mon = sprintf("%02d", ($mon + 1));
                    116:     $day = sprintf("%02d", $day);
                    117: 
1.1       nick      118:        open FILEN, "<$comicFile";
                    119:                while (<FILEN>) {
                    120:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14      nick      121:                 $_ =~ s/__YEAR__/$year/g;
                    122:                 $_ =~ s/__MON__/$mon/g;
                    123:                 $_ =~ s/__DAY__/$day/g;
                    124:                 
1.1       nick      125:                                my @res = split /,/, $_;
                    126:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                    127:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                    128:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                    129:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    130:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
1.14      nick      131:                 $comicConfig{$res[0]}{'sunday'}   = $res[6] || 1;
1.1       nick      132:                                $comicConfig{$res[0]}{'error'}    = 0;
                    133:                        }
                    134:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    135:                                $comicConfig{'configs'}{$1} = $2;
                    136:                        }
                    137:                }
                    138:        close (FILEN);
                    139: 
                    140:        return %comicConfig;
                    141: }
                    142: 
                    143: #######################################################################
                    144: #######################################################################
                    145: sub writeComic ($$) {
                    146:        my ( $comics, $comic, $date ) = @_;
1.11      nick      147:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      148:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    149:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    150:                        $sd . ".html";
1.1       nick      151:        my $content = <<EOF;
                    152: 
                    153: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    154:   <tr>
                    155:     <td align="left">
                    156: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    157: <font size="-2">
                    158:        <a href="$comics->{$comic}{'url'}">
                    159:                $comics->{$comic}{'url'}
                    160:        </a>
                    161: </font><br/>
1.17    ! nick      162: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1       nick      163: <br/><br/>
                    164: </td></tr>
                    165: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    166: 
                    167: EOF
                    168:        open INDEX, ">>$indexFile";
                    169: 
                    170:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    171: 
                    172:        print INDEX <<EOF
                    173: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    174: <font size="-2"><
                    175:         <a href="$comics->{$comic}{'url'}">
                    176:                 $comics->{$comic}{'url'}
                    177:         </a>
                    178: </font><br/>
                    179: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    180:   </td>
                    181: </tr>
                    182: EOF
                    183:                if ( $comics->{$comic}{'error'} );
                    184: 
                    185:        close (INDEX);
                    186: 
                    187:        return 0;
                    188: }
                    189: 
                    190: 
                    191: #######################################################################
                    192: #######################################################################
                    193: sub writeMainIndex ($$) {
                    194:        my ( $date ) = @_;
                    195: 
                    196: }
                    197: 
                    198: 
                    199: #######################################################################
                    200: #######################################################################
                    201: sub writeFooter {
                    202:        my ( $date ) = @_;
1.11      nick      203:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      204:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    205:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    206:                        $sd . ".html";
1.1       nick      207:        my $sysDate = `date`;
                    208: 
                    209:        open INDEX, ">>$indexFile";
                    210:        print INDEX <<EOF;
                    211: </table>
1.3       nick      212: <center>
                    213: <font size="2">
                    214: Generated on: <font color="green">$sysDate</font><br/>
1.7       nick      215: Version: <font color="green">$ver</font><br />
                    216: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1       nick      217:   <p>
                    218:     <a href="http://validator.w3.org/check?uri=referer"><img
                    219:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    220:   </p>
                    221: </center>
                    222: 
                    223: </body>
                    224: </html>
                    225: EOF
                    226:        close( INDEX );
                    227: }
                    228: 
                    229: #######################################################################
                    230: #######################################################################
                    231: sub checkDir ($$) {
                    232:        my @dir = @_;
                    233: 
                    234:        foreach ( @dir ) {
                    235:                if ( ! -d $_ ) { mkpath( $_ ); }
                    236:        }
                    237: }
                    238: 
                    239: #######################################################################
                    240: #######################################################################
                    241: sub writeTitle ($$) {
                    242:        my ( $date ) = @_;
1.11      nick      243:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      244:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    245:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    246:                        $sd . ".html";
1.8       nick      247:        my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16      nick      248:     my $today_long = Date_to_Text_Long(Today());
1.1       nick      249: 
                    250:        open INDEX, ">$indexFile";
                    251:        print INDEX <<EOF;
                    252: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    253: 
                    254: <html xmlns="http://www.w3.org/1999/xhtml">
                    255: <head>
                    256: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13      nick      257: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.1       nick      258:     <title>Daily Comics for $today</title>
                    259:   </head>
                    260: <body bgcolor="#FFFFFF">
                    261: <table align="center" cellpadding="5" cellspacing="0">
1.16      nick      262: <tr><td>
                    263: <table cellpadding="0" cellspacing="0" border="0">
                    264: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
                    265: <tr><td align="left">$today_long</td></tr>
                    266: <tr><td>&nbsp;</td></tr>
                    267: </td</tr>
                    268: 
1.1       nick      269: EOF
                    270:        close (INDEX);
                    271: }
                    272: 
                    273: #######################################################################
                    274: #######################################################################
                    275: sub directDownload ($$) {
                    276:        my ( $comics, $comic, $date ) = @_;
                    277:        my $file = &parseComic ( $comics, $comic, $date );
                    278: 
                    279:         ##
                    280:         ## Save the file to the appropriate directory
                    281:         ##
                    282:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    283:         my $cDate = $date->{'day2'};
                    284: 
1.3       nick      285:        my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14      nick      286: 
1.1       nick      287:         return system($cmd);
                    288: }
                    289: 
                    290: #######################################################################
                    291: #######################################################################
                    292: sub indexDownload ($$) {
                    293:        my ( $comics, $comic, $date ) = @_;
                    294:        my ( @lines, $comicLine, $mainURL );
                    295:        my $comicIndex = "indexes/index.$comic";
                    296: 
                    297:        `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
                    298: 
                    299:        if ( ! open FILEN, "<$comicIndex" ) {  
                    300:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    301:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    302:        } 
                    303:                @lines = <FILEN>;
                    304:        close (FILEN);  
                    305: 
                    306:        unlink ("$comicIndex");
                    307: 
                    308:        $mainURL = $comics->{$comic}{'url'};
                    309:        ## I need to figure out how to merge these two in to one regex.
                    310:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    311:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    312: 
                    313:        ##
                    314:        ## Find the comic strip URL based on the specified regex in the search
                    315:        ##
                    316:        foreach my $line (@lines) {
1.17    ! nick      317:                if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1       nick      318:                        $comicLine = $1; chomp $comicLine;
                    319:                }
1.17    ! nick      320:     }
1.1       nick      321: 
                    322:        ##
                    323:        ## Save the file to the appropriate directory
                    324:        ##
                    325:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    326:        my $cDate   = $date->{'day2'};
                    327: 
                    328:        if ( $comicLine ) {
                    329:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    330:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17    ! nick      331:                my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1       nick      332:                system( $cmd );
                    333:                return 0;
                    334:        }
                    335: 
                    336:        unlink "index.html";
                    337: 
                    338:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    339: }
                    340: 
                    341: #######################################################################
                    342: #######################################################################
                    343: sub parseComic ($$) {
                    344:        my ( $comics, $comic, $date ) = @_;
                    345:        my $string = $comics->{$comic}{'search'};
                    346: 
                    347:        $string =~ s/__year__/$date->{'year'}/g;
                    348:        $string =~ s/__year2__/$date->{'year2'}/g;
                    349:        $string =~ s/__mon__/$date->{'mon'}/g;
                    350:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    351:        $string =~ s/__day__/$date->{'day'}/g;
                    352:        $string =~ s/__day2__/$date->{'day2'}/g;
                    353:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    354:        chomp $string;
                    355: 
                    356:        return $string;
                    357: }
                    358: 
                    359: #######################################################################
                    360: #######################################################################
                    361: sub fetchDates () {
                    362:        my %dates = ();
                    363: 
1.8       nick      364:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1       nick      365: 
                    366:        $dates{'year'} += 1900;
                    367:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    368:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    369:        $dates{'mon'}++;
                    370:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
                    371: 
                    372:        return %dates;
                    373: }
1.8       nick      374: 
                    375: ###############################################################################
                    376: ##
                    377: ## &fetchOptions( );
                    378: ##
                    379: ##      Grab our command line arguments and toss them in to a hash
                    380: ##
                    381: ###############################################################################
                    382: sub fetchOptions {
                    383:         my %opts;
                    384: 
                    385:         &GetOptions(
                    386:                         "days:i"        => \$opts{'days'},
                    387:                         "help|?"        => \$opts{'help'},
                    388:                         "man"           => \$opts{'man'},
                    389:                    ) || &pod2usage( );
                    390:         &pod2usage( ) if defined $opts{'help'};
                    391:         &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
                    392: 
                    393:         return %opts;
                    394: }
                    395: 
                    396: __END__
                    397: 
                    398: =head1 NAME
                    399: 
                    400: fetch.pl - Fetches comics and places them all locally in a single html file.
                    401: 
                    402: =head1 SYNOPSIS
                    403: 
                    404: fetch.pl [options]
                    405: 
                    406: Options:
                    407:         --days,d        Fetch comics from X days ago
                    408:         --help,?        Display the basic help menu
                    409:         --man,m         Display the detailed man page
                    410: 
                    411: =head1 DESCRIPTION
                    412: 
                    413: =head1 HISTORY
                    414: 
                    415: =head1 AUTHOR
                    416: 
                    417: Nicholas DeClario <nick@declario.com>
                    418: 
                    419: =head1 BUGS
                    420: 
                    421: This is a work in progress.  Please report all bugs to the author.
                    422: 
                    423: =head1 SEE ALSO
                    424: 
                    425: =head1 COPYRIGHT
                    426: 
                    427: =cut
                    428: 
                    429: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>