Annotation of comics/fetch.pl.new, revision 1.8

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
                      3: use strict;
                      4: use File::Path;
                      5: use Data::Dumper;
1.8     ! nick        6: use Pod::Usage;
        !             7: use Getopt::Long;
1.1       nick        8: 
                      9: ## 
                     10: ## Some default values
                     11: ##
1.8     ! nick       12: my $ver                = q/$Id: fetch.pl.new,v 1.7 2013-01-10 14:25:57 nick Exp $/;
1.1       nick       13: my $comicFile   = "comics.conf";
                     14: my %comics     = &readComicConfig ( $comicFile );
1.8     ! nick       15: my %opts        = &fetchOptions( );
        !            16: my $days_ago    = $opts{'days'} || 0;
1.1       nick       17: my %dates      = &fetchDates();
                     18: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     19: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     20:                  "/$dates{'mon2'}$dates{'year2'}";
                     21: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       22: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8     ! nick       23: my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1       nick       24: 
                     25: 
                     26: my $DATE=`date`; chomp $DATE;
                     27: print STDOUT "Starting comic fetch at $DATE\n";
                     28: 
                     29: ##
                     30: ## Main program starts here
                     31: ##
                     32: &checkDir ( [ $imageDir, $indexDir ] );
                     33: 
1.5       nick       34: &writeTitle ( \%dates );
1.1       nick       35: 
                     36: foreach my $comic ( sort keys %comics ) {
                     37:   next if ( $comic =~ m/config/ );
                     38:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                     39:   &writeComic ( \%comics, $comic, \%dates );
                     40: }
                     41: 
1.8     ! nick       42: print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";
        !            43: foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) ) 
1.4       nick       44: {
                     45:        my $size = `/usr/bin/identify $file`;
                     46:        $size =~ s/.*\s(\d+)x\d+.*/$1/;
                     47: 
                     48:        system( "/usr/bin/convert -resize 640 $file $file" )
                     49:                if ( $size > 640 ) 
                     50: }
                     51: 
1.1       nick       52: ## &writeMainIndex ( \%dates );
                     53: 
                     54: &writeFooter( \%dates );
                     55: 
                     56: $DATE=`date`;  chomp( $DATE );
                     57: print STDOUT "Completed comic fetch at $DATE\n";
                     58: 
                     59: ## End
                     60: 
                     61: #######################################################################
                     62: ## Function :  downloadComic
                     63: ##
                     64: ##   Description :
                     65: ##     This function determines the download method being used to 
                     66: ##      retrieve the comic and calls the apprioriate function.
                     67: ##
                     68: ##      If the mode is invalid an error will be returned.
                     69: ##
                     70: #######################################################################
                     71: sub downloadComic ($$) {
                     72:        my ( $comics, $comic, $date ) = @_;
                     73: 
                     74:        SWITCH: {
                     75:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                     76:                        return indexDownload ( \%comics, $comic, $date );
                     77:                        last SWITCH;
                     78:                }
                     79:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                     80:                        return directDownload ( \%comics, $comic, $date );
                     81:                        last SWITCH;
                     82:                }
                     83:        }
                     84:         
                     85:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                     86: }
                     87: 
                     88: #######################################################################
                     89: #######################################################################
                     90: sub readComicConfig ($$) {
                     91:        my ( $comicFile ) = @_;
                     92:        my %comicConfig   = ( );
                     93:        my %config        = ( );
                     94: 
                     95:        open FILEN, "<$comicFile";
                     96:                while (<FILEN>) {
                     97:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
                     98:                                my @res = split /,/, $_;
                     99:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                    100:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                    101:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                    102:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    103:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
                    104:                                $comicConfig{$res[0]}{'error'}    = 0;
                    105:                        }
                    106:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    107:                                $comicConfig{'configs'}{$1} = $2;
                    108:                        }
                    109:                }
                    110:        close (FILEN);
                    111: 
                    112:        return %comicConfig;
                    113: }
                    114: 
                    115: #######################################################################
                    116: #######################################################################
                    117: sub writeComic ($$) {
                    118:        my ( $comics, $comic, $date ) = @_;
                    119:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
                    120:                         $date->{'day2'} . ".html";
                    121:        my $content = <<EOF;
                    122: 
                    123: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    124:   <tr>
                    125:     <td align="left">
                    126: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    127: <font size="-2">
                    128:        <a href="$comics->{$comic}{'url'}">
                    129:                $comics->{$comic}{'url'}
                    130:        </a>
                    131: </font><br/>
                    132: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
                    133: <br/><br/>
                    134: </td></tr>
                    135: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    136: 
                    137: EOF
                    138:        open INDEX, ">>$indexFile";
                    139: 
                    140:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    141: 
                    142:        print INDEX <<EOF
                    143: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    144: <font size="-2"><
                    145:         <a href="$comics->{$comic}{'url'}">
                    146:                 $comics->{$comic}{'url'}
                    147:         </a>
                    148: </font><br/>
                    149: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    150:   </td>
                    151: </tr>
                    152: EOF
                    153:                if ( $comics->{$comic}{'error'} );
                    154: 
                    155:        close (INDEX);
                    156: 
                    157:        return 0;
                    158: }
                    159: 
                    160: 
                    161: #######################################################################
                    162: #######################################################################
                    163: sub writeMainIndex ($$) {
                    164:        my ( $date ) = @_;
                    165: 
                    166: }
                    167: 
                    168: 
                    169: #######################################################################
                    170: #######################################################################
                    171: sub writeFooter {
                    172:        my ( $date ) = @_;
                    173:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
                    174:                         $date->{'day2'} . ".html";
                    175:        my $sysDate = `date`;
                    176: 
                    177:        open INDEX, ">>$indexFile";
                    178:        print INDEX <<EOF;
                    179: </table>
1.3       nick      180: <center>
                    181: <font size="2">
                    182: Generated on: <font color="green">$sysDate</font><br/>
1.7       nick      183: Version: <font color="green">$ver</font><br />
                    184: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1       nick      185:   <p>
                    186:     <a href="http://validator.w3.org/check?uri=referer"><img
                    187:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    188:   </p>
                    189: </center>
                    190: 
                    191: </body>
                    192: </html>
                    193: EOF
                    194:        close( INDEX );
                    195: }
                    196: 
                    197: #######################################################################
                    198: #######################################################################
                    199: sub checkDir ($$) {
                    200:        my @dir = @_;
                    201: 
                    202:        foreach ( @dir ) {
                    203:                if ( ! -d $_ ) { mkpath( $_ ); }
                    204:        }
                    205: }
                    206: 
                    207: #######################################################################
                    208: #######################################################################
                    209: sub writeTitle ($$) {
                    210:        my ( $date ) = @_;
                    211:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
                    212:                          $date->{'day2'} . ".html";
1.8     ! nick      213:        my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.1       nick      214: 
                    215:        open INDEX, ">$indexFile";
                    216:        print INDEX <<EOF;
                    217: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    218: 
                    219: <html xmlns="http://www.w3.org/1999/xhtml">
                    220: <head>
                    221: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
                    222:     <title>Daily Comics for $today</title>
                    223:   </head>
                    224: <body bgcolor="#FFFFFF">
                    225: <h1>Daily Comics for $today</h1>
                    226: <table align="center" cellpadding="5" cellspacing="0">
                    227: EOF
                    228:        close (INDEX);
                    229: }
                    230: 
                    231: #######################################################################
                    232: #######################################################################
                    233: sub directDownload ($$) {
                    234:        my ( $comics, $comic, $date ) = @_;
                    235:        my $file = &parseComic ( $comics, $comic, $date );
                    236: 
                    237:         ##
                    238:         ## Save the file to the appropriate directory
                    239:         ##
                    240:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    241:         my $cDate = $date->{'day2'};
                    242: 
1.3       nick      243:        my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1       nick      244:         return system($cmd);
                    245: }
                    246: 
                    247: #######################################################################
                    248: #######################################################################
                    249: sub indexDownload ($$) {
                    250:        my ( $comics, $comic, $date ) = @_;
                    251:        my ( @lines, $comicLine, $mainURL );
                    252:        my $comicIndex = "indexes/index.$comic";
                    253: 
                    254:        `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
                    255: 
                    256:        if ( ! open FILEN, "<$comicIndex" ) {  
                    257:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    258:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    259:        } 
                    260:                @lines = <FILEN>;
                    261:        close (FILEN);  
                    262: 
                    263:        unlink ("$comicIndex");
                    264: 
                    265:        $mainURL = $comics->{$comic}{'url'};
                    266:        ## I need to figure out how to merge these two in to one regex.
                    267:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    268:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    269: 
                    270:        ##
                    271:        ## Find the comic strip URL based on the specified regex in the search
                    272:        ##
                    273:        foreach my $line (@lines) {
                    274:                if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
                    275:                        $comicLine = $1; chomp $comicLine;
                    276:                }
                    277:         }
                    278: 
                    279:        ##
                    280:        ## Save the file to the appropriate directory
                    281:        ##
                    282:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    283:        my $cDate   = $date->{'day2'};
                    284: 
                    285:        if ( $comicLine ) {
                    286:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    287:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3       nick      288:                my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1       nick      289:                system( $cmd );
                    290:                return 0;
                    291:        }
                    292: 
                    293:        unlink "index.html";
                    294: 
                    295:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    296: }
                    297: 
                    298: #######################################################################
                    299: #######################################################################
                    300: sub parseComic ($$) {
                    301:        my ( $comics, $comic, $date ) = @_;
                    302:        my $string = $comics->{$comic}{'search'};
                    303: 
                    304:        $string =~ s/__year__/$date->{'year'}/g;
                    305:        $string =~ s/__year2__/$date->{'year2'}/g;
                    306:        $string =~ s/__mon__/$date->{'mon'}/g;
                    307:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    308:        $string =~ s/__day__/$date->{'day'}/g;
                    309:        $string =~ s/__day2__/$date->{'day2'}/g;
                    310:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    311:        chomp $string;
                    312: 
                    313:        return $string;
                    314: }
                    315: 
                    316: #######################################################################
                    317: #######################################################################
                    318: sub fetchDates () {
                    319:        my %dates = ();
                    320: 
1.8     ! nick      321:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1       nick      322: 
                    323:        $dates{'year'} += 1900;
                    324:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    325:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    326:        $dates{'mon'}++;
                    327:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
                    328: 
                    329:        return %dates;
                    330: }
1.8     ! nick      331: 
        !           332: ###############################################################################
        !           333: ##
        !           334: ## &fetchOptions( );
        !           335: ##
        !           336: ##      Grab our command line arguments and toss them in to a hash
        !           337: ##
        !           338: ###############################################################################
        !           339: sub fetchOptions {
        !           340:         my %opts;
        !           341: 
        !           342:         &GetOptions(
        !           343:                         "days:i"        => \$opts{'days'},
        !           344:                         "help|?"        => \$opts{'help'},
        !           345:                         "man"           => \$opts{'man'},
        !           346:                    ) || &pod2usage( );
        !           347:         &pod2usage( ) if defined $opts{'help'};
        !           348:         &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
        !           349: 
        !           350:         return %opts;
        !           351: }
        !           352: 
        !           353: __END__
        !           354: 
        !           355: =head1 NAME
        !           356: 
        !           357: fetch.pl - Fetches comics and places them all locally in a single html file.
        !           358: 
        !           359: =head1 SYNOPSIS
        !           360: 
        !           361: fetch.pl [options]
        !           362: 
        !           363: Options:
        !           364:         --days,d        Fetch comics from X days ago
        !           365:         --help,?        Display the basic help menu
        !           366:         --man,m         Display the detailed man page
        !           367: 
        !           368: =head1 DESCRIPTION
        !           369: 
        !           370: =head1 HISTORY
        !           371: 
        !           372: =head1 AUTHOR
        !           373: 
        !           374: Nicholas DeClario <nick@declario.com>
        !           375: 
        !           376: =head1 BUGS
        !           377: 
        !           378: This is a work in progress.  Please report all bugs to the author.
        !           379: 
        !           380: =head1 SEE ALSO
        !           381: 
        !           382: =head1 COPYRIGHT
        !           383: 
        !           384: =cut
        !           385: 
        !           386: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>