Annotation of comics/fetch.pl.new, revision 1.6

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
                      3: use strict;
                      4: use File::Path;
                      5: use Data::Dumper;
                      6: 
                      7: ## 
                      8: ## Some default values
                      9: ##
1.6     ! nick       10: my $ver                = q/$Id: fetch.pl.new,v 1.5 2012-12-20 16:18:45 nick Exp $/;
1.1       nick       11: my $comicFile   = "comics.conf";
                     12: my %comics     = &readComicConfig ( $comicFile );
1.6     ! nick       13: my $days_ago    = 0;  # Set this to fetch comics from X days ago
1.1       nick       14: my %dates      = &fetchDates();
                     15: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     16: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     17:                  "/$dates{'mon2'}$dates{'year2'}";
                     18: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       19: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.1       nick       20: 
                     21: 
                     22: my $DATE=`date`; chomp $DATE;
                     23: print STDOUT "Starting comic fetch at $DATE\n";
                     24: 
                     25: ##
                     26: ## Main program starts here
                     27: ##
                     28: &checkDir ( [ $imageDir, $indexDir ] );
                     29: 
1.5       nick       30: &writeTitle ( \%dates );
1.1       nick       31: 
                     32: foreach my $comic ( sort keys %comics ) {
                     33:   next if ( $comic =~ m/config/ );
                     34:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                     35:   &writeComic ( \%comics, $comic, \%dates );
                     36: }
                     37: 
1.4       nick       38: my $D  = `date +%d`; chomp $D;
                     39: print "Finding in $imageDir/*-$D.jpg\n";
                     40: foreach my $file ( glob( "$imageDir/*-$D.jpg" ) ) 
                     41: {
                     42:        my $size = `/usr/bin/identify $file`;
                     43:        $size =~ s/.*\s(\d+)x\d+.*/$1/;
                     44: 
                     45:        system( "/usr/bin/convert -resize 640 $file $file" )
                     46:                if ( $size > 640 ) 
                     47: }
                     48: 
1.1       nick       49: ## &writeMainIndex ( \%dates );
                     50: 
                     51: &writeFooter( \%dates );
                     52: 
                     53: $DATE=`date`;  chomp( $DATE );
                     54: print STDOUT "Completed comic fetch at $DATE\n";
                     55: 
                     56: ## End
                     57: 
                     58: #######################################################################
                     59: ## Function :  downloadComic
                     60: ##
                     61: ##   Description :
                     62: ##     This function determines the download method being used to 
                     63: ##      retrieve the comic and calls the apprioriate function.
                     64: ##
                     65: ##      If the mode is invalid an error will be returned.
                     66: ##
                     67: #######################################################################
                     68: sub downloadComic ($$) {
                     69:        my ( $comics, $comic, $date ) = @_;
                     70: 
                     71:        SWITCH: {
                     72:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                     73:                        return indexDownload ( \%comics, $comic, $date );
                     74:                        last SWITCH;
                     75:                }
                     76:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                     77:                        return directDownload ( \%comics, $comic, $date );
                     78:                        last SWITCH;
                     79:                }
                     80:        }
                     81:         
                     82:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                     83: }
                     84: 
                     85: #######################################################################
                     86: #######################################################################
                     87: sub readComicConfig ($$) {
                     88:        my ( $comicFile ) = @_;
                     89:        my %comicConfig   = ( );
                     90:        my %config        = ( );
                     91: 
                     92:        open FILEN, "<$comicFile";
                     93:                while (<FILEN>) {
                     94:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
                     95:                                my @res = split /,/, $_;
                     96:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                     97:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                     98:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                     99:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    100:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
                    101:                                $comicConfig{$res[0]}{'error'}    = 0;
                    102:                        }
                    103:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    104:                                $comicConfig{'configs'}{$1} = $2;
                    105:                        }
                    106:                }
                    107:        close (FILEN);
                    108: 
                    109:        return %comicConfig;
                    110: }
                    111: 
                    112: #######################################################################
                    113: #######################################################################
                    114: sub writeComic ($$) {
                    115:        my ( $comics, $comic, $date ) = @_;
                    116:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
                    117:                         $date->{'day2'} . ".html";
                    118:        my $content = <<EOF;
                    119: 
                    120: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    121:   <tr>
                    122:     <td align="left">
                    123: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    124: <font size="-2">
                    125:        <a href="$comics->{$comic}{'url'}">
                    126:                $comics->{$comic}{'url'}
                    127:        </a>
                    128: </font><br/>
                    129: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
                    130: <br/><br/>
                    131: </td></tr>
                    132: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    133: 
                    134: EOF
                    135:        open INDEX, ">>$indexFile";
                    136: 
                    137:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    138: 
                    139:        print INDEX <<EOF
                    140: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    141: <font size="-2"><
                    142:         <a href="$comics->{$comic}{'url'}">
                    143:                 $comics->{$comic}{'url'}
                    144:         </a>
                    145: </font><br/>
                    146: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    147:   </td>
                    148: </tr>
                    149: EOF
                    150:                if ( $comics->{$comic}{'error'} );
                    151: 
                    152:        close (INDEX);
                    153: 
                    154:        return 0;
                    155: }
                    156: 
                    157: 
                    158: #######################################################################
                    159: #######################################################################
                    160: sub writeMainIndex ($$) {
                    161:        my ( $date ) = @_;
                    162: 
                    163: }
                    164: 
                    165: 
                    166: #######################################################################
                    167: #######################################################################
                    168: sub writeFooter {
                    169:        my ( $date ) = @_;
                    170:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
                    171:                         $date->{'day2'} . ".html";
                    172:        my $sysDate = `date`;
                    173: 
                    174:        open INDEX, ">>$indexFile";
                    175:        print INDEX <<EOF;
                    176: </table>
1.3       nick      177: <center>
                    178: <font size="2">
                    179: Generated on: <font color="green">$sysDate</font><br/>
                    180: Version: <font color="green">$ver</font></font>
1.1       nick      181:   <p>
                    182:     <a href="http://validator.w3.org/check?uri=referer"><img
                    183:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    184:   </p>
                    185: </center>
                    186: 
                    187: </body>
                    188: </html>
                    189: EOF
                    190:        close( INDEX );
                    191: }
                    192: 
                    193: #######################################################################
                    194: #######################################################################
                    195: sub checkDir ($$) {
                    196:        my @dir = @_;
                    197: 
                    198:        foreach ( @dir ) {
                    199:                if ( ! -d $_ ) { mkpath( $_ ); }
                    200:        }
                    201: }
                    202: 
                    203: #######################################################################
                    204: #######################################################################
                    205: sub writeTitle ($$) {
                    206:        my ( $date ) = @_;
                    207:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
                    208:                          $date->{'day2'} . ".html";
                    209:        my $today     = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
                    210: 
                    211:        open INDEX, ">$indexFile";
                    212:        print INDEX <<EOF;
                    213: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    214: 
                    215: <html xmlns="http://www.w3.org/1999/xhtml">
                    216: <head>
                    217: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
                    218:     <title>Daily Comics for $today</title>
                    219:   </head>
                    220: <body bgcolor="#FFFFFF">
                    221: <h1>Daily Comics for $today</h1>
                    222: <table align="center" cellpadding="5" cellspacing="0">
                    223: EOF
                    224:        close (INDEX);
                    225: }
                    226: 
                    227: #######################################################################
                    228: #######################################################################
                    229: sub directDownload ($$) {
                    230:        my ( $comics, $comic, $date ) = @_;
                    231:        my $file = &parseComic ( $comics, $comic, $date );
                    232: 
                    233:         ##
                    234:         ## Save the file to the appropriate directory
                    235:         ##
                    236:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    237:         my $cDate = $date->{'day2'};
                    238: 
1.3       nick      239:        my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1       nick      240:         return system($cmd);
                    241: }
                    242: 
                    243: #######################################################################
                    244: #######################################################################
                    245: sub indexDownload ($$) {
                    246:        my ( $comics, $comic, $date ) = @_;
                    247:        my ( @lines, $comicLine, $mainURL );
                    248:        my $comicIndex = "indexes/index.$comic";
                    249: 
                    250:        `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
                    251: 
                    252:        if ( ! open FILEN, "<$comicIndex" ) {  
                    253:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    254:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    255:        } 
                    256:                @lines = <FILEN>;
                    257:        close (FILEN);  
                    258: 
                    259:        unlink ("$comicIndex");
                    260: 
                    261:        $mainURL = $comics->{$comic}{'url'};
                    262:        ## I need to figure out how to merge these two in to one regex.
                    263:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    264:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    265: 
                    266:        ##
                    267:        ## Find the comic strip URL based on the specified regex in the search
                    268:        ##
                    269:        foreach my $line (@lines) {
                    270:                if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
                    271:                        $comicLine = $1; chomp $comicLine;
                    272:                }
                    273:         }
                    274: 
                    275:        ##
                    276:        ## Save the file to the appropriate directory
                    277:        ##
                    278:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    279:        my $cDate   = $date->{'day2'};
                    280: 
                    281:        if ( $comicLine ) {
                    282:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    283:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3       nick      284:                my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1       nick      285:                system( $cmd );
                    286:                return 0;
                    287:        }
                    288: 
                    289:        unlink "index.html";
                    290: 
                    291:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    292: }
                    293: 
                    294: #######################################################################
                    295: #######################################################################
                    296: sub parseComic ($$) {
                    297:        my ( $comics, $comic, $date ) = @_;
                    298:        my $string = $comics->{$comic}{'search'};
                    299: 
                    300:        $string =~ s/__year__/$date->{'year'}/g;
                    301:        $string =~ s/__year2__/$date->{'year2'}/g;
                    302:        $string =~ s/__mon__/$date->{'mon'}/g;
                    303:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    304:        $string =~ s/__day__/$date->{'day'}/g;
                    305:        $string =~ s/__day2__/$date->{'day2'}/g;
                    306:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    307:        chomp $string;
                    308: 
                    309:        return $string;
                    310: }
                    311: 
                    312: #######################################################################
                    313: #######################################################################
                    314: sub fetchDates () {
                    315:        my %dates = ();
                    316: 
                    317:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];
                    318: 
                    319:        ## If you missed a day or two, reflect it here:
1.5       nick      320:        $dates{'day'} -= $days_ago;  ## <-- 5 days ago
1.1       nick      321: 
                    322:        $dates{'year'} += 1900;
                    323:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    324:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    325:        $dates{'mon'}++;
                    326:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
                    327: 
                    328:        return %dates;
                    329: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>