Annotation of comics/fetch.pl.new, revision 1.1

1.1     ! nick        1: #!/usr/bin/perl -w
        !             2: 
        !             3: use strict;
        !             4: use File::Path;
        !             5: use Data::Dumper;
        !             6: 
        !             7: ## 
        !             8: ## Some default values
        !             9: ##
        !            10: my $ver                = q/$Id$/;
        !            11: my $comicFile   = "comics.conf";
        !            12: my %comics     = &readComicConfig ( $comicFile );
        !            13: my %dates      = &fetchDates();
        !            14: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
        !            15: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
        !            16:                  "/$dates{'mon2'}$dates{'year2'}";
        !            17: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
        !            18: my $USER_AGENT  = "ozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
        !            19: 
        !            20: 
        !            21: my $DATE=`date`; chomp $DATE;
        !            22: print STDOUT "Starting comic fetch at $DATE\n";
        !            23: 
        !            24: ##
        !            25: ## Main program starts here
        !            26: ##
        !            27: &checkDir ( [ $imageDir, $indexDir ] );
        !            28: 
        !            29: &writeTitle ( \%dates );
        !            30: 
        !            31: foreach my $comic ( sort keys %comics ) {
        !            32:   next if ( $comic =~ m/config/ );
        !            33:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
        !            34:   &writeComic ( \%comics, $comic, \%dates );
        !            35: }
        !            36: 
        !            37: ## &writeMainIndex ( \%dates );
        !            38: 
        !            39: &writeFooter( \%dates );
        !            40: 
        !            41: $DATE=`date`;  chomp( $DATE );
        !            42: print STDOUT "Completed comic fetch at $DATE\n";
        !            43: 
        !            44: ## End
        !            45: 
        !            46: #######################################################################
        !            47: ## Function :  downloadComic
        !            48: ##
        !            49: ##   Description :
        !            50: ##     This function determines the download method being used to 
        !            51: ##      retrieve the comic and calls the apprioriate function.
        !            52: ##
        !            53: ##      If the mode is invalid an error will be returned.
        !            54: ##
        !            55: #######################################################################
        !            56: sub downloadComic ($$) {
        !            57:        my ( $comics, $comic, $date ) = @_;
        !            58: 
        !            59:        SWITCH: {
        !            60:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
        !            61:                        return indexDownload ( \%comics, $comic, $date );
        !            62:                        last SWITCH;
        !            63:                }
        !            64:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
        !            65:                        return directDownload ( \%comics, $comic, $date );
        !            66:                        last SWITCH;
        !            67:                }
        !            68:        }
        !            69:         
        !            70:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
        !            71: }
        !            72: 
        !            73: #######################################################################
        !            74: #######################################################################
        !            75: sub readComicConfig ($$) {
        !            76:        my ( $comicFile ) = @_;
        !            77:        my %comicConfig   = ( );
        !            78:        my %config        = ( );
        !            79: 
        !            80:        open FILEN, "<$comicFile";
        !            81:                while (<FILEN>) {
        !            82:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
        !            83:                                my @res = split /,/, $_;
        !            84:                                $comicConfig{$res[0]}{'url'}      = $res[1];
        !            85:                                $comicConfig{$res[0]}{'search'}   = $res[2];
        !            86:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
        !            87:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
        !            88:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
        !            89:                                $comicConfig{$res[0]}{'error'}    = 0;
        !            90:                        }
        !            91:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
        !            92:                                $comicConfig{'configs'}{$1} = $2;
        !            93:                        }
        !            94:                }
        !            95:        close (FILEN);
        !            96: 
        !            97:        return %comicConfig;
        !            98: }
        !            99: 
        !           100: #######################################################################
        !           101: #######################################################################
        !           102: sub writeComic ($$) {
        !           103:        my ( $comics, $comic, $date ) = @_;
        !           104:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
        !           105:                         $date->{'day2'} . ".html";
        !           106:        my $content = <<EOF;
        !           107: 
        !           108: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
        !           109:   <tr>
        !           110:     <td align="left">
        !           111: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
        !           112: <font size="-2">
        !           113:        <a href="$comics->{$comic}{'url'}">
        !           114:                $comics->{$comic}{'url'}
        !           115:        </a>
        !           116: </font><br/>
        !           117: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
        !           118: <br/><br/>
        !           119: </td></tr>
        !           120: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
        !           121: 
        !           122: EOF
        !           123:        open INDEX, ">>$indexFile";
        !           124: 
        !           125:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
        !           126: 
        !           127:        print INDEX <<EOF
        !           128: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
        !           129: <font size="-2"><
        !           130:         <a href="$comics->{$comic}{'url'}">
        !           131:                 $comics->{$comic}{'url'}
        !           132:         </a>
        !           133: </font><br/>
        !           134: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
        !           135:   </td>
        !           136: </tr>
        !           137: EOF
        !           138:                if ( $comics->{$comic}{'error'} );
        !           139: 
        !           140:        close (INDEX);
        !           141: 
        !           142:        return 0;
        !           143: }
        !           144: 
        !           145: 
        !           146: #######################################################################
        !           147: #######################################################################
        !           148: sub writeMainIndex ($$) {
        !           149:        my ( $date ) = @_;
        !           150: 
        !           151: }
        !           152: 
        !           153: 
        !           154: #######################################################################
        !           155: #######################################################################
        !           156: sub writeFooter {
        !           157:        my ( $date ) = @_;
        !           158:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
        !           159:                         $date->{'day2'} . ".html";
        !           160:        my $sysDate = `date`;
        !           161: 
        !           162:        open INDEX, ">>$indexFile";
        !           163:        print INDEX <<EOF;
        !           164: </table>
        !           165: <center>Generated at $sysDate
        !           166:   <p>
        !           167:     <a href="http://validator.w3.org/check?uri=referer"><img
        !           168:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
        !           169:   </p>
        !           170: </center>
        !           171: 
        !           172: </body>
        !           173: </html>
        !           174: EOF
        !           175:        close( INDEX );
        !           176: }
        !           177: 
        !           178: #######################################################################
        !           179: #######################################################################
        !           180: sub checkDir ($$) {
        !           181:        my @dir = @_;
        !           182: 
        !           183:        foreach ( @dir ) {
        !           184:                if ( ! -d $_ ) { mkpath( $_ ); }
        !           185:        }
        !           186: }
        !           187: 
        !           188: #######################################################################
        !           189: #######################################################################
        !           190: sub writeTitle ($$) {
        !           191:        my ( $date ) = @_;
        !           192:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
        !           193:                          $date->{'day2'} . ".html";
        !           194:        my $today     = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
        !           195: 
        !           196:        open INDEX, ">$indexFile";
        !           197:        print INDEX <<EOF;
        !           198: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
        !           199: 
        !           200: <html xmlns="http://www.w3.org/1999/xhtml">
        !           201: <head>
        !           202: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
        !           203:     <title>Daily Comics for $today</title>
        !           204:   </head>
        !           205: <body bgcolor="#FFFFFF">
        !           206: <h1>Daily Comics for $today</h1>
        !           207: <table align="center" cellpadding="5" cellspacing="0">
        !           208: EOF
        !           209:        close (INDEX);
        !           210: }
        !           211: 
        !           212: #######################################################################
        !           213: #######################################################################
        !           214: sub directDownload ($$) {
        !           215:        my ( $comics, $comic, $date ) = @_;
        !           216:        my $file = &parseComic ( $comics, $comic, $date );
        !           217: 
        !           218:         ##
        !           219:         ## Save the file to the appropriate directory
        !           220:         ##
        !           221:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
        !           222:         my $cDate = $date->{'day2'};
        !           223: 
        !           224:        my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert -resize 640 - jpeg:images/$cDir/$comic-$cDate.jpg";
        !           225:         return system($cmd);
        !           226: }
        !           227: 
        !           228: #######################################################################
        !           229: #######################################################################
        !           230: sub indexDownload ($$) {
        !           231:        my ( $comics, $comic, $date ) = @_;
        !           232:        my ( @lines, $comicLine, $mainURL );
        !           233:        my $comicIndex = "indexes/index.$comic";
        !           234: 
        !           235:        `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
        !           236: 
        !           237:        if ( ! open FILEN, "<$comicIndex" ) {  
        !           238:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
        !           239:                       " (" . $comics->{$comic}{'url'} . ")"; 
        !           240:        } 
        !           241:                @lines = <FILEN>;
        !           242:        close (FILEN);  
        !           243: 
        !           244:        unlink ("$comicIndex");
        !           245: 
        !           246:        $mainURL = $comics->{$comic}{'url'};
        !           247:        ## I need to figure out how to merge these two in to one regex.
        !           248:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
        !           249:        $mainURL =~ s/([a-z])\/.*/$1/i;
        !           250: 
        !           251:        ##
        !           252:        ## Find the comic strip URL based on the specified regex in the search
        !           253:        ##
        !           254:        foreach my $line (@lines) {
        !           255:                if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
        !           256:                        $comicLine = $1; chomp $comicLine;
        !           257:                }
        !           258:         }
        !           259: 
        !           260:        ##
        !           261:        ## Save the file to the appropriate directory
        !           262:        ##
        !           263:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
        !           264:        my $cDate   = $date->{'day2'};
        !           265: 
        !           266:        if ( $comicLine ) {
        !           267:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
        !           268:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
        !           269:                my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert -resize 640 - jpeg:images/$cDir/$comic-$cDate.jpg";
        !           270:                system( $cmd );
        !           271:                return 0;
        !           272:        }
        !           273: 
        !           274:        unlink "index.html";
        !           275: 
        !           276:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
        !           277: }
        !           278: 
        !           279: #######################################################################
        !           280: #######################################################################
        !           281: sub parseComic ($$) {
        !           282:        my ( $comics, $comic, $date ) = @_;
        !           283:        my $string = $comics->{$comic}{'search'};
        !           284: 
        !           285:        $string =~ s/__year__/$date->{'year'}/g;
        !           286:        $string =~ s/__year2__/$date->{'year2'}/g;
        !           287:        $string =~ s/__mon__/$date->{'mon'}/g;
        !           288:        $string =~ s/__mon2__/$date->{'mon2'}/g;
        !           289:        $string =~ s/__day__/$date->{'day'}/g;
        !           290:        $string =~ s/__day2__/$date->{'day2'}/g;
        !           291:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
        !           292:        chomp $string;
        !           293: 
        !           294:        return $string;
        !           295: }
        !           296: 
        !           297: #######################################################################
        !           298: #######################################################################
        !           299: sub fetchDates () {
        !           300:        my %dates = ();
        !           301: 
        !           302:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];
        !           303: 
        !           304:        ## If you missed a day or two, reflect it here:
        !           305:        # $dates{'day'}-=1;  ## <-- 5 days ago
        !           306: 
        !           307:        $dates{'year'} += 1900;
        !           308:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
        !           309:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
        !           310:        $dates{'mon'}++;
        !           311:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
        !           312: 
        !           313:        return %dates;
        !           314: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>