Annotation of comics/fetch.pl.new, revision 1.31

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
1.15      nick        3: ###############################################################################
1.16      nick        4: # $Log: fetch.pl.new,v $
1.31    ! nick        5: # Revision 1.30  2022/10/04 12:02:03  nick
        !             6: # Added --no-check-certificate for wget calls as arcamax was failing its cert check.  Meh, whatever.  It's just comics.
        !             7: #
1.30      nick        8: # Revision 1.29  2020/06/10 21:32:52  nick
                      9: # Centered page
                     10: #
1.29      nick       11: # Revision 1.28  2020/06/10 21:14:31  nick
                     12: # Updated for w3 validation.
                     13: #
1.28      nick       14: # Revision 1.27  2019/04/15 12:50:23  nick
                     15: # The script was unable to handle html '&' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
                     16: #
1.27      nick       17: # Revision 1.26  2018/04/22 14:03:54  nick
                     18: # Changed the default for Sunday comics that was causing issues with some comics.
                     19: #
1.26      nick       20: # Revision 1.25  2018/02/12 13:30:58  nick
                     21: # Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't.
                     22: #
1.25      nick       23: # Revision 1.24  2018/02/06 14:31:06  nick
                     24: # A status report is now generated in JSON that can easily be scanned so that
                     25: # I can be alerted when there are failures that I miss if I don't read the
                     26: # comics that day.
                     27: #
1.24      nick       28: # Revision 1.23  2018/01/26 13:05:27  nick
                     29: # Added a new config option to remove all newline from the resulting index.html
                     30: # file.  This allows for easier parsing for certain comics.  I then updated
                     31: # the URLs to search for and enabled the newline removal for a handful
                     32: # of uComics.
                     33: #
                     34: # I believe I've also properly fixed the Comic Config version displayed on
                     35: # the webpage itself.
                     36: #
1.23      nick       37: # Revision 1.22  2017/12/05 13:37:40  nick
                     38: # Added the CVS config version to the outpuit.
                     39: #
1.22      nick       40: # Revision 1.21  2015/10/26 14:25:40  nick
                     41: # Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly.
                     42: #
1.21      nick       43: # Revision 1.20  2015/10/22 12:58:44  nick
                     44: # Added the ability for Sunday only comics.  Stonesoup is no longer weekdays, this has been added to Sunday only.  I also added Foxtrot Classics for weekdays and Foxtrot for Sundays.
                     45: #
1.20      nick       46: # Revision 1.19  2015/07/13 12:56:58  nick
                     47: # Added Sally Forth and Pearls Before Swine.  Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
                     48: #
1.19      nick       49: # Revision 1.18  2015/05/07 12:31:43  nick
                     50: # Added favicon
                     51: #
1.18      nick       52: # Revision 1.17  2015/02/19 14:56:10  nick
                     53: # Fixed a problem that forced everything to JPG.  This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation.  I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file.  Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
                     54: #
1.17      nick       55: # Revision 1.16  2015/02/05 18:05:58  nick
                     56: # Changed the background and added a fancy title.
                     57: #
1.16      nick       58: # Revision 1.15  2015/01/19 13:46:19  nick
                     59: # *** empty log message ***
                     60: #
1.15      nick       61: ###############################################################################
                     62: 
1.1       nick       63: use strict;
                     64: use File::Path;
                     65: use Data::Dumper;
1.8       nick       66: use Pod::Usage;
                     67: use Getopt::Long;
1.24      nick       68: use JSON::Create 'create_json';
1.21      nick       69: use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
1.30      nick       70: use Data::Dumper;
1.16      nick       71: 
1.31    ! nick       72: print("Running");
        !            73: 
1.1       nick       74: ## 
                     75: ## Some default values
                     76: ##
1.31    ! nick       77: my $ver                = '$Id: fetch.pl.new,v 1.30 2022/10/04 12:02:03 nick Exp $';
1.1       nick       78: my $comicFile   = "comics.conf";
1.22      nick       79: my $comicConfigVer = "Unknown";
1.24      nick       80: my $reportFile = "/home/httpd/html/daily/comics/status_report.json";
1.1       nick       81: my %comics     = &readComicConfig ( $comicFile );
1.8       nick       82: my %opts        = &fetchOptions( );
                     83: my $days_ago    = $opts{'days'} || 0;
1.1       nick       84: my %dates      = &fetchDates();
                     85: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     86: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     87:                  "/$dates{'mon2'}$dates{'year2'}";
                     88: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       89: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8       nick       90: my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1       nick       91: 
                     92: my $DATE=`date`; chomp $DATE;
                     93: print STDOUT "Starting comic fetch at $DATE\n";
                     94: 
                     95: ##
                     96: ## Main program starts here
                     97: ##
                     98: &checkDir ( [ $imageDir, $indexDir ] );
                     99: 
1.5       nick      100: &writeTitle ( \%dates );
1.1       nick      101: 
                    102: foreach my $comic ( sort keys %comics ) {
1.20      nick      103: 
1.31    ! nick      104:   print("Checking Comic $comic\n");
        !           105: 
1.20      nick      106:   ## Skip if this is Sunday and the comic is weekdays only
1.1       nick      107:   next if ( $comic =~ m/config/ );
1.21      nick      108:   if (($dates{'wday'} eq "Sunday") && 
1.26      nick      109:       ($comics{$comic}{'not_sunday'} == 1)) {
1.20      nick      110:     print "Skipping '$comic'; Weekdays only.\n";
                    111:     next;
                    112:   }
                    113: 
                    114:   ## Skip if Sunday only comic and it's not Sunday.
1.21      nick      115:   if (($dates{'wday'} ne "Sunday") &&
1.20      nick      116:       ($comics{$comic}{'sunday_only'} == 1)) {
1.21      nick      117:     print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
1.20      nick      118:     next
                    119:   }
1.26      nick      120:   
1.1       nick      121:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                    122:   &writeComic ( \%comics, $comic, \%dates );
                    123: 
1.17      nick      124:     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
                    125:     my $size = 0;    
                    126: 
1.31    ! nick      127:     ## Resize downloaded images
        !           128:     if($comics{$comic}{'mode'} != 3) {
        !           129:            my $cmd = "/usr/bin/identify -verbose $file|";
        !           130:            open(IMG, $cmd) || die ("Can't open: $!\n");
        !           131:            while(<IMG>) {
        !           132:                if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
        !           133:                    $size = $1 if ( $size == 0);
        !           134:                }
        !           135:            }
        !           136:            close(IMG);
        !           137: 
        !           138:            system( "/usr/bin/convert -resize 800 $file $file" )
        !           139:            if ( $size > 800 ) 
        !           140:      }
1.4       nick      141: }
                    142: 
1.1       nick      143: ## &writeMainIndex ( \%dates );
                    144: 
                    145: &writeFooter( \%dates );
                    146: 
1.24      nick      147: print STDOUT "Status written to $reportFile.\n"
                    148:     if (&writeStatusReportJSON(\%comics, $reportFile));
                    149: 
1.1       nick      150: $DATE=`date`;  chomp( $DATE );
                    151: print STDOUT "Completed comic fetch at $DATE\n";
                    152: 
                    153: ## End
                    154: 
                    155: #######################################################################
                    156: ## Function :  downloadComic
                    157: ##
                    158: ##   Description :
                    159: ##     This function determines the download method being used to 
                    160: ##      retrieve the comic and calls the apprioriate function.
                    161: ##
                    162: ##      If the mode is invalid an error will be returned.
                    163: ##
                    164: #######################################################################
                    165: sub downloadComic ($$) {
                    166:        my ( $comics, $comic, $date ) = @_;
                    167: 
                    168:        SWITCH: {
                    169:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                    170:                        return indexDownload ( \%comics, $comic, $date );
                    171:                        last SWITCH;
                    172:                }
                    173:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                    174:                        return directDownload ( \%comics, $comic, $date );
                    175:                        last SWITCH;
                    176:                }
1.31    ! nick      177:                if ( $comics->{$comic}{'mode'} eq 3 ) { 
        !           178:                        return 0;
        !           179:                        last SWITCH;
        !           180:                }
1.1       nick      181:        }
                    182:         
                    183:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                    184: }
                    185: 
                    186: #######################################################################
                    187: #######################################################################
                    188: sub readComicConfig ($$) {
                    189:        my ( $comicFile ) = @_;
                    190:        my %comicConfig   = ( );
                    191:        my %config        = ( );
                    192: 
1.14      nick      193:     my ($year, $mon, $day) =( localtime(time))[5,4,3];
                    194:     $year += 1900;
                    195:     $mon = sprintf("%02d", ($mon + 1));
                    196:     $day = sprintf("%02d", $day);
                    197: 
1.1       nick      198:        open FILEN, "<$comicFile";
                    199:                while (<FILEN>) {
1.24      nick      200:             #if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) {
                    201:             if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) {
1.22      nick      202:                 $comicConfigVer = $1;
                    203:             }
1.1       nick      204:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14      nick      205:                 $_ =~ s/__YEAR__/$year/g;
                    206:                 $_ =~ s/__MON__/$mon/g;
                    207:                 $_ =~ s/__DAY__/$day/g;
                    208:                 
1.1       nick      209:                                my @res = split /,/, $_;
                    210:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                    211:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                    212:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                    213:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    214:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
1.26      nick      215:                 $comicConfig{$res[0]}{'not_sunday'}   = sprintf("%d", $res[6] || 0);
1.21      nick      216:                 $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
1.23      nick      217:                 $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0);
1.1       nick      218:                                $comicConfig{$res[0]}{'error'}    = 0;
                    219:                        }
                    220:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    221:                                $comicConfig{'configs'}{$1} = $2;
                    222:                        }
                    223:                }
                    224:        close (FILEN);
                    225: 
                    226:        return %comicConfig;
                    227: }
                    228: 
                    229: #######################################################################
                    230: #######################################################################
1.24      nick      231: sub writeStatusReportJSON ($$) {
                    232:        my ( $comicsRef, $filename ) = @_;
                    233:     my %comics = %$comicsRef;
1.25      nick      234:     my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,
                    235:                                           (localtime)[4] + 1,
                    236:                                           (localtime)[3]);
1.27      nick      237:     my %json = ('date' => $shortDate, 'comics' => ());
1.24      nick      238:     my $totalErrors = 0;
                    239: 
                    240:     foreach my $comic (sort keys %comics) {
                    241:       next unless $comics{$comic}{'fullName'};
                    242:       if ($comics{$comic}{'error'}) {
                    243:         my %error = ('comicName' => "$comics{$comic}{'fullName'}",
                    244:                      'error' => "$comics{$comic}{'error'}",
                    245:                      'status' => "Error");
1.27      nick      246:         push @{$json{'comics'}}, \%error;
1.24      nick      247:         $totalErrors += 1;
                    248:       } else {
                    249:         my %status = ('comicName' => "$comics{$comic}{'fullName'}",
                    250:                       'error' => 0,
                    251:                       'status' => "Successfull");
1.27      nick      252:         push @{$json{'comics'}}, \%status;
1.24      nick      253:       }
                    254:     }
                    255:     $json{'totalErrors'} = $totalErrors;    
                    256: 
                    257:     open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n");
                    258:     print SR create_json (\%json);
                    259:     close(SR); 
                    260: }
                    261: 
                    262: #######################################################################
                    263: #######################################################################
1.1       nick      264: sub writeComic ($$) {
                    265:        my ( $comics, $comic, $date ) = @_;
1.11      nick      266:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      267:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    268:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    269:                        $sd . ".html";
1.31    ! nick      270:        $comics->{$comic}{'fullName'} =~ s/&/&amp;/g;
        !           271: 
1.1       nick      272:        my $content = <<EOF;
                    273: 
                    274: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    275:   <tr>
                    276:     <td align="left">
                    277: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    278: <font size="-2">
1.31    ! nick      279:        <a href="$comics->{$comic}{'url'}" target="_blank">
1.1       nick      280:                $comics->{$comic}{'url'}
                    281:        </a>
                    282: </font><br/>
1.31    ! nick      283: EOF
        !           284:        if ( $comics->{$comic}{'mode'} == 3 ) {
        !           285:                print("Mode 3\n");
        !           286:                $content .= <<EOF;
        !           287: <img src="$comics->{$comic}{'ext'}" alt="$comics->{$comic}{'fullName'}" />
        !           288: EOF
        !           289:        } else {
        !           290:                $content .= <<EOF;
1.17      nick      291: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.31    ! nick      292: EOF
        !           293:        }
        !           294:        $content .= <<EOF;
1.1       nick      295: <br/><br/>
                    296: </td></tr>
                    297: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    298: 
                    299: EOF
                    300:        open INDEX, ">>$indexFile";
                    301: 
                    302:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    303: 
                    304:        print INDEX <<EOF
                    305: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    306: <font size="-2"><
                    307:         <a href="$comics->{$comic}{'url'}">
                    308:                 $comics->{$comic}{'url'}
                    309:         </a>
                    310: </font><br/>
                    311: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    312:   </td>
                    313: </tr>
                    314: EOF
                    315:                if ( $comics->{$comic}{'error'} );
                    316: 
                    317:        close (INDEX);
                    318: 
                    319:        return 0;
                    320: }
                    321: 
                    322: 
                    323: #######################################################################
                    324: #######################################################################
                    325: sub writeMainIndex ($$) {
                    326:        my ( $date ) = @_;
                    327: 
                    328: }
                    329: 
                    330: 
                    331: #######################################################################
                    332: #######################################################################
                    333: sub writeFooter {
                    334:        my ( $date ) = @_;
1.11      nick      335:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      336:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    337:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    338:                        $sd . ".html";
1.1       nick      339:        my $sysDate = `date`;
                    340: 
                    341:        open INDEX, ">>$indexFile";
                    342:        print INDEX <<EOF;
                    343: </table>
1.3       nick      344: <center>
1.28      nick      345: Generated on: <font size="2" color="green">$sysDate</font><br/>
                    346: Version: <font size="2" color="green">$ver</font><br />
                    347: Config Version: <font size="2" color="green">$comicConfigVer</font><br />
                    348: CVS: <a href="http://demandred.dyndns.org:3000/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
                    349:   <br />
1.1       nick      350:     <a href="http://validator.w3.org/check?uri=referer"><img
                    351:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    352: </center>
                    353: 
                    354: </body>
                    355: </html>
                    356: EOF
                    357:        close( INDEX );
                    358: }
                    359: 
                    360: #######################################################################
                    361: #######################################################################
                    362: sub checkDir ($$) {
                    363:        my @dir = @_;
                    364: 
                    365:        foreach ( @dir ) {
                    366:                if ( ! -d $_ ) { mkpath( $_ ); }
                    367:        }
                    368: }
                    369: 
                    370: #######################################################################
                    371: #######################################################################
                    372: sub writeTitle ($$) {
                    373:        my ( $date ) = @_;
1.11      nick      374:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      375:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    376:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    377:                        $sd . ".html";
1.8       nick      378:        my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16      nick      379:     my $today_long = Date_to_Text_Long(Today());
1.1       nick      380: 
                    381:        open INDEX, ">$indexFile";
                    382:        print INDEX <<EOF;
                    383: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    384: 
                    385: <html xmlns="http://www.w3.org/1999/xhtml">
                    386: <head>
                    387: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.28      nick      388: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" />
                    389: <link rel="shortcut icon" href="./favicon.ico" />
1.1       nick      390:     <title>Daily Comics for $today</title>
                    391:   </head>
                    392: <body bgcolor="#FFFFFF">
1.29      nick      393: <table align="center" cellpadding="0" cellspacing="0" border="0">
1.28      nick      394: <tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr>
1.16      nick      395: <tr><td align="left">$today_long</td></tr>
                    396: <tr><td>&nbsp;</td></tr>
1.1       nick      397: EOF
                    398:        close (INDEX);
                    399: }
                    400: 
                    401: #######################################################################
                    402: #######################################################################
                    403: sub directDownload ($$) {
                    404:        my ( $comics, $comic, $date ) = @_;
                    405:        my $file = &parseComic ( $comics, $comic, $date );
                    406: 
                    407:         ##
                    408:         ## Save the file to the appropriate directory
                    409:         ##
                    410:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    411:         my $cDate = $date->{'day2'};
                    412: 
1.30      nick      413:        my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14      nick      414: 
1.1       nick      415:         return system($cmd);
                    416: }
                    417: 
                    418: #######################################################################
                    419: #######################################################################
1.31    ! nick      420: sub linkOnly ($$) {
        !           421:        my ( $comics, $comic, $date ) = @_;
        !           422: 
        !           423:        return 0;
        !           424: }
        !           425: #######################################################################
        !           426: #######################################################################
1.1       nick      427: sub indexDownload ($$) {
                    428:        my ( $comics, $comic, $date ) = @_;
                    429:        my ( @lines, $comicLine, $mainURL );
                    430:        my $comicIndex = "indexes/index.$comic";
                    431: 
1.30      nick      432:     print("Getching Index $comicIndex.\n");
                    433:     print("comic url: $comics->{$comic}{'url'}\n");
                    434: 
                    435:     print Dumper($comics->{$comic});
                    436: 
                    437:     my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " .
                    438:                    "--no-check-certificate --user-agent=\"$USER_AGENT\" " .
1.19      nick      439:                    "$comics->{$comic}{'url'} -O $comicIndex";
1.30      nick      440:     print ("Using wget command:\n$wget_cmd\n");
                    441: 
                    442:     my $status = system($wget_cmd);
                    443: 
                    444:     print ("Return status: $status\n");
1.1       nick      445: 
                    446:        if ( ! open FILEN, "<$comicIndex" ) {  
                    447:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    448:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    449:        } 
1.23      nick      450:     while (<FILEN>) {
                    451:         my $line = $_;
1.27      nick      452:         $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} );
1.23      nick      453:        push @lines, $line;
                    454:     }
1.1       nick      455:        close (FILEN);  
                    456: 
1.27      nick      457: 
1.1       nick      458:        unlink ("$comicIndex");
                    459: 
                    460:        $mainURL = $comics->{$comic}{'url'};
                    461:        ## I need to figure out how to merge these two in to one regex.
                    462:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    463:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    464: 
                    465:        ##
                    466:        ## Find the comic strip URL based on the specified regex in the search
                    467:        ##
1.27      nick      468: 
1.31    ! nick      469:     print "Using search $comics->{$comic}{'search'}\n";
        !           470: 
1.1       nick      471:        foreach my $line (@lines) {
1.17      nick      472:                if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.31    ! nick      473:             print "Found match:\n";
1.1       nick      474:                        $comicLine = $1; chomp $comicLine;
1.31    ! nick      475:             print "+ $comicLine\n";
1.1       nick      476:                }
1.17      nick      477:     }
1.1       nick      478: 
                    479:        ##
                    480:        ## Save the file to the appropriate directory
                    481:        ##
                    482:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    483:        my $cDate   = $date->{'day2'};
                    484: 
                    485:        if ( $comicLine ) {
1.31    ! nick      486:         print "Downloading Comic\n";
1.1       nick      487:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    488:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.31    ! nick      489:         print "Final URL: $comicURL\n";
1.27      nick      490:         # Strip &amp;
                    491:         $comicURL =~ s/\&amp\;/&/g;
1.30      nick      492:                my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1       nick      493:                system( $cmd );
                    494:                return 0;
                    495:        }
                    496: 
                    497:        unlink "index.html";
                    498: 
                    499:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    500: }
                    501: 
                    502: #######################################################################
                    503: #######################################################################
                    504: sub parseComic ($$) {
                    505:        my ( $comics, $comic, $date ) = @_;
                    506:        my $string = $comics->{$comic}{'search'};
                    507: 
                    508:        $string =~ s/__year__/$date->{'year'}/g;
                    509:        $string =~ s/__year2__/$date->{'year2'}/g;
                    510:        $string =~ s/__mon__/$date->{'mon'}/g;
                    511:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    512:        $string =~ s/__day__/$date->{'day'}/g;
                    513:        $string =~ s/__day2__/$date->{'day2'}/g;
                    514:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    515:        chomp $string;
                    516: 
                    517:        return $string;
                    518: }
                    519: 
                    520: #######################################################################
                    521: #######################################################################
                    522: sub fetchDates () {
                    523:        my %dates = ();
                    524: 
1.8       nick      525:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1       nick      526: 
                    527:        $dates{'year'} += 1900;
                    528:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    529:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    530:        $dates{'mon'}++;
                    531:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
1.21      nick      532:     my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
                    533:     $dates{'wday'} = $days[$dates{'dow'}];
1.1       nick      534: 
                    535:        return %dates;
                    536: }
1.8       nick      537: 
                    538: ###############################################################################
                    539: ##
                    540: ## &fetchOptions( );
                    541: ##
                    542: ##      Grab our command line arguments and toss them in to a hash
                    543: ##
                    544: ###############################################################################
                    545: sub fetchOptions {
                    546:         my %opts;
                    547: 
                    548:         &GetOptions(
                    549:                         "days:i"        => \$opts{'days'},
                    550:                         "help|?"        => \$opts{'help'},
                    551:                         "man"           => \$opts{'man'},
                    552:                    ) || &pod2usage( );
                    553:         &pod2usage( ) if defined $opts{'help'};
                    554:         &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
                    555: 
                    556:         return %opts;
                    557: }
                    558: 
                    559: __END__
                    560: 
                    561: =head1 NAME
                    562: 
                    563: fetch.pl - Fetches comics and places them all locally in a single html file.
                    564: 
                    565: =head1 SYNOPSIS
                    566: 
                    567: fetch.pl [options]
                    568: 
                    569: Options:
                    570:         --days,d        Fetch comics from X days ago
                    571:         --help,?        Display the basic help menu
                    572:         --man,m         Display the detailed man page
                    573: 
                    574: =head1 DESCRIPTION
                    575: 
                    576: =head1 HISTORY
                    577: 
                    578: =head1 AUTHOR
                    579: 
                    580: Nicholas DeClario <nick@declario.com>
                    581: 
                    582: =head1 BUGS
                    583: 
                    584: This is a work in progress.  Please report all bugs to the author.
                    585: 
                    586: =head1 SEE ALSO
                    587: 
                    588: =head1 COPYRIGHT
                    589: 
                    590: =cut
                    591: 
                    592: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>