comics/fetch.pl.new - annotate

Return to fetch.pl.new CVS log
Up to [Local Repository] / comics
Annotation of comics/fetch.pl.new, revision 1.32

1.1       nick        1: #!/usr/bin/perl -w
                      2: 
1.15      nick        3: ###############################################################################
1.16      nick        4: # $Log: fetch.pl.new,v $
1.32    ! nick        5: # Revision 1.31  2024/12/13 16:03:49  nick
        !             6: # This adds the ability to specify a comic as a link only with a default splash image.
        !             7: #
1.31      nick        8: # Revision 1.30  2022/10/04 12:02:03  nick
                      9: # Added --no-check-certificate for wget calls as arcamax was failing its cert check.  Meh, whatever.  It's just comics.
                     10: #
1.30      nick       11: # Revision 1.29  2020/06/10 21:32:52  nick
                     12: # Centered page
                     13: #
1.29      nick       14: # Revision 1.28  2020/06/10 21:14:31  nick
                     15: # Updated for w3 validation.
                     16: #
1.28      nick       17: # Revision 1.27  2019/04/15 12:50:23  nick
                     18: # The script was unable to handle html '&amp;' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
                     19: #
1.27      nick       20: # Revision 1.26  2018/04/22 14:03:54  nick
                     21: # Changed the default for Sunday comics that was causing issues with some comics.
                     22: #
1.26      nick       23: # Revision 1.25  2018/02/12 13:30:58  nick
                     24: # Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't.
                     25: #
1.25      nick       26: # Revision 1.24  2018/02/06 14:31:06  nick
                     27: # A status report is now generated in JSON that can easily be scanned so that
                     28: # I can be alerted when there are failures that I miss if I don't read the
                     29: # comics that day.
                     30: #
1.24      nick       31: # Revision 1.23  2018/01/26 13:05:27  nick
                     32: # Added a new config option to remove all newline from the resulting index.html
                     33: # file.  This allows for easier parsing for certain comics.  I then updated
                     34: # the URLs to search for and enabled the newline removal for a handful
                     35: # of uComics.
                     36: #
                     37: # I believe I've also properly fixed the Comic Config version displayed on
                     38: # the webpage itself.
                     39: #
1.23      nick       40: # Revision 1.22  2017/12/05 13:37:40  nick
                     41: # Added the CVS config version to the outpuit.
                     42: #
1.22      nick       43: # Revision 1.21  2015/10/26 14:25:40  nick
                     44: # Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly.
                     45: #
1.21      nick       46: # Revision 1.20  2015/10/22 12:58:44  nick
                     47: # Added the ability for Sunday only comics.  Stonesoup is no longer weekdays, this has been added to Sunday only.  I also added Foxtrot Classics for weekdays and Foxtrot for Sundays.
                     48: #
1.20      nick       49: # Revision 1.19  2015/07/13 12:56:58  nick
                     50: # Added Sally Forth and Pearls Before Swine.  Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
                     51: #
1.19      nick       52: # Revision 1.18  2015/05/07 12:31:43  nick
                     53: # Added favicon
                     54: #
1.18      nick       55: # Revision 1.17  2015/02/19 14:56:10  nick
                     56: # Fixed a problem that forced everything to JPG.  This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation.  I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file.  Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
                     57: #
1.17      nick       58: # Revision 1.16  2015/02/05 18:05:58  nick
                     59: # Changed the background and added a fancy title.
                     60: #
1.16      nick       61: # Revision 1.15  2015/01/19 13:46:19  nick
                     62: # *** empty log message ***
                     63: #
1.15      nick       64: ###############################################################################
                     65: 
1.1       nick       66: use strict;
                     67: use File::Path;
                     68: use Data::Dumper;
1.8       nick       69: use Pod::Usage;
                     70: use Getopt::Long;
1.24      nick       71: use JSON::Create 'create_json';
1.21      nick       72: use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
1.30      nick       73: use Data::Dumper;
1.16      nick       74: 
1.31      nick       75: print("Running");
                     76: 
1.1       nick       77: ## 
                     78: ## Some default values
                     79: ##
1.32    ! nick       80: my $ver                = '$Id: fetch.pl.new,v 1.31 2024/12/13 16:03:49 nick Exp $';
1.1       nick       81: my $comicFile   = "comics.conf";
1.22      nick       82: my $comicConfigVer = "Unknown";
1.24      nick       83: my $reportFile = "/home/httpd/html/daily/comics/status_report.json";
1.1       nick       84: my %comics     = &readComicConfig ( $comicFile );
1.8       nick       85: my %opts        = &fetchOptions( );
                     86: my $days_ago    = $opts{'days'} || 0;
1.1       nick       87: my %dates      = &fetchDates();
                     88: my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
                     89: my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
                     90:                  "/$dates{'mon2'}$dates{'year2'}";
                     91: my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2       nick       92: my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8       nick       93: my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1       nick       94: 
                     95: my $DATE=`date`; chomp $DATE;
                     96: print STDOUT "Starting comic fetch at $DATE\n";
                     97: 
                     98: ##
                     99: ## Main program starts here
                    100: ##
                    101: &checkDir ( [ $imageDir, $indexDir ] );
                    102: 
1.5       nick      103: &writeTitle ( \%dates );
1.1       nick      104: 
                    105: foreach my $comic ( sort keys %comics ) {
1.20      nick      106: 
1.31      nick      107:   print("Checking Comic $comic\n");
                    108: 
1.20      nick      109:   ## Skip if this is Sunday and the comic is weekdays only
1.1       nick      110:   next if ( $comic =~ m/config/ );
1.21      nick      111:   if (($dates{'wday'} eq "Sunday") && 
1.26      nick      112:       ($comics{$comic}{'not_sunday'} == 1)) {
1.20      nick      113:     print "Skipping '$comic'; Weekdays only.\n";
                    114:     next;
                    115:   }
                    116: 
                    117:   ## Skip if Sunday only comic and it's not Sunday.
1.21      nick      118:   if (($dates{'wday'} ne "Sunday") &&
1.20      nick      119:       ($comics{$comic}{'sunday_only'} == 1)) {
1.21      nick      120:     print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
1.20      nick      121:     next
                    122:   }
1.26      nick      123:   
1.1       nick      124:   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
                    125:   &writeComic ( \%comics, $comic, \%dates );
                    126: 
1.17      nick      127:     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
                    128:     my $size = 0;    
                    129: 
1.31      nick      130:     ## Resize downloaded images
                    131:     if($comics{$comic}{'mode'} != 3) {
                    132:            my $cmd = "/usr/bin/identify -verbose $file|";
                    133:            open(IMG, $cmd) || die ("Can't open: $!\n");
                    134:            while(<IMG>) {
                    135:                if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
                    136:                    $size = $1 if ( $size == 0);
                    137:                }
                    138:            }
                    139:            close(IMG);
                    140: 
                    141:            system( "/usr/bin/convert -resize 800 $file $file" )
1.32    ! nick      142:            # if ( $size > 800 ) 
1.31      nick      143:      }
1.4       nick      144: }
                    145: 
1.1       nick      146: ## &writeMainIndex ( \%dates );
                    147: 
                    148: &writeFooter( \%dates );
                    149: 
1.24      nick      150: print STDOUT "Status written to $reportFile.\n"
                    151:     if (&writeStatusReportJSON(\%comics, $reportFile));
                    152: 
1.1       nick      153: $DATE=`date`;  chomp( $DATE );
                    154: print STDOUT "Completed comic fetch at $DATE\n";
                    155: 
                    156: ## End
                    157: 
                    158: #######################################################################
                    159: ## Function :  downloadComic
                    160: ##
                    161: ##   Description :
                    162: ##     This function determines the download method being used to 
                    163: ##      retrieve the comic and calls the apprioriate function.
                    164: ##
                    165: ##      If the mode is invalid an error will be returned.
                    166: ##
                    167: #######################################################################
                    168: sub downloadComic ($$) {
                    169:        my ( $comics, $comic, $date ) = @_;
                    170: 
                    171:        SWITCH: {
                    172:                if ( $comics->{$comic}{'mode'} eq 1 ) { 
                    173:                        return indexDownload ( \%comics, $comic, $date );
                    174:                        last SWITCH;
                    175:                }
                    176:                if ( $comics->{$comic}{'mode'} eq 2 ) { 
                    177:                        return directDownload ( \%comics, $comic, $date );
                    178:                        last SWITCH;
                    179:                }
1.31      nick      180:                if ( $comics->{$comic}{'mode'} eq 3 ) { 
                    181:                        return 0;
                    182:                        last SWITCH;
                    183:                }
1.1       nick      184:        }
                    185:         
                    186:        return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
                    187: }
                    188: 
                    189: #######################################################################
                    190: #######################################################################
                    191: sub readComicConfig ($$) {
                    192:        my ( $comicFile ) = @_;
                    193:        my %comicConfig   = ( );
                    194:        my %config        = ( );
                    195: 
1.14      nick      196:     my ($year, $mon, $day) =( localtime(time))[5,4,3];
                    197:     $year += 1900;
                    198:     $mon = sprintf("%02d", ($mon + 1));
                    199:     $day = sprintf("%02d", $day);
1.32    ! nick      200:     my $year_short = substr($year, -2);
1.14      nick      201: 
1.1       nick      202:        open FILEN, "<$comicFile";
                    203:                while (<FILEN>) {
1.24      nick      204:             #if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) {
                    205:             if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) {
1.22      nick      206:                 $comicConfigVer = $1;
                    207:             }
1.1       nick      208:                        if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14      nick      209:                 $_ =~ s/__YEAR__/$year/g;
1.32    ! nick      210:                 $_ =~ s/__YR__/$year_short/g;
1.14      nick      211:                 $_ =~ s/__MON__/$mon/g;
                    212:                 $_ =~ s/__DAY__/$day/g;
                    213:                 
1.1       nick      214:                                my @res = split /,/, $_;
                    215:                                $comicConfig{$res[0]}{'url'}      = $res[1];
                    216:                                $comicConfig{$res[0]}{'search'}   = $res[2];
                    217:                                $comicConfig{$res[0]}{'mode'}     = $res[3];
                    218:                                $comicConfig{$res[0]}{'fullName'} = $res[4];
                    219:                                $comicConfig{$res[0]}{'ext'}      = $res[5];
1.26      nick      220:                 $comicConfig{$res[0]}{'not_sunday'}   = sprintf("%d", $res[6] || 0);
1.21      nick      221:                 $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
1.23      nick      222:                 $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0);
1.1       nick      223:                                $comicConfig{$res[0]}{'error'}    = 0;
                    224:                        }
                    225:                        elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
                    226:                                $comicConfig{'configs'}{$1} = $2;
                    227:                        }
                    228:                }
                    229:        close (FILEN);
                    230: 
                    231:        return %comicConfig;
                    232: }
                    233: 
                    234: #######################################################################
                    235: #######################################################################
1.24      nick      236: sub writeStatusReportJSON ($$) {
                    237:        my ( $comicsRef, $filename ) = @_;
                    238:     my %comics = %$comicsRef;
1.25      nick      239:     my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,
                    240:                                           (localtime)[4] + 1,
                    241:                                           (localtime)[3]);
1.27      nick      242:     my %json = ('date' => $shortDate, 'comics' => ());
1.24      nick      243:     my $totalErrors = 0;
                    244: 
                    245:     foreach my $comic (sort keys %comics) {
                    246:       next unless $comics{$comic}{'fullName'};
                    247:       if ($comics{$comic}{'error'}) {
                    248:         my %error = ('comicName' => "$comics{$comic}{'fullName'}",
                    249:                      'error' => "$comics{$comic}{'error'}",
                    250:                      'status' => "Error");
1.27      nick      251:         push @{$json{'comics'}}, \%error;
1.24      nick      252:         $totalErrors += 1;
                    253:       } else {
                    254:         my %status = ('comicName' => "$comics{$comic}{'fullName'}",
                    255:                       'error' => 0,
                    256:                       'status' => "Successfull");
1.27      nick      257:         push @{$json{'comics'}}, \%status;
1.24      nick      258:       }
                    259:     }
                    260:     $json{'totalErrors'} = $totalErrors;    
                    261: 
                    262:     open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n");
                    263:     print SR create_json (\%json);
                    264:     close(SR); 
                    265: }
                    266: 
                    267: #######################################################################
                    268: #######################################################################
1.1       nick      269: sub writeComic ($$) {
                    270:        my ( $comics, $comic, $date ) = @_;
1.11      nick      271:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      272:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    273:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    274:                        $sd . ".html";
1.31      nick      275:        $comics->{$comic}{'fullName'} =~ s/&/&amp;/g;
                    276: 
1.1       nick      277:        my $content = <<EOF;
                    278: 
                    279: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
                    280:   <tr>
                    281:     <td align="left">
                    282: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
                    283: <font size="-2">
1.31      nick      284:        <a href="$comics->{$comic}{'url'}" target="_blank">
1.1       nick      285:                $comics->{$comic}{'url'}
                    286:        </a>
                    287: </font><br/>
1.31      nick      288: EOF
                    289:        if ( $comics->{$comic}{'mode'} == 3 ) {
                    290:                print("Mode 3\n");
                    291:                $content .= <<EOF;
                    292: <img src="$comics->{$comic}{'ext'}" alt="$comics->{$comic}{'fullName'}" />
                    293: EOF
                    294:        } else {
                    295:                $content .= <<EOF;
1.17      nick      296: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.31      nick      297: EOF
                    298:        }
                    299:        $content .= <<EOF;
1.1       nick      300: <br/><br/>
                    301: </td></tr>
                    302: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
                    303: 
                    304: EOF
                    305:        open INDEX, ">>$indexFile";
                    306: 
                    307:        print INDEX $content if ( ! $comics->{$comic}{'error'} );
                    308: 
                    309:        print INDEX <<EOF
                    310: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
                    311: <font size="-2"><
                    312:         <a href="$comics->{$comic}{'url'}">
                    313:                 $comics->{$comic}{'url'}
                    314:         </a>
                    315: </font><br/>
                    316: <font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
                    317:   </td>
                    318: </tr>
                    319: EOF
                    320:                if ( $comics->{$comic}{'error'} );
                    321: 
                    322:        close (INDEX);
                    323: 
                    324:        return 0;
                    325: }
                    326: 
                    327: 
                    328: #######################################################################
                    329: #######################################################################
                    330: sub writeMainIndex ($$) {
                    331:        my ( $date ) = @_;
                    332: 
                    333: }
                    334: 
                    335: 
                    336: #######################################################################
                    337: #######################################################################
                    338: sub writeFooter {
                    339:        my ( $date ) = @_;
1.11      nick      340:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      341:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    342:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    343:                        $sd . ".html";
1.1       nick      344:        my $sysDate = `date`;
                    345: 
                    346:        open INDEX, ">>$indexFile";
                    347:        print INDEX <<EOF;
                    348: </table>
1.3       nick      349: <center>
1.28      nick      350: Generated on: <font size="2" color="green">$sysDate</font><br/>
                    351: Version: <font size="2" color="green">$ver</font><br />
                    352: Config Version: <font size="2" color="green">$comicConfigVer</font><br />
1.32    ! nick      353: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.28      nick      354:   <br />
1.1       nick      355:     <a href="http://validator.w3.org/check?uri=referer"><img
                    356:       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
                    357: </center>
                    358: 
                    359: </body>
                    360: </html>
                    361: EOF
                    362:        close( INDEX );
                    363: }
                    364: 
                    365: #######################################################################
                    366: #######################################################################
                    367: sub checkDir ($$) {
                    368:        my @dir = @_;
                    369: 
                    370:        foreach ( @dir ) {
                    371:                if ( ! -d $_ ) { mkpath( $_ ); }
                    372:        }
                    373: }
                    374: 
                    375: #######################################################################
                    376: #######################################################################
                    377: sub writeTitle ($$) {
                    378:        my ( $date ) = @_;
1.11      nick      379:        my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12      nick      380:        my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
                    381:                        $date->{'mon2'} . $date->{'day2'} . "-" . 
                    382:                        $sd . ".html";
1.8       nick      383:        my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16      nick      384:     my $today_long = Date_to_Text_Long(Today());
1.1       nick      385: 
                    386:        open INDEX, ">$indexFile";
                    387:        print INDEX <<EOF;
                    388: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    389: 
                    390: <html xmlns="http://www.w3.org/1999/xhtml">
                    391: <head>
                    392: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.28      nick      393: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" />
                    394: <link rel="shortcut icon" href="./favicon.ico" />
1.1       nick      395:     <title>Daily Comics for $today</title>
                    396:   </head>
                    397: <body bgcolor="#FFFFFF">
1.29      nick      398: <table align="center" cellpadding="0" cellspacing="0" border="0">
1.28      nick      399: <tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr>
1.16      nick      400: <tr><td align="left">$today_long</td></tr>
                    401: <tr><td>&nbsp;</td></tr>
1.1       nick      402: EOF
                    403:        close (INDEX);
                    404: }
                    405: 
                    406: #######################################################################
                    407: #######################################################################
                    408: sub directDownload ($$) {
                    409:        my ( $comics, $comic, $date ) = @_;
                    410:        my $file = &parseComic ( $comics, $comic, $date );
                    411: 
                    412:         ##
                    413:         ## Save the file to the appropriate directory
                    414:         ##
                    415:         my $cDir  = $date->{'mon2'} . $date->{'year2'};
                    416:         my $cDate = $date->{'day2'};
                    417: 
1.32    ! nick      418:        my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" $comics->{$comic}{'url'} -O - | /usr/bin/convert - images/$cDir/$comic-$cDate.$comics->{$comic}{ext}";
        !           419: 
        !           420:         print("Command: $cmd\n");
1.14      nick      421: 
1.1       nick      422:         return system($cmd);
                    423: }
                    424: 
                    425: #######################################################################
                    426: #######################################################################
1.31      nick      427: sub linkOnly ($$) {
                    428:        my ( $comics, $comic, $date ) = @_;
                    429: 
                    430:        return 0;
                    431: }
                    432: #######################################################################
                    433: #######################################################################
1.1       nick      434: sub indexDownload ($$) {
                    435:        my ( $comics, $comic, $date ) = @_;
                    436:        my ( @lines, $comicLine, $mainURL );
                    437:        my $comicIndex = "indexes/index.$comic";
                    438: 
1.30      nick      439:     print("Getching Index $comicIndex.\n");
                    440:     print("comic url: $comics->{$comic}{'url'}\n");
                    441: 
                    442:     print Dumper($comics->{$comic});
                    443: 
                    444:     my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " .
                    445:                    "--no-check-certificate --user-agent=\"$USER_AGENT\" " .
1.19      nick      446:                    "$comics->{$comic}{'url'} -O $comicIndex";
1.30      nick      447:     print ("Using wget command:\n$wget_cmd\n");
                    448: 
                    449:     my $status = system($wget_cmd);
                    450: 
                    451:     print ("Return status: $status\n");
1.1       nick      452: 
                    453:        if ( ! open FILEN, "<$comicIndex" ) {  
                    454:                return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
                    455:                       " (" . $comics->{$comic}{'url'} . ")"; 
                    456:        } 
1.23      nick      457:     while (<FILEN>) {
                    458:         my $line = $_;
1.27      nick      459:         $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} );
1.23      nick      460:        push @lines, $line;
                    461:     }
1.1       nick      462:        close (FILEN);  
                    463: 
1.27      nick      464: 
1.1       nick      465:        unlink ("$comicIndex");
                    466: 
                    467:        $mainURL = $comics->{$comic}{'url'};
                    468:        ## I need to figure out how to merge these two in to one regex.
                    469:        $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
                    470:        $mainURL =~ s/([a-z])\/.*/$1/i;
                    471: 
                    472:        ##
                    473:        ## Find the comic strip URL based on the specified regex in the search
                    474:        ##
1.27      nick      475: 
1.31      nick      476:     print "Using search $comics->{$comic}{'search'}\n";
                    477: 
1.1       nick      478:        foreach my $line (@lines) {
1.17      nick      479:                if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.31      nick      480:             print "Found match:\n";
1.1       nick      481:                        $comicLine = $1; chomp $comicLine;
1.31      nick      482:             print "+ $comicLine\n";
1.1       nick      483:                }
1.17      nick      484:     }
1.1       nick      485: 
                    486:        ##
                    487:        ## Save the file to the appropriate directory
                    488:        ##
                    489:        my $cDir    = $date->{'mon2'} . $date->{'year2'};
                    490:        my $cDate   = $date->{'day2'};
                    491: 
                    492:        if ( $comicLine ) {
1.31      nick      493:         print "Downloading Comic\n";
1.1       nick      494:                if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
                    495:                my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.31      nick      496:         print "Final URL: $comicURL\n";
1.27      nick      497:         # Strip &amp;
                    498:         $comicURL =~ s/\&amp\;/&/g;
1.30      nick      499:                my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1       nick      500:                system( $cmd );
                    501:                return 0;
                    502:        }
                    503: 
                    504:        unlink "index.html";
                    505: 
                    506:        return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
                    507: }
                    508: 
                    509: #######################################################################
                    510: #######################################################################
                    511: sub parseComic ($$) {
                    512:        my ( $comics, $comic, $date ) = @_;
                    513:        my $string = $comics->{$comic}{'search'};
                    514: 
                    515:        $string =~ s/__year__/$date->{'year'}/g;
                    516:        $string =~ s/__year2__/$date->{'year2'}/g;
                    517:        $string =~ s/__mon__/$date->{'mon'}/g;
                    518:        $string =~ s/__mon2__/$date->{'mon2'}/g;
                    519:        $string =~ s/__day__/$date->{'day'}/g;
                    520:        $string =~ s/__day2__/$date->{'day2'}/g;
                    521:        $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
                    522:        chomp $string;
                    523: 
                    524:        return $string;
                    525: }
                    526: 
                    527: #######################################################################
                    528: #######################################################################
                    529: sub fetchDates () {
                    530:        my %dates = ();
                    531: 
1.8       nick      532:        ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1       nick      533: 
                    534:        $dates{'year'} += 1900;
                    535:        $dates{'year2'} = substr $dates{'year'}, 2, 2;
                    536:        $dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
                    537:        $dates{'mon'}++;
                    538:        $dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
1.21      nick      539:     my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
                    540:     $dates{'wday'} = $days[$dates{'dow'}];
1.1       nick      541: 
                    542:        return %dates;
                    543: }
1.8       nick      544: 
                    545: ###############################################################################
                    546: ##
                    547: ## &fetchOptions( );
                    548: ##
                    549: ##      Grab our command line arguments and toss them in to a hash
                    550: ##
                    551: ###############################################################################
                    552: sub fetchOptions {
                    553:         my %opts;
                    554: 
                    555:         &GetOptions(
                    556:                         "days:i"        => \$opts{'days'},
                    557:                         "help|?"        => \$opts{'help'},
                    558:                         "man"           => \$opts{'man'},
                    559:                    ) || &pod2usage( );
                    560:         &pod2usage( ) if defined $opts{'help'};
                    561:         &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
                    562: 
                    563:         return %opts;
                    564: }
                    565: 
                    566: __END__
                    567: 
                    568: =head1 NAME
                    569: 
                    570: fetch.pl - Fetches comics and places them all locally in a single html file.
                    571: 
                    572: =head1 SYNOPSIS
                    573: 
                    574: fetch.pl [options]
                    575: 
                    576: Options:
                    577:         --days,d        Fetch comics from X days ago
                    578:         --help,?        Display the basic help menu
                    579:         --man,m         Display the detailed man page
                    580: 
                    581: =head1 DESCRIPTION
                    582: 
                    583: =head1 HISTORY
                    584: 
                    585: =head1 AUTHOR
                    586: 
                    587: Nicholas DeClario <nick@declario.com>
                    588: 
                    589: =head1 BUGS
                    590: 
                    591: This is a work in progress.  Please report all bugs to the author.
                    592: 
                    593: =head1 SEE ALSO
                    594: 
                    595: =head1 COPYRIGHT
                    596: 
                    597: =cut
                    598: 
                    599:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>