Annotation of comics/fetch.pl.new, revision 1.22
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.22 ! nick 5: # Revision 1.21 2015/10/26 14:25:40 nick
! 6: # Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly.
! 7: #
1.21 nick 8: # Revision 1.20 2015/10/22 12:58:44 nick
9: # Added the ability for Sunday only comics. Stonesoup is no longer weekdays, this has been added to Sunday only. I also added Foxtrot Classics for weekdays and Foxtrot for Sundays.
10: #
1.20 nick 11: # Revision 1.19 2015/07/13 12:56:58 nick
12: # Added Sally Forth and Pearls Before Swine. Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
13: #
1.19 nick 14: # Revision 1.18 2015/05/07 12:31:43 nick
15: # Added favicon
16: #
1.18 nick 17: # Revision 1.17 2015/02/19 14:56:10 nick
18: # Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
19: #
1.17 nick 20: # Revision 1.16 2015/02/05 18:05:58 nick
21: # Changed the background and added a fancy title.
22: #
1.16 nick 23: # Revision 1.15 2015/01/19 13:46:19 nick
24: # *** empty log message ***
25: #
1.15 nick 26: ###############################################################################
27:
1.1 nick 28: use strict;
29: use File::Path;
30: use Data::Dumper;
1.8 nick 31: use Pod::Usage;
32: use Getopt::Long;
1.1 nick 33:
1.21 nick 34: use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
1.16 nick 35:
1.1 nick 36: ##
37: ## Some default values
38: ##
1.22 ! nick 39: my $ver = '$Id: fetch.pl.new,v 1.21 2015/10/26 14:25:40 nick Exp $';
1.1 nick 40: my $comicFile = "comics.conf";
1.22 ! nick 41: my $comicConfigVer = "Unknown";
1.1 nick 42: my %comics = &readComicConfig ( $comicFile );
1.8 nick 43: my %opts = &fetchOptions( );
44: my $days_ago = $opts{'days'} || 0;
1.1 nick 45: my %dates = &fetchDates();
46: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
47: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
48: "/$dates{'mon2'}$dates{'year2'}";
49: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 50: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 51: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 52:
53: my $DATE=`date`; chomp $DATE;
54: print STDOUT "Starting comic fetch at $DATE\n";
55:
56: ##
57: ## Main program starts here
58: ##
59: &checkDir ( [ $imageDir, $indexDir ] );
60:
1.5 nick 61: &writeTitle ( \%dates );
1.1 nick 62:
63: foreach my $comic ( sort keys %comics ) {
1.20 nick 64:
65: ## Skip if this is Sunday and the comic is weekdays only
1.1 nick 66: next if ( $comic =~ m/config/ );
1.21 nick 67: if (($dates{'wday'} eq "Sunday") &&
1.20 nick 68: ($comics{$comic}{'sunday'} == 0)) {
69: print "Skipping '$comic'; Weekdays only.\n";
70: next;
71: }
72:
73: ## Skip if Sunday only comic and it's not Sunday.
1.21 nick 74: if (($dates{'wday'} ne "Sunday") &&
1.20 nick 75: ($comics{$comic}{'sunday_only'} == 1)) {
1.21 nick 76: print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
1.20 nick 77: next
78: }
79:
1.1 nick 80: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
81: &writeComic ( \%comics, $comic, \%dates );
82:
1.17 nick 83: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
84: my $size = 0;
85:
86: my $cmd = "/usr/bin/identify -verbose $file|";
87: open(IMG, $cmd) || die ("Can't open: $!\n");
88: while(<IMG>) {
89: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
90: $size = $1 if ( $size == 0);
91: }
92: }
93: close(IMG);
1.4 nick 94:
1.19 nick 95:
1.4 nick 96: system( "/usr/bin/convert -resize 640 $file $file" )
97: if ( $size > 640 )
98: }
99:
1.1 nick 100: ## &writeMainIndex ( \%dates );
101:
102: &writeFooter( \%dates );
103:
104: $DATE=`date`; chomp( $DATE );
105: print STDOUT "Completed comic fetch at $DATE\n";
106:
107: ## End
108:
109: #######################################################################
110: ## Function : downloadComic
111: ##
112: ## Description :
113: ## This function determines the download method being used to
114: ## retrieve the comic and calls the apprioriate function.
115: ##
116: ## If the mode is invalid an error will be returned.
117: ##
118: #######################################################################
119: sub downloadComic ($$) {
120: my ( $comics, $comic, $date ) = @_;
121:
122: SWITCH: {
123: if ( $comics->{$comic}{'mode'} eq 1 ) {
124: return indexDownload ( \%comics, $comic, $date );
125: last SWITCH;
126: }
127: if ( $comics->{$comic}{'mode'} eq 2 ) {
128: return directDownload ( \%comics, $comic, $date );
129: last SWITCH;
130: }
131: }
132:
133: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
134: }
135:
136: #######################################################################
137: #######################################################################
138: sub readComicConfig ($$) {
139: my ( $comicFile ) = @_;
140: my %comicConfig = ( );
141: my %config = ( );
142:
1.14 nick 143: my ($year, $mon, $day) =( localtime(time))[5,4,3];
144: $year += 1900;
145: $mon = sprintf("%02d", ($mon + 1));
146: $day = sprintf("%02d", $day);
147:
1.1 nick 148: open FILEN, "<$comicFile";
149: while (<FILEN>) {
1.22 ! nick 150: if ($_ =~ m/^#.* \$Id: (.*)\$/) {
! 151: $comicConfigVer = $1;
! 152: }
1.1 nick 153: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 154: $_ =~ s/__YEAR__/$year/g;
155: $_ =~ s/__MON__/$mon/g;
156: $_ =~ s/__DAY__/$day/g;
157:
1.1 nick 158: my @res = split /,/, $_;
159: $comicConfig{$res[0]}{'url'} = $res[1];
160: $comicConfig{$res[0]}{'search'} = $res[2];
161: $comicConfig{$res[0]}{'mode'} = $res[3];
162: $comicConfig{$res[0]}{'fullName'} = $res[4];
163: $comicConfig{$res[0]}{'ext'} = $res[5];
1.21 nick 164: $comicConfig{$res[0]}{'sunday'} = sprintf("%d", $res[6] || 1);
165: $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
1.1 nick 166: $comicConfig{$res[0]}{'error'} = 0;
167: }
168: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
169: $comicConfig{'configs'}{$1} = $2;
170: }
171: }
172: close (FILEN);
173:
174: return %comicConfig;
175: }
176:
177: #######################################################################
178: #######################################################################
179: sub writeComic ($$) {
180: my ( $comics, $comic, $date ) = @_;
1.11 nick 181: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 182: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
183: $date->{'mon2'} . $date->{'day2'} . "-" .
184: $sd . ".html";
1.1 nick 185: my $content = <<EOF;
186:
187: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
188: <tr>
189: <td align="left">
190: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
191: <font size="-2">
192: <a href="$comics->{$comic}{'url'}">
193: $comics->{$comic}{'url'}
194: </a>
195: </font><br/>
1.17 nick 196: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 197: <br/><br/>
198: </td></tr>
199: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
200:
201: EOF
202: open INDEX, ">>$indexFile";
203:
204: print INDEX $content if ( ! $comics->{$comic}{'error'} );
205:
206: print INDEX <<EOF
207: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
208: <font size="-2"><
209: <a href="$comics->{$comic}{'url'}">
210: $comics->{$comic}{'url'}
211: </a>
212: </font><br/>
213: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
214: </td>
215: </tr>
216: EOF
217: if ( $comics->{$comic}{'error'} );
218:
219: close (INDEX);
220:
221: return 0;
222: }
223:
224:
225: #######################################################################
226: #######################################################################
227: sub writeMainIndex ($$) {
228: my ( $date ) = @_;
229:
230: }
231:
232:
233: #######################################################################
234: #######################################################################
235: sub writeFooter {
236: my ( $date ) = @_;
1.11 nick 237: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 238: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
239: $date->{'mon2'} . $date->{'day2'} . "-" .
240: $sd . ".html";
1.1 nick 241: my $sysDate = `date`;
242:
243: open INDEX, ">>$indexFile";
244: print INDEX <<EOF;
245: </table>
1.3 nick 246: <center>
247: <font size="2">
248: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 249: Version: <font color="green">$ver</font><br />
1.22 ! nick 250: Config Version: <font color="green">$comicConfigVer</font><br />
1.7 nick 251: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 252: <p>
253: <a href="http://validator.w3.org/check?uri=referer"><img
254: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
255: </p>
256: </center>
257:
258: </body>
259: </html>
260: EOF
261: close( INDEX );
262: }
263:
264: #######################################################################
265: #######################################################################
266: sub checkDir ($$) {
267: my @dir = @_;
268:
269: foreach ( @dir ) {
270: if ( ! -d $_ ) { mkpath( $_ ); }
271: }
272: }
273:
274: #######################################################################
275: #######################################################################
276: sub writeTitle ($$) {
277: my ( $date ) = @_;
1.11 nick 278: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 279: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
280: $date->{'mon2'} . $date->{'day2'} . "-" .
281: $sd . ".html";
1.8 nick 282: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 283: my $today_long = Date_to_Text_Long(Today());
1.1 nick 284:
285: open INDEX, ">$indexFile";
286: print INDEX <<EOF;
287: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
288:
289: <html xmlns="http://www.w3.org/1999/xhtml">
290: <head>
291: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 292: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18 nick 293: <link rel="shortcut icon" href="./favicon.ico">
1.1 nick 294: <title>Daily Comics for $today</title>
295: </head>
296: <body bgcolor="#FFFFFF">
297: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 298: <tr><td>
299: <table cellpadding="0" cellspacing="0" border="0">
300: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
301: <tr><td align="left">$today_long</td></tr>
302: <tr><td> </td></tr>
303: </td</tr>
304:
1.1 nick 305: EOF
306: close (INDEX);
307: }
308:
309: #######################################################################
310: #######################################################################
311: sub directDownload ($$) {
312: my ( $comics, $comic, $date ) = @_;
313: my $file = &parseComic ( $comics, $comic, $date );
314:
315: ##
316: ## Save the file to the appropriate directory
317: ##
318: my $cDir = $date->{'mon2'} . $date->{'year2'};
319: my $cDate = $date->{'day2'};
320:
1.18 nick 321: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 322:
1.1 nick 323: return system($cmd);
324: }
325:
326: #######################################################################
327: #######################################################################
328: sub indexDownload ($$) {
329: my ( $comics, $comic, $date ) = @_;
330: my ( @lines, $comicLine, $mainURL );
331: my $comicIndex = "indexes/index.$comic";
332:
1.19 nick 333: my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
334: "--user-agent=\"$USER_AGENT\" " .
335: "$comics->{$comic}{'url'} -O $comicIndex";
336: system($wget_cmd);
1.1 nick 337:
338: if ( ! open FILEN, "<$comicIndex" ) {
339: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
340: " (" . $comics->{$comic}{'url'} . ")";
341: }
342: @lines = <FILEN>;
343: close (FILEN);
344:
345: unlink ("$comicIndex");
346:
347: $mainURL = $comics->{$comic}{'url'};
348: ## I need to figure out how to merge these two in to one regex.
349: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
350: $mainURL =~ s/([a-z])\/.*/$1/i;
351:
352: ##
353: ## Find the comic strip URL based on the specified regex in the search
354: ##
355: foreach my $line (@lines) {
1.17 nick 356: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 357: $comicLine = $1; chomp $comicLine;
358: }
1.17 nick 359: }
1.1 nick 360:
361: ##
362: ## Save the file to the appropriate directory
363: ##
364: my $cDir = $date->{'mon2'} . $date->{'year2'};
365: my $cDate = $date->{'day2'};
366:
367: if ( $comicLine ) {
368: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
369: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 nick 370: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 371: system( $cmd );
372: return 0;
373: }
374:
375: unlink "index.html";
376:
377: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
378: }
379:
380: #######################################################################
381: #######################################################################
382: sub parseComic ($$) {
383: my ( $comics, $comic, $date ) = @_;
384: my $string = $comics->{$comic}{'search'};
385:
386: $string =~ s/__year__/$date->{'year'}/g;
387: $string =~ s/__year2__/$date->{'year2'}/g;
388: $string =~ s/__mon__/$date->{'mon'}/g;
389: $string =~ s/__mon2__/$date->{'mon2'}/g;
390: $string =~ s/__day__/$date->{'day'}/g;
391: $string =~ s/__day2__/$date->{'day2'}/g;
392: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
393: chomp $string;
394:
395: return $string;
396: }
397:
398: #######################################################################
399: #######################################################################
400: sub fetchDates () {
401: my %dates = ();
402:
1.8 nick 403: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 404:
405: $dates{'year'} += 1900;
406: $dates{'year2'} = substr $dates{'year'}, 2, 2;
407: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
408: $dates{'mon'}++;
409: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
1.21 nick 410: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
411: $dates{'wday'} = $days[$dates{'dow'}];
1.1 nick 412:
413: return %dates;
414: }
1.8 nick 415:
416: ###############################################################################
417: ##
418: ## &fetchOptions( );
419: ##
420: ## Grab our command line arguments and toss them in to a hash
421: ##
422: ###############################################################################
423: sub fetchOptions {
424: my %opts;
425:
426: &GetOptions(
427: "days:i" => \$opts{'days'},
428: "help|?" => \$opts{'help'},
429: "man" => \$opts{'man'},
430: ) || &pod2usage( );
431: &pod2usage( ) if defined $opts{'help'};
432: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
433:
434: return %opts;
435: }
436:
437: __END__
438:
439: =head1 NAME
440:
441: fetch.pl - Fetches comics and places them all locally in a single html file.
442:
443: =head1 SYNOPSIS
444:
445: fetch.pl [options]
446:
447: Options:
448: --days,d Fetch comics from X days ago
449: --help,? Display the basic help menu
450: --man,m Display the detailed man page
451:
452: =head1 DESCRIPTION
453:
454: =head1 HISTORY
455:
456: =head1 AUTHOR
457:
458: Nicholas DeClario <nick@declario.com>
459:
460: =head1 BUGS
461:
462: This is a work in progress. Please report all bugs to the author.
463:
464: =head1 SEE ALSO
465:
466: =head1 COPYRIGHT
467:
468: =cut
469:
470:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>