Annotation of comics/fetch.pl.new, revision 1.23
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.23 ! nick 5: # Revision 1.22 2017/12/05 13:37:40 nick
! 6: # Added the CVS config version to the outpuit.
! 7: #
1.22 nick 8: # Revision 1.21 2015/10/26 14:25:40 nick
9: # Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly.
10: #
1.21 nick 11: # Revision 1.20 2015/10/22 12:58:44 nick
12: # Added the ability for Sunday only comics. Stonesoup is no longer weekdays, this has been added to Sunday only. I also added Foxtrot Classics for weekdays and Foxtrot for Sundays.
13: #
1.20 nick 14: # Revision 1.19 2015/07/13 12:56:58 nick
15: # Added Sally Forth and Pearls Before Swine. Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
16: #
1.19 nick 17: # Revision 1.18 2015/05/07 12:31:43 nick
18: # Added favicon
19: #
1.18 nick 20: # Revision 1.17 2015/02/19 14:56:10 nick
21: # Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
22: #
1.17 nick 23: # Revision 1.16 2015/02/05 18:05:58 nick
24: # Changed the background and added a fancy title.
25: #
1.16 nick 26: # Revision 1.15 2015/01/19 13:46:19 nick
27: # *** empty log message ***
28: #
1.15 nick 29: ###############################################################################
30:
1.1 nick 31: use strict;
32: use File::Path;
33: use Data::Dumper;
1.8 nick 34: use Pod::Usage;
35: use Getopt::Long;
1.1 nick 36:
1.21 nick 37: use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
1.16 nick 38:
1.1 nick 39: ##
40: ## Some default values
41: ##
1.23 ! nick 42: my $ver = '$Id: fetch.pl.new,v 1.22 2017/12/05 13:37:40 nick Exp $';
1.1 nick 43: my $comicFile = "comics.conf";
1.22 nick 44: my $comicConfigVer = "Unknown";
1.1 nick 45: my %comics = &readComicConfig ( $comicFile );
1.8 nick 46: my %opts = &fetchOptions( );
47: my $days_ago = $opts{'days'} || 0;
1.1 nick 48: my %dates = &fetchDates();
49: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
50: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
51: "/$dates{'mon2'}$dates{'year2'}";
52: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 53: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 54: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 55:
56: my $DATE=`date`; chomp $DATE;
57: print STDOUT "Starting comic fetch at $DATE\n";
58:
59: ##
60: ## Main program starts here
61: ##
62: &checkDir ( [ $imageDir, $indexDir ] );
63:
1.5 nick 64: &writeTitle ( \%dates );
1.1 nick 65:
66: foreach my $comic ( sort keys %comics ) {
1.20 nick 67:
68: ## Skip if this is Sunday and the comic is weekdays only
1.1 nick 69: next if ( $comic =~ m/config/ );
1.21 nick 70: if (($dates{'wday'} eq "Sunday") &&
1.20 nick 71: ($comics{$comic}{'sunday'} == 0)) {
72: print "Skipping '$comic'; Weekdays only.\n";
73: next;
74: }
75:
76: ## Skip if Sunday only comic and it's not Sunday.
1.21 nick 77: if (($dates{'wday'} ne "Sunday") &&
1.20 nick 78: ($comics{$comic}{'sunday_only'} == 1)) {
1.21 nick 79: print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
1.20 nick 80: next
81: }
82:
1.1 nick 83: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
84: &writeComic ( \%comics, $comic, \%dates );
85:
1.17 nick 86: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
87: my $size = 0;
88:
89: my $cmd = "/usr/bin/identify -verbose $file|";
90: open(IMG, $cmd) || die ("Can't open: $!\n");
91: while(<IMG>) {
92: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
93: $size = $1 if ( $size == 0);
94: }
95: }
96: close(IMG);
1.4 nick 97:
1.19 nick 98:
1.4 nick 99: system( "/usr/bin/convert -resize 640 $file $file" )
100: if ( $size > 640 )
101: }
102:
1.1 nick 103: ## &writeMainIndex ( \%dates );
104:
105: &writeFooter( \%dates );
106:
107: $DATE=`date`; chomp( $DATE );
108: print STDOUT "Completed comic fetch at $DATE\n";
109:
110: ## End
111:
112: #######################################################################
113: ## Function : downloadComic
114: ##
115: ## Description :
116: ## This function determines the download method being used to
117: ## retrieve the comic and calls the apprioriate function.
118: ##
119: ## If the mode is invalid an error will be returned.
120: ##
121: #######################################################################
122: sub downloadComic ($$) {
123: my ( $comics, $comic, $date ) = @_;
124:
125: SWITCH: {
126: if ( $comics->{$comic}{'mode'} eq 1 ) {
127: return indexDownload ( \%comics, $comic, $date );
128: last SWITCH;
129: }
130: if ( $comics->{$comic}{'mode'} eq 2 ) {
131: return directDownload ( \%comics, $comic, $date );
132: last SWITCH;
133: }
134: }
135:
136: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
137: }
138:
139: #######################################################################
140: #######################################################################
141: sub readComicConfig ($$) {
142: my ( $comicFile ) = @_;
143: my %comicConfig = ( );
144: my %config = ( );
145:
1.14 nick 146: my ($year, $mon, $day) =( localtime(time))[5,4,3];
147: $year += 1900;
148: $mon = sprintf("%02d", ($mon + 1));
149: $day = sprintf("%02d", $day);
150:
1.1 nick 151: open FILEN, "<$comicFile";
152: while (<FILEN>) {
1.23 ! nick 153: #if ($_ =~ m/^#.* \$Id: fetch.pl.new,v 1.22 2017/12/05 13:37:40 nick Exp $/) {
! 154: if ($_ =~ m/^#.* \$Id:\ (.*)Exp \$$/) {
1.22 nick 155: $comicConfigVer = $1;
156: }
1.1 nick 157: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 158: $_ =~ s/__YEAR__/$year/g;
159: $_ =~ s/__MON__/$mon/g;
160: $_ =~ s/__DAY__/$day/g;
161:
1.1 nick 162: my @res = split /,/, $_;
163: $comicConfig{$res[0]}{'url'} = $res[1];
164: $comicConfig{$res[0]}{'search'} = $res[2];
165: $comicConfig{$res[0]}{'mode'} = $res[3];
166: $comicConfig{$res[0]}{'fullName'} = $res[4];
167: $comicConfig{$res[0]}{'ext'} = $res[5];
1.21 nick 168: $comicConfig{$res[0]}{'sunday'} = sprintf("%d", $res[6] || 1);
169: $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
1.23 ! nick 170: $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0);
1.1 nick 171: $comicConfig{$res[0]}{'error'} = 0;
172: }
173: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
174: $comicConfig{'configs'}{$1} = $2;
175: }
176: }
177: close (FILEN);
178:
179: return %comicConfig;
180: }
181:
182: #######################################################################
183: #######################################################################
184: sub writeComic ($$) {
185: my ( $comics, $comic, $date ) = @_;
1.11 nick 186: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 187: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
188: $date->{'mon2'} . $date->{'day2'} . "-" .
189: $sd . ".html";
1.1 nick 190: my $content = <<EOF;
191:
192: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
193: <tr>
194: <td align="left">
195: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
196: <font size="-2">
197: <a href="$comics->{$comic}{'url'}">
198: $comics->{$comic}{'url'}
199: </a>
200: </font><br/>
1.17 nick 201: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 202: <br/><br/>
203: </td></tr>
204: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
205:
206: EOF
207: open INDEX, ">>$indexFile";
208:
209: print INDEX $content if ( ! $comics->{$comic}{'error'} );
210:
211: print INDEX <<EOF
212: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
213: <font size="-2"><
214: <a href="$comics->{$comic}{'url'}">
215: $comics->{$comic}{'url'}
216: </a>
217: </font><br/>
218: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
219: </td>
220: </tr>
221: EOF
222: if ( $comics->{$comic}{'error'} );
223:
224: close (INDEX);
225:
226: return 0;
227: }
228:
229:
230: #######################################################################
231: #######################################################################
232: sub writeMainIndex ($$) {
233: my ( $date ) = @_;
234:
235: }
236:
237:
238: #######################################################################
239: #######################################################################
240: sub writeFooter {
241: my ( $date ) = @_;
1.11 nick 242: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 243: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
244: $date->{'mon2'} . $date->{'day2'} . "-" .
245: $sd . ".html";
1.1 nick 246: my $sysDate = `date`;
247:
248: open INDEX, ">>$indexFile";
249: print INDEX <<EOF;
250: </table>
1.3 nick 251: <center>
252: <font size="2">
253: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 254: Version: <font color="green">$ver</font><br />
1.22 nick 255: Config Version: <font color="green">$comicConfigVer</font><br />
1.7 nick 256: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 257: <p>
258: <a href="http://validator.w3.org/check?uri=referer"><img
259: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
260: </p>
261: </center>
262:
263: </body>
264: </html>
265: EOF
266: close( INDEX );
267: }
268:
269: #######################################################################
270: #######################################################################
271: sub checkDir ($$) {
272: my @dir = @_;
273:
274: foreach ( @dir ) {
275: if ( ! -d $_ ) { mkpath( $_ ); }
276: }
277: }
278:
279: #######################################################################
280: #######################################################################
281: sub writeTitle ($$) {
282: my ( $date ) = @_;
1.11 nick 283: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 284: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
285: $date->{'mon2'} . $date->{'day2'} . "-" .
286: $sd . ".html";
1.8 nick 287: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 288: my $today_long = Date_to_Text_Long(Today());
1.1 nick 289:
290: open INDEX, ">$indexFile";
291: print INDEX <<EOF;
292: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
293:
294: <html xmlns="http://www.w3.org/1999/xhtml">
295: <head>
296: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 297: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18 nick 298: <link rel="shortcut icon" href="./favicon.ico">
1.1 nick 299: <title>Daily Comics for $today</title>
300: </head>
301: <body bgcolor="#FFFFFF">
302: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 303: <tr><td>
304: <table cellpadding="0" cellspacing="0" border="0">
305: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
306: <tr><td align="left">$today_long</td></tr>
307: <tr><td> </td></tr>
308: </td</tr>
309:
1.1 nick 310: EOF
311: close (INDEX);
312: }
313:
314: #######################################################################
315: #######################################################################
316: sub directDownload ($$) {
317: my ( $comics, $comic, $date ) = @_;
318: my $file = &parseComic ( $comics, $comic, $date );
319:
320: ##
321: ## Save the file to the appropriate directory
322: ##
323: my $cDir = $date->{'mon2'} . $date->{'year2'};
324: my $cDate = $date->{'day2'};
325:
1.18 nick 326: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 327:
1.1 nick 328: return system($cmd);
329: }
330:
331: #######################################################################
332: #######################################################################
333: sub indexDownload ($$) {
334: my ( $comics, $comic, $date ) = @_;
335: my ( @lines, $comicLine, $mainURL );
336: my $comicIndex = "indexes/index.$comic";
337:
1.19 nick 338: my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
339: "--user-agent=\"$USER_AGENT\" " .
340: "$comics->{$comic}{'url'} -O $comicIndex";
341: system($wget_cmd);
1.1 nick 342:
343: if ( ! open FILEN, "<$comicIndex" ) {
344: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
345: " (" . $comics->{$comic}{'url'} . ")";
346: }
1.23 ! nick 347: while (<FILEN>) {
! 348: my $line = $_;
! 349: $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} );
! 350: push @lines, $line;
! 351: }
1.1 nick 352: close (FILEN);
353:
354: unlink ("$comicIndex");
355:
356: $mainURL = $comics->{$comic}{'url'};
357: ## I need to figure out how to merge these two in to one regex.
358: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
359: $mainURL =~ s/([a-z])\/.*/$1/i;
360:
361: ##
362: ## Find the comic strip URL based on the specified regex in the search
363: ##
364: foreach my $line (@lines) {
1.17 nick 365: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 366: $comicLine = $1; chomp $comicLine;
367: }
1.17 nick 368: }
1.1 nick 369:
370: ##
371: ## Save the file to the appropriate directory
372: ##
373: my $cDir = $date->{'mon2'} . $date->{'year2'};
374: my $cDate = $date->{'day2'};
375:
376: if ( $comicLine ) {
377: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
378: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 nick 379: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 380: system( $cmd );
381: return 0;
382: }
383:
384: unlink "index.html";
385:
386: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
387: }
388:
389: #######################################################################
390: #######################################################################
391: sub parseComic ($$) {
392: my ( $comics, $comic, $date ) = @_;
393: my $string = $comics->{$comic}{'search'};
394:
395: $string =~ s/__year__/$date->{'year'}/g;
396: $string =~ s/__year2__/$date->{'year2'}/g;
397: $string =~ s/__mon__/$date->{'mon'}/g;
398: $string =~ s/__mon2__/$date->{'mon2'}/g;
399: $string =~ s/__day__/$date->{'day'}/g;
400: $string =~ s/__day2__/$date->{'day2'}/g;
401: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
402: chomp $string;
403:
404: return $string;
405: }
406:
407: #######################################################################
408: #######################################################################
409: sub fetchDates () {
410: my %dates = ();
411:
1.8 nick 412: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 413:
414: $dates{'year'} += 1900;
415: $dates{'year2'} = substr $dates{'year'}, 2, 2;
416: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
417: $dates{'mon'}++;
418: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
1.21 nick 419: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
420: $dates{'wday'} = $days[$dates{'dow'}];
1.1 nick 421:
422: return %dates;
423: }
1.8 nick 424:
425: ###############################################################################
426: ##
427: ## &fetchOptions( );
428: ##
429: ## Grab our command line arguments and toss them in to a hash
430: ##
431: ###############################################################################
432: sub fetchOptions {
433: my %opts;
434:
435: &GetOptions(
436: "days:i" => \$opts{'days'},
437: "help|?" => \$opts{'help'},
438: "man" => \$opts{'man'},
439: ) || &pod2usage( );
440: &pod2usage( ) if defined $opts{'help'};
441: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
442:
443: return %opts;
444: }
445:
446: __END__
447:
448: =head1 NAME
449:
450: fetch.pl - Fetches comics and places them all locally in a single html file.
451:
452: =head1 SYNOPSIS
453:
454: fetch.pl [options]
455:
456: Options:
457: --days,d Fetch comics from X days ago
458: --help,? Display the basic help menu
459: --man,m Display the detailed man page
460:
461: =head1 DESCRIPTION
462:
463: =head1 HISTORY
464:
465: =head1 AUTHOR
466:
467: Nicholas DeClario <nick@declario.com>
468:
469: =head1 BUGS
470:
471: This is a work in progress. Please report all bugs to the author.
472:
473: =head1 SEE ALSO
474:
475: =head1 COPYRIGHT
476:
477: =cut
478:
479:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>