Annotation of comics/fetch.pl.new, revision 1.21
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.21 ! nick 5: # Revision 1.20 2015/10/22 12:58:44 nick
! 6: # Added the ability for Sunday only comics. Stonesoup is no longer weekdays, this has been added to Sunday only. I also added Foxtrot Classics for weekdays and Foxtrot for Sundays.
! 7: #
1.20 nick 8: # Revision 1.19 2015/07/13 12:56:58 nick
9: # Added Sally Forth and Pearls Before Swine. Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
10: #
1.19 nick 11: # Revision 1.18 2015/05/07 12:31:43 nick
12: # Added favicon
13: #
1.18 nick 14: # Revision 1.17 2015/02/19 14:56:10 nick
15: # Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
16: #
1.17 nick 17: # Revision 1.16 2015/02/05 18:05:58 nick
18: # Changed the background and added a fancy title.
19: #
1.16 nick 20: # Revision 1.15 2015/01/19 13:46:19 nick
21: # *** empty log message ***
22: #
1.15 nick 23: ###############################################################################
24:
1.1 nick 25: use strict;
26: use File::Path;
27: use Data::Dumper;
1.8 nick 28: use Pod::Usage;
29: use Getopt::Long;
1.1 nick 30:
1.21 ! nick 31: use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
1.16 nick 32:
1.1 nick 33: ##
34: ## Some default values
35: ##
1.21 ! nick 36: my $ver = '$Id: fetch.pl.new,v 1.20 2015/10/22 12:58:44 nick Exp $';
1.1 nick 37: my $comicFile = "comics.conf";
38: my %comics = &readComicConfig ( $comicFile );
1.8 nick 39: my %opts = &fetchOptions( );
40: my $days_ago = $opts{'days'} || 0;
1.1 nick 41: my %dates = &fetchDates();
42: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
43: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
44: "/$dates{'mon2'}$dates{'year2'}";
45: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 46: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 47: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 48:
49: my $DATE=`date`; chomp $DATE;
50: print STDOUT "Starting comic fetch at $DATE\n";
51:
52: ##
53: ## Main program starts here
54: ##
55: &checkDir ( [ $imageDir, $indexDir ] );
56:
1.5 nick 57: &writeTitle ( \%dates );
1.1 nick 58:
59: foreach my $comic ( sort keys %comics ) {
1.20 nick 60:
61: ## Skip if this is Sunday and the comic is weekdays only
1.1 nick 62: next if ( $comic =~ m/config/ );
1.21 ! nick 63: if (($dates{'wday'} eq "Sunday") &&
1.20 nick 64: ($comics{$comic}{'sunday'} == 0)) {
65: print "Skipping '$comic'; Weekdays only.\n";
66: next;
67: }
68:
69: ## Skip if Sunday only comic and it's not Sunday.
1.21 ! nick 70: if (($dates{'wday'} ne "Sunday") &&
1.20 nick 71: ($comics{$comic}{'sunday_only'} == 1)) {
1.21 ! nick 72: print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
1.20 nick 73: next
74: }
75:
1.1 nick 76: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
77: &writeComic ( \%comics, $comic, \%dates );
78:
1.17 nick 79: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
80: my $size = 0;
81:
82: my $cmd = "/usr/bin/identify -verbose $file|";
83: open(IMG, $cmd) || die ("Can't open: $!\n");
84: while(<IMG>) {
85: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
86: $size = $1 if ( $size == 0);
87: }
88: }
89: close(IMG);
1.4 nick 90:
1.19 nick 91:
1.4 nick 92: system( "/usr/bin/convert -resize 640 $file $file" )
93: if ( $size > 640 )
94: }
95:
1.1 nick 96: ## &writeMainIndex ( \%dates );
97:
98: &writeFooter( \%dates );
99:
100: $DATE=`date`; chomp( $DATE );
101: print STDOUT "Completed comic fetch at $DATE\n";
102:
103: ## End
104:
105: #######################################################################
106: ## Function : downloadComic
107: ##
108: ## Description :
109: ## This function determines the download method being used to
110: ## retrieve the comic and calls the apprioriate function.
111: ##
112: ## If the mode is invalid an error will be returned.
113: ##
114: #######################################################################
115: sub downloadComic ($$) {
116: my ( $comics, $comic, $date ) = @_;
117:
118: SWITCH: {
119: if ( $comics->{$comic}{'mode'} eq 1 ) {
120: return indexDownload ( \%comics, $comic, $date );
121: last SWITCH;
122: }
123: if ( $comics->{$comic}{'mode'} eq 2 ) {
124: return directDownload ( \%comics, $comic, $date );
125: last SWITCH;
126: }
127: }
128:
129: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
130: }
131:
132: #######################################################################
133: #######################################################################
134: sub readComicConfig ($$) {
135: my ( $comicFile ) = @_;
136: my %comicConfig = ( );
137: my %config = ( );
138:
1.14 nick 139: my ($year, $mon, $day) =( localtime(time))[5,4,3];
140: $year += 1900;
141: $mon = sprintf("%02d", ($mon + 1));
142: $day = sprintf("%02d", $day);
143:
1.1 nick 144: open FILEN, "<$comicFile";
145: while (<FILEN>) {
146: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 147: $_ =~ s/__YEAR__/$year/g;
148: $_ =~ s/__MON__/$mon/g;
149: $_ =~ s/__DAY__/$day/g;
150:
1.1 nick 151: my @res = split /,/, $_;
152: $comicConfig{$res[0]}{'url'} = $res[1];
153: $comicConfig{$res[0]}{'search'} = $res[2];
154: $comicConfig{$res[0]}{'mode'} = $res[3];
155: $comicConfig{$res[0]}{'fullName'} = $res[4];
156: $comicConfig{$res[0]}{'ext'} = $res[5];
1.21 ! nick 157: $comicConfig{$res[0]}{'sunday'} = sprintf("%d", $res[6] || 1);
! 158: $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
1.1 nick 159: $comicConfig{$res[0]}{'error'} = 0;
160: }
161: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
162: $comicConfig{'configs'}{$1} = $2;
163: }
164: }
165: close (FILEN);
166:
167: return %comicConfig;
168: }
169:
170: #######################################################################
171: #######################################################################
172: sub writeComic ($$) {
173: my ( $comics, $comic, $date ) = @_;
1.11 nick 174: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 175: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
176: $date->{'mon2'} . $date->{'day2'} . "-" .
177: $sd . ".html";
1.1 nick 178: my $content = <<EOF;
179:
180: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
181: <tr>
182: <td align="left">
183: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
184: <font size="-2">
185: <a href="$comics->{$comic}{'url'}">
186: $comics->{$comic}{'url'}
187: </a>
188: </font><br/>
1.17 nick 189: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 190: <br/><br/>
191: </td></tr>
192: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
193:
194: EOF
195: open INDEX, ">>$indexFile";
196:
197: print INDEX $content if ( ! $comics->{$comic}{'error'} );
198:
199: print INDEX <<EOF
200: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
201: <font size="-2"><
202: <a href="$comics->{$comic}{'url'}">
203: $comics->{$comic}{'url'}
204: </a>
205: </font><br/>
206: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
207: </td>
208: </tr>
209: EOF
210: if ( $comics->{$comic}{'error'} );
211:
212: close (INDEX);
213:
214: return 0;
215: }
216:
217:
218: #######################################################################
219: #######################################################################
220: sub writeMainIndex ($$) {
221: my ( $date ) = @_;
222:
223: }
224:
225:
226: #######################################################################
227: #######################################################################
228: sub writeFooter {
229: my ( $date ) = @_;
1.11 nick 230: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 231: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
232: $date->{'mon2'} . $date->{'day2'} . "-" .
233: $sd . ".html";
1.1 nick 234: my $sysDate = `date`;
235:
236: open INDEX, ">>$indexFile";
237: print INDEX <<EOF;
238: </table>
1.3 nick 239: <center>
240: <font size="2">
241: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 242: Version: <font color="green">$ver</font><br />
243: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 244: <p>
245: <a href="http://validator.w3.org/check?uri=referer"><img
246: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
247: </p>
248: </center>
249:
250: </body>
251: </html>
252: EOF
253: close( INDEX );
254: }
255:
256: #######################################################################
257: #######################################################################
258: sub checkDir ($$) {
259: my @dir = @_;
260:
261: foreach ( @dir ) {
262: if ( ! -d $_ ) { mkpath( $_ ); }
263: }
264: }
265:
266: #######################################################################
267: #######################################################################
268: sub writeTitle ($$) {
269: my ( $date ) = @_;
1.11 nick 270: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 271: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
272: $date->{'mon2'} . $date->{'day2'} . "-" .
273: $sd . ".html";
1.8 nick 274: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 275: my $today_long = Date_to_Text_Long(Today());
1.1 nick 276:
277: open INDEX, ">$indexFile";
278: print INDEX <<EOF;
279: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
280:
281: <html xmlns="http://www.w3.org/1999/xhtml">
282: <head>
283: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 284: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18 nick 285: <link rel="shortcut icon" href="./favicon.ico">
1.1 nick 286: <title>Daily Comics for $today</title>
287: </head>
288: <body bgcolor="#FFFFFF">
289: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 290: <tr><td>
291: <table cellpadding="0" cellspacing="0" border="0">
292: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
293: <tr><td align="left">$today_long</td></tr>
294: <tr><td> </td></tr>
295: </td</tr>
296:
1.1 nick 297: EOF
298: close (INDEX);
299: }
300:
301: #######################################################################
302: #######################################################################
303: sub directDownload ($$) {
304: my ( $comics, $comic, $date ) = @_;
305: my $file = &parseComic ( $comics, $comic, $date );
306:
307: ##
308: ## Save the file to the appropriate directory
309: ##
310: my $cDir = $date->{'mon2'} . $date->{'year2'};
311: my $cDate = $date->{'day2'};
312:
1.18 nick 313: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 314:
1.1 nick 315: return system($cmd);
316: }
317:
318: #######################################################################
319: #######################################################################
320: sub indexDownload ($$) {
321: my ( $comics, $comic, $date ) = @_;
322: my ( @lines, $comicLine, $mainURL );
323: my $comicIndex = "indexes/index.$comic";
324:
1.19 nick 325: my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
326: "--user-agent=\"$USER_AGENT\" " .
327: "$comics->{$comic}{'url'} -O $comicIndex";
328: system($wget_cmd);
1.1 nick 329:
330: if ( ! open FILEN, "<$comicIndex" ) {
331: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
332: " (" . $comics->{$comic}{'url'} . ")";
333: }
334: @lines = <FILEN>;
335: close (FILEN);
336:
337: unlink ("$comicIndex");
338:
339: $mainURL = $comics->{$comic}{'url'};
340: ## I need to figure out how to merge these two in to one regex.
341: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
342: $mainURL =~ s/([a-z])\/.*/$1/i;
343:
344: ##
345: ## Find the comic strip URL based on the specified regex in the search
346: ##
347: foreach my $line (@lines) {
1.17 nick 348: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 349: $comicLine = $1; chomp $comicLine;
350: }
1.17 nick 351: }
1.1 nick 352:
353: ##
354: ## Save the file to the appropriate directory
355: ##
356: my $cDir = $date->{'mon2'} . $date->{'year2'};
357: my $cDate = $date->{'day2'};
358:
359: if ( $comicLine ) {
360: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
361: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 nick 362: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 363: system( $cmd );
364: return 0;
365: }
366:
367: unlink "index.html";
368:
369: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
370: }
371:
372: #######################################################################
373: #######################################################################
374: sub parseComic ($$) {
375: my ( $comics, $comic, $date ) = @_;
376: my $string = $comics->{$comic}{'search'};
377:
378: $string =~ s/__year__/$date->{'year'}/g;
379: $string =~ s/__year2__/$date->{'year2'}/g;
380: $string =~ s/__mon__/$date->{'mon'}/g;
381: $string =~ s/__mon2__/$date->{'mon2'}/g;
382: $string =~ s/__day__/$date->{'day'}/g;
383: $string =~ s/__day2__/$date->{'day2'}/g;
384: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
385: chomp $string;
386:
387: return $string;
388: }
389:
390: #######################################################################
391: #######################################################################
392: sub fetchDates () {
393: my %dates = ();
394:
1.8 nick 395: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 396:
397: $dates{'year'} += 1900;
398: $dates{'year2'} = substr $dates{'year'}, 2, 2;
399: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
400: $dates{'mon'}++;
401: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
1.21 ! nick 402: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
! 403: $dates{'wday'} = $days[$dates{'dow'}];
1.1 nick 404:
405: return %dates;
406: }
1.8 nick 407:
408: ###############################################################################
409: ##
410: ## &fetchOptions( );
411: ##
412: ## Grab our command line arguments and toss them in to a hash
413: ##
414: ###############################################################################
415: sub fetchOptions {
416: my %opts;
417:
418: &GetOptions(
419: "days:i" => \$opts{'days'},
420: "help|?" => \$opts{'help'},
421: "man" => \$opts{'man'},
422: ) || &pod2usage( );
423: &pod2usage( ) if defined $opts{'help'};
424: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
425:
426: return %opts;
427: }
428:
429: __END__
430:
431: =head1 NAME
432:
433: fetch.pl - Fetches comics and places them all locally in a single html file.
434:
435: =head1 SYNOPSIS
436:
437: fetch.pl [options]
438:
439: Options:
440: --days,d Fetch comics from X days ago
441: --help,? Display the basic help menu
442: --man,m Display the detailed man page
443:
444: =head1 DESCRIPTION
445:
446: =head1 HISTORY
447:
448: =head1 AUTHOR
449:
450: Nicholas DeClario <nick@declario.com>
451:
452: =head1 BUGS
453:
454: This is a work in progress. Please report all bugs to the author.
455:
456: =head1 SEE ALSO
457:
458: =head1 COPYRIGHT
459:
460: =cut
461:
462:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>