Annotation of comics/fetch.pl.new, revision 1.20
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.20 ! nick 5: # Revision 1.19 2015/07/13 12:56:58 nick
! 6: # Added Sally Forth and Pearls Before Swine. Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'.
! 7: #
1.19 nick 8: # Revision 1.18 2015/05/07 12:31:43 nick
9: # Added favicon
10: #
1.18 nick 11: # Revision 1.17 2015/02/19 14:56:10 nick
12: # Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
13: #
1.17 nick 14: # Revision 1.16 2015/02/05 18:05:58 nick
15: # Changed the background and added a fancy title.
16: #
1.16 nick 17: # Revision 1.15 2015/01/19 13:46:19 nick
18: # *** empty log message ***
19: #
1.15 nick 20: ###############################################################################
21:
1.1 nick 22: use strict;
23: use File::Path;
24: use Data::Dumper;
1.8 nick 25: use Pod::Usage;
26: use Getopt::Long;
1.1 nick 27:
1.16 nick 28: use Date::Calc qw/Date_to_Text_Long Today/;
29:
1.1 nick 30: ##
31: ## Some default values
32: ##
1.20 ! nick 33: my $ver = '$Id: fetch.pl.new,v 1.19 2015/07/13 12:56:58 nick Exp $';
1.1 nick 34: my $comicFile = "comics.conf";
35: my %comics = &readComicConfig ( $comicFile );
1.8 nick 36: my %opts = &fetchOptions( );
37: my $days_ago = $opts{'days'} || 0;
1.1 nick 38: my %dates = &fetchDates();
39: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
40: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
41: "/$dates{'mon2'}$dates{'year2'}";
42: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 43: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 44: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 45:
46: my $DATE=`date`; chomp $DATE;
47: print STDOUT "Starting comic fetch at $DATE\n";
48:
49: ##
50: ## Main program starts here
51: ##
52: &checkDir ( [ $imageDir, $indexDir ] );
53:
1.5 nick 54: &writeTitle ( \%dates );
1.1 nick 55:
56: foreach my $comic ( sort keys %comics ) {
1.20 ! nick 57:
! 58: ## Skip if this is Sunday and the comic is weekdays only
1.1 nick 59: next if ( $comic =~ m/config/ );
1.20 ! nick 60: if (($dates{'day2'} eq "Sunday") &&
! 61: ($comics{$comic}{'sunday'} == 0)) {
! 62: print "Skipping '$comic'; Weekdays only.\n";
! 63: next;
! 64: }
! 65:
! 66: ## Skip if Sunday only comic and it's not Sunday.
! 67: if (($dates{'day2'} ne "Sunday") &&
! 68: ($comics{$comic}{'sunday_only'} == 1)) {
! 69: print "Skipping '$comic'; Sunday only.\n";
! 70: next
! 71: }
! 72:
1.1 nick 73: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
74: &writeComic ( \%comics, $comic, \%dates );
75:
1.17 nick 76: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
77: my $size = 0;
78:
79: my $cmd = "/usr/bin/identify -verbose $file|";
80: open(IMG, $cmd) || die ("Can't open: $!\n");
81: while(<IMG>) {
82: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
83: $size = $1 if ( $size == 0);
84: }
85: }
86: close(IMG);
1.4 nick 87:
1.19 nick 88:
1.4 nick 89: system( "/usr/bin/convert -resize 640 $file $file" )
90: if ( $size > 640 )
91: }
92:
1.1 nick 93: ## &writeMainIndex ( \%dates );
94:
95: &writeFooter( \%dates );
96:
97: $DATE=`date`; chomp( $DATE );
98: print STDOUT "Completed comic fetch at $DATE\n";
99:
100: ## End
101:
102: #######################################################################
103: ## Function : downloadComic
104: ##
105: ## Description :
106: ## This function determines the download method being used to
107: ## retrieve the comic and calls the apprioriate function.
108: ##
109: ## If the mode is invalid an error will be returned.
110: ##
111: #######################################################################
112: sub downloadComic ($$) {
113: my ( $comics, $comic, $date ) = @_;
114:
115: SWITCH: {
116: if ( $comics->{$comic}{'mode'} eq 1 ) {
117: return indexDownload ( \%comics, $comic, $date );
118: last SWITCH;
119: }
120: if ( $comics->{$comic}{'mode'} eq 2 ) {
121: return directDownload ( \%comics, $comic, $date );
122: last SWITCH;
123: }
124: }
125:
126: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
127: }
128:
129: #######################################################################
130: #######################################################################
131: sub readComicConfig ($$) {
132: my ( $comicFile ) = @_;
133: my %comicConfig = ( );
134: my %config = ( );
135:
1.14 nick 136: my ($year, $mon, $day) =( localtime(time))[5,4,3];
137: $year += 1900;
138: $mon = sprintf("%02d", ($mon + 1));
139: $day = sprintf("%02d", $day);
140:
1.1 nick 141: open FILEN, "<$comicFile";
142: while (<FILEN>) {
143: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 144: $_ =~ s/__YEAR__/$year/g;
145: $_ =~ s/__MON__/$mon/g;
146: $_ =~ s/__DAY__/$day/g;
147:
1.1 nick 148: my @res = split /,/, $_;
149: $comicConfig{$res[0]}{'url'} = $res[1];
150: $comicConfig{$res[0]}{'search'} = $res[2];
151: $comicConfig{$res[0]}{'mode'} = $res[3];
152: $comicConfig{$res[0]}{'fullName'} = $res[4];
153: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 nick 154: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.20 ! nick 155: $comicConfig{$res[0]}{'sunday_only'} = $res[7] || 0;
1.1 nick 156: $comicConfig{$res[0]}{'error'} = 0;
157: }
158: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
159: $comicConfig{'configs'}{$1} = $2;
160: }
161: }
162: close (FILEN);
163:
164: return %comicConfig;
165: }
166:
167: #######################################################################
168: #######################################################################
169: sub writeComic ($$) {
170: my ( $comics, $comic, $date ) = @_;
1.11 nick 171: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 172: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
173: $date->{'mon2'} . $date->{'day2'} . "-" .
174: $sd . ".html";
1.1 nick 175: my $content = <<EOF;
176:
177: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
178: <tr>
179: <td align="left">
180: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
181: <font size="-2">
182: <a href="$comics->{$comic}{'url'}">
183: $comics->{$comic}{'url'}
184: </a>
185: </font><br/>
1.17 nick 186: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 187: <br/><br/>
188: </td></tr>
189: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
190:
191: EOF
192: open INDEX, ">>$indexFile";
193:
194: print INDEX $content if ( ! $comics->{$comic}{'error'} );
195:
196: print INDEX <<EOF
197: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
198: <font size="-2"><
199: <a href="$comics->{$comic}{'url'}">
200: $comics->{$comic}{'url'}
201: </a>
202: </font><br/>
203: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
204: </td>
205: </tr>
206: EOF
207: if ( $comics->{$comic}{'error'} );
208:
209: close (INDEX);
210:
211: return 0;
212: }
213:
214:
215: #######################################################################
216: #######################################################################
217: sub writeMainIndex ($$) {
218: my ( $date ) = @_;
219:
220: }
221:
222:
223: #######################################################################
224: #######################################################################
225: sub writeFooter {
226: my ( $date ) = @_;
1.11 nick 227: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 228: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
229: $date->{'mon2'} . $date->{'day2'} . "-" .
230: $sd . ".html";
1.1 nick 231: my $sysDate = `date`;
232:
233: open INDEX, ">>$indexFile";
234: print INDEX <<EOF;
235: </table>
1.3 nick 236: <center>
237: <font size="2">
238: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 239: Version: <font color="green">$ver</font><br />
240: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 241: <p>
242: <a href="http://validator.w3.org/check?uri=referer"><img
243: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
244: </p>
245: </center>
246:
247: </body>
248: </html>
249: EOF
250: close( INDEX );
251: }
252:
253: #######################################################################
254: #######################################################################
255: sub checkDir ($$) {
256: my @dir = @_;
257:
258: foreach ( @dir ) {
259: if ( ! -d $_ ) { mkpath( $_ ); }
260: }
261: }
262:
263: #######################################################################
264: #######################################################################
265: sub writeTitle ($$) {
266: my ( $date ) = @_;
1.11 nick 267: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 268: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
269: $date->{'mon2'} . $date->{'day2'} . "-" .
270: $sd . ".html";
1.8 nick 271: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 272: my $today_long = Date_to_Text_Long(Today());
1.1 nick 273:
274: open INDEX, ">$indexFile";
275: print INDEX <<EOF;
276: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
277:
278: <html xmlns="http://www.w3.org/1999/xhtml">
279: <head>
280: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 281: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18 nick 282: <link rel="shortcut icon" href="./favicon.ico">
1.1 nick 283: <title>Daily Comics for $today</title>
284: </head>
285: <body bgcolor="#FFFFFF">
286: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 287: <tr><td>
288: <table cellpadding="0" cellspacing="0" border="0">
289: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
290: <tr><td align="left">$today_long</td></tr>
291: <tr><td> </td></tr>
292: </td</tr>
293:
1.1 nick 294: EOF
295: close (INDEX);
296: }
297:
298: #######################################################################
299: #######################################################################
300: sub directDownload ($$) {
301: my ( $comics, $comic, $date ) = @_;
302: my $file = &parseComic ( $comics, $comic, $date );
303:
304: ##
305: ## Save the file to the appropriate directory
306: ##
307: my $cDir = $date->{'mon2'} . $date->{'year2'};
308: my $cDate = $date->{'day2'};
309:
1.18 nick 310: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 311:
1.1 nick 312: return system($cmd);
313: }
314:
315: #######################################################################
316: #######################################################################
317: sub indexDownload ($$) {
318: my ( $comics, $comic, $date ) = @_;
319: my ( @lines, $comicLine, $mainURL );
320: my $comicIndex = "indexes/index.$comic";
321:
1.19 nick 322: my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
323: "--user-agent=\"$USER_AGENT\" " .
324: "$comics->{$comic}{'url'} -O $comicIndex";
325: system($wget_cmd);
1.1 nick 326:
327: if ( ! open FILEN, "<$comicIndex" ) {
328: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
329: " (" . $comics->{$comic}{'url'} . ")";
330: }
331: @lines = <FILEN>;
332: close (FILEN);
333:
334: unlink ("$comicIndex");
335:
336: $mainURL = $comics->{$comic}{'url'};
337: ## I need to figure out how to merge these two in to one regex.
338: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
339: $mainURL =~ s/([a-z])\/.*/$1/i;
340:
341: ##
342: ## Find the comic strip URL based on the specified regex in the search
343: ##
344: foreach my $line (@lines) {
1.17 nick 345: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 346: $comicLine = $1; chomp $comicLine;
347: }
1.17 nick 348: }
1.1 nick 349:
350: ##
351: ## Save the file to the appropriate directory
352: ##
353: my $cDir = $date->{'mon2'} . $date->{'year2'};
354: my $cDate = $date->{'day2'};
355:
356: if ( $comicLine ) {
357: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
358: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 nick 359: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 360: system( $cmd );
361: return 0;
362: }
363:
364: unlink "index.html";
365:
366: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
367: }
368:
369: #######################################################################
370: #######################################################################
371: sub parseComic ($$) {
372: my ( $comics, $comic, $date ) = @_;
373: my $string = $comics->{$comic}{'search'};
374:
375: $string =~ s/__year__/$date->{'year'}/g;
376: $string =~ s/__year2__/$date->{'year2'}/g;
377: $string =~ s/__mon__/$date->{'mon'}/g;
378: $string =~ s/__mon2__/$date->{'mon2'}/g;
379: $string =~ s/__day__/$date->{'day'}/g;
380: $string =~ s/__day2__/$date->{'day2'}/g;
381: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
382: chomp $string;
383:
384: return $string;
385: }
386:
387: #######################################################################
388: #######################################################################
389: sub fetchDates () {
390: my %dates = ();
391:
1.8 nick 392: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 393:
394: $dates{'year'} += 1900;
395: $dates{'year2'} = substr $dates{'year'}, 2, 2;
396: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
397: $dates{'mon'}++;
398: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
399:
400: return %dates;
401: }
1.8 nick 402:
403: ###############################################################################
404: ##
405: ## &fetchOptions( );
406: ##
407: ## Grab our command line arguments and toss them in to a hash
408: ##
409: ###############################################################################
410: sub fetchOptions {
411: my %opts;
412:
413: &GetOptions(
414: "days:i" => \$opts{'days'},
415: "help|?" => \$opts{'help'},
416: "man" => \$opts{'man'},
417: ) || &pod2usage( );
418: &pod2usage( ) if defined $opts{'help'};
419: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
420:
421: return %opts;
422: }
423:
424: __END__
425:
426: =head1 NAME
427:
428: fetch.pl - Fetches comics and places them all locally in a single html file.
429:
430: =head1 SYNOPSIS
431:
432: fetch.pl [options]
433:
434: Options:
435: --days,d Fetch comics from X days ago
436: --help,? Display the basic help menu
437: --man,m Display the detailed man page
438:
439: =head1 DESCRIPTION
440:
441: =head1 HISTORY
442:
443: =head1 AUTHOR
444:
445: Nicholas DeClario <nick@declario.com>
446:
447: =head1 BUGS
448:
449: This is a work in progress. Please report all bugs to the author.
450:
451: =head1 SEE ALSO
452:
453: =head1 COPYRIGHT
454:
455: =cut
456:
457:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>