Annotation of comics/fetch.pl.new, revision 1.18
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.18 ! nick 5: # Revision 1.17 2015/02/19 14:56:10 nick
! 6: # Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
! 7: #
1.17 nick 8: # Revision 1.16 2015/02/05 18:05:58 nick
9: # Changed the background and added a fancy title.
10: #
1.16 nick 11: # Revision 1.15 2015/01/19 13:46:19 nick
12: # *** empty log message ***
13: #
1.15 nick 14: ###############################################################################
15:
1.1 nick 16: use strict;
17: use File::Path;
18: use Data::Dumper;
1.8 nick 19: use Pod::Usage;
20: use Getopt::Long;
1.1 nick 21:
1.16 nick 22: use Date::Calc qw/Date_to_Text_Long Today/;
23:
1.1 nick 24: ##
25: ## Some default values
26: ##
1.18 ! nick 27: my $ver = '$Id: fetch.pl.new,v 1.17 2015/02/19 14:56:10 nick Exp $';
1.1 nick 28: my $comicFile = "comics.conf";
29: my %comics = &readComicConfig ( $comicFile );
1.8 nick 30: my %opts = &fetchOptions( );
31: my $days_ago = $opts{'days'} || 0;
1.1 nick 32: my %dates = &fetchDates();
33: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
34: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
35: "/$dates{'mon2'}$dates{'year2'}";
36: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 37: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 38: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 39:
40: my $DATE=`date`; chomp $DATE;
41: print STDOUT "Starting comic fetch at $DATE\n";
42:
43: ##
44: ## Main program starts here
45: ##
46: &checkDir ( [ $imageDir, $indexDir ] );
47:
1.5 nick 48: &writeTitle ( \%dates );
1.1 nick 49:
50: foreach my $comic ( sort keys %comics ) {
51: next if ( $comic =~ m/config/ );
1.14 nick 52: if ( ( $dates{'day2'} eq "Sunday" ) &&
53: ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1 nick 54: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
55: &writeComic ( \%comics, $comic, \%dates );
56:
1.17 nick 57: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
58: my $size = 0;
59:
60: my $cmd = "/usr/bin/identify -verbose $file|";
61: open(IMG, $cmd) || die ("Can't open: $!\n");
62: while(<IMG>) {
63: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
64: $size = $1 if ( $size == 0);
65: }
66: }
67: close(IMG);
1.4 nick 68:
69: system( "/usr/bin/convert -resize 640 $file $file" )
70: if ( $size > 640 )
71: }
72:
1.1 nick 73: ## &writeMainIndex ( \%dates );
74:
75: &writeFooter( \%dates );
76:
77: $DATE=`date`; chomp( $DATE );
78: print STDOUT "Completed comic fetch at $DATE\n";
79:
80: ## End
81:
82: #######################################################################
83: ## Function : downloadComic
84: ##
85: ## Description :
86: ## This function determines the download method being used to
87: ## retrieve the comic and calls the apprioriate function.
88: ##
89: ## If the mode is invalid an error will be returned.
90: ##
91: #######################################################################
92: sub downloadComic ($$) {
93: my ( $comics, $comic, $date ) = @_;
94:
95: SWITCH: {
96: if ( $comics->{$comic}{'mode'} eq 1 ) {
97: return indexDownload ( \%comics, $comic, $date );
98: last SWITCH;
99: }
100: if ( $comics->{$comic}{'mode'} eq 2 ) {
101: return directDownload ( \%comics, $comic, $date );
102: last SWITCH;
103: }
104: }
105:
106: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
107: }
108:
109: #######################################################################
110: #######################################################################
111: sub readComicConfig ($$) {
112: my ( $comicFile ) = @_;
113: my %comicConfig = ( );
114: my %config = ( );
115:
1.14 nick 116: my ($year, $mon, $day) =( localtime(time))[5,4,3];
117: $year += 1900;
118: $mon = sprintf("%02d", ($mon + 1));
119: $day = sprintf("%02d", $day);
120:
1.1 nick 121: open FILEN, "<$comicFile";
122: while (<FILEN>) {
123: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 124: $_ =~ s/__YEAR__/$year/g;
125: $_ =~ s/__MON__/$mon/g;
126: $_ =~ s/__DAY__/$day/g;
127:
1.1 nick 128: my @res = split /,/, $_;
129: $comicConfig{$res[0]}{'url'} = $res[1];
130: $comicConfig{$res[0]}{'search'} = $res[2];
131: $comicConfig{$res[0]}{'mode'} = $res[3];
132: $comicConfig{$res[0]}{'fullName'} = $res[4];
133: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 nick 134: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.1 nick 135: $comicConfig{$res[0]}{'error'} = 0;
136: }
137: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
138: $comicConfig{'configs'}{$1} = $2;
139: }
140: }
141: close (FILEN);
142:
143: return %comicConfig;
144: }
145:
146: #######################################################################
147: #######################################################################
148: sub writeComic ($$) {
149: my ( $comics, $comic, $date ) = @_;
1.11 nick 150: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 151: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
152: $date->{'mon2'} . $date->{'day2'} . "-" .
153: $sd . ".html";
1.1 nick 154: my $content = <<EOF;
155:
156: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
157: <tr>
158: <td align="left">
159: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
160: <font size="-2">
161: <a href="$comics->{$comic}{'url'}">
162: $comics->{$comic}{'url'}
163: </a>
164: </font><br/>
1.17 nick 165: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 166: <br/><br/>
167: </td></tr>
168: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
169:
170: EOF
171: open INDEX, ">>$indexFile";
172:
173: print INDEX $content if ( ! $comics->{$comic}{'error'} );
174:
175: print INDEX <<EOF
176: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
177: <font size="-2"><
178: <a href="$comics->{$comic}{'url'}">
179: $comics->{$comic}{'url'}
180: </a>
181: </font><br/>
182: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
183: </td>
184: </tr>
185: EOF
186: if ( $comics->{$comic}{'error'} );
187:
188: close (INDEX);
189:
190: return 0;
191: }
192:
193:
194: #######################################################################
195: #######################################################################
196: sub writeMainIndex ($$) {
197: my ( $date ) = @_;
198:
199: }
200:
201:
202: #######################################################################
203: #######################################################################
204: sub writeFooter {
205: my ( $date ) = @_;
1.11 nick 206: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 207: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
208: $date->{'mon2'} . $date->{'day2'} . "-" .
209: $sd . ".html";
1.1 nick 210: my $sysDate = `date`;
211:
212: open INDEX, ">>$indexFile";
213: print INDEX <<EOF;
214: </table>
1.3 nick 215: <center>
216: <font size="2">
217: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 218: Version: <font color="green">$ver</font><br />
219: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 220: <p>
221: <a href="http://validator.w3.org/check?uri=referer"><img
222: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
223: </p>
224: </center>
225:
226: </body>
227: </html>
228: EOF
229: close( INDEX );
230: }
231:
232: #######################################################################
233: #######################################################################
234: sub checkDir ($$) {
235: my @dir = @_;
236:
237: foreach ( @dir ) {
238: if ( ! -d $_ ) { mkpath( $_ ); }
239: }
240: }
241:
242: #######################################################################
243: #######################################################################
244: sub writeTitle ($$) {
245: my ( $date ) = @_;
1.11 nick 246: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 247: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
248: $date->{'mon2'} . $date->{'day2'} . "-" .
249: $sd . ".html";
1.8 nick 250: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 251: my $today_long = Date_to_Text_Long(Today());
1.1 nick 252:
253: open INDEX, ">$indexFile";
254: print INDEX <<EOF;
255: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
256:
257: <html xmlns="http://www.w3.org/1999/xhtml">
258: <head>
259: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 260: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18 ! nick 261: <link rel="shortcut icon" href="./favicon.ico">
1.1 nick 262: <title>Daily Comics for $today</title>
263: </head>
264: <body bgcolor="#FFFFFF">
265: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 266: <tr><td>
267: <table cellpadding="0" cellspacing="0" border="0">
268: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
269: <tr><td align="left">$today_long</td></tr>
270: <tr><td> </td></tr>
271: </td</tr>
272:
1.1 nick 273: EOF
274: close (INDEX);
275: }
276:
277: #######################################################################
278: #######################################################################
279: sub directDownload ($$) {
280: my ( $comics, $comic, $date ) = @_;
281: my $file = &parseComic ( $comics, $comic, $date );
282:
283: ##
284: ## Save the file to the appropriate directory
285: ##
286: my $cDir = $date->{'mon2'} . $date->{'year2'};
287: my $cDate = $date->{'day2'};
288:
1.18 ! nick 289: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 290:
1.1 nick 291: return system($cmd);
292: }
293:
294: #######################################################################
295: #######################################################################
296: sub indexDownload ($$) {
297: my ( $comics, $comic, $date ) = @_;
298: my ( @lines, $comicLine, $mainURL );
299: my $comicIndex = "indexes/index.$comic";
300:
301: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
302:
303: if ( ! open FILEN, "<$comicIndex" ) {
304: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
305: " (" . $comics->{$comic}{'url'} . ")";
306: }
307: @lines = <FILEN>;
308: close (FILEN);
309:
310: unlink ("$comicIndex");
311:
312: $mainURL = $comics->{$comic}{'url'};
313: ## I need to figure out how to merge these two in to one regex.
314: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
315: $mainURL =~ s/([a-z])\/.*/$1/i;
316:
317: ##
318: ## Find the comic strip URL based on the specified regex in the search
319: ##
320: foreach my $line (@lines) {
1.17 nick 321: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 322: $comicLine = $1; chomp $comicLine;
323: }
1.17 nick 324: }
1.1 nick 325:
326: ##
327: ## Save the file to the appropriate directory
328: ##
329: my $cDir = $date->{'mon2'} . $date->{'year2'};
330: my $cDate = $date->{'day2'};
331:
332: if ( $comicLine ) {
333: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
334: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 nick 335: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 336: system( $cmd );
337: return 0;
338: }
339:
340: unlink "index.html";
341:
342: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
343: }
344:
345: #######################################################################
346: #######################################################################
347: sub parseComic ($$) {
348: my ( $comics, $comic, $date ) = @_;
349: my $string = $comics->{$comic}{'search'};
350:
351: $string =~ s/__year__/$date->{'year'}/g;
352: $string =~ s/__year2__/$date->{'year2'}/g;
353: $string =~ s/__mon__/$date->{'mon'}/g;
354: $string =~ s/__mon2__/$date->{'mon2'}/g;
355: $string =~ s/__day__/$date->{'day'}/g;
356: $string =~ s/__day2__/$date->{'day2'}/g;
357: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
358: chomp $string;
359:
360: return $string;
361: }
362:
363: #######################################################################
364: #######################################################################
365: sub fetchDates () {
366: my %dates = ();
367:
1.8 nick 368: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 369:
370: $dates{'year'} += 1900;
371: $dates{'year2'} = substr $dates{'year'}, 2, 2;
372: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
373: $dates{'mon'}++;
374: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
375:
376: return %dates;
377: }
1.8 nick 378:
379: ###############################################################################
380: ##
381: ## &fetchOptions( );
382: ##
383: ## Grab our command line arguments and toss them in to a hash
384: ##
385: ###############################################################################
386: sub fetchOptions {
387: my %opts;
388:
389: &GetOptions(
390: "days:i" => \$opts{'days'},
391: "help|?" => \$opts{'help'},
392: "man" => \$opts{'man'},
393: ) || &pod2usage( );
394: &pod2usage( ) if defined $opts{'help'};
395: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
396:
397: return %opts;
398: }
399:
400: __END__
401:
402: =head1 NAME
403:
404: fetch.pl - Fetches comics and places them all locally in a single html file.
405:
406: =head1 SYNOPSIS
407:
408: fetch.pl [options]
409:
410: Options:
411: --days,d Fetch comics from X days ago
412: --help,? Display the basic help menu
413: --man,m Display the detailed man page
414:
415: =head1 DESCRIPTION
416:
417: =head1 HISTORY
418:
419: =head1 AUTHOR
420:
421: Nicholas DeClario <nick@declario.com>
422:
423: =head1 BUGS
424:
425: This is a work in progress. Please report all bugs to the author.
426:
427: =head1 SEE ALSO
428:
429: =head1 COPYRIGHT
430:
431: =cut
432:
433:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>