Annotation of comics/fetch.pl.new, revision 1.19
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.19 ! nick 5: # Revision 1.18 2015/05/07 12:31:43 nick
! 6: # Added favicon
! 7: #
1.18 nick 8: # Revision 1.17 2015/02/19 14:56:10 nick
9: # Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update.
10: #
1.17 nick 11: # Revision 1.16 2015/02/05 18:05:58 nick
12: # Changed the background and added a fancy title.
13: #
1.16 nick 14: # Revision 1.15 2015/01/19 13:46:19 nick
15: # *** empty log message ***
16: #
1.15 nick 17: ###############################################################################
18:
1.1 nick 19: use strict;
20: use File::Path;
21: use Data::Dumper;
1.8 nick 22: use Pod::Usage;
23: use Getopt::Long;
1.1 nick 24:
1.16 nick 25: use Date::Calc qw/Date_to_Text_Long Today/;
26:
1.1 nick 27: ##
28: ## Some default values
29: ##
1.19 ! nick 30: my $ver = '$Id: fetch.pl.new,v 1.18 2015/05/07 12:31:43 nick Exp $';
1.1 nick 31: my $comicFile = "comics.conf";
32: my %comics = &readComicConfig ( $comicFile );
1.8 nick 33: my %opts = &fetchOptions( );
34: my $days_ago = $opts{'days'} || 0;
1.1 nick 35: my %dates = &fetchDates();
36: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
37: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
38: "/$dates{'mon2'}$dates{'year2'}";
39: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 40: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 41: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 42:
43: my $DATE=`date`; chomp $DATE;
44: print STDOUT "Starting comic fetch at $DATE\n";
45:
46: ##
47: ## Main program starts here
48: ##
49: &checkDir ( [ $imageDir, $indexDir ] );
50:
1.5 nick 51: &writeTitle ( \%dates );
1.1 nick 52:
53: foreach my $comic ( sort keys %comics ) {
54: next if ( $comic =~ m/config/ );
1.14 nick 55: if ( ( $dates{'day2'} eq "Sunday" ) &&
56: ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1 nick 57: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
58: &writeComic ( \%comics, $comic, \%dates );
59:
1.17 nick 60: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
61: my $size = 0;
62:
63: my $cmd = "/usr/bin/identify -verbose $file|";
64: open(IMG, $cmd) || die ("Can't open: $!\n");
65: while(<IMG>) {
66: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
67: $size = $1 if ( $size == 0);
68: }
69: }
70: close(IMG);
1.4 nick 71:
1.19 ! nick 72:
1.4 nick 73: system( "/usr/bin/convert -resize 640 $file $file" )
74: if ( $size > 640 )
75: }
76:
1.1 nick 77: ## &writeMainIndex ( \%dates );
78:
79: &writeFooter( \%dates );
80:
81: $DATE=`date`; chomp( $DATE );
82: print STDOUT "Completed comic fetch at $DATE\n";
83:
84: ## End
85:
86: #######################################################################
87: ## Function : downloadComic
88: ##
89: ## Description :
90: ## This function determines the download method being used to
91: ## retrieve the comic and calls the apprioriate function.
92: ##
93: ## If the mode is invalid an error will be returned.
94: ##
95: #######################################################################
96: sub downloadComic ($$) {
97: my ( $comics, $comic, $date ) = @_;
98:
99: SWITCH: {
100: if ( $comics->{$comic}{'mode'} eq 1 ) {
101: return indexDownload ( \%comics, $comic, $date );
102: last SWITCH;
103: }
104: if ( $comics->{$comic}{'mode'} eq 2 ) {
105: return directDownload ( \%comics, $comic, $date );
106: last SWITCH;
107: }
108: }
109:
110: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
111: }
112:
113: #######################################################################
114: #######################################################################
115: sub readComicConfig ($$) {
116: my ( $comicFile ) = @_;
117: my %comicConfig = ( );
118: my %config = ( );
119:
1.14 nick 120: my ($year, $mon, $day) =( localtime(time))[5,4,3];
121: $year += 1900;
122: $mon = sprintf("%02d", ($mon + 1));
123: $day = sprintf("%02d", $day);
124:
1.1 nick 125: open FILEN, "<$comicFile";
126: while (<FILEN>) {
127: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 128: $_ =~ s/__YEAR__/$year/g;
129: $_ =~ s/__MON__/$mon/g;
130: $_ =~ s/__DAY__/$day/g;
131:
1.1 nick 132: my @res = split /,/, $_;
133: $comicConfig{$res[0]}{'url'} = $res[1];
134: $comicConfig{$res[0]}{'search'} = $res[2];
135: $comicConfig{$res[0]}{'mode'} = $res[3];
136: $comicConfig{$res[0]}{'fullName'} = $res[4];
137: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 nick 138: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.1 nick 139: $comicConfig{$res[0]}{'error'} = 0;
140: }
141: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
142: $comicConfig{'configs'}{$1} = $2;
143: }
144: }
145: close (FILEN);
146:
147: return %comicConfig;
148: }
149:
150: #######################################################################
151: #######################################################################
152: sub writeComic ($$) {
153: my ( $comics, $comic, $date ) = @_;
1.11 nick 154: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 155: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
156: $date->{'mon2'} . $date->{'day2'} . "-" .
157: $sd . ".html";
1.1 nick 158: my $content = <<EOF;
159:
160: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
161: <tr>
162: <td align="left">
163: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
164: <font size="-2">
165: <a href="$comics->{$comic}{'url'}">
166: $comics->{$comic}{'url'}
167: </a>
168: </font><br/>
1.17 nick 169: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 170: <br/><br/>
171: </td></tr>
172: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
173:
174: EOF
175: open INDEX, ">>$indexFile";
176:
177: print INDEX $content if ( ! $comics->{$comic}{'error'} );
178:
179: print INDEX <<EOF
180: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
181: <font size="-2"><
182: <a href="$comics->{$comic}{'url'}">
183: $comics->{$comic}{'url'}
184: </a>
185: </font><br/>
186: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
187: </td>
188: </tr>
189: EOF
190: if ( $comics->{$comic}{'error'} );
191:
192: close (INDEX);
193:
194: return 0;
195: }
196:
197:
198: #######################################################################
199: #######################################################################
200: sub writeMainIndex ($$) {
201: my ( $date ) = @_;
202:
203: }
204:
205:
206: #######################################################################
207: #######################################################################
208: sub writeFooter {
209: my ( $date ) = @_;
1.11 nick 210: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 211: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
212: $date->{'mon2'} . $date->{'day2'} . "-" .
213: $sd . ".html";
1.1 nick 214: my $sysDate = `date`;
215:
216: open INDEX, ">>$indexFile";
217: print INDEX <<EOF;
218: </table>
1.3 nick 219: <center>
220: <font size="2">
221: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 222: Version: <font color="green">$ver</font><br />
223: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 224: <p>
225: <a href="http://validator.w3.org/check?uri=referer"><img
226: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
227: </p>
228: </center>
229:
230: </body>
231: </html>
232: EOF
233: close( INDEX );
234: }
235:
236: #######################################################################
237: #######################################################################
238: sub checkDir ($$) {
239: my @dir = @_;
240:
241: foreach ( @dir ) {
242: if ( ! -d $_ ) { mkpath( $_ ); }
243: }
244: }
245:
246: #######################################################################
247: #######################################################################
248: sub writeTitle ($$) {
249: my ( $date ) = @_;
1.11 nick 250: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 251: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
252: $date->{'mon2'} . $date->{'day2'} . "-" .
253: $sd . ".html";
1.8 nick 254: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 255: my $today_long = Date_to_Text_Long(Today());
1.1 nick 256:
257: open INDEX, ">$indexFile";
258: print INDEX <<EOF;
259: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
260:
261: <html xmlns="http://www.w3.org/1999/xhtml">
262: <head>
263: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 264: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.18 nick 265: <link rel="shortcut icon" href="./favicon.ico">
1.1 nick 266: <title>Daily Comics for $today</title>
267: </head>
268: <body bgcolor="#FFFFFF">
269: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 270: <tr><td>
271: <table cellpadding="0" cellspacing="0" border="0">
272: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
273: <tr><td align="left">$today_long</td></tr>
274: <tr><td> </td></tr>
275: </td</tr>
276:
1.1 nick 277: EOF
278: close (INDEX);
279: }
280:
281: #######################################################################
282: #######################################################################
283: sub directDownload ($$) {
284: my ( $comics, $comic, $date ) = @_;
285: my $file = &parseComic ( $comics, $comic, $date );
286:
287: ##
288: ## Save the file to the appropriate directory
289: ##
290: my $cDir = $date->{'mon2'} . $date->{'year2'};
291: my $cDate = $date->{'day2'};
292:
1.18 nick 293: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 294:
1.1 nick 295: return system($cmd);
296: }
297:
298: #######################################################################
299: #######################################################################
300: sub indexDownload ($$) {
301: my ( $comics, $comic, $date ) = @_;
302: my ( @lines, $comicLine, $mainURL );
303: my $comicIndex = "indexes/index.$comic";
304:
1.19 ! nick 305: my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
! 306: "--user-agent=\"$USER_AGENT\" " .
! 307: "$comics->{$comic}{'url'} -O $comicIndex";
! 308: system($wget_cmd);
1.1 nick 309:
310: if ( ! open FILEN, "<$comicIndex" ) {
311: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
312: " (" . $comics->{$comic}{'url'} . ")";
313: }
314: @lines = <FILEN>;
315: close (FILEN);
316:
317: unlink ("$comicIndex");
318:
319: $mainURL = $comics->{$comic}{'url'};
320: ## I need to figure out how to merge these two in to one regex.
321: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
322: $mainURL =~ s/([a-z])\/.*/$1/i;
323:
324: ##
325: ## Find the comic strip URL based on the specified regex in the search
326: ##
327: foreach my $line (@lines) {
1.17 nick 328: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 329: $comicLine = $1; chomp $comicLine;
330: }
1.17 nick 331: }
1.1 nick 332:
333: ##
334: ## Save the file to the appropriate directory
335: ##
336: my $cDir = $date->{'mon2'} . $date->{'year2'};
337: my $cDate = $date->{'day2'};
338:
339: if ( $comicLine ) {
340: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
341: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 nick 342: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 343: system( $cmd );
344: return 0;
345: }
346:
347: unlink "index.html";
348:
349: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
350: }
351:
352: #######################################################################
353: #######################################################################
354: sub parseComic ($$) {
355: my ( $comics, $comic, $date ) = @_;
356: my $string = $comics->{$comic}{'search'};
357:
358: $string =~ s/__year__/$date->{'year'}/g;
359: $string =~ s/__year2__/$date->{'year2'}/g;
360: $string =~ s/__mon__/$date->{'mon'}/g;
361: $string =~ s/__mon2__/$date->{'mon2'}/g;
362: $string =~ s/__day__/$date->{'day'}/g;
363: $string =~ s/__day2__/$date->{'day2'}/g;
364: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
365: chomp $string;
366:
367: return $string;
368: }
369:
370: #######################################################################
371: #######################################################################
372: sub fetchDates () {
373: my %dates = ();
374:
1.8 nick 375: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 376:
377: $dates{'year'} += 1900;
378: $dates{'year2'} = substr $dates{'year'}, 2, 2;
379: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
380: $dates{'mon'}++;
381: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
382:
383: return %dates;
384: }
1.8 nick 385:
386: ###############################################################################
387: ##
388: ## &fetchOptions( );
389: ##
390: ## Grab our command line arguments and toss them in to a hash
391: ##
392: ###############################################################################
393: sub fetchOptions {
394: my %opts;
395:
396: &GetOptions(
397: "days:i" => \$opts{'days'},
398: "help|?" => \$opts{'help'},
399: "man" => \$opts{'man'},
400: ) || &pod2usage( );
401: &pod2usage( ) if defined $opts{'help'};
402: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
403:
404: return %opts;
405: }
406:
407: __END__
408:
409: =head1 NAME
410:
411: fetch.pl - Fetches comics and places them all locally in a single html file.
412:
413: =head1 SYNOPSIS
414:
415: fetch.pl [options]
416:
417: Options:
418: --days,d Fetch comics from X days ago
419: --help,? Display the basic help menu
420: --man,m Display the detailed man page
421:
422: =head1 DESCRIPTION
423:
424: =head1 HISTORY
425:
426: =head1 AUTHOR
427:
428: Nicholas DeClario <nick@declario.com>
429:
430: =head1 BUGS
431:
432: This is a work in progress. Please report all bugs to the author.
433:
434: =head1 SEE ALSO
435:
436: =head1 COPYRIGHT
437:
438: =cut
439:
440:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>