Annotation of comics/fetch.pl.new, revision 1.17
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 nick 4: # $Log: fetch.pl.new,v $
1.17 ! nick 5: # Revision 1.16 2015/02/05 18:05:58 nick
! 6: # Changed the background and added a fancy title.
! 7: #
1.16 nick 8: # Revision 1.15 2015/01/19 13:46:19 nick
9: # *** empty log message ***
10: #
1.15 nick 11: ###############################################################################
12:
1.1 nick 13: use strict;
14: use File::Path;
15: use Data::Dumper;
1.8 nick 16: use Pod::Usage;
17: use Getopt::Long;
1.1 nick 18:
1.16 nick 19: use Date::Calc qw/Date_to_Text_Long Today/;
20:
1.1 nick 21: ##
22: ## Some default values
23: ##
1.17 ! nick 24: my $ver = '$Id: fetch.pl.new,v 1.16 2015/02/05 18:05:58 nick Exp $';
1.1 nick 25: my $comicFile = "comics.conf";
26: my %comics = &readComicConfig ( $comicFile );
1.8 nick 27: my %opts = &fetchOptions( );
28: my $days_ago = $opts{'days'} || 0;
1.1 nick 29: my %dates = &fetchDates();
30: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
31: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
32: "/$dates{'mon2'}$dates{'year2'}";
33: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 34: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 35: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 36:
37: my $DATE=`date`; chomp $DATE;
38: print STDOUT "Starting comic fetch at $DATE\n";
39:
40: ##
41: ## Main program starts here
42: ##
43: &checkDir ( [ $imageDir, $indexDir ] );
44:
1.5 nick 45: &writeTitle ( \%dates );
1.1 nick 46:
47: foreach my $comic ( sort keys %comics ) {
48: next if ( $comic =~ m/config/ );
1.14 nick 49: if ( ( $dates{'day2'} eq "Sunday" ) &&
50: ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1 nick 51: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
52: &writeComic ( \%comics, $comic, \%dates );
53:
1.17 ! nick 54: my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
! 55: my $size = 0;
! 56:
! 57: my $cmd = "/usr/bin/identify -verbose $file|";
! 58: open(IMG, $cmd) || die ("Can't open: $!\n");
! 59: while(<IMG>) {
! 60: if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
! 61: $size = $1 if ( $size == 0);
! 62: }
! 63: }
! 64: close(IMG);
1.4 nick 65:
66: system( "/usr/bin/convert -resize 640 $file $file" )
67: if ( $size > 640 )
68: }
69:
1.1 nick 70: ## &writeMainIndex ( \%dates );
71:
72: &writeFooter( \%dates );
73:
74: $DATE=`date`; chomp( $DATE );
75: print STDOUT "Completed comic fetch at $DATE\n";
76:
77: ## End
78:
79: #######################################################################
80: ## Function : downloadComic
81: ##
82: ## Description :
83: ## This function determines the download method being used to
84: ## retrieve the comic and calls the apprioriate function.
85: ##
86: ## If the mode is invalid an error will be returned.
87: ##
88: #######################################################################
89: sub downloadComic ($$) {
90: my ( $comics, $comic, $date ) = @_;
91:
92: SWITCH: {
93: if ( $comics->{$comic}{'mode'} eq 1 ) {
94: return indexDownload ( \%comics, $comic, $date );
95: last SWITCH;
96: }
97: if ( $comics->{$comic}{'mode'} eq 2 ) {
98: return directDownload ( \%comics, $comic, $date );
99: last SWITCH;
100: }
101: }
102:
103: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
104: }
105:
106: #######################################################################
107: #######################################################################
108: sub readComicConfig ($$) {
109: my ( $comicFile ) = @_;
110: my %comicConfig = ( );
111: my %config = ( );
112:
1.14 nick 113: my ($year, $mon, $day) =( localtime(time))[5,4,3];
114: $year += 1900;
115: $mon = sprintf("%02d", ($mon + 1));
116: $day = sprintf("%02d", $day);
117:
1.1 nick 118: open FILEN, "<$comicFile";
119: while (<FILEN>) {
120: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 121: $_ =~ s/__YEAR__/$year/g;
122: $_ =~ s/__MON__/$mon/g;
123: $_ =~ s/__DAY__/$day/g;
124:
1.1 nick 125: my @res = split /,/, $_;
126: $comicConfig{$res[0]}{'url'} = $res[1];
127: $comicConfig{$res[0]}{'search'} = $res[2];
128: $comicConfig{$res[0]}{'mode'} = $res[3];
129: $comicConfig{$res[0]}{'fullName'} = $res[4];
130: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 nick 131: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.1 nick 132: $comicConfig{$res[0]}{'error'} = 0;
133: }
134: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
135: $comicConfig{'configs'}{$1} = $2;
136: }
137: }
138: close (FILEN);
139:
140: return %comicConfig;
141: }
142:
143: #######################################################################
144: #######################################################################
145: sub writeComic ($$) {
146: my ( $comics, $comic, $date ) = @_;
1.11 nick 147: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 148: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
149: $date->{'mon2'} . $date->{'day2'} . "-" .
150: $sd . ".html";
1.1 nick 151: my $content = <<EOF;
152:
153: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
154: <tr>
155: <td align="left">
156: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
157: <font size="-2">
158: <a href="$comics->{$comic}{'url'}">
159: $comics->{$comic}{'url'}
160: </a>
161: </font><br/>
1.17 ! nick 162: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
1.1 nick 163: <br/><br/>
164: </td></tr>
165: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
166:
167: EOF
168: open INDEX, ">>$indexFile";
169:
170: print INDEX $content if ( ! $comics->{$comic}{'error'} );
171:
172: print INDEX <<EOF
173: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
174: <font size="-2"><
175: <a href="$comics->{$comic}{'url'}">
176: $comics->{$comic}{'url'}
177: </a>
178: </font><br/>
179: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
180: </td>
181: </tr>
182: EOF
183: if ( $comics->{$comic}{'error'} );
184:
185: close (INDEX);
186:
187: return 0;
188: }
189:
190:
191: #######################################################################
192: #######################################################################
193: sub writeMainIndex ($$) {
194: my ( $date ) = @_;
195:
196: }
197:
198:
199: #######################################################################
200: #######################################################################
201: sub writeFooter {
202: my ( $date ) = @_;
1.11 nick 203: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 204: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
205: $date->{'mon2'} . $date->{'day2'} . "-" .
206: $sd . ".html";
1.1 nick 207: my $sysDate = `date`;
208:
209: open INDEX, ">>$indexFile";
210: print INDEX <<EOF;
211: </table>
1.3 nick 212: <center>
213: <font size="2">
214: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 215: Version: <font color="green">$ver</font><br />
216: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 217: <p>
218: <a href="http://validator.w3.org/check?uri=referer"><img
219: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
220: </p>
221: </center>
222:
223: </body>
224: </html>
225: EOF
226: close( INDEX );
227: }
228:
229: #######################################################################
230: #######################################################################
231: sub checkDir ($$) {
232: my @dir = @_;
233:
234: foreach ( @dir ) {
235: if ( ! -d $_ ) { mkpath( $_ ); }
236: }
237: }
238:
239: #######################################################################
240: #######################################################################
241: sub writeTitle ($$) {
242: my ( $date ) = @_;
1.11 nick 243: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 244: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
245: $date->{'mon2'} . $date->{'day2'} . "-" .
246: $sd . ".html";
1.8 nick 247: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 nick 248: my $today_long = Date_to_Text_Long(Today());
1.1 nick 249:
250: open INDEX, ">$indexFile";
251: print INDEX <<EOF;
252: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
253:
254: <html xmlns="http://www.w3.org/1999/xhtml">
255: <head>
256: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 257: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.1 nick 258: <title>Daily Comics for $today</title>
259: </head>
260: <body bgcolor="#FFFFFF">
261: <table align="center" cellpadding="5" cellspacing="0">
1.16 nick 262: <tr><td>
263: <table cellpadding="0" cellspacing="0" border="0">
264: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
265: <tr><td align="left">$today_long</td></tr>
266: <tr><td> </td></tr>
267: </td</tr>
268:
1.1 nick 269: EOF
270: close (INDEX);
271: }
272:
273: #######################################################################
274: #######################################################################
275: sub directDownload ($$) {
276: my ( $comics, $comic, $date ) = @_;
277: my $file = &parseComic ( $comics, $comic, $date );
278:
279: ##
280: ## Save the file to the appropriate directory
281: ##
282: my $cDir = $date->{'mon2'} . $date->{'year2'};
283: my $cDate = $date->{'day2'};
284:
1.3 nick 285: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 286:
1.1 nick 287: return system($cmd);
288: }
289:
290: #######################################################################
291: #######################################################################
292: sub indexDownload ($$) {
293: my ( $comics, $comic, $date ) = @_;
294: my ( @lines, $comicLine, $mainURL );
295: my $comicIndex = "indexes/index.$comic";
296:
297: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
298:
299: if ( ! open FILEN, "<$comicIndex" ) {
300: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
301: " (" . $comics->{$comic}{'url'} . ")";
302: }
303: @lines = <FILEN>;
304: close (FILEN);
305:
306: unlink ("$comicIndex");
307:
308: $mainURL = $comics->{$comic}{'url'};
309: ## I need to figure out how to merge these two in to one regex.
310: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
311: $mainURL =~ s/([a-z])\/.*/$1/i;
312:
313: ##
314: ## Find the comic strip URL based on the specified regex in the search
315: ##
316: foreach my $line (@lines) {
1.17 ! nick 317: if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
1.1 nick 318: $comicLine = $1; chomp $comicLine;
319: }
1.17 ! nick 320: }
1.1 nick 321:
322: ##
323: ## Save the file to the appropriate directory
324: ##
325: my $cDir = $date->{'mon2'} . $date->{'year2'};
326: my $cDate = $date->{'day2'};
327:
328: if ( $comicLine ) {
329: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
330: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.17 ! nick 331: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
1.1 nick 332: system( $cmd );
333: return 0;
334: }
335:
336: unlink "index.html";
337:
338: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
339: }
340:
341: #######################################################################
342: #######################################################################
343: sub parseComic ($$) {
344: my ( $comics, $comic, $date ) = @_;
345: my $string = $comics->{$comic}{'search'};
346:
347: $string =~ s/__year__/$date->{'year'}/g;
348: $string =~ s/__year2__/$date->{'year2'}/g;
349: $string =~ s/__mon__/$date->{'mon'}/g;
350: $string =~ s/__mon2__/$date->{'mon2'}/g;
351: $string =~ s/__day__/$date->{'day'}/g;
352: $string =~ s/__day2__/$date->{'day2'}/g;
353: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
354: chomp $string;
355:
356: return $string;
357: }
358:
359: #######################################################################
360: #######################################################################
361: sub fetchDates () {
362: my %dates = ();
363:
1.8 nick 364: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 365:
366: $dates{'year'} += 1900;
367: $dates{'year2'} = substr $dates{'year'}, 2, 2;
368: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
369: $dates{'mon'}++;
370: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
371:
372: return %dates;
373: }
1.8 nick 374:
375: ###############################################################################
376: ##
377: ## &fetchOptions( );
378: ##
379: ## Grab our command line arguments and toss them in to a hash
380: ##
381: ###############################################################################
382: sub fetchOptions {
383: my %opts;
384:
385: &GetOptions(
386: "days:i" => \$opts{'days'},
387: "help|?" => \$opts{'help'},
388: "man" => \$opts{'man'},
389: ) || &pod2usage( );
390: &pod2usage( ) if defined $opts{'help'};
391: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
392:
393: return %opts;
394: }
395:
396: __END__
397:
398: =head1 NAME
399:
400: fetch.pl - Fetches comics and places them all locally in a single html file.
401:
402: =head1 SYNOPSIS
403:
404: fetch.pl [options]
405:
406: Options:
407: --days,d Fetch comics from X days ago
408: --help,? Display the basic help menu
409: --man,m Display the detailed man page
410:
411: =head1 DESCRIPTION
412:
413: =head1 HISTORY
414:
415: =head1 AUTHOR
416:
417: Nicholas DeClario <nick@declario.com>
418:
419: =head1 BUGS
420:
421: This is a work in progress. Please report all bugs to the author.
422:
423: =head1 SEE ALSO
424:
425: =head1 COPYRIGHT
426:
427: =cut
428:
429:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>