Annotation of comics/fetch.pl.new, revision 1.16
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 nick 3: ###############################################################################
1.16 ! nick 4: # $Log: fetch.pl.new,v $
! 5: # Revision 1.15 2015/01/19 13:46:19 nick
! 6: # *** empty log message ***
! 7: #
1.15 nick 8: ###############################################################################
9:
1.1 nick 10: use strict;
11: use File::Path;
12: use Data::Dumper;
1.8 nick 13: use Pod::Usage;
14: use Getopt::Long;
1.1 nick 15:
1.16 ! nick 16: use Date::Calc qw/Date_to_Text_Long Today/;
! 17:
1.1 nick 18: ##
19: ## Some default values
20: ##
1.16 ! nick 21: my $ver = '$Id: fetch.pl.new,v 1.15 2015/01/19 13:46:19 nick Exp $';
1.1 nick 22: my $comicFile = "comics.conf";
23: my %comics = &readComicConfig ( $comicFile );
1.8 nick 24: my %opts = &fetchOptions( );
25: my $days_ago = $opts{'days'} || 0;
1.1 nick 26: my %dates = &fetchDates();
27: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
28: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
29: "/$dates{'mon2'}$dates{'year2'}";
30: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 31: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 32: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 33:
34: my $DATE=`date`; chomp $DATE;
35: print STDOUT "Starting comic fetch at $DATE\n";
36:
37: ##
38: ## Main program starts here
39: ##
40: &checkDir ( [ $imageDir, $indexDir ] );
41:
1.5 nick 42: &writeTitle ( \%dates );
1.1 nick 43:
44: foreach my $comic ( sort keys %comics ) {
45: next if ( $comic =~ m/config/ );
1.14 nick 46: if ( ( $dates{'day2'} eq "Sunday" ) &&
47: ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1 nick 48: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
49: &writeComic ( \%comics, $comic, \%dates );
50: }
51:
1.8 nick 52: print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";
53: foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) )
1.4 nick 54: {
55: my $size = `/usr/bin/identify $file`;
56: $size =~ s/.*\s(\d+)x\d+.*/$1/;
57:
58: system( "/usr/bin/convert -resize 640 $file $file" )
59: if ( $size > 640 )
60: }
61:
1.1 nick 62: ## &writeMainIndex ( \%dates );
63:
64: &writeFooter( \%dates );
65:
66: $DATE=`date`; chomp( $DATE );
67: print STDOUT "Completed comic fetch at $DATE\n";
68:
69: ## End
70:
71: #######################################################################
72: ## Function : downloadComic
73: ##
74: ## Description :
75: ## This function determines the download method being used to
76: ## retrieve the comic and calls the apprioriate function.
77: ##
78: ## If the mode is invalid an error will be returned.
79: ##
80: #######################################################################
81: sub downloadComic ($$) {
82: my ( $comics, $comic, $date ) = @_;
83:
84: SWITCH: {
85: if ( $comics->{$comic}{'mode'} eq 1 ) {
86: return indexDownload ( \%comics, $comic, $date );
87: last SWITCH;
88: }
89: if ( $comics->{$comic}{'mode'} eq 2 ) {
90: return directDownload ( \%comics, $comic, $date );
91: last SWITCH;
92: }
93: }
94:
95: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
96: }
97:
98: #######################################################################
99: #######################################################################
100: sub readComicConfig ($$) {
101: my ( $comicFile ) = @_;
102: my %comicConfig = ( );
103: my %config = ( );
104:
1.14 nick 105: my ($year, $mon, $day) =( localtime(time))[5,4,3];
106: $year += 1900;
107: $mon = sprintf("%02d", ($mon + 1));
108: $day = sprintf("%02d", $day);
109:
1.1 nick 110: open FILEN, "<$comicFile";
111: while (<FILEN>) {
112: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 113: $_ =~ s/__YEAR__/$year/g;
114: $_ =~ s/__MON__/$mon/g;
115: $_ =~ s/__DAY__/$day/g;
116:
1.1 nick 117: my @res = split /,/, $_;
118: $comicConfig{$res[0]}{'url'} = $res[1];
119: $comicConfig{$res[0]}{'search'} = $res[2];
120: $comicConfig{$res[0]}{'mode'} = $res[3];
121: $comicConfig{$res[0]}{'fullName'} = $res[4];
122: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 nick 123: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.1 nick 124: $comicConfig{$res[0]}{'error'} = 0;
125: }
126: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
127: $comicConfig{'configs'}{$1} = $2;
128: }
129: }
130: close (FILEN);
131:
132: return %comicConfig;
133: }
134:
135: #######################################################################
136: #######################################################################
137: sub writeComic ($$) {
138: my ( $comics, $comic, $date ) = @_;
1.11 nick 139: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 140: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
141: $date->{'mon2'} . $date->{'day2'} . "-" .
142: $sd . ".html";
1.1 nick 143: my $content = <<EOF;
144:
145: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
146: <tr>
147: <td align="left">
148: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
149: <font size="-2">
150: <a href="$comics->{$comic}{'url'}">
151: $comics->{$comic}{'url'}
152: </a>
153: </font><br/>
154: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
155: <br/><br/>
156: </td></tr>
157: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
158:
159: EOF
160: open INDEX, ">>$indexFile";
161:
162: print INDEX $content if ( ! $comics->{$comic}{'error'} );
163:
164: print INDEX <<EOF
165: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
166: <font size="-2"><
167: <a href="$comics->{$comic}{'url'}">
168: $comics->{$comic}{'url'}
169: </a>
170: </font><br/>
171: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
172: </td>
173: </tr>
174: EOF
175: if ( $comics->{$comic}{'error'} );
176:
177: close (INDEX);
178:
179: return 0;
180: }
181:
182:
183: #######################################################################
184: #######################################################################
185: sub writeMainIndex ($$) {
186: my ( $date ) = @_;
187:
188: }
189:
190:
191: #######################################################################
192: #######################################################################
193: sub writeFooter {
194: my ( $date ) = @_;
1.11 nick 195: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 196: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
197: $date->{'mon2'} . $date->{'day2'} . "-" .
198: $sd . ".html";
1.1 nick 199: my $sysDate = `date`;
200:
201: open INDEX, ">>$indexFile";
202: print INDEX <<EOF;
203: </table>
1.3 nick 204: <center>
205: <font size="2">
206: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 207: Version: <font color="green">$ver</font><br />
208: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 209: <p>
210: <a href="http://validator.w3.org/check?uri=referer"><img
211: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
212: </p>
213: </center>
214:
215: </body>
216: </html>
217: EOF
218: close( INDEX );
219: }
220:
221: #######################################################################
222: #######################################################################
223: sub checkDir ($$) {
224: my @dir = @_;
225:
226: foreach ( @dir ) {
227: if ( ! -d $_ ) { mkpath( $_ ); }
228: }
229: }
230:
231: #######################################################################
232: #######################################################################
233: sub writeTitle ($$) {
234: my ( $date ) = @_;
1.11 nick 235: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 236: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
237: $date->{'mon2'} . $date->{'day2'} . "-" .
238: $sd . ".html";
1.8 nick 239: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.16 ! nick 240: my $today_long = Date_to_Text_Long(Today());
1.1 nick 241:
242: open INDEX, ">$indexFile";
243: print INDEX <<EOF;
244: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
245:
246: <html xmlns="http://www.w3.org/1999/xhtml">
247: <head>
248: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 249: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.1 nick 250: <title>Daily Comics for $today</title>
251: </head>
252: <body bgcolor="#FFFFFF">
253: <table align="center" cellpadding="5" cellspacing="0">
1.16 ! nick 254: <tr><td>
! 255: <table cellpadding="0" cellspacing="0" border="0">
! 256: <tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
! 257: <tr><td align="left">$today_long</td></tr>
! 258: <tr><td> </td></tr>
! 259: </td</tr>
! 260:
1.1 nick 261: EOF
262: close (INDEX);
263: }
264:
265: #######################################################################
266: #######################################################################
267: sub directDownload ($$) {
268: my ( $comics, $comic, $date ) = @_;
269: my $file = &parseComic ( $comics, $comic, $date );
270:
271: ##
272: ## Save the file to the appropriate directory
273: ##
274: my $cDir = $date->{'mon2'} . $date->{'year2'};
275: my $cDate = $date->{'day2'};
276:
1.3 nick 277: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 278:
1.1 nick 279: return system($cmd);
280: }
281:
282: #######################################################################
283: #######################################################################
284: sub indexDownload ($$) {
285: my ( $comics, $comic, $date ) = @_;
286: my ( @lines, $comicLine, $mainURL );
287: my $comicIndex = "indexes/index.$comic";
288:
289: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
290:
291: if ( ! open FILEN, "<$comicIndex" ) {
292: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
293: " (" . $comics->{$comic}{'url'} . ")";
294: }
295: @lines = <FILEN>;
296: close (FILEN);
297:
298: unlink ("$comicIndex");
299:
300: $mainURL = $comics->{$comic}{'url'};
301: ## I need to figure out how to merge these two in to one regex.
302: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
303: $mainURL =~ s/([a-z])\/.*/$1/i;
304:
305: ##
306: ## Find the comic strip URL based on the specified regex in the search
307: ##
308: foreach my $line (@lines) {
309: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
310: $comicLine = $1; chomp $comicLine;
311: }
312: }
313:
314: ##
315: ## Save the file to the appropriate directory
316: ##
317: my $cDir = $date->{'mon2'} . $date->{'year2'};
318: my $cDate = $date->{'day2'};
319:
320: if ( $comicLine ) {
321: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
322: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 nick 323: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 324: system( $cmd );
325: return 0;
326: }
327:
328: unlink "index.html";
329:
330: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
331: }
332:
333: #######################################################################
334: #######################################################################
335: sub parseComic ($$) {
336: my ( $comics, $comic, $date ) = @_;
337: my $string = $comics->{$comic}{'search'};
338:
339: $string =~ s/__year__/$date->{'year'}/g;
340: $string =~ s/__year2__/$date->{'year2'}/g;
341: $string =~ s/__mon__/$date->{'mon'}/g;
342: $string =~ s/__mon2__/$date->{'mon2'}/g;
343: $string =~ s/__day__/$date->{'day'}/g;
344: $string =~ s/__day2__/$date->{'day2'}/g;
345: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
346: chomp $string;
347:
348: return $string;
349: }
350:
351: #######################################################################
352: #######################################################################
353: sub fetchDates () {
354: my %dates = ();
355:
1.8 nick 356: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 357:
358: $dates{'year'} += 1900;
359: $dates{'year2'} = substr $dates{'year'}, 2, 2;
360: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
361: $dates{'mon'}++;
362: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
363:
364: return %dates;
365: }
1.8 nick 366:
367: ###############################################################################
368: ##
369: ## &fetchOptions( );
370: ##
371: ## Grab our command line arguments and toss them in to a hash
372: ##
373: ###############################################################################
374: sub fetchOptions {
375: my %opts;
376:
377: &GetOptions(
378: "days:i" => \$opts{'days'},
379: "help|?" => \$opts{'help'},
380: "man" => \$opts{'man'},
381: ) || &pod2usage( );
382: &pod2usage( ) if defined $opts{'help'};
383: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
384:
385: return %opts;
386: }
387:
388: __END__
389:
390: =head1 NAME
391:
392: fetch.pl - Fetches comics and places them all locally in a single html file.
393:
394: =head1 SYNOPSIS
395:
396: fetch.pl [options]
397:
398: Options:
399: --days,d Fetch comics from X days ago
400: --help,? Display the basic help menu
401: --man,m Display the detailed man page
402:
403: =head1 DESCRIPTION
404:
405: =head1 HISTORY
406:
407: =head1 AUTHOR
408:
409: Nicholas DeClario <nick@declario.com>
410:
411: =head1 BUGS
412:
413: This is a work in progress. Please report all bugs to the author.
414:
415: =head1 SEE ALSO
416:
417: =head1 COPYRIGHT
418:
419: =cut
420:
421:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>