Annotation of comics/fetch.pl.new, revision 1.14
1.1 nick 1: #!/usr/bin/perl -w
2:
3: use strict;
4: use File::Path;
5: use Data::Dumper;
1.8 nick 6: use Pod::Usage;
7: use Getopt::Long;
1.1 nick 8:
9: ##
10: ## Some default values
11: ##
1.14 ! nick 12: my $ver = q/$Id: fetch.pl.new,v 1.13 2013-03-12 13:09:34 nick Exp $/;
1.1 nick 13: my $comicFile = "comics.conf";
14: my %comics = &readComicConfig ( $comicFile );
1.8 nick 15: my %opts = &fetchOptions( );
16: my $days_ago = $opts{'days'} || 0;
1.1 nick 17: my %dates = &fetchDates();
18: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
19: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
20: "/$dates{'mon2'}$dates{'year2'}";
21: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 22: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 23: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 24:
25:
26: my $DATE=`date`; chomp $DATE;
27: print STDOUT "Starting comic fetch at $DATE\n";
28:
29: ##
30: ## Main program starts here
31: ##
32: &checkDir ( [ $imageDir, $indexDir ] );
33:
1.5 nick 34: &writeTitle ( \%dates );
1.1 nick 35:
36: foreach my $comic ( sort keys %comics ) {
37: next if ( $comic =~ m/config/ );
1.14 ! nick 38: if ( ( $dates{'day2'} eq "Sunday" ) &&
! 39: ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1 nick 40: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
41: &writeComic ( \%comics, $comic, \%dates );
42: }
43:
1.8 nick 44: print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";
45: foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) )
1.4 nick 46: {
47: my $size = `/usr/bin/identify $file`;
48: $size =~ s/.*\s(\d+)x\d+.*/$1/;
49:
50: system( "/usr/bin/convert -resize 640 $file $file" )
51: if ( $size > 640 )
52: }
53:
1.1 nick 54: ## &writeMainIndex ( \%dates );
55:
56: &writeFooter( \%dates );
57:
58: $DATE=`date`; chomp( $DATE );
59: print STDOUT "Completed comic fetch at $DATE\n";
60:
61: ## End
62:
63: #######################################################################
64: ## Function : downloadComic
65: ##
66: ## Description :
67: ## This function determines the download method being used to
68: ## retrieve the comic and calls the apprioriate function.
69: ##
70: ## If the mode is invalid an error will be returned.
71: ##
72: #######################################################################
73: sub downloadComic ($$) {
74: my ( $comics, $comic, $date ) = @_;
75:
76: SWITCH: {
77: if ( $comics->{$comic}{'mode'} eq 1 ) {
78: return indexDownload ( \%comics, $comic, $date );
79: last SWITCH;
80: }
81: if ( $comics->{$comic}{'mode'} eq 2 ) {
82: return directDownload ( \%comics, $comic, $date );
83: last SWITCH;
84: }
85: }
86:
87: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
88: }
89:
90: #######################################################################
91: #######################################################################
92: sub readComicConfig ($$) {
93: my ( $comicFile ) = @_;
94: my %comicConfig = ( );
95: my %config = ( );
96:
1.14 ! nick 97: my ($year, $mon, $day) =( localtime(time))[5,4,3];
! 98: $year += 1900;
! 99: $mon = sprintf("%02d", ($mon + 1));
! 100: $day = sprintf("%02d", $day);
! 101:
1.1 nick 102: open FILEN, "<$comicFile";
103: while (<FILEN>) {
104: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 ! nick 105: $_ =~ s/__YEAR__/$year/g;
! 106: $_ =~ s/__MON__/$mon/g;
! 107: $_ =~ s/__DAY__/$day/g;
! 108:
1.1 nick 109: my @res = split /,/, $_;
110: $comicConfig{$res[0]}{'url'} = $res[1];
111: $comicConfig{$res[0]}{'search'} = $res[2];
112: $comicConfig{$res[0]}{'mode'} = $res[3];
113: $comicConfig{$res[0]}{'fullName'} = $res[4];
114: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 ! nick 115: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.1 nick 116: $comicConfig{$res[0]}{'error'} = 0;
117: }
118: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
119: $comicConfig{'configs'}{$1} = $2;
120: }
121: }
122: close (FILEN);
123:
124: return %comicConfig;
125: }
126:
127: #######################################################################
128: #######################################################################
129: sub writeComic ($$) {
130: my ( $comics, $comic, $date ) = @_;
1.11 nick 131: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 132: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
133: $date->{'mon2'} . $date->{'day2'} . "-" .
134: $sd . ".html";
1.1 nick 135: my $content = <<EOF;
136:
137: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
138: <tr>
139: <td align="left">
140: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
141: <font size="-2">
142: <a href="$comics->{$comic}{'url'}">
143: $comics->{$comic}{'url'}
144: </a>
145: </font><br/>
146: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
147: <br/><br/>
148: </td></tr>
149: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
150:
151: EOF
152: open INDEX, ">>$indexFile";
153:
154: print INDEX $content if ( ! $comics->{$comic}{'error'} );
155:
156: print INDEX <<EOF
157: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
158: <font size="-2"><
159: <a href="$comics->{$comic}{'url'}">
160: $comics->{$comic}{'url'}
161: </a>
162: </font><br/>
163: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
164: </td>
165: </tr>
166: EOF
167: if ( $comics->{$comic}{'error'} );
168:
169: close (INDEX);
170:
171: return 0;
172: }
173:
174:
175: #######################################################################
176: #######################################################################
177: sub writeMainIndex ($$) {
178: my ( $date ) = @_;
179:
180: }
181:
182:
183: #######################################################################
184: #######################################################################
185: sub writeFooter {
186: my ( $date ) = @_;
1.11 nick 187: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 188: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
189: $date->{'mon2'} . $date->{'day2'} . "-" .
190: $sd . ".html";
1.1 nick 191: my $sysDate = `date`;
192:
193: open INDEX, ">>$indexFile";
194: print INDEX <<EOF;
195: </table>
1.3 nick 196: <center>
197: <font size="2">
198: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 199: Version: <font color="green">$ver</font><br />
200: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 201: <p>
202: <a href="http://validator.w3.org/check?uri=referer"><img
203: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
204: </p>
205: </center>
206:
207: </body>
208: </html>
209: EOF
210: close( INDEX );
211: }
212:
213: #######################################################################
214: #######################################################################
215: sub checkDir ($$) {
216: my @dir = @_;
217:
218: foreach ( @dir ) {
219: if ( ! -d $_ ) { mkpath( $_ ); }
220: }
221: }
222:
223: #######################################################################
224: #######################################################################
225: sub writeTitle ($$) {
226: my ( $date ) = @_;
1.11 nick 227: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 228: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
229: $date->{'mon2'} . $date->{'day2'} . "-" .
230: $sd . ".html";
1.8 nick 231: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.1 nick 232:
233: open INDEX, ">$indexFile";
234: print INDEX <<EOF;
235: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
236:
237: <html xmlns="http://www.w3.org/1999/xhtml">
238: <head>
239: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 240: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.1 nick 241: <title>Daily Comics for $today</title>
242: </head>
243: <body bgcolor="#FFFFFF">
244: <h1>Daily Comics for $today</h1>
245: <table align="center" cellpadding="5" cellspacing="0">
246: EOF
247: close (INDEX);
248: }
249:
250: #######################################################################
251: #######################################################################
252: sub directDownload ($$) {
253: my ( $comics, $comic, $date ) = @_;
254: my $file = &parseComic ( $comics, $comic, $date );
255:
256: ##
257: ## Save the file to the appropriate directory
258: ##
259: my $cDir = $date->{'mon2'} . $date->{'year2'};
260: my $cDate = $date->{'day2'};
261:
1.3 nick 262: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 ! nick 263:
1.1 nick 264: return system($cmd);
265: }
266:
267: #######################################################################
268: #######################################################################
269: sub indexDownload ($$) {
270: my ( $comics, $comic, $date ) = @_;
271: my ( @lines, $comicLine, $mainURL );
272: my $comicIndex = "indexes/index.$comic";
273:
274: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
275:
276: if ( ! open FILEN, "<$comicIndex" ) {
277: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
278: " (" . $comics->{$comic}{'url'} . ")";
279: }
280: @lines = <FILEN>;
281: close (FILEN);
282:
283: unlink ("$comicIndex");
284:
285: $mainURL = $comics->{$comic}{'url'};
286: ## I need to figure out how to merge these two in to one regex.
287: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
288: $mainURL =~ s/([a-z])\/.*/$1/i;
289:
290: ##
291: ## Find the comic strip URL based on the specified regex in the search
292: ##
293: foreach my $line (@lines) {
294: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
295: $comicLine = $1; chomp $comicLine;
296: }
297: }
298:
299: ##
300: ## Save the file to the appropriate directory
301: ##
302: my $cDir = $date->{'mon2'} . $date->{'year2'};
303: my $cDate = $date->{'day2'};
304:
305: if ( $comicLine ) {
306: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
307: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 nick 308: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 309: system( $cmd );
310: return 0;
311: }
312:
313: unlink "index.html";
314:
315: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
316: }
317:
318: #######################################################################
319: #######################################################################
320: sub parseComic ($$) {
321: my ( $comics, $comic, $date ) = @_;
322: my $string = $comics->{$comic}{'search'};
323:
324: $string =~ s/__year__/$date->{'year'}/g;
325: $string =~ s/__year2__/$date->{'year2'}/g;
326: $string =~ s/__mon__/$date->{'mon'}/g;
327: $string =~ s/__mon2__/$date->{'mon2'}/g;
328: $string =~ s/__day__/$date->{'day'}/g;
329: $string =~ s/__day2__/$date->{'day2'}/g;
330: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
331: chomp $string;
332:
333: return $string;
334: }
335:
336: #######################################################################
337: #######################################################################
338: sub fetchDates () {
339: my %dates = ();
340:
1.8 nick 341: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 342:
343: $dates{'year'} += 1900;
344: $dates{'year2'} = substr $dates{'year'}, 2, 2;
345: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
346: $dates{'mon'}++;
347: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
348:
349: return %dates;
350: }
1.8 nick 351:
352: ###############################################################################
353: ##
354: ## &fetchOptions( );
355: ##
356: ## Grab our command line arguments and toss them in to a hash
357: ##
358: ###############################################################################
359: sub fetchOptions {
360: my %opts;
361:
362: &GetOptions(
363: "days:i" => \$opts{'days'},
364: "help|?" => \$opts{'help'},
365: "man" => \$opts{'man'},
366: ) || &pod2usage( );
367: &pod2usage( ) if defined $opts{'help'};
368: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
369:
370: return %opts;
371: }
372:
373: __END__
374:
375: =head1 NAME
376:
377: fetch.pl - Fetches comics and places them all locally in a single html file.
378:
379: =head1 SYNOPSIS
380:
381: fetch.pl [options]
382:
383: Options:
384: --days,d Fetch comics from X days ago
385: --help,? Display the basic help menu
386: --man,m Display the detailed man page
387:
388: =head1 DESCRIPTION
389:
390: =head1 HISTORY
391:
392: =head1 AUTHOR
393:
394: Nicholas DeClario <nick@declario.com>
395:
396: =head1 BUGS
397:
398: This is a work in progress. Please report all bugs to the author.
399:
400: =head1 SEE ALSO
401:
402: =head1 COPYRIGHT
403:
404: =cut
405:
406:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>