Annotation of comics/fetch.pl.new, revision 1.15
1.1 nick 1: #!/usr/bin/perl -w
2:
1.15 ! nick 3: ###############################################################################
! 4: # $Log:$
! 5: ###############################################################################
! 6:
1.1 nick 7: use strict;
8: use File::Path;
9: use Data::Dumper;
1.8 nick 10: use Pod::Usage;
11: use Getopt::Long;
1.1 nick 12:
13: ##
14: ## Some default values
15: ##
1.15 ! nick 16: my $ver = '$Id: fetch.pl.new,v 1.14 2015/01/16 14:17:56 nick Exp $';
1.1 nick 17: my $comicFile = "comics.conf";
18: my %comics = &readComicConfig ( $comicFile );
1.8 nick 19: my %opts = &fetchOptions( );
20: my $days_ago = $opts{'days'} || 0;
1.1 nick 21: my %dates = &fetchDates();
22: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
23: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
24: "/$dates{'mon2'}$dates{'year2'}";
25: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 26: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 27: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 28:
29:
30: my $DATE=`date`; chomp $DATE;
31: print STDOUT "Starting comic fetch at $DATE\n";
32:
33: ##
34: ## Main program starts here
35: ##
36: &checkDir ( [ $imageDir, $indexDir ] );
37:
1.5 nick 38: &writeTitle ( \%dates );
1.1 nick 39:
40: foreach my $comic ( sort keys %comics ) {
41: next if ( $comic =~ m/config/ );
1.14 nick 42: if ( ( $dates{'day2'} eq "Sunday" ) &&
43: ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
1.1 nick 44: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
45: &writeComic ( \%comics, $comic, \%dates );
46: }
47:
1.8 nick 48: print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";
49: foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) )
1.4 nick 50: {
51: my $size = `/usr/bin/identify $file`;
52: $size =~ s/.*\s(\d+)x\d+.*/$1/;
53:
54: system( "/usr/bin/convert -resize 640 $file $file" )
55: if ( $size > 640 )
56: }
57:
1.1 nick 58: ## &writeMainIndex ( \%dates );
59:
60: &writeFooter( \%dates );
61:
62: $DATE=`date`; chomp( $DATE );
63: print STDOUT "Completed comic fetch at $DATE\n";
64:
65: ## End
66:
67: #######################################################################
68: ## Function : downloadComic
69: ##
70: ## Description :
71: ## This function determines the download method being used to
72: ## retrieve the comic and calls the apprioriate function.
73: ##
74: ## If the mode is invalid an error will be returned.
75: ##
76: #######################################################################
77: sub downloadComic ($$) {
78: my ( $comics, $comic, $date ) = @_;
79:
80: SWITCH: {
81: if ( $comics->{$comic}{'mode'} eq 1 ) {
82: return indexDownload ( \%comics, $comic, $date );
83: last SWITCH;
84: }
85: if ( $comics->{$comic}{'mode'} eq 2 ) {
86: return directDownload ( \%comics, $comic, $date );
87: last SWITCH;
88: }
89: }
90:
91: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
92: }
93:
94: #######################################################################
95: #######################################################################
96: sub readComicConfig ($$) {
97: my ( $comicFile ) = @_;
98: my %comicConfig = ( );
99: my %config = ( );
100:
1.14 nick 101: my ($year, $mon, $day) =( localtime(time))[5,4,3];
102: $year += 1900;
103: $mon = sprintf("%02d", ($mon + 1));
104: $day = sprintf("%02d", $day);
105:
1.1 nick 106: open FILEN, "<$comicFile";
107: while (<FILEN>) {
108: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
1.14 nick 109: $_ =~ s/__YEAR__/$year/g;
110: $_ =~ s/__MON__/$mon/g;
111: $_ =~ s/__DAY__/$day/g;
112:
1.1 nick 113: my @res = split /,/, $_;
114: $comicConfig{$res[0]}{'url'} = $res[1];
115: $comicConfig{$res[0]}{'search'} = $res[2];
116: $comicConfig{$res[0]}{'mode'} = $res[3];
117: $comicConfig{$res[0]}{'fullName'} = $res[4];
118: $comicConfig{$res[0]}{'ext'} = $res[5];
1.14 nick 119: $comicConfig{$res[0]}{'sunday'} = $res[6] || 1;
1.1 nick 120: $comicConfig{$res[0]}{'error'} = 0;
121: }
122: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
123: $comicConfig{'configs'}{$1} = $2;
124: }
125: }
126: close (FILEN);
127:
128: return %comicConfig;
129: }
130:
131: #######################################################################
132: #######################################################################
133: sub writeComic ($$) {
134: my ( $comics, $comic, $date ) = @_;
1.11 nick 135: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 136: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
137: $date->{'mon2'} . $date->{'day2'} . "-" .
138: $sd . ".html";
1.1 nick 139: my $content = <<EOF;
140:
141: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
142: <tr>
143: <td align="left">
144: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
145: <font size="-2">
146: <a href="$comics->{$comic}{'url'}">
147: $comics->{$comic}{'url'}
148: </a>
149: </font><br/>
150: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
151: <br/><br/>
152: </td></tr>
153: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
154:
155: EOF
156: open INDEX, ">>$indexFile";
157:
158: print INDEX $content if ( ! $comics->{$comic}{'error'} );
159:
160: print INDEX <<EOF
161: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
162: <font size="-2"><
163: <a href="$comics->{$comic}{'url'}">
164: $comics->{$comic}{'url'}
165: </a>
166: </font><br/>
167: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
168: </td>
169: </tr>
170: EOF
171: if ( $comics->{$comic}{'error'} );
172:
173: close (INDEX);
174:
175: return 0;
176: }
177:
178:
179: #######################################################################
180: #######################################################################
181: sub writeMainIndex ($$) {
182: my ( $date ) = @_;
183:
184: }
185:
186:
187: #######################################################################
188: #######################################################################
189: sub writeFooter {
190: my ( $date ) = @_;
1.11 nick 191: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 192: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
193: $date->{'mon2'} . $date->{'day2'} . "-" .
194: $sd . ".html";
1.1 nick 195: my $sysDate = `date`;
196:
197: open INDEX, ">>$indexFile";
198: print INDEX <<EOF;
199: </table>
1.3 nick 200: <center>
201: <font size="2">
202: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 203: Version: <font color="green">$ver</font><br />
204: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 205: <p>
206: <a href="http://validator.w3.org/check?uri=referer"><img
207: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
208: </p>
209: </center>
210:
211: </body>
212: </html>
213: EOF
214: close( INDEX );
215: }
216:
217: #######################################################################
218: #######################################################################
219: sub checkDir ($$) {
220: my @dir = @_;
221:
222: foreach ( @dir ) {
223: if ( ! -d $_ ) { mkpath( $_ ); }
224: }
225: }
226:
227: #######################################################################
228: #######################################################################
229: sub writeTitle ($$) {
230: my ( $date ) = @_;
1.11 nick 231: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 232: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
233: $date->{'mon2'} . $date->{'day2'} . "-" .
234: $sd . ".html";
1.8 nick 235: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.1 nick 236:
237: open INDEX, ">$indexFile";
238: print INDEX <<EOF;
239: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
240:
241: <html xmlns="http://www.w3.org/1999/xhtml">
242: <head>
243: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 nick 244: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.1 nick 245: <title>Daily Comics for $today</title>
246: </head>
247: <body bgcolor="#FFFFFF">
248: <h1>Daily Comics for $today</h1>
249: <table align="center" cellpadding="5" cellspacing="0">
250: EOF
251: close (INDEX);
252: }
253:
254: #######################################################################
255: #######################################################################
256: sub directDownload ($$) {
257: my ( $comics, $comic, $date ) = @_;
258: my $file = &parseComic ( $comics, $comic, $date );
259:
260: ##
261: ## Save the file to the appropriate directory
262: ##
263: my $cDir = $date->{'mon2'} . $date->{'year2'};
264: my $cDate = $date->{'day2'};
265:
1.3 nick 266: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.14 nick 267:
1.1 nick 268: return system($cmd);
269: }
270:
271: #######################################################################
272: #######################################################################
273: sub indexDownload ($$) {
274: my ( $comics, $comic, $date ) = @_;
275: my ( @lines, $comicLine, $mainURL );
276: my $comicIndex = "indexes/index.$comic";
277:
278: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
279:
280: if ( ! open FILEN, "<$comicIndex" ) {
281: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
282: " (" . $comics->{$comic}{'url'} . ")";
283: }
284: @lines = <FILEN>;
285: close (FILEN);
286:
287: unlink ("$comicIndex");
288:
289: $mainURL = $comics->{$comic}{'url'};
290: ## I need to figure out how to merge these two in to one regex.
291: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
292: $mainURL =~ s/([a-z])\/.*/$1/i;
293:
294: ##
295: ## Find the comic strip URL based on the specified regex in the search
296: ##
297: foreach my $line (@lines) {
298: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
299: $comicLine = $1; chomp $comicLine;
300: }
301: }
302:
303: ##
304: ## Save the file to the appropriate directory
305: ##
306: my $cDir = $date->{'mon2'} . $date->{'year2'};
307: my $cDate = $date->{'day2'};
308:
309: if ( $comicLine ) {
310: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
311: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 nick 312: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 313: system( $cmd );
314: return 0;
315: }
316:
317: unlink "index.html";
318:
319: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
320: }
321:
322: #######################################################################
323: #######################################################################
324: sub parseComic ($$) {
325: my ( $comics, $comic, $date ) = @_;
326: my $string = $comics->{$comic}{'search'};
327:
328: $string =~ s/__year__/$date->{'year'}/g;
329: $string =~ s/__year2__/$date->{'year2'}/g;
330: $string =~ s/__mon__/$date->{'mon'}/g;
331: $string =~ s/__mon2__/$date->{'mon2'}/g;
332: $string =~ s/__day__/$date->{'day'}/g;
333: $string =~ s/__day2__/$date->{'day2'}/g;
334: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
335: chomp $string;
336:
337: return $string;
338: }
339:
340: #######################################################################
341: #######################################################################
342: sub fetchDates () {
343: my %dates = ();
344:
1.8 nick 345: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 346:
347: $dates{'year'} += 1900;
348: $dates{'year2'} = substr $dates{'year'}, 2, 2;
349: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
350: $dates{'mon'}++;
351: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
352:
353: return %dates;
354: }
1.8 nick 355:
356: ###############################################################################
357: ##
358: ## &fetchOptions( );
359: ##
360: ## Grab our command line arguments and toss them in to a hash
361: ##
362: ###############################################################################
363: sub fetchOptions {
364: my %opts;
365:
366: &GetOptions(
367: "days:i" => \$opts{'days'},
368: "help|?" => \$opts{'help'},
369: "man" => \$opts{'man'},
370: ) || &pod2usage( );
371: &pod2usage( ) if defined $opts{'help'};
372: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
373:
374: return %opts;
375: }
376:
377: __END__
378:
379: =head1 NAME
380:
381: fetch.pl - Fetches comics and places them all locally in a single html file.
382:
383: =head1 SYNOPSIS
384:
385: fetch.pl [options]
386:
387: Options:
388: --days,d Fetch comics from X days ago
389: --help,? Display the basic help menu
390: --man,m Display the detailed man page
391:
392: =head1 DESCRIPTION
393:
394: =head1 HISTORY
395:
396: =head1 AUTHOR
397:
398: Nicholas DeClario <nick@declario.com>
399:
400: =head1 BUGS
401:
402: This is a work in progress. Please report all bugs to the author.
403:
404: =head1 SEE ALSO
405:
406: =head1 COPYRIGHT
407:
408: =cut
409:
410:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>