Annotation of comics/fetch.pl.new, revision 1.13
1.1 nick 1: #!/usr/bin/perl -w
2:
3: use strict;
4: use File::Path;
5: use Data::Dumper;
1.8 nick 6: use Pod::Usage;
7: use Getopt::Long;
1.1 nick 8:
9: ##
10: ## Some default values
11: ##
1.13 ! nick 12: my $ver = q/$Id: fetch.pl.new,v 1.12 2013-03-01 14:38:47 nick Exp $/;
1.1 nick 13: my $comicFile = "comics.conf";
14: my %comics = &readComicConfig ( $comicFile );
1.8 nick 15: my %opts = &fetchOptions( );
16: my $days_ago = $opts{'days'} || 0;
1.1 nick 17: my %dates = &fetchDates();
18: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
19: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
20: "/$dates{'mon2'}$dates{'year2'}";
21: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 22: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.8 nick 23: my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;
1.1 nick 24:
25:
26: my $DATE=`date`; chomp $DATE;
27: print STDOUT "Starting comic fetch at $DATE\n";
28:
29: ##
30: ## Main program starts here
31: ##
32: &checkDir ( [ $imageDir, $indexDir ] );
33:
1.5 nick 34: &writeTitle ( \%dates );
1.1 nick 35:
36: foreach my $comic ( sort keys %comics ) {
37: next if ( $comic =~ m/config/ );
38: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
39: &writeComic ( \%comics, $comic, \%dates );
40: }
41:
1.8 nick 42: print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";
43: foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) )
1.4 nick 44: {
45: my $size = `/usr/bin/identify $file`;
46: $size =~ s/.*\s(\d+)x\d+.*/$1/;
47:
48: system( "/usr/bin/convert -resize 640 $file $file" )
49: if ( $size > 640 )
50: }
51:
1.1 nick 52: ## &writeMainIndex ( \%dates );
53:
54: &writeFooter( \%dates );
55:
56: $DATE=`date`; chomp( $DATE );
57: print STDOUT "Completed comic fetch at $DATE\n";
58:
59: ## End
60:
61: #######################################################################
62: ## Function : downloadComic
63: ##
64: ## Description :
65: ## This function determines the download method being used to
66: ## retrieve the comic and calls the apprioriate function.
67: ##
68: ## If the mode is invalid an error will be returned.
69: ##
70: #######################################################################
71: sub downloadComic ($$) {
72: my ( $comics, $comic, $date ) = @_;
73:
74: SWITCH: {
75: if ( $comics->{$comic}{'mode'} eq 1 ) {
76: return indexDownload ( \%comics, $comic, $date );
77: last SWITCH;
78: }
79: if ( $comics->{$comic}{'mode'} eq 2 ) {
80: return directDownload ( \%comics, $comic, $date );
81: last SWITCH;
82: }
83: }
84:
85: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
86: }
87:
88: #######################################################################
89: #######################################################################
90: sub readComicConfig ($$) {
91: my ( $comicFile ) = @_;
92: my %comicConfig = ( );
93: my %config = ( );
94:
95: open FILEN, "<$comicFile";
96: while (<FILEN>) {
97: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
98: my @res = split /,/, $_;
99: $comicConfig{$res[0]}{'url'} = $res[1];
100: $comicConfig{$res[0]}{'search'} = $res[2];
101: $comicConfig{$res[0]}{'mode'} = $res[3];
102: $comicConfig{$res[0]}{'fullName'} = $res[4];
103: $comicConfig{$res[0]}{'ext'} = $res[5];
104: $comicConfig{$res[0]}{'error'} = 0;
105: }
106: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
107: $comicConfig{'configs'}{$1} = $2;
108: }
109: }
110: close (FILEN);
111:
112: return %comicConfig;
113: }
114:
115: #######################################################################
116: #######################################################################
117: sub writeComic ($$) {
118: my ( $comics, $comic, $date ) = @_;
1.11 nick 119: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 120: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
121: $date->{'mon2'} . $date->{'day2'} . "-" .
122: $sd . ".html";
1.1 nick 123: my $content = <<EOF;
124:
125: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
126: <tr>
127: <td align="left">
128: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
129: <font size="-2">
130: <a href="$comics->{$comic}{'url'}">
131: $comics->{$comic}{'url'}
132: </a>
133: </font><br/>
134: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
135: <br/><br/>
136: </td></tr>
137: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
138:
139: EOF
140: open INDEX, ">>$indexFile";
141:
142: print INDEX $content if ( ! $comics->{$comic}{'error'} );
143:
144: print INDEX <<EOF
145: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
146: <font size="-2"><
147: <a href="$comics->{$comic}{'url'}">
148: $comics->{$comic}{'url'}
149: </a>
150: </font><br/>
151: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
152: </td>
153: </tr>
154: EOF
155: if ( $comics->{$comic}{'error'} );
156:
157: close (INDEX);
158:
159: return 0;
160: }
161:
162:
163: #######################################################################
164: #######################################################################
165: sub writeMainIndex ($$) {
166: my ( $date ) = @_;
167:
168: }
169:
170:
171: #######################################################################
172: #######################################################################
173: sub writeFooter {
174: my ( $date ) = @_;
1.11 nick 175: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 176: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
177: $date->{'mon2'} . $date->{'day2'} . "-" .
178: $sd . ".html";
1.1 nick 179: my $sysDate = `date`;
180:
181: open INDEX, ">>$indexFile";
182: print INDEX <<EOF;
183: </table>
1.3 nick 184: <center>
185: <font size="2">
186: Generated on: <font color="green">$sysDate</font><br/>
1.7 nick 187: Version: <font color="green">$ver</font><br />
188: CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
1.1 nick 189: <p>
190: <a href="http://validator.w3.org/check?uri=referer"><img
191: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
192: </p>
193: </center>
194:
195: </body>
196: </html>
197: EOF
198: close( INDEX );
199: }
200:
201: #######################################################################
202: #######################################################################
203: sub checkDir ($$) {
204: my @dir = @_;
205:
206: foreach ( @dir ) {
207: if ( ! -d $_ ) { mkpath( $_ ); }
208: }
209: }
210:
211: #######################################################################
212: #######################################################################
213: sub writeTitle ($$) {
214: my ( $date ) = @_;
1.11 nick 215: my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
1.12 nick 216: my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
217: $date->{'mon2'} . $date->{'day2'} . "-" .
218: $sd . ".html";
1.8 nick 219: my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
1.1 nick 220:
221: open INDEX, ">$indexFile";
222: print INDEX <<EOF;
223: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
224:
225: <html xmlns="http://www.w3.org/1999/xhtml">
226: <head>
227: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
1.13 ! nick 228: <link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
1.1 nick 229: <title>Daily Comics for $today</title>
230: </head>
231: <body bgcolor="#FFFFFF">
232: <h1>Daily Comics for $today</h1>
233: <table align="center" cellpadding="5" cellspacing="0">
234: EOF
235: close (INDEX);
236: }
237:
238: #######################################################################
239: #######################################################################
240: sub directDownload ($$) {
241: my ( $comics, $comic, $date ) = @_;
242: my $file = &parseComic ( $comics, $comic, $date );
243:
244: ##
245: ## Save the file to the appropriate directory
246: ##
247: my $cDir = $date->{'mon2'} . $date->{'year2'};
248: my $cDate = $date->{'day2'};
249:
1.3 nick 250: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 251: return system($cmd);
252: }
253:
254: #######################################################################
255: #######################################################################
256: sub indexDownload ($$) {
257: my ( $comics, $comic, $date ) = @_;
258: my ( @lines, $comicLine, $mainURL );
259: my $comicIndex = "indexes/index.$comic";
260:
261: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
262:
263: if ( ! open FILEN, "<$comicIndex" ) {
264: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
265: " (" . $comics->{$comic}{'url'} . ")";
266: }
267: @lines = <FILEN>;
268: close (FILEN);
269:
270: unlink ("$comicIndex");
271:
272: $mainURL = $comics->{$comic}{'url'};
273: ## I need to figure out how to merge these two in to one regex.
274: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
275: $mainURL =~ s/([a-z])\/.*/$1/i;
276:
277: ##
278: ## Find the comic strip URL based on the specified regex in the search
279: ##
280: foreach my $line (@lines) {
281: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
282: $comicLine = $1; chomp $comicLine;
283: }
284: }
285:
286: ##
287: ## Save the file to the appropriate directory
288: ##
289: my $cDir = $date->{'mon2'} . $date->{'year2'};
290: my $cDate = $date->{'day2'};
291:
292: if ( $comicLine ) {
293: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
294: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 nick 295: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 296: system( $cmd );
297: return 0;
298: }
299:
300: unlink "index.html";
301:
302: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
303: }
304:
305: #######################################################################
306: #######################################################################
307: sub parseComic ($$) {
308: my ( $comics, $comic, $date ) = @_;
309: my $string = $comics->{$comic}{'search'};
310:
311: $string =~ s/__year__/$date->{'year'}/g;
312: $string =~ s/__year2__/$date->{'year2'}/g;
313: $string =~ s/__mon__/$date->{'mon'}/g;
314: $string =~ s/__mon2__/$date->{'mon2'}/g;
315: $string =~ s/__day__/$date->{'day'}/g;
316: $string =~ s/__day2__/$date->{'day2'}/g;
317: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
318: chomp $string;
319:
320: return $string;
321: }
322:
323: #######################################################################
324: #######################################################################
325: sub fetchDates () {
326: my %dates = ();
327:
1.8 nick 328: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];
1.1 nick 329:
330: $dates{'year'} += 1900;
331: $dates{'year2'} = substr $dates{'year'}, 2, 2;
332: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
333: $dates{'mon'}++;
334: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
335:
336: return %dates;
337: }
1.8 nick 338:
339: ###############################################################################
340: ##
341: ## &fetchOptions( );
342: ##
343: ## Grab our command line arguments and toss them in to a hash
344: ##
345: ###############################################################################
346: sub fetchOptions {
347: my %opts;
348:
349: &GetOptions(
350: "days:i" => \$opts{'days'},
351: "help|?" => \$opts{'help'},
352: "man" => \$opts{'man'},
353: ) || &pod2usage( );
354: &pod2usage( ) if defined $opts{'help'};
355: &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};
356:
357: return %opts;
358: }
359:
360: __END__
361:
362: =head1 NAME
363:
364: fetch.pl - Fetches comics and places them all locally in a single html file.
365:
366: =head1 SYNOPSIS
367:
368: fetch.pl [options]
369:
370: Options:
371: --days,d Fetch comics from X days ago
372: --help,? Display the basic help menu
373: --man,m Display the detailed man page
374:
375: =head1 DESCRIPTION
376:
377: =head1 HISTORY
378:
379: =head1 AUTHOR
380:
381: Nicholas DeClario <nick@declario.com>
382:
383: =head1 BUGS
384:
385: This is a work in progress. Please report all bugs to the author.
386:
387: =head1 SEE ALSO
388:
389: =head1 COPYRIGHT
390:
391: =cut
392:
393:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>