Annotation of comics/fetch.pl.new, revision 1.4
1.1 nick 1: #!/usr/bin/perl -w
2:
3: use strict;
4: use File::Path;
5: use Data::Dumper;
6:
7: ##
8: ## Some default values
9: ##
1.4 ! nick 10: my $ver = q/$Id: fetch.pl.new,v 1.3 2011-09-18 14:04:29 nick Exp $/;
1.1 nick 11: my $comicFile = "comics.conf";
12: my %comics = &readComicConfig ( $comicFile );
13: my %dates = &fetchDates();
14: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
15: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
16: "/$dates{'mon2'}$dates{'year2'}";
17: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 18: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.1 nick 19:
20:
21: my $DATE=`date`; chomp $DATE;
22: print STDOUT "Starting comic fetch at $DATE\n";
23:
24: ##
25: ## Main program starts here
26: ##
27: &checkDir ( [ $imageDir, $indexDir ] );
28:
1.4 ! nick 29: #&writeTitle ( \%dates );
1.1 nick 30:
31: foreach my $comic ( sort keys %comics ) {
32: next if ( $comic =~ m/config/ );
33: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
34: &writeComic ( \%comics, $comic, \%dates );
35: }
36:
1.4 ! nick 37: my $D = `date +%d`; chomp $D;
! 38: print "Finding in $imageDir/*-$D.jpg\n";
! 39: foreach my $file ( glob( "$imageDir/*-$D.jpg" ) )
! 40: {
! 41: my $size = `/usr/bin/identify $file`;
! 42: $size =~ s/.*\s(\d+)x\d+.*/$1/;
! 43:
! 44: system( "/usr/bin/convert -resize 640 $file $file" )
! 45: if ( $size > 640 )
! 46: }
! 47:
1.1 nick 48: ## &writeMainIndex ( \%dates );
49:
50: &writeFooter( \%dates );
51:
52: $DATE=`date`; chomp( $DATE );
53: print STDOUT "Completed comic fetch at $DATE\n";
54:
55: ## End
56:
57: #######################################################################
58: ## Function : downloadComic
59: ##
60: ## Description :
61: ## This function determines the download method being used to
62: ## retrieve the comic and calls the apprioriate function.
63: ##
64: ## If the mode is invalid an error will be returned.
65: ##
66: #######################################################################
67: sub downloadComic ($$) {
68: my ( $comics, $comic, $date ) = @_;
69:
70: SWITCH: {
71: if ( $comics->{$comic}{'mode'} eq 1 ) {
72: return indexDownload ( \%comics, $comic, $date );
73: last SWITCH;
74: }
75: if ( $comics->{$comic}{'mode'} eq 2 ) {
76: return directDownload ( \%comics, $comic, $date );
77: last SWITCH;
78: }
79: }
80:
81: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
82: }
83:
84: #######################################################################
85: #######################################################################
86: sub readComicConfig ($$) {
87: my ( $comicFile ) = @_;
88: my %comicConfig = ( );
89: my %config = ( );
90:
91: open FILEN, "<$comicFile";
92: while (<FILEN>) {
93: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
94: my @res = split /,/, $_;
95: $comicConfig{$res[0]}{'url'} = $res[1];
96: $comicConfig{$res[0]}{'search'} = $res[2];
97: $comicConfig{$res[0]}{'mode'} = $res[3];
98: $comicConfig{$res[0]}{'fullName'} = $res[4];
99: $comicConfig{$res[0]}{'ext'} = $res[5];
100: $comicConfig{$res[0]}{'error'} = 0;
101: }
102: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
103: $comicConfig{'configs'}{$1} = $2;
104: }
105: }
106: close (FILEN);
107:
108: return %comicConfig;
109: }
110:
111: #######################################################################
112: #######################################################################
113: sub writeComic ($$) {
114: my ( $comics, $comic, $date ) = @_;
115: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
116: $date->{'day2'} . ".html";
117: my $content = <<EOF;
118:
119: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
120: <tr>
121: <td align="left">
122: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
123: <font size="-2">
124: <a href="$comics->{$comic}{'url'}">
125: $comics->{$comic}{'url'}
126: </a>
127: </font><br/>
128: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
129: <br/><br/>
130: </td></tr>
131: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
132:
133: EOF
134: open INDEX, ">>$indexFile";
135:
136: print INDEX $content if ( ! $comics->{$comic}{'error'} );
137:
138: print INDEX <<EOF
139: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
140: <font size="-2"><
141: <a href="$comics->{$comic}{'url'}">
142: $comics->{$comic}{'url'}
143: </a>
144: </font><br/>
145: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
146: </td>
147: </tr>
148: EOF
149: if ( $comics->{$comic}{'error'} );
150:
151: close (INDEX);
152:
153: return 0;
154: }
155:
156:
157: #######################################################################
158: #######################################################################
159: sub writeMainIndex ($$) {
160: my ( $date ) = @_;
161:
162: }
163:
164:
165: #######################################################################
166: #######################################################################
167: sub writeFooter {
168: my ( $date ) = @_;
169: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
170: $date->{'day2'} . ".html";
171: my $sysDate = `date`;
172:
173: open INDEX, ">>$indexFile";
174: print INDEX <<EOF;
175: </table>
1.3 nick 176: <center>
177: <font size="2">
178: Generated on: <font color="green">$sysDate</font><br/>
179: Version: <font color="green">$ver</font></font>
1.1 nick 180: <p>
181: <a href="http://validator.w3.org/check?uri=referer"><img
182: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
183: </p>
184: </center>
185:
186: </body>
187: </html>
188: EOF
189: close( INDEX );
190: }
191:
192: #######################################################################
193: #######################################################################
194: sub checkDir ($$) {
195: my @dir = @_;
196:
197: foreach ( @dir ) {
198: if ( ! -d $_ ) { mkpath( $_ ); }
199: }
200: }
201:
202: #######################################################################
203: #######################################################################
204: sub writeTitle ($$) {
205: my ( $date ) = @_;
206: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
207: $date->{'day2'} . ".html";
208: my $today = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
209:
210: open INDEX, ">$indexFile";
211: print INDEX <<EOF;
212: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
213:
214: <html xmlns="http://www.w3.org/1999/xhtml">
215: <head>
216: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
217: <title>Daily Comics for $today</title>
218: </head>
219: <body bgcolor="#FFFFFF">
220: <h1>Daily Comics for $today</h1>
221: <table align="center" cellpadding="5" cellspacing="0">
222: EOF
223: close (INDEX);
224: }
225:
226: #######################################################################
227: #######################################################################
228: sub directDownload ($$) {
229: my ( $comics, $comic, $date ) = @_;
230: my $file = &parseComic ( $comics, $comic, $date );
231:
232: ##
233: ## Save the file to the appropriate directory
234: ##
235: my $cDir = $date->{'mon2'} . $date->{'year2'};
236: my $cDate = $date->{'day2'};
237:
1.3 nick 238: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 239: return system($cmd);
240: }
241:
242: #######################################################################
243: #######################################################################
244: sub indexDownload ($$) {
245: my ( $comics, $comic, $date ) = @_;
246: my ( @lines, $comicLine, $mainURL );
247: my $comicIndex = "indexes/index.$comic";
248:
249: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
250:
251: if ( ! open FILEN, "<$comicIndex" ) {
252: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
253: " (" . $comics->{$comic}{'url'} . ")";
254: }
255: @lines = <FILEN>;
256: close (FILEN);
257:
258: unlink ("$comicIndex");
259:
260: $mainURL = $comics->{$comic}{'url'};
261: ## I need to figure out how to merge these two in to one regex.
262: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
263: $mainURL =~ s/([a-z])\/.*/$1/i;
264:
265: ##
266: ## Find the comic strip URL based on the specified regex in the search
267: ##
268: foreach my $line (@lines) {
269: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
270: $comicLine = $1; chomp $comicLine;
271: }
272: }
273:
274: ##
275: ## Save the file to the appropriate directory
276: ##
277: my $cDir = $date->{'mon2'} . $date->{'year2'};
278: my $cDate = $date->{'day2'};
279:
280: if ( $comicLine ) {
281: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
282: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 nick 283: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 284: system( $cmd );
285: return 0;
286: }
287:
288: unlink "index.html";
289:
290: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
291: }
292:
293: #######################################################################
294: #######################################################################
295: sub parseComic ($$) {
296: my ( $comics, $comic, $date ) = @_;
297: my $string = $comics->{$comic}{'search'};
298:
299: $string =~ s/__year__/$date->{'year'}/g;
300: $string =~ s/__year2__/$date->{'year2'}/g;
301: $string =~ s/__mon__/$date->{'mon'}/g;
302: $string =~ s/__mon2__/$date->{'mon2'}/g;
303: $string =~ s/__day__/$date->{'day'}/g;
304: $string =~ s/__day2__/$date->{'day2'}/g;
305: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
306: chomp $string;
307:
308: return $string;
309: }
310:
311: #######################################################################
312: #######################################################################
313: sub fetchDates () {
314: my %dates = ();
315:
316: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];
317:
318: ## If you missed a day or two, reflect it here:
319: # $dates{'day'}-=1; ## <-- 5 days ago
320:
321: $dates{'year'} += 1900;
322: $dates{'year2'} = substr $dates{'year'}, 2, 2;
323: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
324: $dates{'mon'}++;
325: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
326:
327: return %dates;
328: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>