Annotation of comics/fetch.pl.new, revision 1.3
1.1 nick 1: #!/usr/bin/perl -w
2:
3: use strict;
4: use File::Path;
5: use Data::Dumper;
6:
7: ##
8: ## Some default values
9: ##
1.3 ! nick 10: my $ver = q/$Id: fetch.pl.new,v 1.2 2011-08-18 12:50:06 nick Exp $/;
1.1 nick 11: my $comicFile = "comics.conf";
12: my %comics = &readComicConfig ( $comicFile );
13: my %dates = &fetchDates();
14: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
15: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
16: "/$dates{'mon2'}$dates{'year2'}";
17: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 18: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.1 nick 19:
20:
21: my $DATE=`date`; chomp $DATE;
22: print STDOUT "Starting comic fetch at $DATE\n";
23:
24: ##
25: ## Main program starts here
26: ##
27: &checkDir ( [ $imageDir, $indexDir ] );
28:
29: &writeTitle ( \%dates );
30:
31: foreach my $comic ( sort keys %comics ) {
32: next if ( $comic =~ m/config/ );
33: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
34: &writeComic ( \%comics, $comic, \%dates );
35: }
36:
37: ## &writeMainIndex ( \%dates );
38:
39: &writeFooter( \%dates );
40:
41: $DATE=`date`; chomp( $DATE );
42: print STDOUT "Completed comic fetch at $DATE\n";
43:
44: ## End
45:
46: #######################################################################
47: ## Function : downloadComic
48: ##
49: ## Description :
50: ## This function determines the download method being used to
51: ## retrieve the comic and calls the apprioriate function.
52: ##
53: ## If the mode is invalid an error will be returned.
54: ##
55: #######################################################################
56: sub downloadComic ($$) {
57: my ( $comics, $comic, $date ) = @_;
58:
59: SWITCH: {
60: if ( $comics->{$comic}{'mode'} eq 1 ) {
61: return indexDownload ( \%comics, $comic, $date );
62: last SWITCH;
63: }
64: if ( $comics->{$comic}{'mode'} eq 2 ) {
65: return directDownload ( \%comics, $comic, $date );
66: last SWITCH;
67: }
68: }
69:
70: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
71: }
72:
73: #######################################################################
74: #######################################################################
75: sub readComicConfig ($$) {
76: my ( $comicFile ) = @_;
77: my %comicConfig = ( );
78: my %config = ( );
79:
80: open FILEN, "<$comicFile";
81: while (<FILEN>) {
82: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
83: my @res = split /,/, $_;
84: $comicConfig{$res[0]}{'url'} = $res[1];
85: $comicConfig{$res[0]}{'search'} = $res[2];
86: $comicConfig{$res[0]}{'mode'} = $res[3];
87: $comicConfig{$res[0]}{'fullName'} = $res[4];
88: $comicConfig{$res[0]}{'ext'} = $res[5];
89: $comicConfig{$res[0]}{'error'} = 0;
90: }
91: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
92: $comicConfig{'configs'}{$1} = $2;
93: }
94: }
95: close (FILEN);
96:
97: return %comicConfig;
98: }
99:
100: #######################################################################
101: #######################################################################
102: sub writeComic ($$) {
103: my ( $comics, $comic, $date ) = @_;
104: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
105: $date->{'day2'} . ".html";
106: my $content = <<EOF;
107:
108: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
109: <tr>
110: <td align="left">
111: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
112: <font size="-2">
113: <a href="$comics->{$comic}{'url'}">
114: $comics->{$comic}{'url'}
115: </a>
116: </font><br/>
117: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
118: <br/><br/>
119: </td></tr>
120: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
121:
122: EOF
123: open INDEX, ">>$indexFile";
124:
125: print INDEX $content if ( ! $comics->{$comic}{'error'} );
126:
127: print INDEX <<EOF
128: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
129: <font size="-2"><
130: <a href="$comics->{$comic}{'url'}">
131: $comics->{$comic}{'url'}
132: </a>
133: </font><br/>
134: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
135: </td>
136: </tr>
137: EOF
138: if ( $comics->{$comic}{'error'} );
139:
140: close (INDEX);
141:
142: return 0;
143: }
144:
145:
146: #######################################################################
147: #######################################################################
148: sub writeMainIndex ($$) {
149: my ( $date ) = @_;
150:
151: }
152:
153:
154: #######################################################################
155: #######################################################################
156: sub writeFooter {
157: my ( $date ) = @_;
158: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
159: $date->{'day2'} . ".html";
160: my $sysDate = `date`;
161:
162: open INDEX, ">>$indexFile";
163: print INDEX <<EOF;
164: </table>
1.3 ! nick 165: <center>
! 166: <font size="2">
! 167: Generated on: <font color="green">$sysDate</font><br/>
! 168: Version: <font color="green">$ver</font></font>
1.1 nick 169: <p>
170: <a href="http://validator.w3.org/check?uri=referer"><img
171: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
172: </p>
173: </center>
174:
175: </body>
176: </html>
177: EOF
178: close( INDEX );
179: }
180:
181: #######################################################################
182: #######################################################################
183: sub checkDir ($$) {
184: my @dir = @_;
185:
186: foreach ( @dir ) {
187: if ( ! -d $_ ) { mkpath( $_ ); }
188: }
189: }
190:
191: #######################################################################
192: #######################################################################
193: sub writeTitle ($$) {
194: my ( $date ) = @_;
195: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
196: $date->{'day2'} . ".html";
197: my $today = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
198:
199: open INDEX, ">$indexFile";
200: print INDEX <<EOF;
201: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
202:
203: <html xmlns="http://www.w3.org/1999/xhtml">
204: <head>
205: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
206: <title>Daily Comics for $today</title>
207: </head>
208: <body bgcolor="#FFFFFF">
209: <h1>Daily Comics for $today</h1>
210: <table align="center" cellpadding="5" cellspacing="0">
211: EOF
212: close (INDEX);
213: }
214:
215: #######################################################################
216: #######################################################################
217: sub directDownload ($$) {
218: my ( $comics, $comic, $date ) = @_;
219: my $file = &parseComic ( $comics, $comic, $date );
220:
221: ##
222: ## Save the file to the appropriate directory
223: ##
224: my $cDir = $date->{'mon2'} . $date->{'year2'};
225: my $cDate = $date->{'day2'};
226:
1.3 ! nick 227: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 228: return system($cmd);
229: }
230:
231: #######################################################################
232: #######################################################################
233: sub indexDownload ($$) {
234: my ( $comics, $comic, $date ) = @_;
235: my ( @lines, $comicLine, $mainURL );
236: my $comicIndex = "indexes/index.$comic";
237:
238: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
239:
240: if ( ! open FILEN, "<$comicIndex" ) {
241: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
242: " (" . $comics->{$comic}{'url'} . ")";
243: }
244: @lines = <FILEN>;
245: close (FILEN);
246:
247: unlink ("$comicIndex");
248:
249: $mainURL = $comics->{$comic}{'url'};
250: ## I need to figure out how to merge these two in to one regex.
251: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
252: $mainURL =~ s/([a-z])\/.*/$1/i;
253:
254: ##
255: ## Find the comic strip URL based on the specified regex in the search
256: ##
257: foreach my $line (@lines) {
258: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
259: $comicLine = $1; chomp $comicLine;
260: }
261: }
262:
263: ##
264: ## Save the file to the appropriate directory
265: ##
266: my $cDir = $date->{'mon2'} . $date->{'year2'};
267: my $cDate = $date->{'day2'};
268:
269: if ( $comicLine ) {
270: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
271: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 ! nick 272: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 273: system( $cmd );
274: return 0;
275: }
276:
277: unlink "index.html";
278:
279: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
280: }
281:
282: #######################################################################
283: #######################################################################
284: sub parseComic ($$) {
285: my ( $comics, $comic, $date ) = @_;
286: my $string = $comics->{$comic}{'search'};
287:
288: $string =~ s/__year__/$date->{'year'}/g;
289: $string =~ s/__year2__/$date->{'year2'}/g;
290: $string =~ s/__mon__/$date->{'mon'}/g;
291: $string =~ s/__mon2__/$date->{'mon2'}/g;
292: $string =~ s/__day__/$date->{'day'}/g;
293: $string =~ s/__day2__/$date->{'day2'}/g;
294: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
295: chomp $string;
296:
297: return $string;
298: }
299:
300: #######################################################################
301: #######################################################################
302: sub fetchDates () {
303: my %dates = ();
304:
305: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];
306:
307: ## If you missed a day or two, reflect it here:
308: # $dates{'day'}-=1; ## <-- 5 days ago
309:
310: $dates{'year'} += 1900;
311: $dates{'year2'} = substr $dates{'year'}, 2, 2;
312: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
313: $dates{'mon'}++;
314: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
315:
316: return %dates;
317: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>