Annotation of comics/fetch.pl.new, revision 1.5
1.1 nick 1: #!/usr/bin/perl -w
2:
3: use strict;
4: use File::Path;
5: use Data::Dumper;
6:
7: ##
8: ## Some default values
9: ##
1.5 ! nick 10: my $ver = q/$Id: fetch.pl.new,v 1.4 2011-09-23 01:36:54 nick Exp $/;
1.1 nick 11: my $comicFile = "comics.conf";
12: my %comics = &readComicConfig ( $comicFile );
13: my %dates = &fetchDates();
1.5 ! nick 14: my $days_ago = 0; # Set this to fetch comics from X days ago
1.1 nick 15: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
16: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
17: "/$dates{'mon2'}$dates{'year2'}";
18: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
1.2 nick 19: my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
1.1 nick 20:
21:
22: my $DATE=`date`; chomp $DATE;
23: print STDOUT "Starting comic fetch at $DATE\n";
24:
25: ##
26: ## Main program starts here
27: ##
28: &checkDir ( [ $imageDir, $indexDir ] );
29:
1.5 ! nick 30: &writeTitle ( \%dates );
1.1 nick 31:
32: foreach my $comic ( sort keys %comics ) {
33: next if ( $comic =~ m/config/ );
34: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
35: &writeComic ( \%comics, $comic, \%dates );
36: }
37:
1.4 nick 38: my $D = `date +%d`; chomp $D;
39: print "Finding in $imageDir/*-$D.jpg\n";
40: foreach my $file ( glob( "$imageDir/*-$D.jpg" ) )
41: {
42: my $size = `/usr/bin/identify $file`;
43: $size =~ s/.*\s(\d+)x\d+.*/$1/;
44:
45: system( "/usr/bin/convert -resize 640 $file $file" )
46: if ( $size > 640 )
47: }
48:
1.1 nick 49: ## &writeMainIndex ( \%dates );
50:
51: &writeFooter( \%dates );
52:
53: $DATE=`date`; chomp( $DATE );
54: print STDOUT "Completed comic fetch at $DATE\n";
55:
56: ## End
57:
58: #######################################################################
59: ## Function : downloadComic
60: ##
61: ## Description :
62: ## This function determines the download method being used to
63: ## retrieve the comic and calls the apprioriate function.
64: ##
65: ## If the mode is invalid an error will be returned.
66: ##
67: #######################################################################
68: sub downloadComic ($$) {
69: my ( $comics, $comic, $date ) = @_;
70:
71: SWITCH: {
72: if ( $comics->{$comic}{'mode'} eq 1 ) {
73: return indexDownload ( \%comics, $comic, $date );
74: last SWITCH;
75: }
76: if ( $comics->{$comic}{'mode'} eq 2 ) {
77: return directDownload ( \%comics, $comic, $date );
78: last SWITCH;
79: }
80: }
81:
82: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
83: }
84:
85: #######################################################################
86: #######################################################################
87: sub readComicConfig ($$) {
88: my ( $comicFile ) = @_;
89: my %comicConfig = ( );
90: my %config = ( );
91:
92: open FILEN, "<$comicFile";
93: while (<FILEN>) {
94: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
95: my @res = split /,/, $_;
96: $comicConfig{$res[0]}{'url'} = $res[1];
97: $comicConfig{$res[0]}{'search'} = $res[2];
98: $comicConfig{$res[0]}{'mode'} = $res[3];
99: $comicConfig{$res[0]}{'fullName'} = $res[4];
100: $comicConfig{$res[0]}{'ext'} = $res[5];
101: $comicConfig{$res[0]}{'error'} = 0;
102: }
103: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
104: $comicConfig{'configs'}{$1} = $2;
105: }
106: }
107: close (FILEN);
108:
109: return %comicConfig;
110: }
111:
112: #######################################################################
113: #######################################################################
114: sub writeComic ($$) {
115: my ( $comics, $comic, $date ) = @_;
116: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
117: $date->{'day2'} . ".html";
118: my $content = <<EOF;
119:
120: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
121: <tr>
122: <td align="left">
123: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
124: <font size="-2">
125: <a href="$comics->{$comic}{'url'}">
126: $comics->{$comic}{'url'}
127: </a>
128: </font><br/>
129: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
130: <br/><br/>
131: </td></tr>
132: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
133:
134: EOF
135: open INDEX, ">>$indexFile";
136:
137: print INDEX $content if ( ! $comics->{$comic}{'error'} );
138:
139: print INDEX <<EOF
140: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
141: <font size="-2"><
142: <a href="$comics->{$comic}{'url'}">
143: $comics->{$comic}{'url'}
144: </a>
145: </font><br/>
146: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
147: </td>
148: </tr>
149: EOF
150: if ( $comics->{$comic}{'error'} );
151:
152: close (INDEX);
153:
154: return 0;
155: }
156:
157:
158: #######################################################################
159: #######################################################################
160: sub writeMainIndex ($$) {
161: my ( $date ) = @_;
162:
163: }
164:
165:
166: #######################################################################
167: #######################################################################
168: sub writeFooter {
169: my ( $date ) = @_;
170: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
171: $date->{'day2'} . ".html";
172: my $sysDate = `date`;
173:
174: open INDEX, ">>$indexFile";
175: print INDEX <<EOF;
176: </table>
1.3 nick 177: <center>
178: <font size="2">
179: Generated on: <font color="green">$sysDate</font><br/>
180: Version: <font color="green">$ver</font></font>
1.1 nick 181: <p>
182: <a href="http://validator.w3.org/check?uri=referer"><img
183: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
184: </p>
185: </center>
186:
187: </body>
188: </html>
189: EOF
190: close( INDEX );
191: }
192:
193: #######################################################################
194: #######################################################################
195: sub checkDir ($$) {
196: my @dir = @_;
197:
198: foreach ( @dir ) {
199: if ( ! -d $_ ) { mkpath( $_ ); }
200: }
201: }
202:
203: #######################################################################
204: #######################################################################
205: sub writeTitle ($$) {
206: my ( $date ) = @_;
207: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
208: $date->{'day2'} . ".html";
209: my $today = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
210:
211: open INDEX, ">$indexFile";
212: print INDEX <<EOF;
213: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
214:
215: <html xmlns="http://www.w3.org/1999/xhtml">
216: <head>
217: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
218: <title>Daily Comics for $today</title>
219: </head>
220: <body bgcolor="#FFFFFF">
221: <h1>Daily Comics for $today</h1>
222: <table align="center" cellpadding="5" cellspacing="0">
223: EOF
224: close (INDEX);
225: }
226:
227: #######################################################################
228: #######################################################################
229: sub directDownload ($$) {
230: my ( $comics, $comic, $date ) = @_;
231: my $file = &parseComic ( $comics, $comic, $date );
232:
233: ##
234: ## Save the file to the appropriate directory
235: ##
236: my $cDir = $date->{'mon2'} . $date->{'year2'};
237: my $cDate = $date->{'day2'};
238:
1.3 nick 239: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 240: return system($cmd);
241: }
242:
243: #######################################################################
244: #######################################################################
245: sub indexDownload ($$) {
246: my ( $comics, $comic, $date ) = @_;
247: my ( @lines, $comicLine, $mainURL );
248: my $comicIndex = "indexes/index.$comic";
249:
250: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
251:
252: if ( ! open FILEN, "<$comicIndex" ) {
253: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
254: " (" . $comics->{$comic}{'url'} . ")";
255: }
256: @lines = <FILEN>;
257: close (FILEN);
258:
259: unlink ("$comicIndex");
260:
261: $mainURL = $comics->{$comic}{'url'};
262: ## I need to figure out how to merge these two in to one regex.
263: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
264: $mainURL =~ s/([a-z])\/.*/$1/i;
265:
266: ##
267: ## Find the comic strip URL based on the specified regex in the search
268: ##
269: foreach my $line (@lines) {
270: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
271: $comicLine = $1; chomp $comicLine;
272: }
273: }
274:
275: ##
276: ## Save the file to the appropriate directory
277: ##
278: my $cDir = $date->{'mon2'} . $date->{'year2'};
279: my $cDate = $date->{'day2'};
280:
281: if ( $comicLine ) {
282: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
283: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
1.3 nick 284: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
1.1 nick 285: system( $cmd );
286: return 0;
287: }
288:
289: unlink "index.html";
290:
291: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
292: }
293:
294: #######################################################################
295: #######################################################################
296: sub parseComic ($$) {
297: my ( $comics, $comic, $date ) = @_;
298: my $string = $comics->{$comic}{'search'};
299:
300: $string =~ s/__year__/$date->{'year'}/g;
301: $string =~ s/__year2__/$date->{'year2'}/g;
302: $string =~ s/__mon__/$date->{'mon'}/g;
303: $string =~ s/__mon2__/$date->{'mon2'}/g;
304: $string =~ s/__day__/$date->{'day'}/g;
305: $string =~ s/__day2__/$date->{'day2'}/g;
306: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
307: chomp $string;
308:
309: return $string;
310: }
311:
312: #######################################################################
313: #######################################################################
314: sub fetchDates () {
315: my %dates = ();
316:
317: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];
318:
319: ## If you missed a day or two, reflect it here:
1.5 ! nick 320: $dates{'day'} -= $days_ago; ## <-- 5 days ago
1.1 nick 321:
322: $dates{'year'} += 1900;
323: $dates{'year2'} = substr $dates{'year'}, 2, 2;
324: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
325: $dates{'mon'}++;
326: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
327:
328: return %dates;
329: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>