Annotation of comics/fetch.pl.new, revision 1.1
1.1 ! nick 1: #!/usr/bin/perl -w
! 2:
! 3: use strict;
! 4: use File::Path;
! 5: use Data::Dumper;
! 6:
! 7: ##
! 8: ## Some default values
! 9: ##
! 10: my $ver = q/$Id$/;
! 11: my $comicFile = "comics.conf";
! 12: my %comics = &readComicConfig ( $comicFile );
! 13: my %dates = &fetchDates();
! 14: my $baseDir = $comics{'configs'}{'base_directory'} || ".";
! 15: my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) .
! 16: "/$dates{'mon2'}$dates{'year2'}";
! 17: my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
! 18: my $USER_AGENT = "ozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
! 19:
! 20:
! 21: my $DATE=`date`; chomp $DATE;
! 22: print STDOUT "Starting comic fetch at $DATE\n";
! 23:
! 24: ##
! 25: ## Main program starts here
! 26: ##
! 27: &checkDir ( [ $imageDir, $indexDir ] );
! 28:
! 29: &writeTitle ( \%dates );
! 30:
! 31: foreach my $comic ( sort keys %comics ) {
! 32: next if ( $comic =~ m/config/ );
! 33: $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
! 34: &writeComic ( \%comics, $comic, \%dates );
! 35: }
! 36:
! 37: ## &writeMainIndex ( \%dates );
! 38:
! 39: &writeFooter( \%dates );
! 40:
! 41: $DATE=`date`; chomp( $DATE );
! 42: print STDOUT "Completed comic fetch at $DATE\n";
! 43:
! 44: ## End
! 45:
! 46: #######################################################################
! 47: ## Function : downloadComic
! 48: ##
! 49: ## Description :
! 50: ## This function determines the download method being used to
! 51: ## retrieve the comic and calls the apprioriate function.
! 52: ##
! 53: ## If the mode is invalid an error will be returned.
! 54: ##
! 55: #######################################################################
! 56: sub downloadComic ($$) {
! 57: my ( $comics, $comic, $date ) = @_;
! 58:
! 59: SWITCH: {
! 60: if ( $comics->{$comic}{'mode'} eq 1 ) {
! 61: return indexDownload ( \%comics, $comic, $date );
! 62: last SWITCH;
! 63: }
! 64: if ( $comics->{$comic}{'mode'} eq 2 ) {
! 65: return directDownload ( \%comics, $comic, $date );
! 66: last SWITCH;
! 67: }
! 68: }
! 69:
! 70: return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
! 71: }
! 72:
! 73: #######################################################################
! 74: #######################################################################
! 75: sub readComicConfig ($$) {
! 76: my ( $comicFile ) = @_;
! 77: my %comicConfig = ( );
! 78: my %config = ( );
! 79:
! 80: open FILEN, "<$comicFile";
! 81: while (<FILEN>) {
! 82: if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
! 83: my @res = split /,/, $_;
! 84: $comicConfig{$res[0]}{'url'} = $res[1];
! 85: $comicConfig{$res[0]}{'search'} = $res[2];
! 86: $comicConfig{$res[0]}{'mode'} = $res[3];
! 87: $comicConfig{$res[0]}{'fullName'} = $res[4];
! 88: $comicConfig{$res[0]}{'ext'} = $res[5];
! 89: $comicConfig{$res[0]}{'error'} = 0;
! 90: }
! 91: elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
! 92: $comicConfig{'configs'}{$1} = $2;
! 93: }
! 94: }
! 95: close (FILEN);
! 96:
! 97: return %comicConfig;
! 98: }
! 99:
! 100: #######################################################################
! 101: #######################################################################
! 102: sub writeComic ($$) {
! 103: my ( $comics, $comic, $date ) = @_;
! 104: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
! 105: $date->{'day2'} . ".html";
! 106: my $content = <<EOF;
! 107:
! 108: <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
! 109: <tr>
! 110: <td align="left">
! 111: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
! 112: <font size="-2">
! 113: <a href="$comics->{$comic}{'url'}">
! 114: $comics->{$comic}{'url'}
! 115: </a>
! 116: </font><br/>
! 117: <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
! 118: <br/><br/>
! 119: </td></tr>
! 120: <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
! 121:
! 122: EOF
! 123: open INDEX, ">>$indexFile";
! 124:
! 125: print INDEX $content if ( ! $comics->{$comic}{'error'} );
! 126:
! 127: print INDEX <<EOF
! 128: <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font>
! 129: <font size="-2"><
! 130: <a href="$comics->{$comic}{'url'}">
! 131: $comics->{$comic}{'url'}
! 132: </a>
! 133: </font><br/>
! 134: <font color="red"><b>$comic : $comics->{$comic}{'error'}</b></font><br/>
! 135: </td>
! 136: </tr>
! 137: EOF
! 138: if ( $comics->{$comic}{'error'} );
! 139:
! 140: close (INDEX);
! 141:
! 142: return 0;
! 143: }
! 144:
! 145:
! 146: #######################################################################
! 147: #######################################################################
! 148: sub writeMainIndex ($$) {
! 149: my ( $date ) = @_;
! 150:
! 151: }
! 152:
! 153:
! 154: #######################################################################
! 155: #######################################################################
! 156: sub writeFooter {
! 157: my ( $date ) = @_;
! 158: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
! 159: $date->{'day2'} . ".html";
! 160: my $sysDate = `date`;
! 161:
! 162: open INDEX, ">>$indexFile";
! 163: print INDEX <<EOF;
! 164: </table>
! 165: <center>Generated at $sysDate
! 166: <p>
! 167: <a href="http://validator.w3.org/check?uri=referer"><img
! 168: src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
! 169: </p>
! 170: </center>
! 171:
! 172: </body>
! 173: </html>
! 174: EOF
! 175: close( INDEX );
! 176: }
! 177:
! 178: #######################################################################
! 179: #######################################################################
! 180: sub checkDir ($$) {
! 181: my @dir = @_;
! 182:
! 183: foreach ( @dir ) {
! 184: if ( ! -d $_ ) { mkpath( $_ ); }
! 185: }
! 186: }
! 187:
! 188: #######################################################################
! 189: #######################################################################
! 190: sub writeTitle ($$) {
! 191: my ( $date ) = @_;
! 192: my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} .
! 193: $date->{'day2'} . ".html";
! 194: my $today = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
! 195:
! 196: open INDEX, ">$indexFile";
! 197: print INDEX <<EOF;
! 198: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
! 199:
! 200: <html xmlns="http://www.w3.org/1999/xhtml">
! 201: <head>
! 202: <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
! 203: <title>Daily Comics for $today</title>
! 204: </head>
! 205: <body bgcolor="#FFFFFF">
! 206: <h1>Daily Comics for $today</h1>
! 207: <table align="center" cellpadding="5" cellspacing="0">
! 208: EOF
! 209: close (INDEX);
! 210: }
! 211:
! 212: #######################################################################
! 213: #######################################################################
! 214: sub directDownload ($$) {
! 215: my ( $comics, $comic, $date ) = @_;
! 216: my $file = &parseComic ( $comics, $comic, $date );
! 217:
! 218: ##
! 219: ## Save the file to the appropriate directory
! 220: ##
! 221: my $cDir = $date->{'mon2'} . $date->{'year2'};
! 222: my $cDate = $date->{'day2'};
! 223:
! 224: my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert -resize 640 - jpeg:images/$cDir/$comic-$cDate.jpg";
! 225: return system($cmd);
! 226: }
! 227:
! 228: #######################################################################
! 229: #######################################################################
! 230: sub indexDownload ($$) {
! 231: my ( $comics, $comic, $date ) = @_;
! 232: my ( @lines, $comicLine, $mainURL );
! 233: my $comicIndex = "indexes/index.$comic";
! 234:
! 235: `wget -q $comics->{$comic}{'url'} -O $comicIndex`;
! 236:
! 237: if ( ! open FILEN, "<$comicIndex" ) {
! 238: return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
! 239: " (" . $comics->{$comic}{'url'} . ")";
! 240: }
! 241: @lines = <FILEN>;
! 242: close (FILEN);
! 243:
! 244: unlink ("$comicIndex");
! 245:
! 246: $mainURL = $comics->{$comic}{'url'};
! 247: ## I need to figure out how to merge these two in to one regex.
! 248: $mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
! 249: $mainURL =~ s/([a-z])\/.*/$1/i;
! 250:
! 251: ##
! 252: ## Find the comic strip URL based on the specified regex in the search
! 253: ##
! 254: foreach my $line (@lines) {
! 255: if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
! 256: $comicLine = $1; chomp $comicLine;
! 257: }
! 258: }
! 259:
! 260: ##
! 261: ## Save the file to the appropriate directory
! 262: ##
! 263: my $cDir = $date->{'mon2'} . $date->{'year2'};
! 264: my $cDate = $date->{'day2'};
! 265:
! 266: if ( $comicLine ) {
! 267: if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
! 268: my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
! 269: my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert -resize 640 - jpeg:images/$cDir/$comic-$cDate.jpg";
! 270: system( $cmd );
! 271: return 0;
! 272: }
! 273:
! 274: unlink "index.html";
! 275:
! 276: return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
! 277: }
! 278:
! 279: #######################################################################
! 280: #######################################################################
! 281: sub parseComic ($$) {
! 282: my ( $comics, $comic, $date ) = @_;
! 283: my $string = $comics->{$comic}{'search'};
! 284:
! 285: $string =~ s/__year__/$date->{'year'}/g;
! 286: $string =~ s/__year2__/$date->{'year2'}/g;
! 287: $string =~ s/__mon__/$date->{'mon'}/g;
! 288: $string =~ s/__mon2__/$date->{'mon2'}/g;
! 289: $string =~ s/__day__/$date->{'day'}/g;
! 290: $string =~ s/__day2__/$date->{'day2'}/g;
! 291: $string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
! 292: chomp $string;
! 293:
! 294: return $string;
! 295: }
! 296:
! 297: #######################################################################
! 298: #######################################################################
! 299: sub fetchDates () {
! 300: my %dates = ();
! 301:
! 302: ($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6];
! 303:
! 304: ## If you missed a day or two, reflect it here:
! 305: # $dates{'day'}-=1; ## <-- 5 days ago
! 306:
! 307: $dates{'year'} += 1900;
! 308: $dates{'year2'} = substr $dates{'year'}, 2, 2;
! 309: $dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'};
! 310: $dates{'mon'}++;
! 311: $dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};
! 312:
! 313: return %dates;
! 314: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>