File:  [Local Repository] / comics / fetch.pl.new
Revision 1.16: download - view: text, annotated - select for diffs
Thu Feb 5 18:05:58 2015 UTC (9 years, 8 months ago) by nick
Branches: MAIN
CVS tags: HEAD
Changed the background and added a fancy title.

#!/usr/bin/perl -w

###############################################################################
# $Log: fetch.pl.new,v $
# Revision 1.16  2015/02/05 18:05:58  nick
# Changed the background and added a fancy title.
#
# Revision 1.15  2015/01/19 13:46:19  nick
# *** empty log message ***
#
###############################################################################

use strict;
use File::Path;
use Data::Dumper;
use Pod::Usage;
use Getopt::Long;

use Date::Calc qw/Date_to_Text_Long Today/;

## 
## Some default values
##
my $ver		= '$Id: fetch.pl.new,v 1.16 2015/02/05 18:05:58 nick Exp $';
my $comicFile   = "comics.conf";
my %comics	= &readComicConfig ( $comicFile );
my %opts        = &fetchOptions( );
my $days_ago    = $opts{'days'} || 0;
my %dates 	= &fetchDates();
my $baseDir     = $comics{'configs'}{'base_directory'} || ".";
my $imageDir    = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . 
		  "/$dates{'mon2'}$dates{'year2'}";
my $indexDir    = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" );
my $USER_AGENT  = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18";
my @days        = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /;

my $DATE=`date`; chomp $DATE;
print STDOUT "Starting comic fetch at $DATE\n";

##
## Main program starts here
##
&checkDir ( [ $imageDir, $indexDir ] );

&writeTitle ( \%dates );

foreach my $comic ( sort keys %comics ) {
  next if ( $comic =~ m/config/ );
  if ( ( $dates{'day2'} eq "Sunday" ) && 
       ( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; }
  $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
  &writeComic ( \%comics, $comic, \%dates );
}

print "Finding in $imageDir/*-$dates{'day2'}.jpg\n";
foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) ) 
{
	my $size = `/usr/bin/identify $file`;
	$size =~ s/.*\s(\d+)x\d+.*/$1/;

	system( "/usr/bin/convert -resize 640 $file $file" )
		if ( $size > 640 ) 
}

## &writeMainIndex ( \%dates );

&writeFooter( \%dates );

$DATE=`date`;  chomp( $DATE );
print STDOUT "Completed comic fetch at $DATE\n";

## End

#######################################################################
## Function :  downloadComic
##
##   Description :
##	This function determines the download method being used to 
##      retrieve the comic and calls the apprioriate function.
##
##      If the mode is invalid an error will be returned.
##
#######################################################################
sub downloadComic ($$) {
	my ( $comics, $comic, $date ) = @_;

 	SWITCH: {
		if ( $comics->{$comic}{'mode'} eq 1 ) { 
			return indexDownload ( \%comics, $comic, $date );
			last SWITCH;
		}
		if ( $comics->{$comic}{'mode'} eq 2 ) { 
			return directDownload ( \%comics, $comic, $date );
			last SWITCH;
		}
	}
	 
	return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
}

#######################################################################
#######################################################################
sub readComicConfig ($$) {
	my ( $comicFile ) = @_;
	my %comicConfig   = ( );
	my %config        = ( );

    my ($year, $mon, $day) =( localtime(time))[5,4,3];
    $year += 1900;
    $mon = sprintf("%02d", ($mon + 1));
    $day = sprintf("%02d", $day);

	open FILEN, "<$comicFile";
		while (<FILEN>) {
			if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
                $_ =~ s/__YEAR__/$year/g;
                $_ =~ s/__MON__/$mon/g;
                $_ =~ s/__DAY__/$day/g;
                
				my @res = split /,/, $_;
				$comicConfig{$res[0]}{'url'} 	  = $res[1];
				$comicConfig{$res[0]}{'search'}   = $res[2];
				$comicConfig{$res[0]}{'mode'} 	  = $res[3];
				$comicConfig{$res[0]}{'fullName'} = $res[4];
				$comicConfig{$res[0]}{'ext'} 	  = $res[5];
                $comicConfig{$res[0]}{'sunday'}   = $res[6] || 1;
				$comicConfig{$res[0]}{'error'} 	  = 0;
			}
			elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
				$comicConfig{'configs'}{$1} = $2;
			}
		}
	close (FILEN);

	return %comicConfig;
}

#######################################################################
#######################################################################
sub writeComic ($$) {
	my ( $comics, $comic, $date ) = @_;
	my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
	my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
			$date->{'mon2'} . $date->{'day2'} . "-" . 
			$sd . ".html";
	my $content = <<EOF;

<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
  <tr>
    <td align="left">
<font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
<font size="-2">
	<a href="$comics->{$comic}{'url'}">
		$comics->{$comic}{'url'}
	</a>
</font><br/>
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" />
<br/><br/>
</td></tr>
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->

EOF
	open INDEX, ">>$indexFile";

	print INDEX $content if ( ! $comics->{$comic}{'error'} );

	print INDEX <<EOF
<font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp;
<font size="-2"><
        <a href="$comics->{$comic}{'url'}">
                $comics->{$comic}{'url'}
        </a>
</font><br/>
<font color="red"><b>$comic :  $comics->{$comic}{'error'}</b></font><br/>
  </td>
</tr>
EOF
		if ( $comics->{$comic}{'error'} );

	close (INDEX);

	return 0;
}


#######################################################################
#######################################################################
sub writeMainIndex ($$) {
	my ( $date ) = @_;

}


#######################################################################
#######################################################################
sub writeFooter {
	my ( $date ) = @_;
	my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
	my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
			$date->{'mon2'} . $date->{'day2'} . "-" . 
			$sd . ".html";
	my $sysDate = `date`;

	open INDEX, ">>$indexFile";
	print INDEX <<EOF;
</table>
<center>
<font size="2">
Generated on: <font color="green">$sysDate</font><br/>
Version: <font color="green">$ver</font><br />
CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
  <p>
    <a href="http://validator.w3.org/check?uri=referer"><img
      src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
  </p>
</center>

</body>
</html>
EOF
	close( INDEX );
}

#######################################################################
#######################################################################
sub checkDir ($$) {
	my @dir = @_;

	foreach ( @dir ) {
		if ( ! -d $_ ) { mkpath( $_ ); }
	}
}

#######################################################################
#######################################################################
sub writeTitle ($$) {
	my ( $date ) = @_;
	my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
	my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
			$date->{'mon2'} . $date->{'day2'} . "-" . 
			$sd . ".html";
	my $today     = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'};
    my $today_long = Date_to_Text_Long(Today());

	open INDEX, ">$indexFile";
	print INDEX <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
    <title>Daily Comics for $today</title>
  </head>
<body bgcolor="#FFFFFF">
<table align="center" cellpadding="5" cellspacing="0">
<tr><td>
<table cellpadding="0" cellspacing="0" border="0">
<tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
<tr><td align="left">$today_long</td></tr>
<tr><td>&nbsp;</td></tr>
</td</tr>

EOF
	close (INDEX);
}

#######################################################################
#######################################################################
sub directDownload ($$) {
	my ( $comics, $comic, $date ) = @_;
	my $file = &parseComic ( $comics, $comic, $date );

        ##
        ## Save the file to the appropriate directory
        ##
        my $cDir  = $date->{'mon2'} . $date->{'year2'};
        my $cDate = $date->{'day2'};

	my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";

        return system($cmd);
}

#######################################################################
#######################################################################
sub indexDownload ($$) {
	my ( $comics, $comic, $date ) = @_;
	my ( @lines, $comicLine, $mainURL );
	my $comicIndex = "indexes/index.$comic";

	`wget -q $comics->{$comic}{'url'} -O $comicIndex`;

	if ( ! open FILEN, "<$comicIndex" ) {  
        	return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
		       " (" . $comics->{$comic}{'url'} . ")"; 
	} 
		@lines = <FILEN>;
	close (FILEN);	

	unlink ("$comicIndex");

	$mainURL = $comics->{$comic}{'url'};
	## I need to figure out how to merge these two in to one regex.
	$mainURL =~ s/(http:\/\/.*)(?:\/.*\/){1,}.*/$1/;
	$mainURL =~ s/([a-z])\/.*/$1/i;

	##
	## Find the comic strip URL based on the specified regex in the search
	##
	foreach my $line (@lines) {
		if ( $line =~ m/$comics->{$comic}{'search'}/ ) {
			$comicLine = $1; chomp $comicLine;
		}
        }

	##
	## Save the file to the appropriate directory
	##
	my $cDir    = $date->{'mon2'} . $date->{'year2'};
	my $cDate   = $date->{'day2'};

	if ( $comicLine ) {
		if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
		my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
		my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
		system( $cmd );
		return 0;
	}

	unlink "index.html";

	return "ERROR: Could not download comic $comics->{$comic}{'fullName'}";
}

#######################################################################
#######################################################################
sub parseComic ($$) {
	my ( $comics, $comic, $date ) = @_;
	my $string = $comics->{$comic}{'search'};

	$string =~ s/__year__/$date->{'year'}/g;
	$string =~ s/__year2__/$date->{'year2'}/g;
	$string =~ s/__mon__/$date->{'mon'}/g;
	$string =~ s/__mon2__/$date->{'mon2'}/g;
	$string =~ s/__day__/$date->{'day'}/g;
	$string =~ s/__day2__/$date->{'day2'}/g;
	$string =~ s/__ext__/$comics->{$comic}{'ext'}/g;
	chomp $string;

	return $string;
}

#######################################################################
#######################################################################
sub fetchDates () {
	my %dates = ();

	($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6];

	$dates{'year'} += 1900;
	$dates{'year2'} = substr $dates{'year'}, 2, 2;
	$dates{'day2'}  = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; 
	$dates{'mon'}++;
	$dates{'mon2'}  = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'};

	return %dates;
}

###############################################################################
##
## &fetchOptions( );
##
##      Grab our command line arguments and toss them in to a hash
##
###############################################################################
sub fetchOptions {
        my %opts;

        &GetOptions(
                        "days:i"        => \$opts{'days'},
                        "help|?"        => \$opts{'help'},
                        "man"           => \$opts{'man'},
                   ) || &pod2usage( );
        &pod2usage( ) if defined $opts{'help'};
        &pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'};

        return %opts;
}

__END__

=head1 NAME

fetch.pl - Fetches comics and places them all locally in a single html file.

=head1 SYNOPSIS

fetch.pl [options]

Options:
        --days,d        Fetch comics from X days ago
        --help,?        Display the basic help menu
        --man,m         Display the detailed man page

=head1 DESCRIPTION

=head1 HISTORY

=head1 AUTHOR

Nicholas DeClario <nick@declario.com>

=head1 BUGS

This is a work in progress.  Please report all bugs to the author.

=head1 SEE ALSO

=head1 COPYRIGHT

=cut



FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>