version 1.2, 2011/08/18 12:50:06
|
version 1.17, 2015/02/19 14:56:10
|
Line 1
|
Line 1
|
#!/usr/bin/perl -w |
#!/usr/bin/perl -w |
|
|
|
############################################################################### |
|
# $Log$ |
|
# Revision 1.17 2015/02/19 14:56:10 nick |
|
# Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update. |
|
# |
|
# Revision 1.16 2015/02/05 18:05:58 nick |
|
# Changed the background and added a fancy title. |
|
# |
|
# Revision 1.15 2015/01/19 13:46:19 nick |
|
# *** empty log message *** |
|
# |
|
############################################################################### |
|
|
use strict; |
use strict; |
use File::Path; |
use File::Path; |
use Data::Dumper; |
use Data::Dumper; |
|
use Pod::Usage; |
|
use Getopt::Long; |
|
|
|
use Date::Calc qw/Date_to_Text_Long Today/; |
|
|
## |
## |
## Some default values |
## Some default values |
## |
## |
my $ver = q/$Id$/; |
my $ver = '$Id$'; |
my $comicFile = "comics.conf"; |
my $comicFile = "comics.conf"; |
my %comics = &readComicConfig ( $comicFile ); |
my %comics = &readComicConfig ( $comicFile ); |
|
my %opts = &fetchOptions( ); |
|
my $days_ago = $opts{'days'} || 0; |
my %dates = &fetchDates(); |
my %dates = &fetchDates(); |
my $baseDir = $comics{'configs'}{'base_directory'} || "."; |
my $baseDir = $comics{'configs'}{'base_directory'} || "."; |
my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . |
my $imageDir = $baseDir . "/" . ( $comics{'configs'}{'image_directory'} || "images" ) . |
"/$dates{'mon2'}$dates{'year2'}"; |
"/$dates{'mon2'}$dates{'year2'}"; |
my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" ); |
my $indexDir = $baseDir . "/" . ( $comics{'configs'}{'index_directory'} || "indexes" ); |
my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18"; |
my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18"; |
|
my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /; |
|
|
my $DATE=`date`; chomp $DATE; |
my $DATE=`date`; chomp $DATE; |
print STDOUT "Starting comic fetch at $DATE\n"; |
print STDOUT "Starting comic fetch at $DATE\n"; |
Line 30 print STDOUT "Starting comic fetch at $D
|
Line 49 print STDOUT "Starting comic fetch at $D
|
|
|
foreach my $comic ( sort keys %comics ) { |
foreach my $comic ( sort keys %comics ) { |
next if ( $comic =~ m/config/ ); |
next if ( $comic =~ m/config/ ); |
|
if ( ( $dates{'day2'} eq "Sunday" ) && |
|
( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; } |
$comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
$comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
&writeComic ( \%comics, $comic, \%dates ); |
&writeComic ( \%comics, $comic, \%dates ); |
|
|
|
my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}"; |
|
my $size = 0; |
|
|
|
my $cmd = "/usr/bin/identify -verbose $file|"; |
|
open(IMG, $cmd) || die ("Can't open: $!\n"); |
|
while(<IMG>) { |
|
if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) { |
|
$size = $1 if ( $size == 0); |
|
} |
|
} |
|
close(IMG); |
|
|
|
system( "/usr/bin/convert -resize 640 $file $file" ) |
|
if ( $size > 640 ) |
} |
} |
|
|
## &writeMainIndex ( \%dates ); |
## &writeMainIndex ( \%dates ); |
Line 77 sub readComicConfig ($$) {
|
Line 113 sub readComicConfig ($$) {
|
my %comicConfig = ( ); |
my %comicConfig = ( ); |
my %config = ( ); |
my %config = ( ); |
|
|
|
my ($year, $mon, $day) =( localtime(time))[5,4,3]; |
|
$year += 1900; |
|
$mon = sprintf("%02d", ($mon + 1)); |
|
$day = sprintf("%02d", $day); |
|
|
open FILEN, "<$comicFile"; |
open FILEN, "<$comicFile"; |
while (<FILEN>) { |
while (<FILEN>) { |
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
|
$_ =~ s/__YEAR__/$year/g; |
|
$_ =~ s/__MON__/$mon/g; |
|
$_ =~ s/__DAY__/$day/g; |
|
|
my @res = split /,/, $_; |
my @res = split /,/, $_; |
$comicConfig{$res[0]}{'url'} = $res[1]; |
$comicConfig{$res[0]}{'url'} = $res[1]; |
$comicConfig{$res[0]}{'search'} = $res[2]; |
$comicConfig{$res[0]}{'search'} = $res[2]; |
$comicConfig{$res[0]}{'mode'} = $res[3]; |
$comicConfig{$res[0]}{'mode'} = $res[3]; |
$comicConfig{$res[0]}{'fullName'} = $res[4]; |
$comicConfig{$res[0]}{'fullName'} = $res[4]; |
$comicConfig{$res[0]}{'ext'} = $res[5]; |
$comicConfig{$res[0]}{'ext'} = $res[5]; |
|
$comicConfig{$res[0]}{'sunday'} = $res[6] || 1; |
$comicConfig{$res[0]}{'error'} = 0; |
$comicConfig{$res[0]}{'error'} = 0; |
} |
} |
elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
Line 101 sub readComicConfig ($$) {
|
Line 147 sub readComicConfig ($$) {
|
####################################################################### |
####################################################################### |
sub writeComic ($$) { |
sub writeComic ($$) { |
my ( $comics, $comic, $date ) = @_; |
my ( $comics, $comic, $date ) = @_; |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
$date->{'day2'} . ".html"; |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
|
$date->{'mon2'} . $date->{'day2'} . "-" . |
|
$sd . ".html"; |
my $content = <<EOF; |
my $content = <<EOF; |
|
|
<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
Line 114 sub writeComic ($$) {
|
Line 162 sub writeComic ($$) {
|
$comics->{$comic}{'url'} |
$comics->{$comic}{'url'} |
</a> |
</a> |
</font><br/> |
</font><br/> |
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" /> |
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" /> |
<br/><br/> |
<br/><br/> |
</td></tr> |
</td></tr> |
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
Line 155 sub writeMainIndex ($$) {
|
Line 203 sub writeMainIndex ($$) {
|
####################################################################### |
####################################################################### |
sub writeFooter { |
sub writeFooter { |
my ( $date ) = @_; |
my ( $date ) = @_; |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
$date->{'day2'} . ".html"; |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
|
$date->{'mon2'} . $date->{'day2'} . "-" . |
|
$sd . ".html"; |
my $sysDate = `date`; |
my $sysDate = `date`; |
|
|
open INDEX, ">>$indexFile"; |
open INDEX, ">>$indexFile"; |
print INDEX <<EOF; |
print INDEX <<EOF; |
</table> |
</table> |
<center>Generated at $sysDate</br> |
<center> |
Version: $ver |
<font size="2"> |
|
Generated on: <font color="green">$sysDate</font><br/> |
|
Version: <font color="green">$ver</font><br /> |
|
CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> |
<p> |
<p> |
<a href="http://validator.w3.org/check?uri=referer"><img |
<a href="http://validator.w3.org/check?uri=referer"><img |
src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
Line 190 sub checkDir ($$) {
|
Line 243 sub checkDir ($$) {
|
####################################################################### |
####################################################################### |
sub writeTitle ($$) { |
sub writeTitle ($$) { |
my ( $date ) = @_; |
my ( $date ) = @_; |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . $date->{'mon2'} . |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
$date->{'day2'} . ".html"; |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
my $today = $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'}; |
$date->{'mon2'} . $date->{'day2'} . "-" . |
|
$sd . ".html"; |
|
my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'}; |
|
my $today_long = Date_to_Text_Long(Today()); |
|
|
open INDEX, ">$indexFile"; |
open INDEX, ">$indexFile"; |
print INDEX <<EOF; |
print INDEX <<EOF; |
Line 201 sub writeTitle ($$) {
|
Line 257 sub writeTitle ($$) {
|
<html xmlns="http://www.w3.org/1999/xhtml"> |
<html xmlns="http://www.w3.org/1999/xhtml"> |
<head> |
<head> |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
|
<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen"> |
<title>Daily Comics for $today</title> |
<title>Daily Comics for $today</title> |
</head> |
</head> |
<body bgcolor="#FFFFFF"> |
<body bgcolor="#FFFFFF"> |
<h1>Daily Comics for $today</h1> |
|
<table align="center" cellpadding="5" cellspacing="0"> |
<table align="center" cellpadding="5" cellspacing="0"> |
|
<tr><td> |
|
<table cellpadding="0" cellspacing="0" border="0"> |
|
<tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr> |
|
<tr><td align="left">$today_long</td></tr> |
|
<tr><td> </td></tr> |
|
</td</tr> |
|
|
EOF |
EOF |
close (INDEX); |
close (INDEX); |
} |
} |
Line 222 sub directDownload ($$) {
|
Line 285 sub directDownload ($$) {
|
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDate = $date->{'day2'}; |
my $cDate = $date->{'day2'}; |
|
|
my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert -resize 640 - jpeg:images/$cDir/$comic-$cDate.jpg"; |
my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
|
|
return system($cmd); |
return system($cmd); |
} |
} |
|
|
Line 253 sub indexDownload ($$) {
|
Line 317 sub indexDownload ($$) {
|
## Find the comic strip URL based on the specified regex in the search |
## Find the comic strip URL based on the specified regex in the search |
## |
## |
foreach my $line (@lines) { |
foreach my $line (@lines) { |
if ( $line =~ m/$comics->{$comic}{'search'}/ ) { |
if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
$comicLine = $1; chomp $comicLine; |
$comicLine = $1; chomp $comicLine; |
} |
} |
} |
} |
|
|
## |
## |
## Save the file to the appropriate directory |
## Save the file to the appropriate directory |
Line 267 sub indexDownload ($$) {
|
Line 331 sub indexDownload ($$) {
|
if ( $comicLine ) { |
if ( $comicLine ) { |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert -resize 640 - jpeg:images/$cDir/$comic-$cDate.jpg"; |
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; |
system( $cmd ); |
system( $cmd ); |
return 0; |
return 0; |
} |
} |
Line 300 sub parseComic ($$) {
|
Line 364 sub parseComic ($$) {
|
sub fetchDates () { |
sub fetchDates () { |
my %dates = (); |
my %dates = (); |
|
|
($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime)[3,4,5,6]; |
($dates{'day'}, $dates{'mon'}, $dates{'year'}, $dates{'dow'}) = (localtime(time - (86400 * $days_ago )))[3,4,5,6]; |
|
|
## If you missed a day or two, reflect it here: |
|
# $dates{'day'}-=1; ## <-- 5 days ago |
|
|
|
$dates{'year'} += 1900; |
$dates{'year'} += 1900; |
$dates{'year2'} = substr $dates{'year'}, 2, 2; |
$dates{'year2'} = substr $dates{'year'}, 2, 2; |
Line 313 sub fetchDates () {
|
Line 374 sub fetchDates () {
|
|
|
return %dates; |
return %dates; |
} |
} |
|
|
|
############################################################################### |
|
## |
|
## &fetchOptions( ); |
|
## |
|
## Grab our command line arguments and toss them in to a hash |
|
## |
|
############################################################################### |
|
sub fetchOptions { |
|
my %opts; |
|
|
|
&GetOptions( |
|
"days:i" => \$opts{'days'}, |
|
"help|?" => \$opts{'help'}, |
|
"man" => \$opts{'man'}, |
|
) || &pod2usage( ); |
|
&pod2usage( ) if defined $opts{'help'}; |
|
&pod2usage( { -verbose => 2, -input => \*DATA } ) if defined $opts{'man'}; |
|
|
|
return %opts; |
|
} |
|
|
|
__END__ |
|
|
|
=head1 NAME |
|
|
|
fetch.pl - Fetches comics and places them all locally in a single html file. |
|
|
|
=head1 SYNOPSIS |
|
|
|
fetch.pl [options] |
|
|
|
Options: |
|
--days,d Fetch comics from X days ago |
|
--help,? Display the basic help menu |
|
--man,m Display the detailed man page |
|
|
|
=head1 DESCRIPTION |
|
|
|
=head1 HISTORY |
|
|
|
=head1 AUTHOR |
|
|
|
Nicholas DeClario <nick@declario.com> |
|
|
|
=head1 BUGS |
|
|
|
This is a work in progress. Please report all bugs to the author. |
|
|
|
=head1 SEE ALSO |
|
|
|
=head1 COPYRIGHT |
|
|
|
=cut |
|
|
|
|