version 1.15, 2015/01/19 13:46:19
|
version 1.28, 2020/06/10 21:14:31
|
Line 2
|
Line 2
|
|
|
############################################################################### |
############################################################################### |
# $Log$ |
# $Log$ |
|
# Revision 1.28 2020/06/10 21:14:31 nick |
|
# Updated for w3 validation. |
|
# |
|
# Revision 1.27 2019/04/15 12:50:23 nick |
|
# The script was unable to handle html '&' and convert it, so I added that. I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick. |
|
# |
|
# Revision 1.26 2018/04/22 14:03:54 nick |
|
# Changed the default for Sunday comics that was causing issues with some comics. |
|
# |
|
# Revision 1.25 2018/02/12 13:30:58 nick |
|
# Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't. |
|
# |
|
# Revision 1.24 2018/02/06 14:31:06 nick |
|
# A status report is now generated in JSON that can easily be scanned so that |
|
# I can be alerted when there are failures that I miss if I don't read the |
|
# comics that day. |
|
# |
|
# Revision 1.23 2018/01/26 13:05:27 nick |
|
# Added a new config option to remove all newline from the resulting index.html |
|
# file. This allows for easier parsing for certain comics. I then updated |
|
# the URLs to search for and enabled the newline removal for a handful |
|
# of uComics. |
|
# |
|
# I believe I've also properly fixed the Comic Config version displayed on |
|
# the webpage itself. |
|
# |
|
# Revision 1.22 2017/12/05 13:37:40 nick |
|
# Added the CVS config version to the outpuit. |
|
# |
|
# Revision 1.21 2015/10/26 14:25:40 nick |
|
# Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly. |
|
# |
|
# Revision 1.20 2015/10/22 12:58:44 nick |
|
# Added the ability for Sunday only comics. Stonesoup is no longer weekdays, this has been added to Sunday only. I also added Foxtrot Classics for weekdays and Foxtrot for Sundays. |
|
# |
|
# Revision 1.19 2015/07/13 12:56:58 nick |
|
# Added Sally Forth and Pearls Before Swine. Adding Sally Forth required a change in the 'wget' command for fetching the index file to include 'user-agent' and 'referer'. |
|
# |
|
# Revision 1.18 2015/05/07 12:31:43 nick |
|
# Added favicon |
|
# |
|
# Revision 1.17 2015/02/19 14:56:10 nick |
|
# Fixed a problem that forced everything to JPG. This would kill GIF animations, but would not display the gifs either because 'convert' appends an index number to the end of the file name for each from of the GIF animation. I fixed this to maintain GIF compatibilty as well as rewritting how the script fetches the size of the file. Additionally, I updated the configuration for Questionable Content to search for GIF or JPG, which is what triggered this entire update. |
|
# |
|
# Revision 1.16 2015/02/05 18:05:58 nick |
|
# Changed the background and added a fancy title. |
|
# |
# Revision 1.15 2015/01/19 13:46:19 nick |
# Revision 1.15 2015/01/19 13:46:19 nick |
# *** empty log message *** |
# *** empty log message *** |
# |
# |
Line 12 use File::Path;
|
Line 59 use File::Path;
|
use Data::Dumper; |
use Data::Dumper; |
use Pod::Usage; |
use Pod::Usage; |
use Getopt::Long; |
use Getopt::Long; |
|
use JSON::Create 'create_json'; |
|
use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/; |
|
|
## |
## |
## Some default values |
## Some default values |
## |
## |
my $ver = '$Id$'; |
my $ver = '$Id$'; |
my $comicFile = "comics.conf"; |
my $comicFile = "comics.conf"; |
|
my $comicConfigVer = "Unknown"; |
|
my $reportFile = "/home/httpd/html/daily/comics/status_report.json"; |
my %comics = &readComicConfig ( $comicFile ); |
my %comics = &readComicConfig ( $comicFile ); |
my %opts = &fetchOptions( ); |
my %opts = &fetchOptions( ); |
my $days_ago = $opts{'days'} || 0; |
my $days_ago = $opts{'days'} || 0; |
Line 29 my $indexDir = $baseDir . "/" . ( $co
|
Line 80 my $indexDir = $baseDir . "/" . ( $co
|
my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18"; |
my $USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick) Firefox/3.6.18"; |
my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /; |
my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /; |
|
|
|
|
my $DATE=`date`; chomp $DATE; |
my $DATE=`date`; chomp $DATE; |
print STDOUT "Starting comic fetch at $DATE\n"; |
print STDOUT "Starting comic fetch at $DATE\n"; |
|
|
Line 41 print STDOUT "Starting comic fetch at $D
|
Line 91 print STDOUT "Starting comic fetch at $D
|
&writeTitle ( \%dates ); |
&writeTitle ( \%dates ); |
|
|
foreach my $comic ( sort keys %comics ) { |
foreach my $comic ( sort keys %comics ) { |
|
|
|
## Skip if this is Sunday and the comic is weekdays only |
next if ( $comic =~ m/config/ ); |
next if ( $comic =~ m/config/ ); |
if ( ( $dates{'day2'} eq "Sunday" ) && |
if (($dates{'wday'} eq "Sunday") && |
( $comics{$comic}{'sunday'} == 0 ) ) { print "Skipping.\n"; next; } |
($comics{$comic}{'not_sunday'} == 1)) { |
|
print "Skipping '$comic'; Weekdays only.\n"; |
|
next; |
|
} |
|
|
|
## Skip if Sunday only comic and it's not Sunday. |
|
if (($dates{'wday'} ne "Sunday") && |
|
($comics{$comic}{'sunday_only'} == 1)) { |
|
print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; |
|
next |
|
} |
|
|
$comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
$comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
&writeComic ( \%comics, $comic, \%dates ); |
&writeComic ( \%comics, $comic, \%dates ); |
} |
|
|
|
print "Finding in $imageDir/*-$dates{'day2'}.jpg\n"; |
my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}"; |
foreach my $file ( glob( "$imageDir/*-$dates{'day2'}.jpg" ) ) |
my $size = 0; |
{ |
|
my $size = `/usr/bin/identify $file`; |
my $cmd = "/usr/bin/identify -verbose $file|"; |
$size =~ s/.*\s(\d+)x\d+.*/$1/; |
open(IMG, $cmd) || die ("Can't open: $!\n"); |
|
while(<IMG>) { |
|
if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) { |
|
$size = $1 if ( $size == 0); |
|
} |
|
} |
|
close(IMG); |
|
|
|
|
system( "/usr/bin/convert -resize 640 $file $file" ) |
system( "/usr/bin/convert -resize 640 $file $file" ) |
if ( $size > 640 ) |
if ( $size > 640 ) |
Line 62 foreach my $file ( glob( "$imageDir/*-$d
|
Line 131 foreach my $file ( glob( "$imageDir/*-$d
|
|
|
&writeFooter( \%dates ); |
&writeFooter( \%dates ); |
|
|
|
print STDOUT "Status written to $reportFile.\n" |
|
if (&writeStatusReportJSON(\%comics, $reportFile)); |
|
|
$DATE=`date`; chomp( $DATE ); |
$DATE=`date`; chomp( $DATE ); |
print STDOUT "Completed comic fetch at $DATE\n"; |
print STDOUT "Completed comic fetch at $DATE\n"; |
|
|
Line 108 sub readComicConfig ($$) {
|
Line 180 sub readComicConfig ($$) {
|
|
|
open FILEN, "<$comicFile"; |
open FILEN, "<$comicFile"; |
while (<FILEN>) { |
while (<FILEN>) { |
|
#if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) { |
|
if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) { |
|
$comicConfigVer = $1; |
|
} |
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
$_ =~ s/__YEAR__/$year/g; |
$_ =~ s/__YEAR__/$year/g; |
$_ =~ s/__MON__/$mon/g; |
$_ =~ s/__MON__/$mon/g; |
Line 119 sub readComicConfig ($$) {
|
Line 195 sub readComicConfig ($$) {
|
$comicConfig{$res[0]}{'mode'} = $res[3]; |
$comicConfig{$res[0]}{'mode'} = $res[3]; |
$comicConfig{$res[0]}{'fullName'} = $res[4]; |
$comicConfig{$res[0]}{'fullName'} = $res[4]; |
$comicConfig{$res[0]}{'ext'} = $res[5]; |
$comicConfig{$res[0]}{'ext'} = $res[5]; |
$comicConfig{$res[0]}{'sunday'} = $res[6] || 1; |
$comicConfig{$res[0]}{'not_sunday'} = sprintf("%d", $res[6] || 0); |
|
$comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); |
|
$comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0); |
$comicConfig{$res[0]}{'error'} = 0; |
$comicConfig{$res[0]}{'error'} = 0; |
} |
} |
elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
Line 133 sub readComicConfig ($$) {
|
Line 211 sub readComicConfig ($$) {
|
|
|
####################################################################### |
####################################################################### |
####################################################################### |
####################################################################### |
|
sub writeStatusReportJSON ($$) { |
|
my ( $comicsRef, $filename ) = @_; |
|
my %comics = %$comicsRef; |
|
my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, |
|
(localtime)[4] + 1, |
|
(localtime)[3]); |
|
my %json = ('date' => $shortDate, 'comics' => ()); |
|
my $totalErrors = 0; |
|
|
|
foreach my $comic (sort keys %comics) { |
|
next unless $comics{$comic}{'fullName'}; |
|
if ($comics{$comic}{'error'}) { |
|
my %error = ('comicName' => "$comics{$comic}{'fullName'}", |
|
'error' => "$comics{$comic}{'error'}", |
|
'status' => "Error"); |
|
push @{$json{'comics'}}, \%error; |
|
$totalErrors += 1; |
|
} else { |
|
my %status = ('comicName' => "$comics{$comic}{'fullName'}", |
|
'error' => 0, |
|
'status' => "Successfull"); |
|
push @{$json{'comics'}}, \%status; |
|
} |
|
} |
|
$json{'totalErrors'} = $totalErrors; |
|
|
|
open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n"); |
|
print SR create_json (\%json); |
|
close(SR); |
|
} |
|
|
|
####################################################################### |
|
####################################################################### |
sub writeComic ($$) { |
sub writeComic ($$) { |
my ( $comics, $comic, $date ) = @_; |
my ( $comics, $comic, $date ) = @_; |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
$date->{'mon2'} . $date->{'day2'} . "-" . |
$date->{'mon2'} . $date->{'day2'} . "-" . |
$sd . ".html"; |
$sd . ".html"; |
|
$comics->{$comic}{'fullName'} =~ s/&/&/g; |
my $content = <<EOF; |
my $content = <<EOF; |
|
|
<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
Line 150 sub writeComic ($$) {
|
Line 262 sub writeComic ($$) {
|
$comics->{$comic}{'url'} |
$comics->{$comic}{'url'} |
</a> |
</a> |
</font><br/> |
</font><br/> |
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.jpg" alt="$comic-$date->{'day2'}" /> |
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" /> |
<br/><br/> |
<br/><br/> |
</td></tr> |
</td></tr> |
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
Line 201 sub writeFooter {
|
Line 313 sub writeFooter {
|
print INDEX <<EOF; |
print INDEX <<EOF; |
</table> |
</table> |
<center> |
<center> |
<font size="2"> |
Generated on: <font size="2" color="green">$sysDate</font><br/> |
Generated on: <font color="green">$sysDate</font><br/> |
Version: <font size="2" color="green">$ver</font><br /> |
Version: <font color="green">$ver</font><br /> |
Config Version: <font size="2" color="green">$comicConfigVer</font><br /> |
CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> |
CVS: <a href="http://demandred.dyndns.org:3000/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> |
<p> |
<br /> |
<a href="http://validator.w3.org/check?uri=referer"><img |
<a href="http://validator.w3.org/check?uri=referer"><img |
src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
</p> |
|
</center> |
</center> |
|
|
</body> |
</body> |
Line 236 sub writeTitle ($$) {
|
Line 347 sub writeTitle ($$) {
|
$date->{'mon2'} . $date->{'day2'} . "-" . |
$date->{'mon2'} . $date->{'day2'} . "-" . |
$sd . ".html"; |
$sd . ".html"; |
my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'}; |
my $today = $days[$date->{'dow'}] . " " . $date->{'mon'} . "/" . $date->{'day'} . "/" . $date->{'year'}; |
|
my $today_long = Date_to_Text_Long(Today()); |
|
|
open INDEX, ">$indexFile"; |
open INDEX, ">$indexFile"; |
print INDEX <<EOF; |
print INDEX <<EOF; |
Line 244 sub writeTitle ($$) {
|
Line 356 sub writeTitle ($$) {
|
<html xmlns="http://www.w3.org/1999/xhtml"> |
<html xmlns="http://www.w3.org/1999/xhtml"> |
<head> |
<head> |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen"> |
<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" /> |
|
<link rel="shortcut icon" href="./favicon.ico" /> |
<title>Daily Comics for $today</title> |
<title>Daily Comics for $today</title> |
</head> |
</head> |
<body bgcolor="#FFFFFF"> |
<body bgcolor="#FFFFFF"> |
<h1>Daily Comics for $today</h1> |
<table cellpadding="0" cellspacing="0" border="0"> |
<table align="center" cellpadding="5" cellspacing="0"> |
<tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr> |
|
<tr><td align="left">$today_long</td></tr> |
|
<tr><td> </td></tr> |
EOF |
EOF |
close (INDEX); |
close (INDEX); |
} |
} |
Line 266 sub directDownload ($$) {
|
Line 381 sub directDownload ($$) {
|
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDate = $date->{'day2'}; |
my $cDate = $date->{'day2'}; |
|
|
my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
my $cmd = "wget -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
|
|
return system($cmd); |
return system($cmd); |
} |
} |
Line 278 sub indexDownload ($$) {
|
Line 393 sub indexDownload ($$) {
|
my ( @lines, $comicLine, $mainURL ); |
my ( @lines, $comicLine, $mainURL ); |
my $comicIndex = "indexes/index.$comic"; |
my $comicIndex = "indexes/index.$comic"; |
|
|
`wget -q $comics->{$comic}{'url'} -O $comicIndex`; |
my $wget_cmd = "wget -q --referer='$comics->{$comic}{'url'}' " . |
|
"--user-agent=\"$USER_AGENT\" " . |
|
"$comics->{$comic}{'url'} -O $comicIndex"; |
|
system($wget_cmd); |
|
|
if ( ! open FILEN, "<$comicIndex" ) { |
if ( ! open FILEN, "<$comicIndex" ) { |
return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . |
return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . |
" (" . $comics->{$comic}{'url'} . ")"; |
" (" . $comics->{$comic}{'url'} . ")"; |
} |
} |
@lines = <FILEN>; |
while (<FILEN>) { |
|
my $line = $_; |
|
$line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} ); |
|
push @lines, $line; |
|
} |
close (FILEN); |
close (FILEN); |
|
|
|
|
unlink ("$comicIndex"); |
unlink ("$comicIndex"); |
|
|
$mainURL = $comics->{$comic}{'url'}; |
$mainURL = $comics->{$comic}{'url'}; |
Line 297 sub indexDownload ($$) {
|
Line 420 sub indexDownload ($$) {
|
## |
## |
## Find the comic strip URL based on the specified regex in the search |
## Find the comic strip URL based on the specified regex in the search |
## |
## |
|
|
foreach my $line (@lines) { |
foreach my $line (@lines) { |
if ( $line =~ m/$comics->{$comic}{'search'}/ ) { |
if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
$comicLine = $1; chomp $comicLine; |
$comicLine = $1; chomp $comicLine; |
} |
} |
} |
} |
|
|
## |
## |
## Save the file to the appropriate directory |
## Save the file to the appropriate directory |
Line 312 sub indexDownload ($$) {
|
Line 436 sub indexDownload ($$) {
|
if ( $comicLine ) { |
if ( $comicLine ) { |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
# Strip & |
|
$comicURL =~ s/\&\;/&/g; |
|
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; |
system( $cmd ); |
system( $cmd ); |
return 0; |
return 0; |
} |
} |
Line 352 sub fetchDates () {
|
Line 478 sub fetchDates () {
|
$dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; |
$dates{'day2'} = ( $dates{'day'} < 10 ) ? "0" . $dates{'day'} : $dates{'day'}; |
$dates{'mon'}++; |
$dates{'mon'}++; |
$dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'}; |
$dates{'mon2'} = ( $dates{'mon'} < 10 ) ? "0".$dates{'mon'} : $dates{'mon'}; |
|
my @days = qw/ Sunday Monday Tuesday Wednesday Thursday Friday Saturday /; |
|
$dates{'wday'} = $days[$dates{'dow'}]; |
|
|
return %dates; |
return %dates; |
} |
} |