version 1.21, 2015/10/26 14:25:40
|
version 1.31, 2024/12/13 16:03:49
|
Line 2
|
Line 2
|
|
|
############################################################################### |
############################################################################### |
# $Log$ |
# $Log$ |
|
# Revision 1.31 2024/12/13 16:03:49 nick |
|
# This adds the ability to specify a comic as a link only with a default splash image. |
|
# |
|
# Revision 1.30 2022/10/04 12:02:03 nick |
|
# Added --no-check-certificate for wget calls as arcamax was failing its cert check. Meh, whatever. It's just comics. |
|
# |
|
# Revision 1.29 2020/06/10 21:32:52 nick |
|
# Centered page |
|
# |
|
# Revision 1.28 2020/06/10 21:14:31 nick |
|
# Updated for w3 validation. |
|
# |
|
# Revision 1.27 2019/04/15 12:50:23 nick |
|
# The script was unable to handle html '&' and convert it, so I added that. I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick. |
|
# |
|
# Revision 1.26 2018/04/22 14:03:54 nick |
|
# Changed the default for Sunday comics that was causing issues with some comics. |
|
# |
|
# Revision 1.25 2018/02/12 13:30:58 nick |
|
# Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't. |
|
# |
|
# Revision 1.24 2018/02/06 14:31:06 nick |
|
# A status report is now generated in JSON that can easily be scanned so that |
|
# I can be alerted when there are failures that I miss if I don't read the |
|
# comics that day. |
|
# |
|
# Revision 1.23 2018/01/26 13:05:27 nick |
|
# Added a new config option to remove all newline from the resulting index.html |
|
# file. This allows for easier parsing for certain comics. I then updated |
|
# the URLs to search for and enabled the newline removal for a handful |
|
# of uComics. |
|
# |
|
# I believe I've also properly fixed the Comic Config version displayed on |
|
# the webpage itself. |
|
# |
|
# Revision 1.22 2017/12/05 13:37:40 nick |
|
# Added the CVS config version to the outpuit. |
|
# |
# Revision 1.21 2015/10/26 14:25:40 nick |
# Revision 1.21 2015/10/26 14:25:40 nick |
# Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly. |
# Fixed a bug that was improperly including the day of week string preventing the weekend comics from fetching proproperly. |
# |
# |
Line 30 use File::Path;
|
Line 68 use File::Path;
|
use Data::Dumper; |
use Data::Dumper; |
use Pod::Usage; |
use Pod::Usage; |
use Getopt::Long; |
use Getopt::Long; |
|
use JSON::Create 'create_json'; |
use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/; |
use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/; |
|
use Data::Dumper; |
|
|
|
print("Running"); |
|
|
## |
## |
## Some default values |
## Some default values |
## |
## |
my $ver = '$Id$'; |
my $ver = '$Id$'; |
my $comicFile = "comics.conf"; |
my $comicFile = "comics.conf"; |
|
my $comicConfigVer = "Unknown"; |
|
my $reportFile = "/home/httpd/html/daily/comics/status_report.json"; |
my %comics = &readComicConfig ( $comicFile ); |
my %comics = &readComicConfig ( $comicFile ); |
my %opts = &fetchOptions( ); |
my %opts = &fetchOptions( ); |
my $days_ago = $opts{'days'} || 0; |
my $days_ago = $opts{'days'} || 0; |
Line 61 print STDOUT "Starting comic fetch at $D
|
Line 104 print STDOUT "Starting comic fetch at $D
|
|
|
foreach my $comic ( sort keys %comics ) { |
foreach my $comic ( sort keys %comics ) { |
|
|
|
print("Checking Comic $comic\n"); |
|
|
## Skip if this is Sunday and the comic is weekdays only |
## Skip if this is Sunday and the comic is weekdays only |
next if ( $comic =~ m/config/ ); |
next if ( $comic =~ m/config/ ); |
if (($dates{'wday'} eq "Sunday") && |
if (($dates{'wday'} eq "Sunday") && |
($comics{$comic}{'sunday'} == 0)) { |
($comics{$comic}{'not_sunday'} == 1)) { |
print "Skipping '$comic'; Weekdays only.\n"; |
print "Skipping '$comic'; Weekdays only.\n"; |
next; |
next; |
} |
} |
Line 75 foreach my $comic ( sort keys %comics )
|
Line 120 foreach my $comic ( sort keys %comics )
|
print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; |
print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; |
next |
next |
} |
} |
|
|
$comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
$comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
&writeComic ( \%comics, $comic, \%dates ); |
&writeComic ( \%comics, $comic, \%dates ); |
|
|
my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}"; |
my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}"; |
my $size = 0; |
my $size = 0; |
|
|
my $cmd = "/usr/bin/identify -verbose $file|"; |
## Resize downloaded images |
open(IMG, $cmd) || die ("Can't open: $!\n"); |
if($comics{$comic}{'mode'} != 3) { |
while(<IMG>) { |
my $cmd = "/usr/bin/identify -verbose $file|"; |
if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) { |
open(IMG, $cmd) || die ("Can't open: $!\n"); |
$size = $1 if ( $size == 0); |
while(<IMG>) { |
} |
if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) { |
} |
$size = $1 if ( $size == 0); |
close(IMG); |
} |
|
} |
|
close(IMG); |
system( "/usr/bin/convert -resize 640 $file $file" ) |
|
if ( $size > 640 ) |
system( "/usr/bin/convert -resize 800 $file $file" ) |
|
if ( $size > 800 ) |
|
} |
} |
} |
|
|
## &writeMainIndex ( \%dates ); |
## &writeMainIndex ( \%dates ); |
|
|
&writeFooter( \%dates ); |
&writeFooter( \%dates ); |
|
|
|
print STDOUT "Status written to $reportFile.\n" |
|
if (&writeStatusReportJSON(\%comics, $reportFile)); |
|
|
$DATE=`date`; chomp( $DATE ); |
$DATE=`date`; chomp( $DATE ); |
print STDOUT "Completed comic fetch at $DATE\n"; |
print STDOUT "Completed comic fetch at $DATE\n"; |
|
|
Line 127 sub downloadComic ($$) {
|
Line 177 sub downloadComic ($$) {
|
return directDownload ( \%comics, $comic, $date ); |
return directDownload ( \%comics, $comic, $date ); |
last SWITCH; |
last SWITCH; |
} |
} |
|
if ( $comics->{$comic}{'mode'} eq 3 ) { |
|
return 0; |
|
last SWITCH; |
|
} |
} |
} |
|
|
return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}."; |
return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}."; |
Line 146 sub readComicConfig ($$) {
|
Line 200 sub readComicConfig ($$) {
|
|
|
open FILEN, "<$comicFile"; |
open FILEN, "<$comicFile"; |
while (<FILEN>) { |
while (<FILEN>) { |
|
#if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) { |
|
if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) { |
|
$comicConfigVer = $1; |
|
} |
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){ |
$_ =~ s/__YEAR__/$year/g; |
$_ =~ s/__YEAR__/$year/g; |
$_ =~ s/__MON__/$mon/g; |
$_ =~ s/__MON__/$mon/g; |
Line 157 sub readComicConfig ($$) {
|
Line 215 sub readComicConfig ($$) {
|
$comicConfig{$res[0]}{'mode'} = $res[3]; |
$comicConfig{$res[0]}{'mode'} = $res[3]; |
$comicConfig{$res[0]}{'fullName'} = $res[4]; |
$comicConfig{$res[0]}{'fullName'} = $res[4]; |
$comicConfig{$res[0]}{'ext'} = $res[5]; |
$comicConfig{$res[0]}{'ext'} = $res[5]; |
$comicConfig{$res[0]}{'sunday'} = sprintf("%d", $res[6] || 1); |
$comicConfig{$res[0]}{'not_sunday'} = sprintf("%d", $res[6] || 0); |
$comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); |
$comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); |
|
$comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0); |
$comicConfig{$res[0]}{'error'} = 0; |
$comicConfig{$res[0]}{'error'} = 0; |
} |
} |
elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) { |
Line 172 sub readComicConfig ($$) {
|
Line 231 sub readComicConfig ($$) {
|
|
|
####################################################################### |
####################################################################### |
####################################################################### |
####################################################################### |
|
sub writeStatusReportJSON ($$) { |
|
my ( $comicsRef, $filename ) = @_; |
|
my %comics = %$comicsRef; |
|
my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, |
|
(localtime)[4] + 1, |
|
(localtime)[3]); |
|
my %json = ('date' => $shortDate, 'comics' => ()); |
|
my $totalErrors = 0; |
|
|
|
foreach my $comic (sort keys %comics) { |
|
next unless $comics{$comic}{'fullName'}; |
|
if ($comics{$comic}{'error'}) { |
|
my %error = ('comicName' => "$comics{$comic}{'fullName'}", |
|
'error' => "$comics{$comic}{'error'}", |
|
'status' => "Error"); |
|
push @{$json{'comics'}}, \%error; |
|
$totalErrors += 1; |
|
} else { |
|
my %status = ('comicName' => "$comics{$comic}{'fullName'}", |
|
'error' => 0, |
|
'status' => "Successfull"); |
|
push @{$json{'comics'}}, \%status; |
|
} |
|
} |
|
$json{'totalErrors'} = $totalErrors; |
|
|
|
open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n"); |
|
print SR create_json (\%json); |
|
close(SR); |
|
} |
|
|
|
####################################################################### |
|
####################################################################### |
sub writeComic ($$) { |
sub writeComic ($$) { |
my ( $comics, $comic, $date ) = @_; |
my ( $comics, $comic, $date ) = @_; |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 ); |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
my $indexFile = $indexDir . "/index-" . $date->{'year2'} . |
$date->{'mon2'} . $date->{'day2'} . "-" . |
$date->{'mon2'} . $date->{'day2'} . "-" . |
$sd . ".html"; |
$sd . ".html"; |
|
$comics->{$comic}{'fullName'} =~ s/&/&/g; |
|
|
my $content = <<EOF; |
my $content = <<EOF; |
|
|
<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
<!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* --> |
Line 185 sub writeComic ($$) {
|
Line 279 sub writeComic ($$) {
|
<td align="left"> |
<td align="left"> |
<font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> |
<font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> |
<font size="-2"> |
<font size="-2"> |
<a href="$comics->{$comic}{'url'}"> |
<a href="$comics->{$comic}{'url'}" target="_blank"> |
$comics->{$comic}{'url'} |
$comics->{$comic}{'url'} |
</a> |
</a> |
</font><br/> |
</font><br/> |
|
EOF |
|
if ( $comics->{$comic}{'mode'} == 3 ) { |
|
print("Mode 3\n"); |
|
$content .= <<EOF; |
|
<img src="$comics->{$comic}{'ext'}" alt="$comics->{$comic}{'fullName'}" /> |
|
EOF |
|
} else { |
|
$content .= <<EOF; |
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" /> |
<img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" /> |
|
EOF |
|
} |
|
$content .= <<EOF; |
<br/><br/> |
<br/><br/> |
</td></tr> |
</td></tr> |
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
<!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* --> |
Line 240 sub writeFooter {
|
Line 345 sub writeFooter {
|
print INDEX <<EOF; |
print INDEX <<EOF; |
</table> |
</table> |
<center> |
<center> |
<font size="2"> |
Generated on: <font size="2" color="green">$sysDate</font><br/> |
Generated on: <font color="green">$sysDate</font><br/> |
Version: <font size="2" color="green">$ver</font><br /> |
Version: <font color="green">$ver</font><br /> |
Config Version: <font size="2" color="green">$comicConfigVer</font><br /> |
CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> |
CVS: <a href="http://demandred.dyndns.org:3000/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a> |
<p> |
<br /> |
<a href="http://validator.w3.org/check?uri=referer"><img |
<a href="http://validator.w3.org/check?uri=referer"><img |
src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a> |
</p> |
|
</center> |
</center> |
|
|
</body> |
</body> |
Line 284 sub writeTitle ($$) {
|
Line 388 sub writeTitle ($$) {
|
<html xmlns="http://www.w3.org/1999/xhtml"> |
<html xmlns="http://www.w3.org/1999/xhtml"> |
<head> |
<head> |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> |
<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen"> |
<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" /> |
<link rel="shortcut icon" href="./favicon.ico"> |
<link rel="shortcut icon" href="./favicon.ico" /> |
<title>Daily Comics for $today</title> |
<title>Daily Comics for $today</title> |
</head> |
</head> |
<body bgcolor="#FFFFFF"> |
<body bgcolor="#FFFFFF"> |
<table align="center" cellpadding="5" cellspacing="0"> |
<table align="center" cellpadding="0" cellspacing="0" border="0"> |
<tr><td> |
<tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr> |
<table cellpadding="0" cellspacing="0" border="0"> |
|
<tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr> |
|
<tr><td align="left">$today_long</td></tr> |
<tr><td align="left">$today_long</td></tr> |
<tr><td> </td></tr> |
<tr><td> </td></tr> |
</td</tr> |
|
|
|
EOF |
EOF |
close (INDEX); |
close (INDEX); |
} |
} |
Line 313 sub directDownload ($$) {
|
Line 413 sub directDownload ($$) {
|
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDate = $date->{'day2'}; |
my $cDate = $date->{'day2'}; |
|
|
my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
|
|
return system($cmd); |
return system($cmd); |
} |
} |
|
|
####################################################################### |
####################################################################### |
####################################################################### |
####################################################################### |
|
sub linkOnly ($$) { |
|
my ( $comics, $comic, $date ) = @_; |
|
|
|
return 0; |
|
} |
|
####################################################################### |
|
####################################################################### |
sub indexDownload ($$) { |
sub indexDownload ($$) { |
my ( $comics, $comic, $date ) = @_; |
my ( $comics, $comic, $date ) = @_; |
my ( @lines, $comicLine, $mainURL ); |
my ( @lines, $comicLine, $mainURL ); |
my $comicIndex = "indexes/index.$comic"; |
my $comicIndex = "indexes/index.$comic"; |
|
|
my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " . |
print("Getching Index $comicIndex.\n"); |
"--user-agent=\"$USER_AGENT\" " . |
print("comic url: $comics->{$comic}{'url'}\n"); |
|
|
|
print Dumper($comics->{$comic}); |
|
|
|
my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " . |
|
"--no-check-certificate --user-agent=\"$USER_AGENT\" " . |
"$comics->{$comic}{'url'} -O $comicIndex"; |
"$comics->{$comic}{'url'} -O $comicIndex"; |
system($wget_cmd); |
print ("Using wget command:\n$wget_cmd\n"); |
|
|
|
my $status = system($wget_cmd); |
|
|
|
print ("Return status: $status\n"); |
|
|
if ( ! open FILEN, "<$comicIndex" ) { |
if ( ! open FILEN, "<$comicIndex" ) { |
return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . |
return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . |
" (" . $comics->{$comic}{'url'} . ")"; |
" (" . $comics->{$comic}{'url'} . ")"; |
} |
} |
@lines = <FILEN>; |
while (<FILEN>) { |
|
my $line = $_; |
|
$line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} ); |
|
push @lines, $line; |
|
} |
close (FILEN); |
close (FILEN); |
|
|
|
|
unlink ("$comicIndex"); |
unlink ("$comicIndex"); |
|
|
$mainURL = $comics->{$comic}{'url'}; |
$mainURL = $comics->{$comic}{'url'}; |
Line 347 sub indexDownload ($$) {
|
Line 468 sub indexDownload ($$) {
|
## |
## |
## Find the comic strip URL based on the specified regex in the search |
## Find the comic strip URL based on the specified regex in the search |
## |
## |
|
|
|
print "Using search $comics->{$comic}{'search'}\n"; |
|
|
foreach my $line (@lines) { |
foreach my $line (@lines) { |
if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
|
print "Found match:\n"; |
$comicLine = $1; chomp $comicLine; |
$comicLine = $1; chomp $comicLine; |
|
print "+ $comicLine\n"; |
} |
} |
} |
} |
|
|
Line 360 sub indexDownload ($$) {
|
Line 486 sub indexDownload ($$) {
|
my $cDate = $date->{'day2'}; |
my $cDate = $date->{'day2'}; |
|
|
if ( $comicLine ) { |
if ( $comicLine ) { |
|
print "Downloading Comic\n"; |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; |
print "Final URL: $comicURL\n"; |
|
# Strip & |
|
$comicURL =~ s/\&\;/&/g; |
|
my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; |
system( $cmd ); |
system( $cmd ); |
return 0; |
return 0; |
} |
} |