--- comics/fetch.pl.new 2018/04/22 14:03:54 1.26
+++ comics/fetch.pl.new 2025/04/02 13:29:10 1.32
@@ -2,6 +2,26 @@
###############################################################################
# $Log: fetch.pl.new,v $
+# Revision 1.32 2025/04/02 13:29:10 nick
+# Updated FBOW to use direct download which also required fixing the direct
+# download function. Lastly, I removed the restriction for resizing images
+# so all comics are resized to a width of 800px.
+#
+# Revision 1.31 2024/12/13 16:03:49 nick
+# This adds the ability to specify a comic as a link only with a default splash image.
+#
+# Revision 1.30 2022/10/04 12:02:03 nick
+# Added --no-check-certificate for wget calls as arcamax was failing its cert check. Meh, whatever. It's just comics.
+#
+# Revision 1.29 2020/06/10 21:32:52 nick
+# Centered page
+#
+# Revision 1.28 2020/06/10 21:14:31 nick
+# Updated for w3 validation.
+#
+# Revision 1.27 2019/04/15 12:50:23 nick
+# The script was unable to handle html '&' and convert it, so I added that. I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
+#
# Revision 1.26 2018/04/22 14:03:54 nick
# Changed the default for Sunday comics that was causing issues with some comics.
#
@@ -55,11 +75,14 @@ use Pod::Usage;
use Getopt::Long;
use JSON::Create 'create_json';
use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
+use Data::Dumper;
+
+print("Running");
##
## Some default values
##
-my $ver = '$Id: fetch.pl.new,v 1.26 2018/04/22 14:03:54 nick Exp $';
+my $ver = '$Id: fetch.pl.new,v 1.32 2025/04/02 13:29:10 nick Exp $';
my $comicFile = "comics.conf";
my $comicConfigVer = "Unknown";
my $reportFile = "/home/httpd/html/daily/comics/status_report.json";
@@ -86,6 +109,8 @@ print STDOUT "Starting comic fetch at $D
foreach my $comic ( sort keys %comics ) {
+ print("Checking Comic $comic\n");
+
## Skip if this is Sunday and the comic is weekdays only
next if ( $comic =~ m/config/ );
if (($dates{'wday'} eq "Sunday") &&
@@ -107,18 +132,20 @@ foreach my $comic ( sort keys %comics )
my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
my $size = 0;
- my $cmd = "/usr/bin/identify -verbose $file|";
- open(IMG, $cmd) || die ("Can't open: $!\n");
- while(
) {
- if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
- $size = $1 if ( $size == 0);
- }
- }
- close(IMG);
-
-
- system( "/usr/bin/convert -resize 640 $file $file" )
- if ( $size > 640 )
+ ## Resize downloaded images
+ if($comics{$comic}{'mode'} != 3) {
+ my $cmd = "/usr/bin/identify -verbose $file|";
+ open(IMG, $cmd) || die ("Can't open: $!\n");
+ while(
) {
+ if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
+ $size = $1 if ( $size == 0);
+ }
+ }
+ close(IMG);
+
+ system( "/usr/bin/convert -resize 800 $file $file" )
+ # if ( $size > 800 )
+ }
}
## &writeMainIndex ( \%dates );
@@ -155,6 +182,10 @@ sub downloadComic ($$) {
return directDownload ( \%comics, $comic, $date );
last SWITCH;
}
+ if ( $comics->{$comic}{'mode'} eq 3 ) {
+ return 0;
+ last SWITCH;
+ }
}
return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
@@ -171,6 +202,7 @@ sub readComicConfig ($$) {
$year += 1900;
$mon = sprintf("%02d", ($mon + 1));
$day = sprintf("%02d", $day);
+ my $year_short = substr($year, -2);
open FILEN, "<$comicFile";
while () {
@@ -180,6 +212,7 @@ sub readComicConfig ($$) {
}
if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
$_ =~ s/__YEAR__/$year/g;
+ $_ =~ s/__YR__/$year_short/g;
$_ =~ s/__MON__/$mon/g;
$_ =~ s/__DAY__/$day/g;
@@ -211,7 +244,7 @@ sub writeStatusReportJSON ($$) {
my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,
(localtime)[4] + 1,
(localtime)[3]);
- my %json = ('date' => $shortDate, 'comics' => []);
+ my %json = ('date' => $shortDate, 'comics' => ());
my $totalErrors = 0;
foreach my $comic (sort keys %comics) {
@@ -220,13 +253,13 @@ sub writeStatusReportJSON ($$) {
my %error = ('comicName' => "$comics{$comic}{'fullName'}",
'error' => "$comics{$comic}{'error'}",
'status' => "Error");
- push $json{'comics'}, \%error;
+ push @{$json{'comics'}}, \%error;
$totalErrors += 1;
} else {
my %status = ('comicName' => "$comics{$comic}{'fullName'}",
'error' => 0,
'status' => "Successfull");
- push $json{'comics'}, \%status;
+ push @{$json{'comics'}}, \%status;
}
}
$json{'totalErrors'} = $totalErrors;
@@ -244,6 +277,8 @@ sub writeComic ($$) {
my $indexFile = $indexDir . "/index-" . $date->{'year2'} .
$date->{'mon2'} . $date->{'day2'} . "-" .
$sd . ".html";
+ $comics->{$comic}{'fullName'} =~ s/&/&/g;
+
my $content = <{$comic}{'fullName'}) ******* -->
@@ -251,11 +286,22 @@ sub writeComic ($$) {
$comics->{$comic}{'fullName'}
-
+
$comics->{$comic}{'url'}
+EOF
+ if ( $comics->{$comic}{'mode'} == 3 ) {
+ print("Mode 3\n");
+ $content .= <
+EOF
+ } else {
+ $content .= <
+EOF
+ }
+ $content .= <
|
@@ -306,15 +352,13 @@ sub writeFooter {
print INDEX <
-
-Generated on: $sysDate
-Version: $ver
-Config Version: $comicConfigVer
+Generated on: $sysDate
+Version: $ver
+Config Version: $comicConfigVer
CVS: http://demandred.dyndns.org/cgi-bin/cvsweb/comics/
-
+
-
-
-
-
- |
+
+ |
$today_long |
|
-
-
EOF
close (INDEX);
}
@@ -380,22 +420,40 @@ sub directDownload ($$) {
my $cDir = $date->{'mon2'} . $date->{'year2'};
my $cDate = $date->{'day2'};
- my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
+ my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" $comics->{$comic}{'url'} -O - | /usr/bin/convert - images/$cDir/$comic-$cDate.$comics->{$comic}{ext}";
+
+ print("Command: $cmd\n");
return system($cmd);
}
#######################################################################
#######################################################################
+sub linkOnly ($$) {
+ my ( $comics, $comic, $date ) = @_;
+
+ return 0;
+}
+#######################################################################
+#######################################################################
sub indexDownload ($$) {
my ( $comics, $comic, $date ) = @_;
my ( @lines, $comicLine, $mainURL );
my $comicIndex = "indexes/index.$comic";
- my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
- "--user-agent=\"$USER_AGENT\" " .
+ print("Getching Index $comicIndex.\n");
+ print("comic url: $comics->{$comic}{'url'}\n");
+
+ print Dumper($comics->{$comic});
+
+ my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " .
+ "--no-check-certificate --user-agent=\"$USER_AGENT\" " .
"$comics->{$comic}{'url'} -O $comicIndex";
- system($wget_cmd);
+ print ("Using wget command:\n$wget_cmd\n");
+
+ my $status = system($wget_cmd);
+
+ print ("Return status: $status\n");
if ( ! open FILEN, "<$comicIndex" ) {
return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} .
@@ -403,11 +461,12 @@ sub indexDownload ($$) {
}
while () {
my $line = $_;
- $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} );
+ $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} );
push @lines, $line;
}
close (FILEN);
+
unlink ("$comicIndex");
$mainURL = $comics->{$comic}{'url'};
@@ -418,9 +477,14 @@ sub indexDownload ($$) {
##
## Find the comic strip URL based on the specified regex in the search
##
+
+ print "Using search $comics->{$comic}{'search'}\n";
+
foreach my $line (@lines) {
if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
+ print "Found match:\n";
$comicLine = $1; chomp $comicLine;
+ print "+ $comicLine\n";
}
}
@@ -431,9 +495,13 @@ sub indexDownload ($$) {
my $cDate = $date->{'day2'};
if ( $comicLine ) {
+ print "Downloading Comic\n";
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
- my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
+ print "Final URL: $comicURL\n";
+ # Strip &
+ $comicURL =~ s/\&\;/&/g;
+ my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
system( $cmd );
return 0;
}
|