--- comics/fetch.pl.new	2017/12/05 13:37:40	1.22
+++ comics/fetch.pl.new	2025/04/02 13:29:10	1.32
@@ -2,6 +2,46 @@
 
 ###############################################################################
 # $Log: fetch.pl.new,v $
+# Revision 1.32  2025/04/02 13:29:10  nick
+# Updated FBOW to use direct download which also required fixing the direct
+# download function.  Lastly, I removed the restriction for resizing images
+# so all comics are resized to a width of 800px.
+#
+# Revision 1.31  2024/12/13 16:03:49  nick
+# This adds the ability to specify a comic as a link only with a default splash image.
+#
+# Revision 1.30  2022/10/04 12:02:03  nick
+# Added --no-check-certificate for wget calls as arcamax was failing its cert check.  Meh, whatever.  It's just comics.
+#
+# Revision 1.29  2020/06/10 21:32:52  nick
+# Centered page
+#
+# Revision 1.28  2020/06/10 21:14:31  nick
+# Updated for w3 validation.
+#
+# Revision 1.27  2019/04/15 12:50:23  nick
+# The script was unable to handle html '&' and convert it, so I added that.  I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick.
+#
+# Revision 1.26  2018/04/22 14:03:54  nick
+# Changed the default for Sunday comics that was causing issues with some comics.
+#
+# Revision 1.25  2018/02/12 13:30:58  nick
+# Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't.
+#
+# Revision 1.24  2018/02/06 14:31:06  nick
+# A status report is now generated in JSON that can easily be scanned so that
+# I can be alerted when there are failures that I miss if I don't read the
+# comics that day.
+#
+# Revision 1.23  2018/01/26 13:05:27  nick
+# Added a new config option to remove all newline from the resulting index.html
+# file.  This allows for easier parsing for certain comics.  I then updated
+# the URLs to search for and enabled the newline removal for a handful
+# of uComics.
+#
+# I believe I've also properly fixed the Comic Config version displayed on
+# the webpage itself.
+#
 # Revision 1.22  2017/12/05 13:37:40  nick
 # Added the CVS config version to the outpuit.
 #
@@ -33,15 +73,19 @@ use File::Path;
 use Data::Dumper;
 use Pod::Usage;
 use Getopt::Long;
-
+use JSON::Create 'create_json';
 use Date::Calc qw/Date_to_Text_Long Today Day_of_Week Day_of_Week_to_Text/;
+use Data::Dumper;
+
+print("Running");
 
 ## 
 ## Some default values
 ##
-my $ver		= '$Id: fetch.pl.new,v 1.22 2017/12/05 13:37:40 nick Exp $';
+my $ver		= '$Id: fetch.pl.new,v 1.32 2025/04/02 13:29:10 nick Exp $';
 my $comicFile   = "comics.conf";
 my $comicConfigVer = "Unknown";
+my $reportFile = "/home/httpd/html/daily/comics/status_report.json";
 my %comics	= &readComicConfig ( $comicFile );
 my %opts        = &fetchOptions( );
 my $days_ago    = $opts{'days'} || 0;
@@ -65,10 +109,12 @@ print STDOUT "Starting comic fetch at $D
 
 foreach my $comic ( sort keys %comics ) {
 
+  print("Checking Comic $comic\n");
+
   ## Skip if this is Sunday and the comic is weekdays only
   next if ( $comic =~ m/config/ );
   if (($dates{'wday'} eq "Sunday") && 
-      ($comics{$comic}{'sunday'} == 0)) {
+      ($comics{$comic}{'not_sunday'} == 1)) {
     print "Skipping '$comic'; Weekdays only.\n";
     next;
   }
@@ -79,31 +125,36 @@ foreach my $comic ( sort keys %comics )
     print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n";
     next
   }
-
+  
   $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates );
   &writeComic ( \%comics, $comic, \%dates );
 
     my $file = "$imageDir/$comic-$dates{'day2'}.$comics{$comic}{'ext'}";
     my $size = 0;    
 
-    my $cmd = "/usr/bin/identify -verbose $file|";
-    open(IMG, $cmd) || die ("Can't open: $!\n");
-    while(<IMG>) {
-        if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
-            $size = $1 if ( $size == 0);
-        }
-    }
-    close(IMG);
-
-
-	system( "/usr/bin/convert -resize 640 $file $file" )
-		if ( $size > 640 ) 
+    ## Resize downloaded images
+    if($comics{$comic}{'mode'} != 3) {
+	    my $cmd = "/usr/bin/identify -verbose $file|";
+	    open(IMG, $cmd) || die ("Can't open: $!\n");
+	    while(<IMG>) {
+	        if ($_ =~ m/^\s+geometry:\s+(\d+)x\d+.*/i) {
+	            $size = $1 if ( $size == 0);
+	        }
+	    }
+	    close(IMG);
+
+	    system( "/usr/bin/convert -resize 800 $file $file" )
+   	    # if ( $size > 800 ) 
+     }
 }
 
 ## &writeMainIndex ( \%dates );
 
 &writeFooter( \%dates );
 
+print STDOUT "Status written to $reportFile.\n"
+    if (&writeStatusReportJSON(\%comics, $reportFile));
+
 $DATE=`date`;  chomp( $DATE );
 print STDOUT "Completed comic fetch at $DATE\n";
 
@@ -131,6 +182,10 @@ sub downloadComic ($$) {
 			return directDownload ( \%comics, $comic, $date );
 			last SWITCH;
 		}
+		if ( $comics->{$comic}{'mode'} eq 3 ) { 
+			return 0;
+			last SWITCH;
+		}
 	}
 	 
 	return "ERROR: Unknown download method specified for $comics->{$comic}{'fullName'}.";
@@ -147,14 +202,17 @@ sub readComicConfig ($$) {
     $year += 1900;
     $mon = sprintf("%02d", ($mon + 1));
     $day = sprintf("%02d", $day);
+    my $year_short = substr($year, -2);
 
 	open FILEN, "<$comicFile";
 		while (<FILEN>) {
-            if ($_ =~ m/^#.* \$Id: fetch.pl.new,v 1.22 2017/12/05 13:37:40 nick Exp $/) {
+            #if ($_ =~ m/^#.* \$[Ii][Dd]: fetch.pl.new,v 1.23 2018/01/26 13:05:27 nick Exp $/) {
+            if ($_ =~ m/^#.* \$[Ii][dD]: .*,v\ (.*)\ \d{4}\/.*\$$/) {
                 $comicConfigVer = $1;
             }
 			if ( ( $_ !~ m/^#/ ) && ( $_ =~ m/,.*,/) ){
                 $_ =~ s/__YEAR__/$year/g;
+                $_ =~ s/__YR__/$year_short/g;
                 $_ =~ s/__MON__/$mon/g;
                 $_ =~ s/__DAY__/$day/g;
                 
@@ -164,8 +222,9 @@ sub readComicConfig ($$) {
 				$comicConfig{$res[0]}{'mode'} 	  = $res[3];
 				$comicConfig{$res[0]}{'fullName'} = $res[4];
 				$comicConfig{$res[0]}{'ext'} 	  = $res[5];
-                $comicConfig{$res[0]}{'sunday'}   = sprintf("%d", $res[6] || 1);
+                $comicConfig{$res[0]}{'not_sunday'}   = sprintf("%d", $res[6] || 0);
                 $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0);
+                $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0);
 				$comicConfig{$res[0]}{'error'} 	  = 0;
 			}
 			elsif ( $_ =~ m/(.*)\s+=\s+(.*)/ ) {
@@ -179,12 +238,47 @@ sub readComicConfig ($$) {
 
 #######################################################################
 #######################################################################
+sub writeStatusReportJSON ($$) {
+	my ( $comicsRef, $filename ) = @_;
+    my %comics = %$comicsRef;
+    my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900,
+                                          (localtime)[4] + 1,
+                                          (localtime)[3]);
+    my %json = ('date' => $shortDate, 'comics' => ());
+    my $totalErrors = 0;
+
+    foreach my $comic (sort keys %comics) {
+      next unless $comics{$comic}{'fullName'};
+      if ($comics{$comic}{'error'}) {
+        my %error = ('comicName' => "$comics{$comic}{'fullName'}",
+                     'error' => "$comics{$comic}{'error'}",
+                     'status' => "Error");
+        push @{$json{'comics'}}, \%error;
+        $totalErrors += 1;
+      } else {
+        my %status = ('comicName' => "$comics{$comic}{'fullName'}",
+                      'error' => 0,
+                      'status' => "Successfull");
+        push @{$json{'comics'}}, \%status;
+      }
+    }
+    $json{'totalErrors'} = $totalErrors;    
+
+    open SR, ">$filename" or die ("ERROR: Failed to create status report: $!\n");
+    print SR create_json (\%json);
+    close(SR); 
+}
+
+#######################################################################
+#######################################################################
 sub writeComic ($$) {
 	my ( $comics, $comic, $date ) = @_;
 	my $sd = substr( join( '', $days[$date->{'dow'}] ), 0, 3 );
 	my $indexFile = $indexDir . "/index-" . $date->{'year2'} . 
 			$date->{'mon2'} . $date->{'day2'} . "-" . 
 			$sd . ".html";
+    	$comics->{$comic}{'fullName'} =~ s/&/&amp;/g;
+
 	my $content = <<EOF;
 
 <!-- ********* Begin $comic ($comics->{$comic}{'fullName'}) ******* -->
@@ -192,11 +286,22 @@ sub writeComic ($$) {
     <td align="left">
 <font color="blue"><b>$comics->{$comic}{'fullName'}</b></font> &nbsp; &nbsp; 
 <font size="-2">
-	<a href="$comics->{$comic}{'url'}">
+	<a href="$comics->{$comic}{'url'}" target="_blank">
 		$comics->{$comic}{'url'}
 	</a>
 </font><br/>
+EOF
+	if ( $comics->{$comic}{'mode'} == 3 ) {
+		print("Mode 3\n");
+		$content .= <<EOF;
+<img src="$comics->{$comic}{'ext'}" alt="$comics->{$comic}{'fullName'}" />
+EOF
+	} else {
+		$content .= <<EOF;
 <img src="../images/$date->{'mon2'}$date->{'year2'}/$comic-$date->{'day2'}.$comics->{$comic}{'ext'}" alt="$comic-$date->{'day2'}" />
+EOF
+	}
+	$content .= <<EOF;
 <br/><br/>
 </td></tr>
 <!-- ********* Finish $comic ($comics->{$comic}{'fullName'}) ******* -->
@@ -247,15 +352,13 @@ sub writeFooter {
 	print INDEX <<EOF;
 </table>
 <center>
-<font size="2">
-Generated on: <font color="green">$sysDate</font><br/>
-Version: <font color="green">$ver</font><br />
-Config Version: <font color="green">$comicConfigVer</font><br />
+Generated on: <font size="2" color="green">$sysDate</font><br/>
+Version: <font size="2" color="green">$ver</font><br />
+Config Version: <font size="2" color="green">$comicConfigVer</font><br />
 CVS: <a href="http://demandred.dyndns.org/cgi-bin/cvsweb/comics/">http://demandred.dyndns.org/cgi-bin/cvsweb/comics/</a>
-  <p>
+  <br />
     <a href="http://validator.w3.org/check?uri=referer"><img
       src="http://www.w3.org/Icons/valid-xhtml10-blue" alt="Valid XHTML 1.0 Transitional" height="31" width="88" border="0" /></a>
-  </p>
 </center>
 
 </body>
@@ -292,19 +395,15 @@ sub writeTitle ($$) {
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen">
-<link rel="shortcut icon" href="./favicon.ico">
+<link href="/daily/comics/comics.css" type="text/css" rel="stylesheet" media="screen" />
+<link rel="shortcut icon" href="./favicon.ico" />
     <title>Daily Comics for $today</title>
   </head>
 <body bgcolor="#FFFFFF">
-<table align="center" cellpadding="5" cellspacing="0">
-<tr><td>
-<table cellpadding="0" cellspacing="0" border="0">
-<tr><td align="Left"><img src="images/daily_comics_heading01.png"></td></tr>
+<table align="center" cellpadding="0" cellspacing="0" border="0">
+<tr><td align="left"><img src="images/daily_comics_heading01.png" alt="Comic Page Heading" /></td></tr>
 <tr><td align="left">$today_long</td></tr>
 <tr><td>&nbsp;</td></tr>
-</td</tr>
-
 EOF
 	close (INDEX);
 }
@@ -321,30 +420,53 @@ sub directDownload ($$) {
         my $cDir  = $date->{'mon2'} . $date->{'year2'};
         my $cDate = $date->{'day2'};
 
-    	my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg";
+    	my $cmd = "wget --no-check-certificate -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" $comics->{$comic}{'url'} -O - | /usr/bin/convert - images/$cDir/$comic-$cDate.$comics->{$comic}{ext}";
+
+        print("Command: $cmd\n");
 
         return system($cmd);
 }
 
 #######################################################################
 #######################################################################
+sub linkOnly ($$) {
+	my ( $comics, $comic, $date ) = @_;
+
+	return 0;
+}
+#######################################################################
+#######################################################################
 sub indexDownload ($$) {
 	my ( $comics, $comic, $date ) = @_;
 	my ( @lines, $comicLine, $mainURL );
 	my $comicIndex = "indexes/index.$comic";
 
-    my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " .
-                   "--user-agent=\"$USER_AGENT\" " .
+    print("Getching Index $comicIndex.\n");
+    print("comic url: $comics->{$comic}{'url'}\n");
+
+    print Dumper($comics->{$comic});
+
+    my $wget_cmd = "wget --referer='$comics->{$comic}{'url'}' " .
+                   "--no-check-certificate --user-agent=\"$USER_AGENT\" " .
                    "$comics->{$comic}{'url'} -O $comicIndex";
-    system($wget_cmd);
+    print ("Using wget command:\n$wget_cmd\n");
+
+    my $status = system($wget_cmd);
+
+    print ("Return status: $status\n");
 
 	if ( ! open FILEN, "<$comicIndex" ) {  
         	return "ERROR: Can't open index file for " . $comics->{$comic}{'fullName'} . 
 		       " (" . $comics->{$comic}{'url'} . ")"; 
 	} 
-		@lines = <FILEN>;
+    while (<FILEN>) {
+        my $line = $_;
+        $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} );
+    	push @lines, $line;
+    }
 	close (FILEN);	
 
+
 	unlink ("$comicIndex");
 
 	$mainURL = $comics->{$comic}{'url'};
@@ -355,9 +477,14 @@ sub indexDownload ($$) {
 	##
 	## Find the comic strip URL based on the specified regex in the search
 	##
+
+    print "Using search $comics->{$comic}{'search'}\n";
+
 	foreach my $line (@lines) {
 		if ( $line =~ m/$comics->{$comic}{'search'}/i ) {
+            print "Found match:\n";
 			$comicLine = $1; chomp $comicLine;
+            print "+ $comicLine\n";
 		}
     }
 
@@ -368,9 +495,13 @@ sub indexDownload ($$) {
 	my $cDate   = $date->{'day2'};
 
 	if ( $comicLine ) {
+        print "Downloading Comic\n";
 		if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; }
 		my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine;
-		my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
+        print "Final URL: $comicURL\n";
+        # Strip &amp;
+        $comicURL =~ s/\&amp\;/&/g;
+		my $cmd = "wget --no-check-certificate --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}";
 		system( $cmd );
 		return 0;
 	}