|
|
| version 1.25, 2018/02/12 13:30:58 | version 1.27, 2019/04/15 12:50:23 |
|---|---|
| Line 2 | Line 2 |
| ############################################################################### | ############################################################################### |
| # $Log$ | # $Log$ |
| # Revision 1.27 2019/04/15 12:50:23 nick | |
| # The script was unable to handle html '&' and convert it, so I added that. I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick. | |
| # | |
| # Revision 1.26 2018/04/22 14:03:54 nick | |
| # Changed the default for Sunday comics that was causing issues with some comics. | |
| # | |
| # Revision 1.25 2018/02/12 13:30:58 nick | # Revision 1.25 2018/02/12 13:30:58 nick |
| # Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't. | # Added an easier to compare date string to determine if the status json file was updated today and report if it wasn't. |
| # | # |
| Line 86 foreach my $comic ( sort keys %comics ) | Line 92 foreach my $comic ( sort keys %comics ) |
| ## Skip if this is Sunday and the comic is weekdays only | ## Skip if this is Sunday and the comic is weekdays only |
| next if ( $comic =~ m/config/ ); | next if ( $comic =~ m/config/ ); |
| if (($dates{'wday'} eq "Sunday") && | if (($dates{'wday'} eq "Sunday") && |
| ($comics{$comic}{'sunday'} == 0)) { | ($comics{$comic}{'not_sunday'} == 1)) { |
| print "Skipping '$comic'; Weekdays only.\n"; | print "Skipping '$comic'; Weekdays only.\n"; |
| next; | next; |
| } | } |
| Line 97 foreach my $comic ( sort keys %comics ) | Line 103 foreach my $comic ( sort keys %comics ) |
| print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; | print "Skipping '$comic' ($comics{$comic}{'sunday_only'}); Sunday only.\n"; |
| next | next |
| } | } |
| $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); | $comics{$comic}{'error'} = &downloadComic ( \%comics, $comic, \%dates ); |
| &writeComic ( \%comics, $comic, \%dates ); | &writeComic ( \%comics, $comic, \%dates ); |
| Line 186 sub readComicConfig ($$) { | Line 192 sub readComicConfig ($$) { |
| $comicConfig{$res[0]}{'mode'} = $res[3]; | $comicConfig{$res[0]}{'mode'} = $res[3]; |
| $comicConfig{$res[0]}{'fullName'} = $res[4]; | $comicConfig{$res[0]}{'fullName'} = $res[4]; |
| $comicConfig{$res[0]}{'ext'} = $res[5]; | $comicConfig{$res[0]}{'ext'} = $res[5]; |
| $comicConfig{$res[0]}{'sunday'} = sprintf("%d", $res[6] || 1); | $comicConfig{$res[0]}{'not_sunday'} = sprintf("%d", $res[6] || 0); |
| $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); | $comicConfig{$res[0]}{'sunday_only'} = sprintf("%d", $res[7] || 0); |
| $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0); | $comicConfig{$res[0]}{'remove_newlines'} = sprintf("%d", $res[8] || 0); |
| $comicConfig{$res[0]}{'error'} = 0; | $comicConfig{$res[0]}{'error'} = 0; |
| Line 208 sub writeStatusReportJSON ($$) { | Line 214 sub writeStatusReportJSON ($$) { |
| my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, | my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, |
| (localtime)[4] + 1, | (localtime)[4] + 1, |
| (localtime)[3]); | (localtime)[3]); |
| my %json = ('date' => $shortDate, 'comics' => []); | my %json = ('date' => $shortDate, 'comics' => ()); |
| my $totalErrors = 0; | my $totalErrors = 0; |
| foreach my $comic (sort keys %comics) { | foreach my $comic (sort keys %comics) { |
| Line 217 sub writeStatusReportJSON ($$) { | Line 223 sub writeStatusReportJSON ($$) { |
| my %error = ('comicName' => "$comics{$comic}{'fullName'}", | my %error = ('comicName' => "$comics{$comic}{'fullName'}", |
| 'error' => "$comics{$comic}{'error'}", | 'error' => "$comics{$comic}{'error'}", |
| 'status' => "Error"); | 'status' => "Error"); |
| push $json{'comics'}, \%error; | push @{$json{'comics'}}, \%error; |
| $totalErrors += 1; | $totalErrors += 1; |
| } else { | } else { |
| my %status = ('comicName' => "$comics{$comic}{'fullName'}", | my %status = ('comicName' => "$comics{$comic}{'fullName'}", |
| 'error' => 0, | 'error' => 0, |
| 'status' => "Successfull"); | 'status' => "Successfull"); |
| push $json{'comics'}, \%status; | push @{$json{'comics'}}, \%status; |
| } | } |
| } | } |
| $json{'totalErrors'} = $totalErrors; | $json{'totalErrors'} = $totalErrors; |
| Line 377 sub directDownload ($$) { | Line 383 sub directDownload ($$) { |
| my $cDir = $date->{'mon2'} . $date->{'year2'}; | my $cDir = $date->{'mon2'} . $date->{'year2'}; |
| my $cDate = $date->{'day2'}; | my $cDate = $date->{'day2'}; |
| my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; | my $cmd = "wget -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
| return system($cmd); | return system($cmd); |
| } | } |
| Line 389 sub indexDownload ($$) { | Line 395 sub indexDownload ($$) { |
| my ( @lines, $comicLine, $mainURL ); | my ( @lines, $comicLine, $mainURL ); |
| my $comicIndex = "indexes/index.$comic"; | my $comicIndex = "indexes/index.$comic"; |
| my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " . | my $wget_cmd = "wget -q --referer='$comics->{$comic}{'url'}' " . |
| "--user-agent=\"$USER_AGENT\" " . | "--user-agent=\"$USER_AGENT\" " . |
| "$comics->{$comic}{'url'} -O $comicIndex"; | "$comics->{$comic}{'url'} -O $comicIndex"; |
| system($wget_cmd); | system($wget_cmd); |
| Line 400 sub indexDownload ($$) { | Line 406 sub indexDownload ($$) { |
| } | } |
| while (<FILEN>) { | while (<FILEN>) { |
| my $line = $_; | my $line = $_; |
| $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} ); | $line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} ); |
| push @lines, $line; | push @lines, $line; |
| } | } |
| close (FILEN); | close (FILEN); |
| unlink ("$comicIndex"); | unlink ("$comicIndex"); |
| $mainURL = $comics->{$comic}{'url'}; | $mainURL = $comics->{$comic}{'url'}; |
| Line 415 sub indexDownload ($$) { | Line 422 sub indexDownload ($$) { |
| ## | ## |
| ## Find the comic strip URL based on the specified regex in the search | ## Find the comic strip URL based on the specified regex in the search |
| ## | ## |
| foreach my $line (@lines) { | foreach my $line (@lines) { |
| if ( $line =~ m/$comics->{$comic}{'search'}/i ) { | if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
| $comicLine = $1; chomp $comicLine; | $comicLine = $1; chomp $comicLine; |
| Line 430 sub indexDownload ($$) { | Line 438 sub indexDownload ($$) { |
| if ( $comicLine ) { | if ( $comicLine ) { |
| if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } | if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
| my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; | my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
| my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; | # Strip & |
| $comicURL =~ s/\&\;/&/g; | |
| my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; | |
| system( $cmd ); | system( $cmd ); |
| return 0; | return 0; |
| } | } |