version 1.26, 2018/04/22 14:03:54
|
version 1.27, 2019/04/15 12:50:23
|
Line 2
|
Line 2
|
|
|
############################################################################### |
############################################################################### |
# $Log$ |
# $Log$ |
|
# Revision 1.27 2019/04/15 12:50:23 nick |
|
# The script was unable to handle html '&' and convert it, so I added that. I probably should see if there's a library or something that handles all those automagically but I just tossed a regex in there for now that does the trick. |
|
# |
# Revision 1.26 2018/04/22 14:03:54 nick |
# Revision 1.26 2018/04/22 14:03:54 nick |
# Changed the default for Sunday comics that was causing issues with some comics. |
# Changed the default for Sunday comics that was causing issues with some comics. |
# |
# |
Line 211 sub writeStatusReportJSON ($$) {
|
Line 214 sub writeStatusReportJSON ($$) {
|
my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, |
my $shortDate = sprintf("%d%02d%02d", (localtime)[5] + 1900, |
(localtime)[4] + 1, |
(localtime)[4] + 1, |
(localtime)[3]); |
(localtime)[3]); |
my %json = ('date' => $shortDate, 'comics' => []); |
my %json = ('date' => $shortDate, 'comics' => ()); |
my $totalErrors = 0; |
my $totalErrors = 0; |
|
|
foreach my $comic (sort keys %comics) { |
foreach my $comic (sort keys %comics) { |
Line 220 sub writeStatusReportJSON ($$) {
|
Line 223 sub writeStatusReportJSON ($$) {
|
my %error = ('comicName' => "$comics{$comic}{'fullName'}", |
my %error = ('comicName' => "$comics{$comic}{'fullName'}", |
'error' => "$comics{$comic}{'error'}", |
'error' => "$comics{$comic}{'error'}", |
'status' => "Error"); |
'status' => "Error"); |
push $json{'comics'}, \%error; |
push @{$json{'comics'}}, \%error; |
$totalErrors += 1; |
$totalErrors += 1; |
} else { |
} else { |
my %status = ('comicName' => "$comics{$comic}{'fullName'}", |
my %status = ('comicName' => "$comics{$comic}{'fullName'}", |
'error' => 0, |
'error' => 0, |
'status' => "Successfull"); |
'status' => "Successfull"); |
push $json{'comics'}, \%status; |
push @{$json{'comics'}}, \%status; |
} |
} |
} |
} |
$json{'totalErrors'} = $totalErrors; |
$json{'totalErrors'} = $totalErrors; |
Line 380 sub directDownload ($$) {
|
Line 383 sub directDownload ($$) {
|
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDir = $date->{'mon2'} . $date->{'year2'}; |
my $cDate = $date->{'day2'}; |
my $cDate = $date->{'day2'}; |
|
|
my $cmd = "wget -q $file --referer=\"" . $comics->{$comic}{'url'} ."\" --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
my $cmd = "wget -q $file --referer='" . $comics->{$comic}{'url'} ."' --user-agent=\"$USER_AGENT\" -O - | /usr/bin/convert - jpeg:images/$cDir/$comic-$cDate.jpg"; |
|
|
return system($cmd); |
return system($cmd); |
} |
} |
Line 392 sub indexDownload ($$) {
|
Line 395 sub indexDownload ($$) {
|
my ( @lines, $comicLine, $mainURL ); |
my ( @lines, $comicLine, $mainURL ); |
my $comicIndex = "indexes/index.$comic"; |
my $comicIndex = "indexes/index.$comic"; |
|
|
my $wget_cmd = "wget -q --referer=\"$comics->{$comic}{'url'}\" " . |
my $wget_cmd = "wget -q --referer='$comics->{$comic}{'url'}' " . |
"--user-agent=\"$USER_AGENT\" " . |
"--user-agent=\"$USER_AGENT\" " . |
"$comics->{$comic}{'url'} -O $comicIndex"; |
"$comics->{$comic}{'url'} -O $comicIndex"; |
system($wget_cmd); |
system($wget_cmd); |
Line 403 sub indexDownload ($$) {
|
Line 406 sub indexDownload ($$) {
|
} |
} |
while (<FILEN>) { |
while (<FILEN>) { |
my $line = $_; |
my $line = $_; |
$line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newliens'} ); |
$line =~ s/\R|\ \ +|\t//g if ( $comics->{$comic}{'remove_newlines'} ); |
push @lines, $line; |
push @lines, $line; |
} |
} |
close (FILEN); |
close (FILEN); |
|
|
|
|
unlink ("$comicIndex"); |
unlink ("$comicIndex"); |
|
|
$mainURL = $comics->{$comic}{'url'}; |
$mainURL = $comics->{$comic}{'url'}; |
Line 418 sub indexDownload ($$) {
|
Line 422 sub indexDownload ($$) {
|
## |
## |
## Find the comic strip URL based on the specified regex in the search |
## Find the comic strip URL based on the specified regex in the search |
## |
## |
|
|
foreach my $line (@lines) { |
foreach my $line (@lines) { |
if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
if ( $line =~ m/$comics->{$comic}{'search'}/i ) { |
$comicLine = $1; chomp $comicLine; |
$comicLine = $1; chomp $comicLine; |
Line 433 sub indexDownload ($$) {
|
Line 438 sub indexDownload ($$) {
|
if ( $comicLine ) { |
if ( $comicLine ) { |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
if ( $comicLine =~ m/(gif|jpg|png)/i ) { $comics->{$comic}{'ext'} = $1; } |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $comicURL = ( $comicLine =~ m/http/ ) ? $comicLine : $mainURL . $comicLine; |
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer=\"" . $comics->{$comic}{'url'} . "\" -q $comicURL -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; |
# Strip & |
|
$comicURL =~ s/\&\;/&/g; |
|
my $cmd = "wget --user-agent=\"$USER_AGENT\" --referer='" . $comics->{$comic}{'url'} . "' -q '$comicURL' -O images/$cDir/$comic-$cDate.$comics->{$comic}{'ext'}"; |
system( $cmd ); |
system( $cmd ); |
return 0; |
return 0; |
} |
} |