@ -1,143 +1,153 @@
#!/bin/env perl
#!/usr/bin/env perl
#use diagnostics;
# Multi-UA web scraper written in Perl. Based on scanning the non-private IPv4 network in reverse order and requesting rDNS if a web-related port is open.
# (C) Roy van Lunsen
# TODO: Add DB support for storing HTMLs, b64 screenshots, etc..
# TODO: Utilize the DB for randomizing the scanning of IPv4 blocks supporting continuation.
# TODO: Add RE-based querying system for scoped IPv4/domain scrapes.
# TODO: Reliably detect when being blocked by companies that own (too) much network space and delay requests appropriately (might not be necessary if various parts are sufficiently randomized).
use strict ;
use strict ;
use warnings ;
use warnings ;
use utf8 ;
use utf8 ;
use feature 'unicode_strings' ;
use File::Path qw( make_path ) ;
use File::Path qw( make_path ) ;
use Digest::SHA qw/sha1_hex/ ;
use Net::Ping ;
use Net::Ping ;
use Net::DNS ;
use Net::DNS ;
use Net::Curl::Easy qw/:constants/ ;
use Selenium::Firefox ;
use Selenium::Firefox ;
use Selenium::Firefox::Profile ;
use Selenium::Firefox::Profile ;
#use LWP::UserAgent;
my $ easy = Net::Curl::Easy - > new ;
my $ easy_ua = 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0' ;
$ easy - > setopt ( CURLOPT_USERAGENT , $ easy_ua ) ;
$ easy - > setopt ( CURLOPT_FOLLOWLOCATION , 1 ) ;
$ easy - > setopt ( CURLOPT_MAXREDIRS , 10 ) ;
$ easy - > setopt ( CURLOPT_TIMEOUT , 15 ) ;
my ( $ continue_from , $ ii1 , $ ii2 , $ ii3 , $ ii4 , $ iii1 , $ iii2 , $ iii3 , $ initvar , $ alpha , $ beta , $ delta , $ gamma , $ retry_i ) ;
my ( $ continue_from , $ ii1 , $ ii2 , $ ii3 , $ ii4 , $ iii1 , $ iii2 , $ iii3 , $ initvar , $ alpha , $ beta , $ delta , $ gamma , $ retry_i ) ;
if ( $ ARGV [ 0 ] ) {
if ( $ ARGV [ 0 ] ) {
chomp ( $ continue_from = $ ARGV [ 0 ] ) ;
chomp ( $ continue_from = $ ARGV [ 0 ] ) ;
if ( $ continue_from !~ m/^[0-9]+([.][0-9]+){3}$/ ) { die $! , "\"$continue_from\" is not a valid ipv4 address:"
if ( $ continue_from !~ m/^[0-9]+([.][0-9]+){3}$/ ) { die $! , "\"$continue_from\" is not a valid ipv4 address:"
} else { ( $ ii1 , $ ii2 , $ ii3 , $ ii4 ) = split /\./ , $ continue_from ; print 'Continuing from ipv4 address ' , $ continue_from , "\n" } } else { print "\n\n" ; foreach ( 0 .. 4 ) { print 'Starting a new tcp/udp 80/443 ipv4 scan in ' , ( 5 - $ { _ } ) , "s.\n" ; sleep 1 } ; print 'Starting...' , "\n" }
} else { ( $ ii1 , $ ii2 , $ ii3 , $ ii4 ) = split /\./ , $ continue_from ; print 'Continuing from ipv4 address ' , $ continue_from , "\n" } } else { print "\n\n" ; foreach ( 0 .. 4 ) { print 'Starting a new tcp/udp 80/443 ipv4 scan in ' , ( 5 - $ { _ } ) , "s.\n" ; sleep 1 } ; print 'Starting...' , "\n" }
my ( % args , % args2 , % hoa , @ host ) ;
my ( % args , % args2 , % hoa , @ host ) ;
# Ghacks user.js is a good start.
$ args2 { 'profile_dir' } = '/home/miami/.Mozilla3/Firefox/ud8j40yn.default/' ; # Ghacks user.js is a good start.
my $ profile = Selenium::Firefox::Profile - > new ( profile_dir = > '/home/miami/.Mozilla3/Firefox/ud8j40yn.default/' ) ;
my $ profile = Selenium::Firefox::Profile - > new ( % args2 ) ;
#$profile->new('/home/miami/.Mozilla3/Firefox/ud8j40yn.default/');
my $ driver = Selenium::Firefox - > new (
#foreach (%{%$profile{'user_prefs'}}) {print $_, "\n"}
firefox_profile = > $ profile ,
#exit;
marionette_enabled = > 1
#my $ua = LWP::UserAgent->new(agent => 'ojffkfldnnnnsdvf');
) ;
my $ driver = Selenium::Firefox - > new ( 'firefox_profile' = > $ profile ) ;
$ driver - > debug_on ;
$ driver - > set_timeout ( 'script' , 10000 ) ;
$ driver - > set_timeout ( 'script' , 10000 ) ;
$ driver - > set_timeout ( 'implicit' , 20000 ) ;
$ driver - > set_timeout ( 'implicit' , 20000 ) ;
$ driver - > set_timeout ( 'page load' , 15000 ) ;
$ driver - > set_timeout ( 'page load' , 15000 ) ;
my $ p = Net::Ping - > new ( "syn" , 3 ) ;
my $ p = Net::Ping - > new ( "syn" , 3 ) ;
$ driver - > set_user_agent ( 'efjnvgjkdnl' ) ; #window.navigator.userAgent
$ driver - > get ( "https://xn--eekf.net" ) ;
sub interrupt {
#eval {print $driver->get_current_url()} or print "hereitis: https://i.redd.it/2nynaq6qwcb91.jpg\n";
if ( $# _ == 0 ) {
#print $driver->screenshot({'full' => 1});
open my $ TMP_TMP , '>' , './lastip' ;
#print $driver->get_page_source();
print $ TMP_TMP $ _ [ 0 ] , "\n" ;
print $ driver - > get_user_agent ( ) ;
close $ TMP_TMP ;
exit ;
} else { print "no args" , "\n" }
$ p - > close ( ) ;
$ driver - > shutdown_binary ;
}
$ SIG { 'INT' } = 'interrupt($args{"host"})' ;
$ SIG { 'HUP' } = 'interrupt($args{"host"})' ;
sub connectivity_check {
sub connectivity_check {
chomp ( my $ connectivity = `ip a | grep -A 2 -Ei '^[0-9]+: wl[^:]+:' | grep -E '\\s+?inet\\b' | sed -E 's/^\\s+?inet\\s+?([^/]+).*\$/\\1/'` ) ; # Will do for now.
chomp ( my $ connectivity = `ip a | grep -A 2 -Ei '^[0-9]+: wl[^:]+:' | grep -E '\\s+?inet\\b' | sed -E 's/^\\s+?inet\\s+?([^/]+).*\$/\\1/'` ) ; # Will do for now.
eval open my $ TMP_FH , '<' , ( glob '/sys/class/net/wl*/carrier' ) [ 0 ] ;
eval open my $ TMP_FH , '<' , ( glob '/sys/class/net/wl*/carrier' ) [ 0 ] ;
unless ( $ connectivity =~ m/^[0-9]+([.][0-9]+){3}$/ and <$TMP_FH> == 1 ) {
unless ( $ connectivity =~ m/^[0-9]+([.][0-9]+){3}$/ and <$TMP_FH> == 1 ) {
$ retry_i = 1 ;
$ retry_i = 1 ;
if ( defined ( $ _ [ 0 ] ) ) { unlink "$_[0]" ; $ _ [ 0 ] =~ s , /[^/ ] + $, , ; unlink "$_[0]/title.txt" ; rmdir "$_[0]/" } # Delete potentially incomplete items from the last ipv4 address/domain, for redoing.
if ( defined ( $ _ [ 0 ] ) ) { unlink ( "$_[0]/src_hea.txt" , "$_[0]/src_hes.txt" , "$_[0]/src.html" , "$_[0]/url.txt" , "$_[0]/scs.png" , "$_[0]/src2_hea.txt" , "$_[0]/src2_hes.txt" , "$_[0]/src2.html" ) ; rmdir "$_[0]/" } # Delete potentially incomplete items from the last ipv4 address/domain, for redoing.
}
}
}
}
sub double_80_screenshot {
sub get_80_src {
if ( $# _ > 0 ) { die "$!: Too much arguments: \"$_[0]\"...\"$_[$#_]\"." }
my $ digest = sha1_hex ( $ _ [ 0 ] ) ; # Use the sha1sum of the domain (if unavailable ip) (w/o protocol prefix) to build directories.
my $ l1 = substr ( $ digest , 0 , 1 ) ;
my $ l2 = substr ( $ digest , 1 , 1 ) ;
my $ l3 = substr ( $ digest , 2 , 1 ) ;
my $ l4 = substr ( $ digest , 3 , 1 ) ;
my $ datadir = './data/' . "$l1/$l2/$l3/$l4/$digest/" ;
make_path ( $ datadir ) or return 10 ;
& connectivity_check ;
& connectivity_check ;
my $ time_in_s = time ;
$ driver - > get ( "http://$_[0]" ) ;
eval { $ driver - > get ( "http://$_[0]" ) } ; # Fetch the eye-candy.
open my $ FILEH_A , '>:encoding(UTF-8)' , $ datadir . 'src.html' ;
eval { $ driver - > dismiss_alert } ;
open my $ FILEH_B , '>' , $ datadir . 'url.txt' ;
eval { $ driver - > accept_alert } ;
my $ FILEH_C ;
my $ current_url ;
open my $ FILEH_D , '>' , $ datadir . 'src_hea.txt' ; # User-agent headers.
eval { $ current_url = $ driver - > get_current_url ( ) } ;
open my $ FILEH_E , '>' , $ datadir . 'src2_hes.txt' ; # Server headers (only applicable for Curl).
if ( $ current_url ) {
open my $ FILEH_F , '>' , $ datadir . 'src2_hea.txt' ;
eval { make_path ( "./data/$_[0]/$current_url/" ) } ;
eval { print $ FILEH_A $ driver - > get_page_source ( "http://$_[0]" ) } ;
open my $ FILEH_B , '>' , "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ;
& connectivity_check ( $ datadir ) ;
open my $ FILEH_C , '>' , "./data/$_[0]/$current_url/title.txt" ;
eval { $ driver - > capture_screenshot ( $ datadir . 'scs.png' , { 'full' = > 1 } ) } ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
$ easy - > setopt ( CURLOPT_URL , "http://$_[0]" ) ;
print $ FILEH_C $ driver - > get_title ( ) ;
if ( - z $ datadir . 'scs.png' ) { # A zero-length screenshot happens with full-screen images. Assume non-html MIME.
& connectivity_check ( "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ) ;
open $ FILEH_C , '>' , $ datadir . 'scs.png' } else { open $ FILEH_C , '>' , $ datadir . 'src2.html' }
} else { eval { make_path ( "./data/$_[0]/-/" ) } ;
$ easy - > setopt ( CURLOPT_FILE , $ FILEH_C ) ;
open my $ FILEH_B , '>' , "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ;
$ easy - > setopt ( CURLOPT_HEADERDATA , $ FILEH_E ) ;
open my $ FILEH_C , '>' , "./data/$_[0]/-/title.txt" ;
eval { $ easy - > perform ( ) } ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
print $ FILEH_B $ _ [ 1 ] ; # First, print the ip.
print $ FILEH_C $ driver - > get_title ( ) ;
print $ FILEH_B "\n" ;
& connectivity_check ( "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ) ;
eval { print $ FILEH_B $ driver - > get_current_url ( ) } ; # Then print the (redirected to) browser location.
}
print $ FILEH_B "\n" ;
$ time_in_s = time ;
print $ FILEH_B 'http://' , $ _ [ 0 ] ; # Lastly, print visited domain/ip.
eval { $ driver - > get ( "view-source:http://$_[0]" ) } ; # Fetch the page source for (partly) reproduction.
print $ FILEH_B "\n" ;
undef $ current_url ;
eval { print $ FILEH_D $ driver - > get_user_agent ( ) } ;
eval { $ current_url = $ driver - > get_current_url ( ) } ;
print $ FILEH_D "\n" ;
if ( $ current_url ) {
print $ FILEH_F $ easy_ua , "\n" ;
eval { make_path ( "./data/$_[0]/$current_url/" ) } ;
open my $ FILEH_B , '>' , "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ;
open my $ FILEH_C , '>' , "./data/$_[0]/$current_url/title.txt" ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
print $ FILEH_C $ driver - > get_title ( ) ;
& connectivity_check ( "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ) ;
} else { eval { make_path ( "./data/$_[0]/-/" ) } ;
open my $ FILEH_B , '>' , "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ;
open my $ FILEH_C , '>' , "./data/$_[0]/-/title.txt" ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
print $ FILEH_C $ driver - > get_title ( ) ;
& connectivity_check ( "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ) ;
}
}
}
sub double_443_screenshot {
sub get_443_src {
if ( $# _ > 0 ) { die "$!: Too much arguments: \"$_[0]\"...\"$_[$#_]\"." }
my $ digest = sha1_hex ( $ _ [ 0 ] ) ; # Use the sha1sum of the domain (if unavailable ip) (w/o protocol prefix) to build directories.
my $ l1 = substr ( $ digest , 0 , 1 ) ;
my $ l2 = substr ( $ digest , 1 , 1 ) ;
my $ l3 = substr ( $ digest , 2 , 1 ) ;
my $ l4 = substr ( $ digest , 3 , 1 ) ;
my $ datadir = './data/' . "$l1/$l2/$l3/$l4/$digest/" ;
make_path ( $ datadir ) or return 10 ;
& connectivity_check ;
& connectivity_check ;
my $ time_in_s = time ;
$ driver - > get ( "https://$_[0]" ) ;
eval { $ driver - > get ( "https://$_[0]" ) } ;
open my $ FILEH_A , '>:encoding(UTF-8)' , $ datadir . 'src.html' ;
eval { $ driver - > dismiss_alert } ;
open my $ FILEH_B , '>' , $ datadir . 'url.txt' ;
eval { $ driver - > accept_alert } ;
my $ FILEH_C ;
my $ current_url ;
open my $ FILEH_D , '>' , $ datadir . 'src_hea.txt' ; # User-agent headers.
eval { $ current_url = $ driver - > get_current_url ( ) } ;
open my $ FILEH_E , '>' , $ datadir . 'src2_hes.txt' ; # Server headers (only applicable for Curl).
if ( $ current_url ) {
open my $ FILEH_F , '>' , $ datadir . 'src2_hea.txt' ;
eval { make_path ( "./data/$_[0]/$current_url/" ) } ;
eval { print $ FILEH_A $ driver - > get_page_source ( "https://$_[0]" ) } ;
open my $ FILEH_B , '>' , "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ;
& connectivity_check ( $ datadir ) ;
open my $ FILEH_C , '>' , "./data/$_[0]/$current_url/title.txt" ;
eval { $ driver - > capture_screenshot ( $ datadir . 'scs.png' , { 'full' = > 1 } ) } ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
$ easy - > setopt ( CURLOPT_URL , "https://$_[0]" ) ;
print $ FILEH_C $ driver - > get_title ( ) ;
if ( - z $ datadir . 'scs.png' ) { # A zero-length screenshot happens with full-screen images. Assume non-html MIME.
& connectivity_check ( "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ) ;
open $ FILEH_C , '>' , $ datadir . 'scs.png' } else { open $ FILEH_C , '>' , $ datadir . 'src2.html' }
} else { eval { make_path ( "./data/$_[0]/-/" ) } ;
$ easy - > setopt ( CURLOPT_FILE , $ FILEH_C ) ;
open my $ FILEH_B , '>' , "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ;
$ easy - > setopt ( CURLOPT_HEADERDATA , $ FILEH_E ) ;
open my $ FILEH_C , '>' , "./data/$_[0]/-/title.txt" ;
eval { $ easy - > perform ( ) } ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
print $ FILEH_B $ _ [ 1 ] ; # First, print the ip.
print $ FILEH_C $ driver - > get_title ( ) ;
print $ FILEH_B "\n" ;
& connectivity_check ( "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ) ;
eval { print $ FILEH_B $ driver - > get_current_url ( ) } ; # Then print the (redirected to) browser location.
}
print $ FILEH_B "\n" ;
$ time_in_s = time ;
print $ FILEH_B 'https://' , $ _ [ 0 ] ; # Lastly, print visited domain/ip.
eval { $ driver - > get ( "view-source:https://$_[0]" ) } ;
print $ FILEH_B "\n" ;
undef $ current_url ;
eval { print $ FILEH_D $ driver - > get_user_agent ( ) } ;
eval { $ current_url = $ driver - > get_current_url ( ) } ;
print $ FILEH_D "\n" ;
if ( $ current_url ) {
print $ FILEH_F $ easy_ua , "\n" ;
eval { make_path ( "./data/$_[0]/$current_url/" ) } ;
open my $ FILEH_B , '>' , "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ;
open my $ FILEH_C , '>' , "./data/$_[0]/$current_url/title.txt" ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
print $ FILEH_C $ driver - > get_title ( ) ;
& connectivity_check ( "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64" ) ;
} else { eval { make_path ( "./data/$_[0]/-/" ) } ;
open my $ FILEH_B , '>' , "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ;
open my $ FILEH_C , '>' , "./data/$_[0]/-/title.txt" ;
eval { print $ FILEH_B $ driver - > screenshot ( { 'full' = > 1 } ) } ;
print $ FILEH_C $ driver - > get_title ( ) ;
& connectivity_check ( "./data/$_[0]/-/screenshot-$time_in_s.png.base64" ) ;
}
}
}
sub reverse_dns_doms {
sub reverse_dns_doms {
my ( @ obj ) ;
my ( @ obj ) ;
my $ res = Net::DNS::Resolver - > new ;
my $ res = Net::DNS::Resolver - > new (
tcp_timeout = > 10 ,
udp_timeout = > 10
) ;
& connectivity_check ;
& connectivity_check ;
my $ reply = $ res - > search ( "$_[0]" , "PTR" ) ;
my $ reply = $ res - > search ( "$_[0]" , "PTR" ) ;
if ( $ reply ) {
if ( $ reply ) {
foreach my $ rr ( grep { $ _ - > type eq "PTR" } $ reply - > answer ) { # Do not assume rr-objects are of the same type as requested.
foreach my $ rr ( grep { $ _ - > type eq "PTR" } $ reply - > answer ) { # Do not assume rr-objects are of the same type as requested (use grep).
eval { push @ obj , $ rr - > ptrdname } ;
push @ obj , $ rr - > ptrdname ;
}
}
}
}
return @ obj ;
return @ obj ;
@ -147,10 +157,9 @@ sub syn_ping_elmn {
$ p - > port_number ( $ _ [ 0 ] ) ;
$ p - > port_number ( $ _ [ 0 ] ) ;
foreach my $ host ( @ host ) {
foreach my $ host ( @ host ) {
next unless defined ( $ host ) ;
next unless defined ( $ host ) ;
$ args { 'host' } = $ host ;
$ p - > ping ( $ host ) ;
$ p - > ping ( $ args { 'host' } ) ;
( my $ tmphost = $ host ) =~ s/[.][0-9.]+$// ;
$ host =~ s/[.][0-9.]+$// ;
$ host [ $ tmphost ] = $ host ;
$ host [ $ host ] = $ args { 'host' } ;
}
}
while ( my ( $ host ) = $ p - > ack ) { push @ { $ hoa { $ _ [ 0 ] } } , $ host ; $ host =~ s/[.][0-9.]+$// ; splice @ host , $ host }
while ( my ( $ host ) = $ p - > ack ) { push @ { $ hoa { $ _ [ 0 ] } } , $ host ; $ host =~ s/[.][0-9.]+$// ; splice @ host , $ host }
}
}
@ -176,10 +185,11 @@ $initvar=0;
$ alpha = time ;
$ alpha = time ;
splice @ host , 0 , $# host ;
splice @ host , 0 , $# host ;
undef % hoa ;
undef % hoa ;
my $ host_end = $ i2 . '.' . $ i3 . '.' . $ i4 ;
$ args { 'port' } = '80' ; $ args { 'proto' } = 'tcp' ;
$ args { 'port' } = '80' ; $ args { 'proto' } = 'tcp' ;
$ p - > port_number ( $ args { 'port' } ) ;
$ p - > port_number ( $ args { 'port' } ) ;
foreach my $ i1 ( $ iii1 .. 9 , $ iii2 .. 126 , $ iii3 .. 254 ) { # Skip large private blocks.
foreach my $ i1 ( $ iii1 .. 9 , $ iii2 .. 126 , $ iii3 .. 254 ) { # Skip large, private ipv4 blocks.
$ args { 'host' } = $ i1 . '.' . $ i2 . '.' . $ i3 . '.' . $ i4 ;
$ args { 'host' } = $ i1 . '.' . $ host_end ;
$ p - > ping ( $ args { 'host' } ) ;
$ p - > ping ( $ args { 'host' } ) ;
$ host [ $ i1 ] = $ args { 'host' } ;
$ host [ $ i1 ] = $ args { 'host' } ;
}
}
@ -191,32 +201,36 @@ while (my ($host) = $p->ack) {push @{$hoa{'80'}}, $host; $host =~ s/[.][0-9.]+$/
& syn_ping_elmn ( 443 , 'udp' ) ;
& syn_ping_elmn ( 443 , 'udp' ) ;
& syn_ping_elmn ( 443 , 'tcp' ) ;
& syn_ping_elmn ( 443 , 'tcp' ) ;
open my $ FILEH_A , '>>' , "./data/domains.txt" ; # Here, we'll store all the domains from the ipv4 addresses.
foreach my $ host ( @ { $ hoa { '80' } } ) {
foreach my $ host ( @ { $ hoa { '80' } } ) {
next unless defined ( $ host ) ;
next unless defined ( $ host ) ;
print $ FILEH_A $ host . ':80' . "\n" ;
utime time , time , './lastip' ;
my @ rray = & reverse_dns_doms ( $ host ) ;
my @ rray = & reverse_dns_doms ( $ host ) ;
if ( $# rray == 0 and $ rray [ 0 ] eq '' ) { & double_80_screenshot ( $ host ) } else {
if ( $# rray == 0 and $ rray [ 0 ] eq '' ) { eval { & get_80_src ( $ host , $ host ) } } else {
foreach ( @ rray ) { if ( ( - d "./data/$_/view-source:http:/$_/" or - d "./data/$_/view-source:https:/$_/" ) and ( - d "./data/$_/http:/$_/" or - d "./data/$_/https:/$_/" ) ) { print $ FILEH_A "\n" ; next } ;
foreach ( @ rray ) {
sleep rand ( 1 ) / ( rand ( 10 ) + 1 ) ;
sleep rand ( 1 ) / ( rand ( 10 ) + 1 ) ;
print $ FILEH_A $ _ . ',' ; & double_80_screenshot ( $ _ ) }
eval { & get_80_src ( $ _ , $ host ) } } } }
print $ FILEH_A "\n" }
print $ FILEH_A "\n" }
foreach my $ host ( @ { $ hoa { '443' } } ) {
foreach my $ host ( @ { $ hoa { '443' } } ) {
next unless defined ( $ host ) ;
next unless defined ( $ host ) ;
print $ FILEH_A $ host . ':443' . "\n" ;
utime time , time , './lastip' ;
my @ rray = & reverse_dns_doms ( $ host ) ;
my @ rray = & reverse_dns_doms ( $ host ) ;
if ( $# rray == 0 and $ rray [ 0 ] eq '' ) { & double_443_screenshot ( $ host ) } else {
if ( $# rray == 0 and $ rray [ 0 ] eq '' ) { eval { & get_443_src ( $ host , $ host ) } } else {
foreach ( @ rray ) { if ( - d "./data/$_/view-source:http:/$_/" or - d "./data/$_/view-source:https:/$_/" and ( - d "./data/$_/http:/$_/" or - d "./data/$_/https:/$_/" ) ) { print $ FILEH_A "\n" ; next } ;
foreach ( @ rray ) {
sleep rand ( 1 ) / ( rand ( 10 ) + 1 ) ;
sleep rand ( 1 ) / ( rand ( 10 ) + 1 ) ;
print $ FILEH_A $ _ . ',' ; & double_443_screenshot ( $ _ ) }
eval { & get_443_src ( $ _ , $ host ) } } } }
print $ FILEH_A "\n" }
utime time , time , './lastip' ;
print $ FILEH_A "\n" }
& connectivity_check ;
& connectivity_check ;
$ p - > close ( ) ;
$ p - > close ( ) ;
$ beta = time ;
$ beta = time ;
$ delta = $ beta - $ alpha ;
$ delta = $ beta - $ alpha ;
$ gamma = $ i4 * 255 ** 2 + $ i3 * 255 ** 1 + $ i2 ;
$ gamma = $ i4 * 255 ** 2 + $ i3 * 255 ** 1 + $ i2 ;
if ( int ( rand ( 1000 ) ) == 0 ) { # Memory usage may accumulate by visiting websites; restart once in a while.
open my $ TMP_TMP , '>' , './lastip' ;
print $ TMP_TMP $ args { 'host' } , "\n" ;
close $ TMP_TMP ;
$ p - > close ( ) ;
$ driver - > shutdown_binary ;
exit 0 ;
}
if ( 3 > $ delta or defined ( $ retry_i ) ) {
if ( 3 > $ delta or defined ( $ retry_i ) ) {
$ retry_i = undef ;
$ retry_i = undef ;
print "$delta < 3.\n" ;
print "$delta < 3.\n" ;
@ -227,15 +241,16 @@ sleep 2**$exp; $exp++; chomp($connectivity = `ip a | grep -A 2 -Ei '^[0-9]+: wl[
eval open my $ TMP_FH , '<' , ( glob '/sys/class/net/wl*/carrier' ) [ 0 ] ;
eval open my $ TMP_FH , '<' , ( glob '/sys/class/net/wl*/carrier' ) [ 0 ] ;
unless ( $ connectivity =~ m/^[0-9]+([.][0-9]+){3}$/ and <$TMP_FH> == 1 ) { warn "$!: No wireless connectivity on (lexicographically) first wireless network." ; if ( $ exp >= 10 ) { $ exp -= int ( rand ( 11 ) ) } } else { last } }
unless ( $ connectivity =~ m/^[0-9]+([.][0-9]+){3}$/ and <$TMP_FH> == 1 ) { warn "$!: No wireless connectivity on (lexicographically) first wireless network." ; if ( $ exp >= 10 ) { $ exp -= int ( rand ( 11 ) ) } } else { last } }
print "Retrying.\n" ;
print "Retrying.\n" ;
$ p - > close ( ) ;
$ p = Net::Ping - > new ( "syn" , 3 ) ;
$ p = Net::Ping - > new ( "syn" , 3 ) ;
redo LABEL2 ;
redo LABEL2 ;
}
}
$ p - > close ( ) ;
$ p = Net::Ping - > new ( "syn" , 3 ) ;
$ p = Net::Ping - > new ( "syn" , 3 ) ;
print 'Progress: ' , $ gamma / 255 ** 3 * 100 , "%\n" , 'ETA: ' , ( 255 ** 3 - $ gamma ) * $ delta , "s\n" ;
print 'Progress: ' , $ gamma / 255 ** 3 * 100 , "%\n" , 'ETA: ' , ( 255 ** 3 - $ gamma ) * $ delta , "s\n" ;
close $ FILEH_A ;
print $ args { 'host' } , "\n" ;
print "\n" , 'To continue after quitting, provide ' , '1.' . $ i2 . '.' . $ i3 . '.' . $ i4 , ' as first argument.' , "\n\n" ;
} } }
} } }
print 'Cleaning up...' . "\n" ;
print 'Cleaning up...' . "\n" ;
$ driver - > quit ( ) ;
$ driver - > shutdown_binary ;
`killall geckodriver` and print 'Done!' . "\n" ;
print 'Done!' . "\n" ;