Print wat er gebeurt (anders gebeurt er niks).

main
Roy 2 years ago
parent 543bc2d44c
commit 95ebf0d965
  1. 61
      senicup.pl

@ -3,11 +3,6 @@
# Multi-UA web scraper written in Perl. Based on scanning the non-private IPv4 network in reverse order and requesting rDNS if a web-related port is open. # Multi-UA web scraper written in Perl. Based on scanning the non-private IPv4 network in reverse order and requesting rDNS if a web-related port is open.
# (C) Roy van Lunsen # (C) Roy van Lunsen
# TODO: Add DB support for storing HTMLs, b64 screenshots, etc..
# TODO: Utilize the DB for randomizing the scanning of IPv4 blocks supporting continuation.
# TODO: Add RE-based querying system for scoped IPv4/domain scrapes.
# TODO: Reliably detect when being blocked by companies that own (too) much network space and delay requests appropriately (might not be necessary if various parts are sufficiently randomized).
use strict; use strict;
use warnings; use warnings;
use utf8; use utf8;
@ -43,18 +38,18 @@ $driver->set_timeout('implicit', 20000);
$driver->set_timeout('page load', 15000); $driver->set_timeout('page load', 15000);
my $p = Net::Ping->new("syn", 3); my $p = Net::Ping->new("syn", 3);
sub interrupt { sub INTERRUPT {
if ($#_ == 0) { if (exists($args{'host'})) {
open my $TMP_TMP, '>', './lastip'; open my $TMP_TMP, '>', './lastip';
print $TMP_TMP $_[0], "\n"; print $TMP_TMP $args{'host'}, "\n";
close $TMP_TMP; close $TMP_TMP;
} else {print "no args", "\n"} } else {print "no args", "\n\n\n\n\n\n\n\n\n\n\n"}
$p->close(); $p->close();
$driver->shutdown_binary; $driver->shutdown_binary;
exit 0;
} }
$SIG{'INT'} = 'interrupt($args{"host"})'; $SIG{'INT'} = 'INTERRUPT';
$SIG{'HUP'} = 'interrupt($args{"host"})';
sub connectivity_check { sub connectivity_check {
chomp(my $connectivity = `ip a | grep -A 2 -Ei '^[0-9]+: wl[^:]+:' | grep -E '\\s+?inet\\b' | sed -E 's/^\\s+?inet\\s+?([^/]+).*\$/\\1/'`); # Will do for now. chomp(my $connectivity = `ip a | grep -A 2 -Ei '^[0-9]+: wl[^:]+:' | grep -E '\\s+?inet\\b' | sed -E 's/^\\s+?inet\\s+?([^/]+).*\$/\\1/'`); # Will do for now.
@ -72,8 +67,9 @@ my $l2 = substr($digest, 1, 1);
my $l3 = substr($digest, 2, 1); my $l3 = substr($digest, 2, 1);
my $l4 = substr($digest, 3, 1); my $l4 = substr($digest, 3, 1);
my $datadir = './data/'."$l1/$l2/$l3/$l4/$digest/"; my $datadir = './data/'."$l1/$l2/$l3/$l4/$digest/";
make_path($datadir) or return 10; make_path($datadir) or (print 'Skipping: Already visited URL ', $_[0], ' before.', "\n" and return 0);
&connectivity_check; &connectivity_check;
print 'Navigating to ', "http://$_[0]\n";
$driver->get("http://$_[0]"); $driver->get("http://$_[0]");
open my $FILEH_A, '>:encoding(UTF-8)', $datadir.'src.html'; open my $FILEH_A, '>:encoding(UTF-8)', $datadir.'src.html';
open my $FILEH_B, '>', $datadir.'url.txt'; open my $FILEH_B, '>', $datadir.'url.txt';
@ -81,35 +77,43 @@ my $FILEH_C;
open my $FILEH_D, '>', $datadir.'src_hea.txt'; # User-agent headers. open my $FILEH_D, '>', $datadir.'src_hea.txt'; # User-agent headers.
open my $FILEH_E, '>', $datadir.'src2_hes.txt'; # Server headers (only applicable for Curl). open my $FILEH_E, '>', $datadir.'src2_hes.txt'; # Server headers (only applicable for Curl).
open my $FILEH_F, '>', $datadir.'src2_hea.txt'; open my $FILEH_F, '>', $datadir.'src2_hea.txt';
print 'Writing Selenium-rendered page source to ', $datadir.'src.html', "\n";
eval {print $FILEH_A $driver->get_page_source("http://$_[0]")}; eval {print $FILEH_A $driver->get_page_source("http://$_[0]")};
&connectivity_check($datadir); &connectivity_check($datadir);
print 'Writing Selenium screenshot to ', $datadir.'scs.png', "\n";
eval {$driver->capture_screenshot($datadir.'scs.png', {'full' => 1})}; eval {$driver->capture_screenshot($datadir.'scs.png', {'full' => 1})};
$easy->setopt(CURLOPT_URL, "http://$_[0]"); $easy->setopt(CURLOPT_URL, "http://$_[0]");
if (-z $datadir.'scs.png') { # A zero-length screenshot happens with full-screen images. Assume non-html MIME. if (-z $datadir.'scs.png') { # A zero-length screenshot happens with full-screen images. Assume non-html MIME.
open $FILEH_C, '>', $datadir.'scs.png'} else {open $FILEH_C, '>', $datadir.'src2.html'} print 'Writing non-HTML Curl-rendered page source to ', $datadir.'scs.png', "\n"; open $FILEH_C, '>', $datadir.'scs.png'} else {print 'Writing Curl-rendered page source to ', $datadir.'src2.html', "\n"; open $FILEH_C, '>', $datadir.'src2.html'}
$easy->setopt(CURLOPT_FILE, $FILEH_C); $easy->setopt(CURLOPT_FILE, $FILEH_C);
print 'Writing server headers to ', $datadir.'src2_hes.txt', "\n";
$easy->setopt(CURLOPT_HEADERDATA, $FILEH_E); $easy->setopt(CURLOPT_HEADERDATA, $FILEH_E);
eval {$easy->perform()}; eval {$easy->perform()};
print $FILEH_B $_[1]; # First, print the ip. print $FILEH_B $_[1]; # First, print the ip.
print $FILEH_B "\n"; print $FILEH_B "\n";
eval {print $FILEH_B $driver->get_current_url()}; # Then print the (redirected to) browser location. eval {print $FILEH_B $driver->get_current_url()}; # Then print the (redirected to) browser location.
print 'Got redirected to ', $driver->get_current_url(), "\n" if "http://$_[0]" ne $driver->get_current_url();
print $FILEH_B "\n"; print $FILEH_B "\n";
print $FILEH_B 'http://', $_[0]; # Lastly, print visited domain/ip. print $FILEH_B 'http://', $_[0]; # Lastly, print visited domain/ip.
print $FILEH_B "\n"; print $FILEH_B "\n";
print 'Writing Selenium UA headers to ', $datadir.'src_hea.txt', "\n";
eval {print $FILEH_D $driver->get_user_agent()}; eval {print $FILEH_D $driver->get_user_agent()};
print $FILEH_D "\n"; print $FILEH_D "\n";
print 'Writing Curl UA headers to ', $datadir.'src2_hea.txt', "\n";
print $FILEH_F $easy_ua, "\n"; print $FILEH_F $easy_ua, "\n";
} }
sub get_443_src { sub get_443_src {
print 'i am retarded', "\n";
my $digest = sha1_hex($_[0]); # Use the sha1sum of the domain (if unavailable ip) (w/o protocol prefix) to build directories. my $digest = sha1_hex($_[0]); # Use the sha1sum of the domain (if unavailable ip) (w/o protocol prefix) to build directories.
my $l1 = substr($digest, 0, 1); my $l1 = substr($digest, 0, 1);
my $l2 = substr($digest, 1, 1); my $l2 = substr($digest, 1, 1);
my $l3 = substr($digest, 2, 1); my $l3 = substr($digest, 2, 1);
my $l4 = substr($digest, 3, 1); my $l4 = substr($digest, 3, 1);
my $datadir = './data/'."$l1/$l2/$l3/$l4/$digest/"; my $datadir = './data/'."$l1/$l2/$l3/$l4/$digest/";
make_path($datadir) or return 10; make_path($datadir) or (print 'Skipping: Already visited URL ', $_[0], ' before.', "\n" and return 0);
&connectivity_check; &connectivity_check;
print 'Navigating to ', "https://$_[0]\n";
$driver->get("https://$_[0]"); $driver->get("https://$_[0]");
open my $FILEH_A, '>:encoding(UTF-8)', $datadir.'src.html'; open my $FILEH_A, '>:encoding(UTF-8)', $datadir.'src.html';
open my $FILEH_B, '>', $datadir.'url.txt'; open my $FILEH_B, '>', $datadir.'url.txt';
@ -117,23 +121,29 @@ my $FILEH_C;
open my $FILEH_D, '>', $datadir.'src_hea.txt'; # User-agent headers. open my $FILEH_D, '>', $datadir.'src_hea.txt'; # User-agent headers.
open my $FILEH_E, '>', $datadir.'src2_hes.txt'; # Server headers (only applicable for Curl). open my $FILEH_E, '>', $datadir.'src2_hes.txt'; # Server headers (only applicable for Curl).
open my $FILEH_F, '>', $datadir.'src2_hea.txt'; open my $FILEH_F, '>', $datadir.'src2_hea.txt';
print 'Writing Selenium-rendered page source to ', $datadir.'src.html', "\n";
eval {print $FILEH_A $driver->get_page_source("https://$_[0]")}; eval {print $FILEH_A $driver->get_page_source("https://$_[0]")};
&connectivity_check($datadir); &connectivity_check($datadir);
print 'Writing Selenium screenshot to ', $datadir.'scs.png', "\n";
eval {$driver->capture_screenshot($datadir.'scs.png', {'full' => 1})}; eval {$driver->capture_screenshot($datadir.'scs.png', {'full' => 1})};
$easy->setopt(CURLOPT_URL, "https://$_[0]"); $easy->setopt(CURLOPT_URL, "https://$_[0]");
if (-z $datadir.'scs.png') { # A zero-length screenshot happens with full-screen images. Assume non-html MIME. if (-z $datadir.'scs.png') { # A zero-length screenshot happens with full-screen images. Assume non-html MIME.
open $FILEH_C, '>', $datadir.'scs.png'} else {open $FILEH_C, '>', $datadir.'src2.html'} print 'Writing non-HTML Curl-rendered page source to ', $datadir.'scs.png', "\n"; open $FILEH_C, '>', $datadir.'scs.png'} else {print 'Writing Curl-rendered page source to ', $datadir.'src2.html', "\n"; open $FILEH_C, '>', $datadir.'src2.html'}
$easy->setopt(CURLOPT_FILE, $FILEH_C); $easy->setopt(CURLOPT_FILE, $FILEH_C);
print 'Writing server headers to ', $datadir.'src2_hes.txt', "\n";
$easy->setopt(CURLOPT_HEADERDATA, $FILEH_E); $easy->setopt(CURLOPT_HEADERDATA, $FILEH_E);
eval {$easy->perform()}; eval {$easy->perform()};
print $FILEH_B $_[1]; # First, print the ip. print $FILEH_B $_[1]; # First, print the ip.
print $FILEH_B "\n"; print $FILEH_B "\n";
eval {print $FILEH_B $driver->get_current_url()}; # Then print the (redirected to) browser location. eval {print $FILEH_B $driver->get_current_url()}; # Then print the (redirected to) browser location.
print 'Got redirected to ', $driver->get_current_url(), "\n" if "https://$_[0]" ne $driver->get_current_url();
print $FILEH_B "\n"; print $FILEH_B "\n";
print $FILEH_B 'https://', $_[0]; # Lastly, print visited domain/ip. print $FILEH_B 'https://', $_[0]; # Lastly, print visited domain/ip.
print $FILEH_B "\n"; print $FILEH_B "\n";
print 'Writing Selenium UA headers to ', $datadir.'src_hea.txt', "\n";
eval {print $FILEH_D $driver->get_user_agent()}; eval {print $FILEH_D $driver->get_user_agent()};
print $FILEH_D "\n"; print $FILEH_D "\n";
print 'Writing Curl UA headers to ', $datadir.'src2_hea.txt', "\n";
print $FILEH_F $easy_ua, "\n"; print $FILEH_F $easy_ua, "\n";
} }
@ -147,6 +157,7 @@ udp_timeout => 10
my $reply = $res->search("$_[0]", "PTR"); my $reply = $res->search("$_[0]", "PTR");
if ($reply) { if ($reply) {
foreach my $rr (grep { $_->type eq "PTR" } $reply->answer) { # Do not assume rr-objects are of the same type as requested (use grep). foreach my $rr (grep { $_->type eq "PTR" } $reply->answer) { # Do not assume rr-objects are of the same type as requested (use grep).
print 'Got ', $rr->ptrdname, "\n";
push @obj, $rr->ptrdname; push @obj, $rr->ptrdname;
} }
} }
@ -161,20 +172,20 @@ $p->ping($host);
(my $tmphost = $host) =~ s/[.][0-9.]+$//; (my $tmphost = $host) =~ s/[.][0-9.]+$//;
$host[$tmphost] = $host; $host[$tmphost] = $host;
} }
while (my ($host) = $p->ack) {push @{$hoa{$_[0]}}, $host; $host =~ s/[.][0-9.]+$//; splice @host, $host} while (my ($host) = $p->ack) {print 'Queueing ', $host, ' because of ', $_[1], '-ACK on port ', $_[0], "\n"; push @{$hoa{$_[0]}}, $host; $host =~ s/[.][0-9.]+$//; splice @host, $host}
} }
$initvar=1; $initvar=1;
unless (defined($continue_from)) {($ii1, $ii2, $ii3, $ii4) = (1, 0, 0, 0)} unless (defined($continue_from)) {($ii1, $ii2, $ii3, $ii4) = (1, 0, 0, 0)}
LABEL4: foreach my $i4 (0..254) { LABEL4: foreach my $i4 (0..255) {
if ($initvar == 1) { if ($initvar == 1) {
until ($i4 >= $ii4) {next LABEL4} until ($i4 >= $ii4) {next LABEL4}
} }
LABEL3: foreach my $i3 (0..254) { LABEL3: foreach my $i3 (0..255) {
if ($initvar == 1) { if ($initvar == 1) {
until (int($i4*255**3+$i3*255**2) >= int($ii4*255**3+$ii3*255**2)) {next LABEL3} until (int($i4*255**3+$i3*255**2) >= int($ii4*255**3+$ii3*255**2)) {next LABEL3}
} }
LABEL2: foreach my $i2 (0..254) { LABEL2: foreach my $i2 (0..255) {
if ($initvar == 1) { if ($initvar == 1) {
until (int($i4*255**3+$i3*255**2+$i2*255) >= int($ii4*255**3+$ii3*255**2+$ii2*255)) {next LABEL2} until (int($i4*255**3+$i3*255**2+$i2*255) >= int($ii4*255**3+$ii3*255**2+$ii2*255)) {next LABEL2}
} }
@ -188,13 +199,13 @@ undef %hoa;
my $host_end = $i2.'.'.$i3.'.'.$i4; my $host_end = $i2.'.'.$i3.'.'.$i4;
$args{'port'} = '80'; $args{'proto'} = 'tcp'; $args{'port'} = '80'; $args{'proto'} = 'tcp';
$p->port_number($args{'port'}); $p->port_number($args{'port'});
foreach my $i1 ($iii1..9,$iii2..126,$iii3..254) { # Skip large, private ipv4 blocks. foreach my $i1 ($iii1..9,$iii2..126,$iii3..255) { # Skip large, private ipv4 blocks.
$args{'host'} = $i1.'.'.$host_end; $args{'host'} = $i1.'.'.$host_end;
$p->ping($args{'host'}); $p->ping($args{'host'});
$host[$i1] = $args{'host'}; $host[$i1] = $args{'host'};
} }
&connectivity_check; &connectivity_check;
while (my ($host) = $p->ack) {push @{$hoa{'80'}}, $host; $host =~ s/[.][0-9.]+$//; splice @host, $host} while (my ($host) = $p->ack) {print 'Queueing ', $host, ' because of ', $args{'proto'}, '-ACK on port ', $args{'port'}, "\n"; push @{$hoa{'80'}}, $host; $host =~ s/[.][0-9.]+$//; splice @host, $host}
&connectivity_check; &connectivity_check;
&syn_ping_elmn(80, 'udp'); &syn_ping_elmn(80, 'udp');
@ -204,19 +215,21 @@ while (my ($host) = $p->ack) {push @{$hoa{'80'}}, $host; $host =~ s/[.][0-9.]+$/
foreach my $host (@{$hoa{'80'}}) { foreach my $host (@{$hoa{'80'}}) {
next unless defined($host); next unless defined($host);
utime time, time, './lastip'; utime time, time, './lastip';
print 'Looking up PTR-records for ', $host, "\n";
my @rray = &reverse_dns_doms($host); my @rray = &reverse_dns_doms($host);
if ($#rray == 0 and $rray[0] eq '') {eval {&get_80_src($host, $host)}} else { if ($#rray == 0 and $rray[0] eq '') {eval {&get_80_src($host, $host)}} else {
foreach (@rray) { foreach (@rray) {
sleep rand(1)/(rand(10)+1); sleep rand(1)/(rand(10)+1);
eval {&get_80_src($_, $host)}}}} eval {&get_80_src($_, $host)}}}} # Catch non-fatal browser errors.
foreach my $host (@{$hoa{'443'}}) { foreach my $host (@{$hoa{'443'}}) {
next unless defined($host); next unless defined($host);
utime time, time, './lastip'; utime time, time, './lastip';
print 'Looking up PTR-records for ', $host, "\n";
my @rray = &reverse_dns_doms($host); my @rray = &reverse_dns_doms($host);
if ($#rray == 0 and $rray[0] eq '') {eval {&get_443_src($host, $host)}} else { if ($#rray == 0 and $rray[0] eq '') {&get_443_src($host, $host)} else {
foreach (@rray) { foreach (@rray) {
sleep rand(1)/(rand(10)+1); sleep rand(1)/(rand(10)+1);
eval {&get_443_src($_, $host)}}}} eval {&get_443_src($_, $host)}}}} # Catch non-fatal browser errors.
utime time, time, './lastip'; utime time, time, './lastip';
&connectivity_check; &connectivity_check;
$p->close(); $p->close();

Loading…
Cancel
Save