commit
2a0161f169
@ -0,0 +1,15 @@ |
|||||||
|
Multi-UA web scraper geschreven in Perl voor educatieve doeleinden. Gebaseerd op het scannen van niet-prive IPv4 netwerken in volgorde van Class A naar Class E en het maken van rDNS requests als een web-gerelateerde port is open. |
||||||
|
|
||||||
|
Dependencies: Selenium::Firefox, Net::DNS en Net::Curl::Easy. |
||||||
|
|
||||||
|
|
||||||
|
[![Willekeurig voorbeeld in video-formaat](sample/senicup.mp4)] |
||||||
|
|
||||||
|
|
||||||
|
TODO: |
||||||
|
|
||||||
|
- Add DB support for storing HTMLs, b64 screenshots, etc.. |
||||||
|
- Utilize the DB for randomizing the scanning of IPv4 blocks and supporting seemless continuation. |
||||||
|
- Add RE-based querying system for scoped IPv4/domain scrapes. |
||||||
|
- Reliably detect when being blocked by companies that own (too) much network space and delay requests appropriately (might not be necessary if various parts are sufficiently randomized). |
||||||
|
- Windows support. |
Binary file not shown.
@ -0,0 +1,123 @@ |
|||||||
|
#!/bin/env perl |
||||||
|
#use diagnostics; |
||||||
|
use strict; |
||||||
|
use warnings; |
||||||
|
use File::Path qw(make_path); |
||||||
|
use Net::EmptyPort qw(check_port); |
||||||
|
use Net::DNS; |
||||||
|
#use Selenium::Remote::Driver; |
||||||
|
use Selenium::Firefox; |
||||||
|
use Selenium::Firefox::Profile; |
||||||
|
|
||||||
|
chomp(my $connectivity = `cat /sys/class/net/*/operstate|grep -m 1 '^up\$'`); # Will do for now. |
||||||
|
my $ii = 0; |
||||||
|
my (%args, %args2); |
||||||
|
$args2{'profile_dir'} = '/home/miami/.Mozilla/Firefox/ud8j40yn.default'; # Ghacks user.js is a good start. |
||||||
|
my $profile = Selenium::Firefox::Profile->new(%args2); |
||||||
|
my $driver = Selenium::Firefox->new('firefox_profile' => $profile); |
||||||
|
open my $FILEH_A, '>>', "./data/domains.txt"; # Here, we'll store all the domains from all ipv4 addresses. |
||||||
|
|
||||||
|
sub connectivity_check { |
||||||
|
my $exp = 0; |
||||||
|
while (1) {unless ($connectivity eq 'up') {warn "$!: No wireless connectivity."; sleep 2**$exp; $exp++; if ($exp >= 10) {$exp -= int(rand(11))}; next}; last} |
||||||
|
} |
||||||
|
|
||||||
|
sub double_80_screenshot { |
||||||
|
if ($#_ > 0) {die "$!: Too much arguments: \"$_[1]\"...\"$_[$#_]\"."} |
||||||
|
&connectivity_check; |
||||||
|
my $time_in_s = time; |
||||||
|
$driver->get("http://$_[0]"); # Fetch the eye-candy. |
||||||
|
my $current_url = $driver->get_current_url(); |
||||||
|
eval {$driver->accept_alert}; |
||||||
|
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||||
|
open my $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||||
|
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||||
|
&connectivity_check; |
||||||
|
$time_in_s = time; |
||||||
|
$driver->get("view-source:http://$_[0]"); # Fetch the page source for (partly) reproduction. |
||||||
|
$current_url = $driver->get_current_url(); |
||||||
|
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||||
|
open $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||||
|
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
sub double_443_screenshot { |
||||||
|
if ($#_ > 0) {die "$!: Too much arguments: \"$_[1]\"...\"$_[$#_]\"."} |
||||||
|
&connectivity_check; |
||||||
|
my $time_in_s = time; |
||||||
|
$driver->get("https://$_[0]"); |
||||||
|
my $current_url = $driver->get_current_url(); |
||||||
|
eval {$driver->accept_alert}; |
||||||
|
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||||
|
open my $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||||
|
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||||
|
&connectivity_check; |
||||||
|
$time_in_s = time; |
||||||
|
$driver->get("view-source:https://$_[0]"); |
||||||
|
$current_url = $driver->get_current_url(); |
||||||
|
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||||
|
open $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||||
|
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||||
|
} |
||||||
|
|
||||||
|
sub reverse_dns_doms { |
||||||
|
my (@obj); |
||||||
|
my $res = Net::DNS::Resolver->new; |
||||||
|
my $reply = $res->search("$_[0]", "PTR"); |
||||||
|
if ($reply) { |
||||||
|
foreach my $rr ($reply->answer) { |
||||||
|
push @obj, $rr->ptrdname; |
||||||
|
} |
||||||
|
} |
||||||
|
return @obj; |
||||||
|
} |
||||||
|
|
||||||
|
foreach my $i1 (1..9,11..126,128..255) { # Skip huge private blocks. |
||||||
|
foreach my $i2 (0..255) { |
||||||
|
foreach my $i3 (0..255) { |
||||||
|
my $alpha = time; |
||||||
|
foreach my $i4 (0..255) { |
||||||
|
$args{'host'} = $i1.'.'.$i2.'.'.$i3.'.'.$i4; |
||||||
|
$args{'port'} = '80'; $args{'proto'} = 'tcp'; |
||||||
|
|
||||||
|
&connectivity_check; |
||||||
|
if (check_port(\%args)) { |
||||||
|
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||||
|
my @rray = &reverse_dns_doms($args{'host'}); |
||||||
|
if ($#rray == 0 and $rray[0] eq '') {&double_80_screenshot($args{'host'})} else { |
||||||
|
foreach (@rray) {print $FILEH_A $_.','; &double_80_screenshot($_)}} |
||||||
|
print $FILEH_A "\n\n"; |
||||||
|
} else {$args{'proto'} = 'udp'; |
||||||
|
&connectivity_check; |
||||||
|
if (check_port(\%args)) { |
||||||
|
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||||
|
my @rray = &reverse_dns_doms($args{'host'}); |
||||||
|
if ($#rray == 0 and $rray[0] eq '') {&double_80_screenshot($args{'host'})} else { |
||||||
|
foreach (@rray) {print $FILEH_A $_.','; &double_80_screenshot($_)}} |
||||||
|
print $FILEH_A "\n\n"; |
||||||
|
} else {$args{'port'} = '443'; |
||||||
|
&connectivity_check; |
||||||
|
if (check_port(\%args)) { |
||||||
|
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||||
|
my @rray = &reverse_dns_doms($args{'host'}); |
||||||
|
if ($#rray == 0 and $rray[0] eq '') {&double_443_screenshot($args{'host'})} else { |
||||||
|
foreach (@rray) {print $FILEH_A $_.','; &double_443_screenshot($_)}} |
||||||
|
print $FILEH_A "\n\n"; |
||||||
|
} else {$args{'proto'} = 'tcp'; |
||||||
|
&connectivity_check; |
||||||
|
if (check_port(\%args)) { |
||||||
|
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||||
|
my @rray = &reverse_dns_doms($args{'host'}); |
||||||
|
if ($#rray == 0 and $rray[0] eq '') {&double_443_screenshot($args{'host'})} else { |
||||||
|
foreach (@rray) {print $FILEH_A $_.','; &double_443_screenshot($_)}} |
||||||
|
print $FILEH_A "\n\n"; |
||||||
|
}}}}} |
||||||
|
my $beta = time; |
||||||
|
my $delta = $beta-$alpha; |
||||||
|
$ii++; print 'Progress: ', 0.00000603*$ii, "%\n", 'ETA: ', $delta*255**3-255*$ii, "s\n"; |
||||||
|
}}} |
||||||
|
|
||||||
|
print 'Cleaning up...'."\n"; |
||||||
|
close $FILEH_A; |
||||||
|
$driver->quit(); |
||||||
|
`killall geckodriver` and print 'Done!'."\n"; |
Loading…
Reference in new issue