commit
2a0161f169
@ -0,0 +1,15 @@ |
||||
Multi-UA web scraper geschreven in Perl voor educatieve doeleinden. Gebaseerd op het scannen van niet-prive IPv4 netwerken in volgorde van Class A naar Class E en het maken van rDNS requests als een web-gerelateerde port is open. |
||||
|
||||
Dependencies: Selenium::Firefox, Net::DNS en Net::Curl::Easy. |
||||
|
||||
|
||||
[![Willekeurig voorbeeld in video-formaat](sample/senicup.mp4)] |
||||
|
||||
|
||||
TODO: |
||||
|
||||
- Add DB support for storing HTMLs, b64 screenshots, etc.. |
||||
- Utilize the DB for randomizing the scanning of IPv4 blocks and supporting seemless continuation. |
||||
- Add RE-based querying system for scoped IPv4/domain scrapes. |
||||
- Reliably detect when being blocked by companies that own (too) much network space and delay requests appropriately (might not be necessary if various parts are sufficiently randomized). |
||||
- Windows support. |
Binary file not shown.
@ -0,0 +1,123 @@ |
||||
#!/bin/env perl |
||||
#use diagnostics; |
||||
use strict; |
||||
use warnings; |
||||
use File::Path qw(make_path); |
||||
use Net::EmptyPort qw(check_port); |
||||
use Net::DNS; |
||||
#use Selenium::Remote::Driver; |
||||
use Selenium::Firefox; |
||||
use Selenium::Firefox::Profile; |
||||
|
||||
chomp(my $connectivity = `cat /sys/class/net/*/operstate|grep -m 1 '^up\$'`); # Will do for now. |
||||
my $ii = 0; |
||||
my (%args, %args2); |
||||
$args2{'profile_dir'} = '/home/miami/.Mozilla/Firefox/ud8j40yn.default'; # Ghacks user.js is a good start. |
||||
my $profile = Selenium::Firefox::Profile->new(%args2); |
||||
my $driver = Selenium::Firefox->new('firefox_profile' => $profile); |
||||
open my $FILEH_A, '>>', "./data/domains.txt"; # Here, we'll store all the domains from all ipv4 addresses. |
||||
|
||||
sub connectivity_check { |
||||
my $exp = 0; |
||||
while (1) {unless ($connectivity eq 'up') {warn "$!: No wireless connectivity."; sleep 2**$exp; $exp++; if ($exp >= 10) {$exp -= int(rand(11))}; next}; last} |
||||
} |
||||
|
||||
sub double_80_screenshot { |
||||
if ($#_ > 0) {die "$!: Too much arguments: \"$_[1]\"...\"$_[$#_]\"."} |
||||
&connectivity_check; |
||||
my $time_in_s = time; |
||||
$driver->get("http://$_[0]"); # Fetch the eye-candy. |
||||
my $current_url = $driver->get_current_url(); |
||||
eval {$driver->accept_alert}; |
||||
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||
open my $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||
&connectivity_check; |
||||
$time_in_s = time; |
||||
$driver->get("view-source:http://$_[0]"); # Fetch the page source for (partly) reproduction. |
||||
$current_url = $driver->get_current_url(); |
||||
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||
open $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||
} |
||||
|
||||
sub double_443_screenshot { |
||||
if ($#_ > 0) {die "$!: Too much arguments: \"$_[1]\"...\"$_[$#_]\"."} |
||||
&connectivity_check; |
||||
my $time_in_s = time; |
||||
$driver->get("https://$_[0]"); |
||||
my $current_url = $driver->get_current_url(); |
||||
eval {$driver->accept_alert}; |
||||
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||
open my $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||
&connectivity_check; |
||||
$time_in_s = time; |
||||
$driver->get("view-source:https://$_[0]"); |
||||
$current_url = $driver->get_current_url(); |
||||
make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; |
||||
open $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; |
||||
print $FILEH_B $driver->screenshot({'full' => 1}); |
||||
} |
||||
|
||||
sub reverse_dns_doms { |
||||
my (@obj); |
||||
my $res = Net::DNS::Resolver->new; |
||||
my $reply = $res->search("$_[0]", "PTR"); |
||||
if ($reply) { |
||||
foreach my $rr ($reply->answer) { |
||||
push @obj, $rr->ptrdname; |
||||
} |
||||
} |
||||
return @obj; |
||||
} |
||||
|
||||
foreach my $i1 (1..9,11..126,128..255) { # Skip huge private blocks. |
||||
foreach my $i2 (0..255) { |
||||
foreach my $i3 (0..255) { |
||||
my $alpha = time; |
||||
foreach my $i4 (0..255) { |
||||
$args{'host'} = $i1.'.'.$i2.'.'.$i3.'.'.$i4; |
||||
$args{'port'} = '80'; $args{'proto'} = 'tcp'; |
||||
|
||||
&connectivity_check; |
||||
if (check_port(\%args)) { |
||||
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||
my @rray = &reverse_dns_doms($args{'host'}); |
||||
if ($#rray == 0 and $rray[0] eq '') {&double_80_screenshot($args{'host'})} else { |
||||
foreach (@rray) {print $FILEH_A $_.','; &double_80_screenshot($_)}} |
||||
print $FILEH_A "\n\n"; |
||||
} else {$args{'proto'} = 'udp'; |
||||
&connectivity_check; |
||||
if (check_port(\%args)) { |
||||
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||
my @rray = &reverse_dns_doms($args{'host'}); |
||||
if ($#rray == 0 and $rray[0] eq '') {&double_80_screenshot($args{'host'})} else { |
||||
foreach (@rray) {print $FILEH_A $_.','; &double_80_screenshot($_)}} |
||||
print $FILEH_A "\n\n"; |
||||
} else {$args{'port'} = '443'; |
||||
&connectivity_check; |
||||
if (check_port(\%args)) { |
||||
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||
my @rray = &reverse_dns_doms($args{'host'}); |
||||
if ($#rray == 0 and $rray[0] eq '') {&double_443_screenshot($args{'host'})} else { |
||||
foreach (@rray) {print $FILEH_A $_.','; &double_443_screenshot($_)}} |
||||
print $FILEH_A "\n\n"; |
||||
} else {$args{'proto'} = 'tcp'; |
||||
&connectivity_check; |
||||
if (check_port(\%args)) { |
||||
print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; |
||||
my @rray = &reverse_dns_doms($args{'host'}); |
||||
if ($#rray == 0 and $rray[0] eq '') {&double_443_screenshot($args{'host'})} else { |
||||
foreach (@rray) {print $FILEH_A $_.','; &double_443_screenshot($_)}} |
||||
print $FILEH_A "\n\n"; |
||||
}}}}} |
||||
my $beta = time; |
||||
my $delta = $beta-$alpha; |
||||
$ii++; print 'Progress: ', 0.00000603*$ii, "%\n", 'ETA: ', $delta*255**3-255*$ii, "s\n"; |
||||
}}} |
||||
|
||||
print 'Cleaning up...'."\n"; |
||||
close $FILEH_A; |
||||
$driver->quit(); |
||||
`killall geckodriver` and print 'Done!'."\n"; |
Loading…
Reference in new issue