commit 2a0161f169838d6af9608c93fb018ebb93b135dc Author: Roy Date: Thu Mar 30 00:39:46 2023 +0200 Dit is de eerste (major) versie. Natuurlijk had de eerste versie geen README en sample video. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f642f51 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +Multi-UA web scraper geschreven in Perl voor educatieve doeleinden. Gebaseerd op het scannen van niet-prive IPv4 netwerken in volgorde van Class A naar Class E en het maken van rDNS requests als een web-gerelateerde port is open. + +Dependencies: Selenium::Firefox, Net::DNS en Net::Curl::Easy. + + +[![Willekeurig voorbeeld in video-formaat](sample/senicup.mp4)] + + +TODO: + +- Add DB support for storing HTMLs, b64 screenshots, etc.. +- Utilize the DB for randomizing the scanning of IPv4 blocks and supporting seemless continuation. +- Add RE-based querying system for scoped IPv4/domain scrapes. +- Reliably detect when being blocked by companies that own (too) much network space and delay requests appropriately (might not be necessary if various parts are sufficiently randomized). +- Windows support. diff --git a/sample/senicup.mp4 b/sample/senicup.mp4 new file mode 100644 index 0000000..7847562 Binary files /dev/null and b/sample/senicup.mp4 differ diff --git a/senicup.pl b/senicup.pl new file mode 100755 index 0000000..43f8431 --- /dev/null +++ b/senicup.pl @@ -0,0 +1,123 @@ +#!/bin/env perl +#use diagnostics; +use strict; +use warnings; +use File::Path qw(make_path); +use Net::EmptyPort qw(check_port); +use Net::DNS; +#use Selenium::Remote::Driver; +use Selenium::Firefox; +use Selenium::Firefox::Profile; + +chomp(my $connectivity = `cat /sys/class/net/*/operstate|grep -m 1 '^up\$'`); # Will do for now. +my $ii = 0; +my (%args, %args2); +$args2{'profile_dir'} = '/home/miami/.Mozilla/Firefox/ud8j40yn.default'; # Ghacks user.js is a good start. +my $profile = Selenium::Firefox::Profile->new(%args2); +my $driver = Selenium::Firefox->new('firefox_profile' => $profile); +open my $FILEH_A, '>>', "./data/domains.txt"; # Here, we'll store all the domains from all ipv4 addresses. + +sub connectivity_check { +my $exp = 0; +while (1) {unless ($connectivity eq 'up') {warn "$!: No wireless connectivity."; sleep 2**$exp; $exp++; if ($exp >= 10) {$exp -= int(rand(11))}; next}; last} +} + +sub double_80_screenshot { +if ($#_ > 0) {die "$!: Too much arguments: \"$_[1]\"...\"$_[$#_]\"."} +&connectivity_check; +my $time_in_s = time; +$driver->get("http://$_[0]"); # Fetch the eye-candy. +my $current_url = $driver->get_current_url(); +eval {$driver->accept_alert}; +make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; +open my $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; +print $FILEH_B $driver->screenshot({'full' => 1}); +&connectivity_check; +$time_in_s = time; +$driver->get("view-source:http://$_[0]"); # Fetch the page source for (partly) reproduction. +$current_url = $driver->get_current_url(); +make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; +open $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; +print $FILEH_B $driver->screenshot({'full' => 1}); +} + +sub double_443_screenshot { +if ($#_ > 0) {die "$!: Too much arguments: \"$_[1]\"...\"$_[$#_]\"."} +&connectivity_check; +my $time_in_s = time; +$driver->get("https://$_[0]"); +my $current_url = $driver->get_current_url(); +eval {$driver->accept_alert}; +make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; +open my $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; +print $FILEH_B $driver->screenshot({'full' => 1}); +&connectivity_check; +$time_in_s = time; +$driver->get("view-source:https://$_[0]"); +$current_url = $driver->get_current_url(); +make_path("./data/$_[0]/$current_url/") or die "$!: Insufficient permissions."; +open $FILEH_B, '>', "./data/$_[0]/$current_url/screenshot-$time_in_s.png.base64"; +print $FILEH_B $driver->screenshot({'full' => 1}); +} + +sub reverse_dns_doms { +my (@obj); +my $res = Net::DNS::Resolver->new; +my $reply = $res->search("$_[0]", "PTR"); +if ($reply) { +foreach my $rr ($reply->answer) { +push @obj, $rr->ptrdname; +} +} +return @obj; +} + +foreach my $i1 (1..9,11..126,128..255) { # Skip huge private blocks. +foreach my $i2 (0..255) { +foreach my $i3 (0..255) { +my $alpha = time; +foreach my $i4 (0..255) { +$args{'host'} = $i1.'.'.$i2.'.'.$i3.'.'.$i4; +$args{'port'} = '80'; $args{'proto'} = 'tcp'; + +&connectivity_check; +if (check_port(\%args)) { + print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; + my @rray = &reverse_dns_doms($args{'host'}); + if ($#rray == 0 and $rray[0] eq '') {&double_80_screenshot($args{'host'})} else { + foreach (@rray) {print $FILEH_A $_.','; &double_80_screenshot($_)}} + print $FILEH_A "\n\n"; + } else {$args{'proto'} = 'udp'; + &connectivity_check; + if (check_port(\%args)) { + print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; + my @rray = &reverse_dns_doms($args{'host'}); + if ($#rray == 0 and $rray[0] eq '') {&double_80_screenshot($args{'host'})} else { + foreach (@rray) {print $FILEH_A $_.','; &double_80_screenshot($_)}} + print $FILEH_A "\n\n"; + } else {$args{'port'} = '443'; + &connectivity_check; + if (check_port(\%args)) { + print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; + my @rray = &reverse_dns_doms($args{'host'}); + if ($#rray == 0 and $rray[0] eq '') {&double_443_screenshot($args{'host'})} else { + foreach (@rray) {print $FILEH_A $_.','; &double_443_screenshot($_)}} + print $FILEH_A "\n\n"; + } else {$args{'proto'} = 'tcp'; + &connectivity_check; + if (check_port(\%args)) { + print $FILEH_A $args{'host'}.':'.$args{'port'}.' is in use:'."\n"; + my @rray = &reverse_dns_doms($args{'host'}); + if ($#rray == 0 and $rray[0] eq '') {&double_443_screenshot($args{'host'})} else { + foreach (@rray) {print $FILEH_A $_.','; &double_443_screenshot($_)}} + print $FILEH_A "\n\n"; +}}}}} +my $beta = time; +my $delta = $beta-$alpha; +$ii++; print 'Progress: ', 0.00000603*$ii, "%\n", 'ETA: ', $delta*255**3-255*$ii, "s\n"; +}}} + +print 'Cleaning up...'."\n"; +close $FILEH_A; +$driver->quit(); +`killall geckodriver` and print 'Done!'."\n";