#!/usr/bin/perl

# WebSpider Version 1.1 by t-Omicr0n [omicr0n@themail.com]
# WebSpider will "follow" every link on a webpage and scan the HTML code for CGI's.
#
# Greets to: f0bic, The Incubus, R00T-dude, cicer0, vorlon, sentinel, oPr, Reggie, F_F, 
#		Shaolin_p, Segfau|t, NecrOmaN, Zym0t1c, l0r3, Preat0r, T0SH, zeroX, AreS, tips, 
#		Lacrima, GigaByte and everyone at #securax@irc.hexyn.be 

# Hmmm, why on earth would we need a socket ?
use Socket;

sub preps() {
	if ($ARGV[2] eq '') { 
		print "\n\nUsage: perl webspider_1.1.pl <proxy server> <proxy port> <URL>\n";
		print "Example: perl webspider_1.1.pl proxy.pandora.be 8080 http://www.microsoft.com/\n";
		exit;
	}
	$proxy = $ARGV[0];
	$port = $ARGV[1];
	@currentlayer[0] = $ARGV[2];
	$layer = "10";
	$maxcurrentlayerteller = "100";
	$noname = "WebSpider 1.1";

	@currentlayer[$currentlayerteller] =~ s/http:\/\///g ;
	($server, $dir, $file) = split(/\//, @currentlayer[$currentlayerteller]);
	$logfile = "WebSpider_Log.txt";
	@currentlayer[$currentlayerteller] = "http://@currentlayer[$currentlayerteller]";

	@dontignore[1] = ".html";
	@dontignore[2] = ".xml";
	@dontignore[3] = ".asp";
	@dontignore[4] = ".php";
	@dontignore[5] = ".htm";

	$prepsdontignoreteller = 0 ;
	while (@dontignore[$prepsdontignoreteller] ne '') { print "Don\'t Ignore: @dontignore[$prepsdontignoreteller]\n"; $prepsdontignoreteller++; }
	
}

sub LogToFile() {
	open(OUTF, ">>$logfile");
	print OUTF "$layerteller $currentlayerteller @foundcgi[$foundcgiteller] http://@currentlayer[$currentlayerteller]\n";
	close(OUTF);
}

sub CheckCGIHistory() {
	$cgihistoryteller = 0 ;
	$cgiwasinhistory = 0 ;
	while (@cgihistory[$cgihistoryteller] ne '') { 	if (@cgihistory[$cgihistoryteller] eq @foundcgi[$foundcgiteller]) { $cgiwasinhistory = 1; } $cgihistoryteller++; 	}
	if ($cgiwasinhistory != 0) { $foundcgiteller-- ; } else {  @cgihistory[$cgihistoryteller] = @foundcgi[$foundcgiteller] ; print "$layerteller:$currentlayerteller @foundcgi[$foundcgiteller]\n"; LogToFile(); }
}

sub CheckHistory() {
	$historyteller = 0 ;
	$wasinhistory = 0 ;
	while (@history[$historyteller] ne '') { 
		if (@history[$historyteller] eq @nextlayer[$nextlayerteller]) { 
			$wasinhistory = 1; 
			$placeinhistory = $historyteller ;
		} 
		$historyteller++; 
	}
	if ($wasinhistory == 0) { 
		@history[$historyteller] = @nextlayer[$nextlayerteller] ;
	} else { 
		@nextlayer[$nextlayerteller] = "";
		$nextlayerteller-- ; 	
	}
}

sub itcontainslocation() {
	($temp, $link) = split(/ /, @response[$responseteller]);
	if ($link =~ /(.*)http:\/\/(.*)/) { @nextlayer[$nextlayerteller] = "$link"; } else { @nextlayer[$nextlayerteller] = "http://$server/"; 	if ($dir ne '') { @nextlayer[$nextlayerteller] = "@nextlayer[$nextlayerteller]$dir/"; } else { @nextlayer[$nextlayerteller] = "@nextlayer[$nextlayerteller]$link"; } }
	CheckHistory() ;
	$nextlayerteller++ ;
}

sub itcontainshref() {
	($temp, $therest) = split(/href=\"/, @response[$responseteller]);
	($link,$temp) = split(/\"/, $therest);
	if ($link =~ /(.*)http:\/\/(.*)/) {	@nextlayer[$nextlayerteller] = "$link"; } else { @nextlayer[$nextlayerteller] = "http://$server/"; if ($dir ne '') { @nextlayer[$nextlayerteller] = "@nextlayer[$nextlayerteller]$dir/"; } else { @nextlayer[$nextlayerteller] = "@nextlayer[$nextlayerteller]$link"; } 	}
	CheckHistory() ;
	$nextlayerteller++ ;
}

sub itcontainsscr() {
	($temp, $therest) = split(/scr=\"/, @response[$responseteller]);
	($link,$temp) = split(/\"/, $therest);
	if ($link =~ /(.*)http:\/\/(.*)/) {	@nextlayer[$nextlayerteller] = "$link"; } else { @nextlayer[$nextlayerteller] = "http://$server/"; if ($dir ne '') { @nextlayer[$nextlayerteller] = "@nextlayer[$nextlayerteller]$dir/"; } else { @nextlayer[$nextlayerteller] = "@nextlayer[$nextlayerteller]$link"; } 	}
	CheckHistory() ;
	$nextlayerteller++ ;
}

sub itcontainsaction() {
	($temp, $therest) = split(/action=\"/, @response[$responseteller]);
	($cgi,$temp) = split(/\"/, $therest);
	if ($cgi =~ /(.*)http:\/\/(.*)/) { $tempfoundcgi = "$cgi"; 	} else { $tempfoundcgi = "http://$server/"; if ($dir ne '') { $tempfoundcgi = "$tempfoundcgi$dir/$cgi"; } else { $tempfoundcgi = "$tempfoundcgi$cgi"; } }
	@foundcgi[$foundcgiteller] = $tempfoundcgi ;
	CheckCGIHistory() ;
	$foundcgiteller++ ;
}

sub parse() {
	$serverIP = inet_aton($proxy);
	$serverAddr = sockaddr_in($port, $serverIP);
	socket(SOCKET, PF_INET, SOCK_STREAM, getprotobyname('tcp')); 
	if (!connect(SOCKET, $serverAddr)) { print "Could not connect, try another proxy server.\n"; exit ; }
      
 	# Send the URL 
	print "Sending: GET http://@currentlayer[$currentlayerteller] HTTP/1.0\n";
	send(SOCKET,"GET http://@currentlayer[$currentlayerteller] HTTP/1.0\n\n",0);
	
	@response=<SOCKET>;

	$responseteller = 0 ;
	while (@response[$responseteller] ne '') {
		chomp (@response[$responseteller]);

		# Convert everything to lowercase...
		@response[$responseteller] = "\L@response[$responseteller]\E";

		# If we get a 302...
		if (@response[$responseteller] =~ /(.*)Location:(.*)/) { itcontainslocation() ; }
		# If we get a 200...
		if (@response[$responseteller] =~ /(.*)href=(.*)/) { 
			$dontignoreteller = 0 ;
			$dontignoreit = 0 ;

			# If the link is not in the @dontignore-list, $dontignoreit stays 0
			while(@dontignore[$dontignoreteller] ne '') { if (@response[$responseteller] =~ /(.*)@dontignore[$dontignoreteller](.*)/) { $dontignoreit = 1 ; } $dontignoreteller++; }

			if ($dontignoreit == 0) { itcontainshref(); }
		}
		# Site has frames...
		if (@response[$responseteller] =~ /(.*)scr=(.*)/) { itcontainsscr() ; }
		# CGI found...
		if (@response[$responseteller] =~ /(.*)action=(.*)/) { itcontainsaction() ; }

		$responseteller++;
	}
}

################
# MAIN PROGGIE #
################

print "\nPreparing...";
preps();
print "Done.\n";

for ($layerteller=0;$layerteller<$layer;$layerteller++) {

	for ($currentlayerteller=0;$currentlayerteller<$maxcurrentlayerteller;$currentlayerteller++) {

		@currentlayer[$currentlayerteller] =~ s/http:\/\///g ;
		($server, $dir, $file) = split(/\//, @currentlayer[$currentlayerteller]);

		if (@currentlayer[$currentlayerteller] ne '') { parse(); }
	}

	@currentlayer = @nextlayer ;
	$nextlayerteller = 0 ;
}

# -- t-Omicr0n @ http://t-Omicr0n.hexyn.be 
