#!/usr/bin/perl use strict; my ($proxyList, $quiet, $pauseOnCompletion, $dnsInstalled, %options, $progname, $VERSION, @inFiles, $outFile); use Getopt::Long; ## remove the '#' from statements below to activate them, or add # one to deactivate them the defaults below are set for running # from the UAE. $progname = $0; $progname =~ s,.*[/\\],,; # use basename only $progname =~ s/\.\w*$//; # strip extension, if any $VERSION = sprintf("%d.%02d", q$Revision: 1.0 $ =~ /(\d+)\.(\d+)/); my @getopt_args = ( 'h', # print usage 'v', # print version 'q', # no extra info, beeps etc. ); Getopt::Long::config("noignorecase", "bundling"); unless (GetOptions(\%options, @getopt_args)) { usage(); } if ($options{'v'}) { my $DISTNAME = 'findProxy ' . $VERSION; die <<"EOT"; This is findProxy $VERSION ($DISTNAME) Author: wayne\@nym.alias.net This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. EOT }; usage() if $options{'h'} || !@ARGV; @inFiles = split(/\+/, $ARGV[0]); #from the command line $outFile=$ARGV[1]; # some useful defaults if the user doesn't add them on the command line $quiet = ($options{'q'} or 0); $pauseOnCompletion = ($options{'w'} or 0); #this prog writes to a file, #so don't need this on ############### don't modify anything below here ############### ############### unless you know what you're doing ############### unless ($dnsInstalled = eval('require Net::DNS')) { print "Not using DNS name resolution because the Net::DNS module\n"; print "is not installed"; print "If you use ActiveState Perl, try \n"; print "ppm install Net-DNS\n"; print "in a command window\n"}; my %tested=(); my $nrTested=0; for $proxyList (@inFiles) { open(LIST, "<$proxyList") or die "can't open proxy list"; undef $/; #slurp it in my $list=."\n"; #make sure there's a new #line for the regex on the end close(LIST); $/="\n"; # ferkrissake, put it back unless ($quiet) {print "$list\n"}; # This stuff is gross. Note the order is important. # One day I'll do it right, with HTML::Parser # At the moment, they're not exactly robust - a small # change to the format of any of these pages might # screw the regexes up. #for http://proxycheck.spylog.ru/ $list =~ s/\ / /ig; #for proxys4all message board $list =~ s/\n(\: )+/\n/ig; #for http://www8.big.or.jp/~000/CyberSyndrome/ $list =~ s/
  • //ig; $list =~ s///ig; #for http://www.hackzone.ru/member/nethack/proxies.htm $list =~ s/\r\n tppabs="http.*?\r\n/\r\n/ig; #for http://www.hackzone.ru/member/nethack/proxies.htm $list =~ s// /ig; #for some of MVlads lists $list =~ s/(
  • )?//ig; $list =~ s|22 //ig; $list =~ s/<[\/]?b>/ /ig; #for some posted lists $list =~ s/> //ig; while ($list =~ m|^\s*([^\s:\(]+\.[^\s:]+)[\s:]+[+-]?(\d+).*[\n\r]+|mg) { my ($host, $port) = ($1, $2); next unless (($host) and ($port)); # next if ($host =~ /^bess-/); #:-) next if (($port<=0) or ($port>65535) or ($host !~ /^[a-zA-Z0-9\.\-]+$/)); next if ($tested{"$host:$port"}); if ($host) { $tested{"$host:$port"}=1; $nrTested++; }; }; }; my %hosts=(); #for dupe checking and sorting print "\n*********************\n"; print "sortProxy v$VERSION report\n"; if ($nrTested==0) { print "No proxies to sort in the specified list(s)\n"; print "Maybe there are none there or maybe the format is one\n"; print "I don't understand - tell wayne\n"; } else { my ($host, $port, $hostFqdn); foreach my $line (keys(%tested)) { ($host, $port)=split(/:/, $line); $hostFqdn=''; if ($dnsInstalled) { my $res = new Net::DNS::Resolver; my $query = $res->search($host); if ($query) { foreach my $rr ($query->answer) { my $type=$rr->type; if ($type eq "A") { $hostFqdn.=$host.", "; $host=$rr->address; } elsif ($type eq "PTR") { $hostFqdn.=$rr->ptrdname.", "; }; } } else { if ($host=~/^\d+\.\d+\.\d+\.\d+$/) { # $hostFqdn = "dns lookup failed: ".$res->errorstring; $hostFqdn='none'; #we have an IP address, but can't #print a fqdn. No problems. } else { # $hostFqdn = $host." dns lookup failed: ".$res->errorstring; print "warning: can't resolve $host - removing \n"; # print "from the output file\n"; $host=''; #we have an fqdn which does not resolve #to an IP address. This one will simply #be removed from the output file #maybe I should do something else, since #this might happen because we're not #connected to the Internet }; }; undef $res; undef $query; }; if ($host) {($hosts{"$host:$port"}, undef) = $hostFqdn=~/^(.*?)(, )?$/}; }; }; #now sort by IP address (alphabetically) and print open(OUT, ">$outFile") or die "can't open output file $outFile: $!"; printf OUT "%-16s %-6s%-30s\n", 'host', 'port', 'FQDN'; foreach my $hp (sort keys %hosts) { my ($host, $port)=split(/:/, $hp); my $hostFqdn=$hosts{$hp}; printf OUT "%-16s:%-6u%-30s\n", $host, $port, $hostFqdn; }; close OUT; print STDOUT "\7\7"; # two beeps to finish even if -q used :-) #wait for the MS crowd ... #if ($pauseOnCompletion) {print STDOUT "press enter to exit "; }; 1; sub usage { # command line options die <<"EOT"; Usage: $progname [-options] {[+]} -v Show program version -h Print this message -q Quiet - don't list the proxy list (default noisy) examples: sortproxy -q largeFile.txt+extraFile.txt outFile.txt perl sortproxy.pl -q largeFile.txt+extraFile.txt outFile.txt EOT }; __END__ =head1 NAME sortProxy =head1 SYNOPSIS sortProxy [-h] [-v] [-q] [-w] {[+]} =head1 OPTIONS =over 4 =item -v Show program version =item -h Print this message =item -q Quiet - don't list the proxy list, and no beeps etc. (default noisy) =back =head1 =head1 DESCRIPTION This program will merge and sort lists of web proxies in local files. Proxies are sorted by IP address, dupes are removed. When run while connected to the Internet, it will include domain names in the results. =head1 REQUIREMENTS: =over 4 =item * a system with perl (http://www.activestate.com/ for win32 systems, http://www.perl.org/ for linux) =item * perl module Net::DNS =back =head1 =head1 VERSION RELEASE HISTORY =over 4 =item * 00/12/17 v1.0 initial release =back =head1 =head1 TO RUN IT =over 4 =item * start it from the command shell by typing: perl sortProxy.pl [options] [+]...[+] Note that any spaces in your filename need to be escaped, or the filename will get munged by the shell before sortProxy even sees it. An example of such a character is . You can enclose the filename in double quotes in MS Windows or single quotes in Unix. =item * print this documentation in a pretty format by typing: perldoc sortProxy.pl =back =head1 TO DO =over 4 =item * option to specify that the input format is the findProxy output format, so this program can parse the reference time and fqdn. That will allow sorting by ref time, or save execution time by using the fqdn instead of DNS lookup. Knowing the IP address and fqdn for *all* the proxies means that you can sort properly without being connected to the Internet as well. =back =head1 =head1 KNOWN PROBLEMS =over 4 =item * none =back =head1 =head1 VARIABLES INSIDE THE CODE $pauseOnCompletion: set this if you don't want the program to exit (mainly so that people can double-click this thing and keep the command window open at the end). =head1 AUTHOR wayne@nym.alias.net (http://www.angelfire.com/wy/1waynes/) =cut