i tried to place this code in the first post but the forum automatically combined the code
so i put the second perl script here
perl script to create access.tmp from access.log
for this example, just rename your access log to access.log for this program in your practice directory
#!/usr/local/bin/perl
#
# Program to do the obvious
#
$filea1 = 'sitestoremove.lst';
$fileb = 'access.log';
$filec = 'access.tmp';
#open(INFO2, ">$filec");
#open INFO2, "> $filec" or die "Cannot open the needed file $filec : $!";
open INFO2, "> $filec" or die "Cannot open the needed file $filec ";
@removelines = ();
#open(REMOVELIST, $filea1);
#open REMOVELIST, "< $filea1" or die "Cannot open the needed file $filea1 : $!";
open REMOVELIST, "< $filea1" or die "Cannot open the needed file $filea1 ";
@removelines = <REMOVELIST>;
close(REMOVELIST);
@webnet = ();
@webcoma = ();
@webcomb = ();
@webcomc = ();
@webcomd = ();
@webcome = ();
@webcomf = ();
@webcomg = ();
@webcomh = ();
@webcomi = ();
@webcomj = ();
@webcomk = ();
@webcoml = ();
@webcomm = ();
@webcomn = ();
@webcomo = ();
@webcomp = ();
@webcomq = ();
@webcomr = ();
@webcoms = ();
@webcomt = ();
@webcomu = ();
@webcomv = ();
@webcomw = ();
@webcomx = ();
@webcomy = ();
@webcomz = ();
@webcomxxx = ();
@weba = ();
@webb = ();
@webc = ();
@webd = ();
@webe = ();
@webf = ();
@webg = ();
@webh = ();
@webi = ();
@webj = ();
@webk = ();
@webl = ();
@webm = ();
@webn = ();
@webo = ();
@webp = ();
@webq = ();
@webr = ();
@webs = ();
@webt = ();
@webu = ();
@webv = ();
@webw = ();
@webx = ();
@weby = ();
@webz = ();
@webxxx = ();
foreach $_ (@removelines)
{
chop $_;
$_=lc(trim($_));
next if $_ eq "";
if (length ($_) < 5 )
{
push ( @webxxx,$_ );
next
}
$stemp = substr( $_,-4 );
if ($stemp eq ".net")
{
push(@webnet,$_);
next
}
#if ($stemp ne ".com")
#{
#goto ARRAYNONECOM
#}
$stemp = substr( $_,-5 );
if ($stemp eq "a.com")
{
push(@webcoma,$_);
next
}
if ($stemp eq "b.com")
{
push(@webcomb,$_);
next
}
if ($stemp eq "c.com")
{
push(@webcomc,$_);
next
}
if ($stemp eq "d.com")
{
push(@webcomd,$_);
next
}
if ($stemp eq "e.com")
{
push(@webcome,$_);
next
}
if ($stemp eq "f.com")
{
push(@webcomf,$_);
next
}
if ($stemp eq "g.com")
{
push(@webcomg,$_);
next
}
if ($stemp eq "h.com")
{
push(@webcomh,$_);
next
}
if ($stemp eq "i.com")
{
push(@webcomi,$_);
next
}
if ($stemp eq "j.com")
{
push(@webcomj,$_);
next
}
if ($stemp eq "k.com")
{
push(@webcomk,$_);
next
}
if ($stemp eq "l.com")
{
push(@webcoml,$_);
next
}
if ($stemp eq "m.com")
{
push(@webcomm,$_);
next
}
if ($stemp eq "n.com")
{
push(@webcomn,$_);
next
}
if ($stemp eq "o.com")
{
push(@webcomo,$_);
next
}
if ($stemp eq "p.com")
{
push(@webcomp,$_);
next
}
if ($stemp eq "q.com")
{
push(@webcomq,$_);
next
}
if ($stemp eq "r.com")
{
push(@webcomr,$_);
next
}
if ($stemp eq "s.com")
{
push(@webcoms,$_);
next
}
if ($stemp eq "t.com")
{
push(@webcomt,$_);
next
}
if ($stemp eq "u.com")
{
push(@webcomu,$_);
next
}
if ($stemp eq "v.com")
{
push(@webcomv,$_);
next
}
if ($stemp eq "w.com")
{
push(@webcomw,$_);
next
}
if ($stemp eq "x.com")
{
push(@webcomx,$_);
next
}
if ($stemp eq "y.com")
{
push(@webcomy,$_);
next
}
if ($stemp eq "z.com")
{
push(@webcomz,$_);
next
}
if (substr( $_,-4) eq ".com")
{
push(@webcomxxx,$_);
next
}
ARRAYNONECOM:
$stemp = substr( $_,-1 );
if ($stemp eq "a")
{
push(@weba,$_);
next
}
if ($stemp eq "b")
{
push(@webb,$_);
next
}
if ($stemp eq "c")
{
push(@webc,$_);
next
}
if ($stemp eq "d")
{
push(@webd,$_);
next
}
if ($stemp eq "e")
{
push(@webe,$_);
next
}
if ($stemp eq "f")
{
push(@webf,$_);
next
}
if ($stemp eq "g")
{
push(@webg,$_);
next
}
if ($stemp eq "h")
{
push(@webh,$_);
next
}
if ($stemp eq "i")
{
push(@webi,$_);
next
}
if ($stemp eq "j")
{
push(@webj,$_);
next
}
if ($stemp eq "k")
{
push(@webk,$_);
next
}
if ($stemp eq "l")
{
push(@webl,$_);
next
}
if ($stemp eq "m")
{
push(@webm,$_);
next
}
if ($stemp eq "n")
{
push(@webn,$_);
next
}
if ($stemp eq "o")
{
push(@webo,$_);
next
}
if ($stemp eq "p")
{
push(@webp,$_);
next
}
if ($stemp eq "q")
{
push(@webq,$_);
next
}
if ($stemp eq "r")
{
push(@webr,$_);
next
}
if ($stemp eq "s")
{
push(@webs,$_);
next
}
if ($stemp eq "t")
{
push(@webt,$_);
next
}
if ($stemp eq "u")
{
push(@webu,$_);
next
}
if ($stemp eq "v")
{
push(@webv,$_);
next
}
if ($stemp eq "w")
{
push(@webw,$_);
next
}
if ($stemp eq "x")
{
push(@webx,$_);
next
}
if ($stemp eq "y")
{
push(@weby,$_);
next
}
if ($stemp eq "z")
{
push(@webz,$_);
next
}
push(@webxxx,$_);
}
#print INFO2 "array size webcoma= ".@webcoma. ".\n";
#print INFO2 "array size webcomb= ".@webcomb. ".\n";
#print INFO2 "array size webcomc= ".@webcomc. ".\n";
#print INFO2 "array size webcomd= ".@webcomd. ".\n";
#print INFO2 "array size webcome= ".@webcome. ".\n";
#print INFO2 "array size webcomf= ".@webcomf. ".\n";
#print INFO2 "array size webcomg= ".@webcomg. ".\n";
#print INFO2 "array size webcomh= ".@webcomh. ".\n";
#print INFO2 "array size webcomi= ".@webcomi. ".\n";
#print INFO2 "array size webcomj= ".@webcomj. ".\n";
#print INFO2 "array size webcomk= ".@webcomk. ".\n";
#print INFO2 "array size webcoml= ".@webcoml. ".\n";
#print INFO2 "array size webcomm= ".@webcomm. ".\n";
#print INFO2 "array size webcomn= ".@webcomn. ".\n";
#print INFO2 "array size webcomo= ".@webcomo. ".\n";
#print INFO2 "array size webcomp= ".@webcomp. ".\n";
#print INFO2 "array size webcomq= ".@webcomq. ".\n";
#print INFO2 "array size webcomr= ".@webcomr. ".\n";
#print INFO2 "array size webcoms= ".@webcoms. ".\n";
#print INFO2 "array size webcomt= ".@webcomt. ".\n";
#print INFO2 "array size webcomu= ".@webcomu. ".\n";
#print INFO2 "array size webcomv= ".@webcomv. ".\n";
#print INFO2 "array size webcomw= ".@webcomw. ".\n";
#print INFO2 "array size webcomx= ".@webcomx. ".\n";
#print INFO2 "array size webcomy= ".@webcomy. ".\n";
#print INFO2 "array size webcomz= ".@webcomz. ".\n";
#print INFO2 "array size webcomxxx= ".@webcomxxx. ".\n";
#print INFO2 "array size webnet = ".@webnet . ".\n";
#print INFO2 "array size weba= ".@weba. ".\n";
#print INFO2 "array size webb= ".@webb. ".\n";
#print INFO2 "array size webc= ".@webc. ".\n";
#print INFO2 "array size webd= ".@webd. ".\n";
#print INFO2 "array size webe= ".@webe. ".\n";
#print INFO2 "array size webf= ".@webf. ".\n";
#print INFO2 "array size webg= ".@webg. ".\n";
#print INFO2 "array size webh= ".@webh. ".\n";
#print INFO2 "array size webi= ".@webi. ".\n";
#print INFO2 "array size webj= ".@webj. ".\n";
#print INFO2 "array size webk= ".@webk. ".\n";
#print INFO2 "array size webl= ".@webl. ".\n";
#print INFO2 "array size webm= ".@webm. ".\n";
#print INFO2 "array size webn= ".@webn. ".\n";
#print INFO2 "array size webo= ".@webo. ".\n";
#print INFO2 "array size webp= ".@webp. ".\n";
#print INFO2 "array size webq= ".@webq. ".\n";
#print INFO2 "array size webr= ".@webr. ".\n";
#print INFO2 "array size webs= ".@webs. ".\n";
#print INFO2 "array size webt= ".@webt. ".\n";
#print INFO2 "array size webu= ".@webu. ".\n";
#print INFO2 "array size webv= ".@webv. ".\n";
#print INFO2 "array size webw= ".@webw. ".\n";
#print INFO2 "array size webx= ".@webx. ".\n";
#print INFO2 "array size weby= ".@weby. ".\n";
#print INFO2 "array size webz= ".@webz. ".\n";
#print INFO2 "array size webxxx = ".@webxxx . ".\n";
open(INFO1, $fileb);
@lines = <INFO1>;
close(INFO1);
foreach $website (@lines)
{
next if $website !~ /http:\/\//;
chop $website;
#convert to lower case for testing
$lcwebsite=lc($website);
#remove the question mark in the website line
$lcwebsite =~ s/\?//;
$lcwebsite=trim($lcwebsite);
next if $lcwebsite eq "";
#next if $lcwebsite =~ / text\/html/;
#next if $lcwebsite =~ / text\/plain/;
next if $lcwebsite =~ / application\//;
next if $lcwebsite =~ / text\/javascript/;
next if $lcwebsite =~ / text\/xml/;
next if $lcwebsite =~ / text\/css/;
next if $lcwebsite =~ / text\/x-icon/;
next if $lcwebsite =~ / text\/x-cross-domain-policy/;
next if $lcwebsite =~ / text\/x-json/;
next if $lcwebsite =~ / text\/js/;
next if $lcwebsite =~ / img\/gif/;
next if $lcwebsite =~ / image\//;
next if $lcwebsite =~ / video\//;
next if $lcwebsite =~ /.gif - direct\//;
next if $lcwebsite =~ /.js - direct\//;
next if $lcwebsite =~ /.jpg - direct\//;
next if $lcwebsite =~ /.ico - direct\//;
next if $lcwebsite =~ /.css - direct\//;
next if $lcwebsite =~ /.png - direct\//;
next if $lcwebsite =~ /.swf - direct\//;
next if $lcwebsite =~ /.xml - direct\//;
next if $lcwebsite =~ /\/log - direct\//;
next if $lcwebsite =~ /\/img - direct\//;
next if $lcwebsite =~ /.gif - none\//;
next if $lcwebsite =~ /.js - none\//;
next if $lcwebsite =~ /.jpg - none\//;
next if $lcwebsite =~ /.ico - none\//;
next if $lcwebsite =~ /.css - none\//;
next if $lcwebsite =~ /.png - none\//;
next if $lcwebsite =~ /.swf - none\//;
next if $lcwebsite =~ /.xml - none\//;
next if $lcwebsite =~ /\/log - none\//;
next if $lcwebsite =~ /\/img - none\//;
$x=index($lcwebsite,"http://",0)+7;
$shortlcwebsite=substr($lcwebsite,$x,index($lcwebsite,"/",$x)-$x);
@temparray=setarray($shortlcwebsite);
$test=1;
foreach $_ (@temparray)
{
next if $_ eq "";
if (index($shortlcwebsite,$_,)>=0)
{
$test=0;
last;
}
}
if ($test)
{
print INFO2 $lcwebsite."\n";
#print INFO2 $shortlcwebsite."\n";
}
}
close(INFO2);
exit;
sub setarray($)
{
my $string = shift;
if (length ($string) < 5 )
{
return @webxxx;
}
$stemp = substr( $string,-4 );
if ($stemp eq ".net")
{
return @webnet;
}
$stemp = substr( $string,-5 );
if ($stemp eq "a.com")
{
return @webcoma;
}
if ($stemp eq "b.com")
{
return @webcomb;
}
if ($stemp eq "c.com")
{
return @webcomc;
}
if ($stemp eq "d.com")
{
return @webcomd;
}
if ($stemp eq "e.com")
{
return @webcome;
}
if ($stemp eq "f.com")
{
return @webcomf;
}
if ($stemp eq "g.com")
{
return @webcomg;
}
if ($stemp eq "h.com")
{
return @webcomh;
}
if ($stemp eq "i.com")
{
return @webcomi;
}
if ($stemp eq "j.com")
{
return @webcomj;
}
if ($stemp eq "k.com")
{
return @webcomk;
}
if ($stemp eq "l.com")
{
return @webcoml;
}
if ($stemp eq "m.com")
{
return @webcomm;
}
if ($stemp eq "n.com")
{
return @webcomn;
}
if ($stemp eq "o.com")
{
return @webcomo;
}
if ($stemp eq "p.com")
{
return @webcomp;
}
if ($stemp eq "q.com")
{
return @webcomq;
}
if ($stemp eq "r.com")
{
return @webcomr;
}
if ($stemp eq "s.com")
{
return @webcoms;
}
if ($stemp eq "t.com")
{
return @webcomt;
}
if ($stemp eq "u.com")
{
return @webcomu;
}
if ($stemp eq "v.com")
{
return @webcomv;
}
if ($stemp eq "w.com")
{
return @webcomw;
}
if ($stemp eq "x.com")
{
return @webcomx;
}
if ($stemp eq "y.com")
{
return @webcomy;
}
if ($stemp eq "z.com")
{
return @webcomz;
}
if (substr( $_,-4) eq ".com")
{
return @webcomxxx;
}
$stemp = substr( $_,-1 );
if ($stemp eq "a")
{
return @weba;
}
if ($stemp eq "b")
{
return @webb;
}
if ($stemp eq "c")
{
return @webc;
}
if ($stemp eq "d")
{
return @webd;
}
if ($stemp eq "e")
{
return @webe;
}
if ($stemp eq "f")
{
return @webf;
}
if ($stemp eq "g")
{
return @webg;
}
if ($stemp eq "h")
{
return @webh;
}
if ($stemp eq "i")
{
return @webi;
}
if ($stemp eq "j")
{
return @webj;
}
if ($stemp eq "k")
{
return @webk;
}
if ($stemp eq "l")
{
return @webl;
}
if ($stemp eq "m")
{
return @webm;
}
if ($stemp eq "n")
{
return @webn;
}
if ($stemp eq "o")
{
return @webo;
}
if ($stemp eq "p")
{
return @webp;
}
if ($stemp eq "q")
{
return @webq;
}
if ($stemp eq "r")
{
return @webr;
}
if ($stemp eq "s")
{
return @webs;
}
if ($stemp eq "t")
{
return @webt;
}
if ($stemp eq "u")
{
return @webu;
}
if ($stemp eq "v")
{
return @webv;
}
if ($stemp eq "w")
{
return @webw;
}
if ($stemp eq "x")
{
return @webx;
}
if ($stemp eq "y")
{
return @weby;
}
if ($stemp eq "z")
{
return @webz;
}
return @webxxx;
}
sub trim($)
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
# Left trim function to remove leading whitespace
sub ltrim($)
{
my $string = shift;
$string =~ s/^\s+//;
return $string;
}
# Right trim function to remove trailing whitespace
sub rtrim($)
{
my $string = shift;
$string =~ s/\s+$//;
return $string;
}
the sub functions left trim and right trim are not used and they can be removed
the code is pretty much inline because i have had hard time learning perl in
in this code, i had to split up the websites to remove into different arrays inorder to speed up the program
i was never able to get some GOTO statements working that can speed up a this routine some more.