#!/usr/bin/perl # If required, adjust line above to point to Perl 5. ###################################################################### # fantomas shadowMaker(TM) # ###################################################################### # $vers = "ver. 02.01.01"; # 2006-02-02 # ###################################################################### # (c) Copyright 2002-2006 by fantomaster.com # # All rights reserved. # # Copying, modification or distribution requires permission # # in writing by copyright holder. # # fantomas shadowMaker(TM) is the protected trade mark of # # fantomaster.com. # # URL: < http://fantomaster.com > # ###################################################################### ###################################################################### ########## PLEASE ADJUST THE FOLLOWING VARIABLES! ################ # # # "$standard" = denotes the core domain you want to redirect # # your "normal" visitors to # # (i.e. NO searchbots, etc.) # # "$keyword_flag" = Set this variable to "1" if you want to # # include the keyword into $standard # # "LWP::Simple" = Required module for RSS Feed Inclusion # # "XML::RSS" = Required module for RSS Feed Inclusion # # "$rss_flag" = Set this variable to "1" if you want to # # include an RSS feed # # "$rss_items" = Define the number of items of the RSS feed # # "$main_dir" = Main directory (DocumentRoot) for your pages # # "$stats_dir" = Directory for log files and admin files # # "$hits_log_file" = Log file listing SD hits # # (Default name is: "hits.log") # # " $human_log_file" = Log file listing SD hits from human visitors # # (Default name is: "human-hits.log") # # "$links_list_file" = Links list file name as generated in step 4 # # (Default name is: "links.txt") # # "$selist_file" = Search engine referrer parsing routines # # (Default name is: "selist.txt") # # "$botbase_dir" = Directory of fantomas spiderSpy(TM) botBase # # (Default name is: "stats") # # "$botbase_file" = file containing spider robots list # # (Default name is: "spiderspy.txt") # ###################################################################### $standard = "http://realestate.puravida.com"; #$keyword_flag = 0; ###################################################################### # RSS Feed Inclusion #use LWP::Simple; #use XML::RSS; $rss_flag = 1; $rss_items = 5; ###################################################################### $main_dir = "/home/coreb4/public_html/discover-costa-rica-realestate/"; $stats_dir = "/home/coreb4/public_html/discover-costa-rica-realestate/cgi-bin/stats/"; $hits_log_file = "hits.log"; $humans_log_file = "human-hits.log"; $links_list_file = "links.txt"; $selist_file = "selist.txt"; $botbase_dir = "/home/coreb4/public_html/discover-costa-rica-realestate/cgi-bin/stats/"; $botbase_file = "spiderspy.txt"; ###################################################################### ############# NO EDITING BEYOND THIS LINE! ####################### ###################################################################### $query_string = "$ENV{'QUERY_STRING'}"; $remote_host = "$ENV{'REMOTE_HOST'}"; $remote_addr = "$ENV{'REMOTE_ADDR'}"; $user_agent = "$ENV{'HTTP_USER_AGENT'}"; $referer = "$ENV{'HTTP_REFERER'}"; $method = "$ENV{'REQUEST_METHOD'}"; ###################################################################### $switch_name = $query_string; $switch_name =~ s/^\///; $fantom_name = $switch_name; ###################################################################### &get_date; &process_register; if ($search_engine) { &fantom } else { &standard } exit; ###################################################################### sub get_date { ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(); $mon++; $sec = sprintf ("%02d", $sec); $min = sprintf ("%02d", $min); $hour = sprintf ("%02d", $hour); $mday = sprintf ("%02d", $mday); $mon = sprintf ("%02d", $mon); $year = scalar localtime; $year =~ s/.*?(\d{4})/$1/; $date = "$year-$mon-$mday, $hour:$min:$sec"; return; } ###################################################################### sub process_register { open (REGISTER, "$botbase_dir/$botbase_file"); @register=; close (REGISTER); $search_engine = (); foreach $botbase (@register) { next if ($botbase =~ /^[#-]/); next if ($botbase =~ /^\s*$/); chomp $botbase; if ( $remote_addr eq $botbase ){ $search_engine = 1; last; } } return; } ###################################################################### sub standard { if ($keyword_flag) { if ($referer) { &process_engine_list; $keyword = &parse_referer($referer); } } elsif (-f "$stats_dir/$links_list_file" ) { if ($referer) { &process_engine_list; $keyword = &parse_referer($referer); } } if (-f "$stats_dir/$links_list_file" ) { open (STANDARD, "$stats_dir/$links_list_file"); @list=; close (STANDARD); foreach $line (@list) { next if ($line =~ /^[#-]/); next if ($line =~ /^\s*$/); chomp $line; ($field1, $field2) = split (/:::/, $line); $links{$field1} = $field2; } if ( $links{$keyword} ) { if ( $links{$keyword} eq '<>' ) { &print_location($standard); } else { &print_location($links{$keyword}); } } else { $switch_name =~ /^(.+?)\-\d{2}/; $keyword = $1; $keyword =~ s/(\_|\-)/ /g; if ( $links{$keyword} ) { if ( $links{$keyword} eq '<>' ) { &print_location($standard); } else { &print_location($links{$keyword}); } } else { &print_location($standard); } } } else { &print_location($standard); } &log_hits ("$date -- $remote_host -- $remote_addr -- $switch_name -- $user_agent -- $referer -- $keyword -- $links{$keyword} -- $method\n"); &log_human_hits ("$date -- $remote_host -- $remote_addr -- $switch_name -- $user_agent -- $referer -- $keyword -- $links{$keyword} -- $method\n"); return; } ###################################################################### sub fantom { @HTML = (); print "Content-type: text/html\n\n"; if (! -f "$main_dir$fantom_name") { $fantom_name = "index.html" } open (FANTOM, "$main_dir$fantom_name"); while () { $html_string .= $_; } close (FANTOM); $html_string =~ s|<\!--\#include virtual=\"(.+?)\"-->|&include_file("$main_dir$1")|egi; if ($rss_flag) { eval { $html_string = &include_rss($html_string); }; if ($@) { $html_string =~ s|<>||s; } } print $html_string; &log_hits ("!!$date -- $remote_host -- $remote_addr -- $switch_name -- $user_agent -- $referer -- $method\n"); return; } ###################################################################### sub include_file { my ($include_file) = @_; my $include_string; open (FILE, "$include_file"); while () { $include_string .= $_; } close (FILE); return $include_string; } ###################################################################### sub log_hits { open (HITS, ">>$stats_dir/$hits_log_file"); print HITS @_; close (HITS); return; } ###################################################################### sub log_human_hits { open (HITS, ">>$stats_dir/$humans_log_file"); print HITS @_; close (HITS); return; } ###################################################################### sub parse_referer { my ($referer) = @_; my ($keyword) = (); foreach $se (@se) { foreach $query ( @{ $query{$se} } ) { if ($referer =~ m/^http:\/\/($domain{$se})(.*?)($query)(.*?)(\&|$)/i){ $keyword = $4; if (! $keyword) {next} $keyword =~ s|^cache\:(.+)\+(.*?)\&|$2|; } } } return ($keyword); } ###################################################################### sub process_engine_list { open (ENGINES, "$botbase_dir/$selist_file") || die "$botbase_dir/$selist_file: $!"; $i = -1; @engines = (); @engine_referer = (); @engine_submit = (); while (){ next if /^\s*$/; next if /^\s*#/; chomp; $_ =~ s/^ *(.+?) *$/$1/g; if (/^\[(.+)\]$/){ $i++; $se = $1; $se[$i] = $se; } if (/domain:\s*(.+)$/){ $domain = quotemeta $1; if ($domain{$se}){ $domain{$se} = join ("|", $domain{$se}, $domain); } else { $domain{$se} = $domain; } } if (/query:\s*(.+)$/){ $query = quotemeta $1; push ( @{ $query{$se} }, $query ); } } close (ENGINES); return; } ###################################################################### sub decode_keyword { my ($keyword) = @_; $keyword =~ s/\+/ /g; $keyword =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; return $keyword; } ###################################################################### sub print_location { my ($standard) = @_; if ($keyword_flag) { if (! $keyword) { $standard =~ s/<>//; } else { $standard =~ s/<>/$keyword/; $keyword = &decode_keyword($keyword); $keyword =~ s|^\s+||; $keyword =~ s|\s+$||; $keyword =~ s|\s+| |g; } } print "Location: $standard\n\n"; return; } ###################################################################### sub include_rss { my ($html_string) = @_; if ($html_string =~ /<>/) { $rss_url = $1; } else { return $html_string; } my $rss = XML::RSS->new; my $content = get($rss_url); $rss->parse($content); foreach my $item ( @{$rss->{'items'}} ) { $i++; if ($rss_items) { if ($i > $rss_items) { last } } $rss_string .= "

{'link'}>$item->{'title'}
$item->{'description'}\n"; } $html_string =~ s|<>|
$rss_string
|s; return $html_string; } ######################################################################