www.pudn.com > searchsite.zip > search.cgi, change:1998-09-15,size:17656b


#!/usr/bin/perl 
 
 
$myurl="http://linux.cqi.com.cn/~elvis";  
$mypath="/home/httpd/elvis/public_html";  
 
$basedir = "$mypath/";  #你要搜索文件放的目录位置,这里设置成了全站。 
# The directory location of all your files.  Remember the trailing  
# slash. 
 
$baseurl = "$myurl/";   #你站点的目录所对应正确的URL位置。 
# The URL corresponding to the base directory. 
 
$docrootlen = length($basedir); 
$win='off';			#如果是在Windows 95/Nt,则是ON 
$bg="$myurl/bg1.jpg";		#背景文件名 
 
@files = ('*.html','*.htm');   #这些文件是被搜索的列表,*号表示通配符。 
 
$summary_file ="$mypath/search2/summaries.html"; 
#你从此文件可以知道搜索你的网站的情况:谁搜索、搜索什么,找到没有等。 
#此文件属性必须为 777 。(至少要全部可写) 
# Make this writable (chmod 777 summaries.html) and hide it well! 
# It holds the results of everybody's searches so you'll know what  
# people are really looking for when they come to your site.  We  
# have placed our summary file in a non-web directory so others  
# can't see it - you could put it in a hidden or secure directory. 
 
$link_url = 'http://oh.yeah.net'; 
$link_title = 'Oh Yeah Net'; 
# Enter the URL and title of your main web page. 
#这个链结地址、标题名称可以改成你自己的。 
$java_toys = 'on'; 
#如果你的参观者所用的浏览器支持Javascript,可以设置成 on;要不就设置成 off 
# If your visitors can't handle Java, better set this to 'off'. 
 
$searchpict = "$myurl/mylogo.gif"; 
#你图像位置URL中的。 
# The URL of the E3 picture. 
#在搜索文件中所用到的图象位置 
 
$cgi_url = "$myurl/search2/search.cgi"; 
#请不要修改此行,除非你对CGI比较熟! 
#改变此url为你的脚本文件的URL调用全名,一定不可错。 
# Change this to the full URL only if your rename this script. 
 
# This array holds info on all the directories and filetypes you'd like  
# your visitors to search.  Visit the readme file for more customizing  
# information. 
#下面显示的是搜索算法设置。 
 
# Options for Weighted Search: 
# 
# All occurrences of a search term count as one point.  The occurrence  
# of a term in the filename, title, META keywords, or META description  
# can have added weight (equivalent to a multiplier per hit).  Enter  
# the multipliers in the array below - the defaults are (2,2,4,2).  If  
# this makes no sense to you, just ignore it and leave the defaults as  
# they are - they work pretty well.  Note that this will give extra  
# weight to those pages that have a properly formatted title and META  
# tags, even if they contain the same basic information. 
 
($name_x, $title_x, $keywords_x, $description_x) = (2,2,4,2); 
 
 
# No further editing is necessary, but feel free to play around... 
# Note that much of the code below is straight HTML, and very easy to  
# modify if you know a little about HTML programming. 
#  
# __________________________________________________________________ 
 
 
 
 
read(STDIN,$buffer,$ENV{'CONTENT_LENGTH'}); 
@pairs = split(/&/,$buffer); 
foreach $pair (@pairs) 
	{ 
	($name,$value) = split(/=/,$pair); 
	$value =~ tr/+/ /; 
	$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; 
	$FORM{$name} = $value; 
	} 
 
 
 
if ($FORM{'terms'}) 
	{ 
	&get_files; 
	&search; 
	&return_html; 
	} 
else 
	{ 
	&prompt; 
	} 
 
 
sub prompt 
{ 
print "Content-type: text/html\n\n"; 
print <<EOM; 
<HTML> 
<HEAD> 
<TITLE>站内搜索</TITLE> 
<META NAME="description" CONTENT="Try out our internal search engine  
	for the fastest way to find what you're looking for!"> 
</HEAD> 
<BODY background="$bg"  bgcolor="#FFFFCC" text="#000099" link="#336600" vlink="#669966" alink="#FF9933"> 
<table BORDER="0" WIDTH="500" CELLPADDING="10" CELLSPACING="10"> 
  <tr> 
    <td BGCOLOR="990033" ALIGN="LEFT"><p align="left"><font size="5" color="FFFFFF"><b> 
    搜索结果:</b></font><big><font color="#FFFFFF">  
          
<strong>是不是你什么也没有输入呀?</strong></font></big></td> 
  </tr> 
</table> 
<BLOCKQUOTE> 
<a href="http://oh.yeah.net"> 
<IMG SRC="$searchpict" ALIGN=RIGHT HSPACE=20 WIDTH=202 HEIGHT=115  
BORDER=0  ALT="站内搜索" ></a> 
<b>请输入一个关键字来搜索本站。如果想要更快速的搜索,请阅读<a 
 HREF="#tips">高级搜索技巧</a> </b></p> 
 
<FORM METHOD=POST ACTION="$cgi_url" NAME="searchform"> 
<INPUT TYPE=TEXT NAME="terms" SIZE=30> 
<INPUT TYPE=SUBMIT VALUE="搜索!"><BR> 
</BLOCKQUOTE> 
EOM 
 
if ($java_toys eq 'on') 
	{ 
	print "<SCRIPT LANGUAGE=\"JavaScript\">\n"; 
	print "<!-- script hiding...\n"; 
	print "document.searchform.terms.focus();\n"; 
	print "// End hiding -->\n"; 
	print "</SCRIPT>\n"; 
	} 
 
print <<EOM; 
</FORM> 
<BR> 
<a href="javascript:history.go(-1);">返回上页</a> 
<h2><a name="tips"><tt>搜索指南</tt></a></h2> 
 
<blockquote> 
  <table border="1" width="90%" height="93"> 
  <tr> 
    <td width="10%" height="16">搜索命令</td> 
    <td width="26%" height="16">作用</td> 
    <td width="19%" height="16">举例</td> 
    <td width="35%" height="16">结果</td> 
  </tr> 
  <tr> 
    <td width="10%" height="19" align="center">and  </td> 
    <td width="26%" height="19">与</td> 
    <td width="19%" height="19">赚钱 and 网络</td> 
    <td width="35%" height="19">查找所有既包含“赚钱”又包含“网络”文档。</td> 
  </tr> 
  <tr> 
    <td width="10%" height="16" align="center">or</td> 
    <td width="26%" height="16">或者</td> 
    <td width="19%" height="16">赚钱 or 网络</td> 
    <td width="35%" height="16">只要文档中包含“赚钱”、“网络”其中任一词语的文档。</td> 
  </tr> 
  <tr> 
    <td width="10%" height="19" align="center">not</td> 
    <td width="26%" height="19">不含</td> 
    <td width="19%" height="19">赚钱 or 网络</td> 
    <td width="35%" height="19">查找所有既包含“赚钱”,但不包含“网络”文档。</td> 
  </tr> 
  <tr> 
    <td width="10%" height="19" align="center">\$ </td> 
    <td width="26%" height="19">包含 某某 查找字符串</td> 
    <td width="19%" height="19">\$oh.yeah.net</td> 
    <td width="35%" height="19">查找所有包含oh.yeah.net之文档</td> 
  </tr> 
  <tr> 
    <td width="10%" height="1" align="center">*</td> 
    <td width="26%" height="1">显示所有文件目录</td> 
    <td width="19%" height="1"></td> 
    <td width="35%" height="1"></td> 
  </tr> 
</table> 
  <p> 以上的搜索方法与著名搜索引擎<a HREF="http://www.altavista.digital.com">Altavista</a>  
  采用的搜索句法相似。 必要时可参考其<a 
  HREF="http://www.altavista.digital.com/cgi-bin/query?pg=h"> 使用说明</a>。</p> 
</blockquote> 
 
<BR><H5 ALIGN=CENTER> 
<A HREF="$link_url">$link_title</A> 
<HR SIZE=1 NOSHADE WIDTH=50\%> 
<p align="center">本程序提供: <a HREF="http://oh.yeah.net">Oh Yeah Net</a>.  
</p> 
</H5></BODY></HTML> 
EOM 
} 
 
 
 
 
sub get_files 
{ 
 if ($win ne 'on' ) 
		{ 
	  
&bad_base unless (-e $basedir); 
} 
chdir($basedir); 
foreach $file (@files) 
	{ 
	     if ($win eq 'on' ) 
		{ 
	 	$ls = `dir /b /s $file` 
                                   } 
	    else { 
		$ls = `ls $file` 
	                 } 
	@ls = split(/\s+/,$ls); 
      foreach $temp_file (@ls) { 
         if (-d $file) { 
            $filename = "$file$temp_file"; 
            if (-T $filename) { 
               push(@FILES,$filename); 
            } 
         } 
         elsif (-T $temp_file) { 
            push(@FILES,$temp_file); 
         } 
      } 
   } 
} 
 
sub search 
{ 
# Convert multiple blank spaces to single spaces: 
$FORM{'terms'} =~ s/\s+/ /g; 
$FORM{'terms'} = " $FORM{'terms'} "; 
 
# Convert NOT statements to minus signs: 
$FORM{'terms'} =~ s/ not / -/ig; 
 
# Convert AND statements to plus signs: 
$FORM{'terms'} =~ s/ and / \+/ig; 
 
# Strip OR statements (OR is the default): 
$FORM{'terms'} =~ s/ or / /ig; 
 
# Strip wildcards (bad, bad things!) 
$check = 'true' unless ($FORM{'terms'} =~ /\*/); 
 
# Correct for grouped entries: 
@terms = split(/\"/,$FORM{'terms'}); 
$iterator = 0; 
$FORM{'terms'} = ""; 
$placeholder = '%%%==%%%'; 
foreach $term (@terms) 
	{ 
	# Do some binary-state switching: 
	if ($iterator == 1) 
		{$iterator--;} 
	else 
		{$iterator++;} 
 
	# The iterator is 0 during grouped states: 
	$term =~ s/ /$placeholder/g unless $iterator; 
	$FORM{'terms'} .= $term; 
	} 
# Done correcting for grouped entries - now all "term1 term2"  
# queries have the ugly placeholder holding them together instead  
# of blank spaces, so they won't get broken up when we do the  
# final splitting by spaces. 
 
@terms = split(/\s+/,$FORM{'terms'}); 
foreach $term (@terms) 
	{ 
	# Skip null entries (first and last) 
	next if ($term eq ''); 
 
	# Unmask grouped terms: 
	$term =~ s/$placeholder/ /g; 
 
	if ($term =~ /^\+/) 
		{ 
		$term =~ s/\+//o; 
		$term = '\W' . $term. '\W' unless ($term =~ /^\$/); 
		$term =~ s/^\$//; 
		push(@required,$term); 
		$required_terms_present = "you bet"; 
		} 
	elsif ($term =~ /^-/) 
		{ 
		$term =~ s/-//o; 
		$term = '\W' . $term. '\W' unless ($term =~ /^\$/); 
		$term =~ s/^\$//; 
		push(@forbidden,$term); 
		$forbidden_terms_present = "fraid so"; 
		} 
	else 
		{ 
		$term = '\W' . $term. '\W' unless ($term =~ /^\$/); 
		$term =~ s/^\$//; 
		push(@optional,$term); 
		} 
	} 
 
 
foreach $FILE (@FILES) 
	{ 
open(FILE,"$FILE"); 
@LINES = <FILE>; 
close(FILE); 
$string = join(' ',@LINES); 
$string =~ s/\n//g; 
 
 
# Extract the title, if there is one: 
if ($string =~ /<title>(.*)<\/title>/i) 
	{ 
	$titles{$FILE} = $1; 
	for ($i=1;$i<$title_x;$i++) 
		{ 
		$string .= $titles{$FILE}; 
		} 
	} 
$titles{$FILE} = $FILE unless $titles{$FILE}; 
 
# Extract the description, if there is one: 
if ($string =~ /<meta\s+name="description"\s+content="(.*)>/i) 
	{ 
	@cut = split(/\">/,$1); 
	$description{$FILE} = $cut[0]; 
	for ($i=0;$i<$description_x;$i++) 
		{ 
		$string .= $description{$FILE}; 
		} 
	} 
else 
	{ 
	$string2 = $string; 
	$string2 =~ s/<title>(.*)<\/title>//ig; 
	$string2 =~ s/<([^>]|\n)*>//g; 
	@words = split(/\s+/,$string2); 
	for ($i=0;$i<25;$i++) 
		{$description{$FILE} .= "$words[$i] ";} 
	$description{$FILE} .= "..."; 
	} 
 
 
# Extract the keywords, if they exist: 
if ($string =~ /<meta\s+name="keywords"\s+content="(.*)>/i) 
	{ 
	@cut = split(/\">/,$1); 
	$keywords = $cut[0]; 
	for ($i=0;$i<$keywords_x;$i++) 
		{ 
		$string .= $keywords; 
		} 
	} 
 
 
# Weight the filename as needed: 
for ($i=0;$i<$name_x;$i++) 
	{ 
	$string .= "$baseurl$FILE"; 
	} 
 
 
# Now that we're done with the special HTML tags, strip HTML tags  
# from the file so that they aren't used in the search: 
 
$string =~ s/<([^>]|\n)*>//g; 
 
 
# Check for optional terms: 
foreach $term (@optional) 
	{ 
	$lowercase = $term; 
	$lowercase =~ tr/[A-Z]/[a-z]/; 
	$lowercase =~ tr/\\w/\\W/; 
 
	if (($term eq $lowercase) && ($check)) 
		{ 
		$include{$FILE} = 'yes' if ($string =~ /$term/i); 
		@count = split(/$term/i,$string); 
		} 
	elsif ($check) 
		{ 
		$include{$FILE} = 'yes' if ($string =~ /$term/); 
		@count = split(/$term/,$string); 
		} 
	$word_count = @count; 
	$relevance{$FILE} = $relevance{$FILE} + $word_count; 
	} 
# Done checking for optional terms. 
 
 
# Check for required terms: 
if ($required_terms_present eq "you bet") 
{ 
foreach $term (@required) 
	{ 
	$lowercase = $term; 
	$lowercase =~ tr/[A-Z]/[a-z]/; 
	$lowercase =~ tr/\\w/\\W/; 
	if (($term eq $lowercase) && ($check)) 
		{ 
		if (($string =~ /$term/i) && ($include{$FILE} ne 'no')) 
			{ 
			$include{$FILE} = 'yes'; 
			@count = split(/$term/i,$string); 
			} 
		else 
			{ 
			$include{$FILE} = 'no'; 
			last; 
			} 
		} 
	elsif ($check) 
		{ 
		if (($string =~ /$term/) && ($include{$FILE} ne 'no')) 
			{ 
			$include{$FILE} = 'yes'; 
			@count = split(/$term/,$string); 
			} 
		else 
			{ 
			$include{$FILE} = 'no'; 
			last; 
			} 
		} 
	$word_count = @count; 
	$relevance{$FILE} = $relevance{$FILE} + $word_count; 
	} 
} 
# Done checking for required terms. 
 
 
# Check for forbidden terms: 
if (($forbidden_terms_present eq "fraid so") && ($check)) 
{ 
foreach $term (@forbidden) 
	{ 
	$lowercase = $term; 
	$lowercase =~ tr/[A-Z]/[a-z]/; 
	$lowercase =~ tr/\\w/\\W/; 
	if (($term eq $lowercase) && ($string =~ /$term/i)) 
		{ 
		$include{$FILE} = 'no'; 
		last; 
		} 
	elsif ($string =~ /$term/) 
		{ 
		$include{$FILE} = 'no'; 
		last; 
		} 
	} 
} 
# Done checking for forbidden terms. 
 
 
# Allow for wildcard-triggered listing: 
$include{$FILE} = 'yes' unless ($check); 
 
 
# Format for relevance: 
if ($include{$FILE} eq 'yes') 
	{ 
	$relevance = sprintf("%.3f",($relevance{$FILE}/1000)); 
	$HITS{"$relevance$FILE"} = "$FILE"; 
	$hitcount++; 
	} 
 
 
} # End loop through all files. 
} # End search procedure. 
 
 
       
sub return_html 
{ 
# First we build a summary file for the webmaster and the visitor: 
$docstring = "找到<font color=ff0080 size=4><strong><big>$hitcount</strong></big></font>个文件。"; 
$docstring = "找到了一个文件。" if ($hitcount == 1); 
$docstring = "找不到一个文件。" unless ($hitcount); 
 
# $summary = "<H2><TT>搜索结果: 找到$docstring </TT></H2>\n"; 
# $summary .= "<BLOCKQUOTE>\n<PRE>\n"; 
# $summary .= "     搜索单词:  " if (@optional); 
$summary ="<TABLE BORDER=0 WIDTH=500 CELLPADDING=10 CELLSPACING=10><TR><TD BGCOLOR=990033 ALIGN=LEFT>"; 
$summary .="<FONT SIZE=5 COLOR=FFFFFF><b>";  
$summary .="  搜索结果</font></b></td></tr></table><h3>$docstring"; 
$summary .="搜索单词:" if (@optional); 
 
$i = 0; 
foreach $term (@optional) 
	{ 
	$term = "<I>$term</I>" unless ($term =~ /^\\W/); 
	$term =~ s/\\W//g; 
	$summary .= "$term</h3></p>"; 
	$i++; 
	$summary .= ", " unless ($i == @optional); 
	} 
$summary .= "\n     Required Terms:  " if (@required); 
$i = 0; 
foreach $term (@required) 
	{ 
	$term = "<I>$term</I>" unless ($term =~ /^\\W/); 
	$term =~ s/\\W//g; 
	$summary .= "$term"; 
	$i++; 
	$summary .= ", " unless ($i == @required); 
	} 
$summary .= "\n    排除的单词:  " if (@forbidden); 
$i = 0; 
foreach $term (@forbidden) 
	{ 
	$term = "<I>$term</I>" unless ($term =~ /^\\W/); 
	$term =~ s/\\W//g; 
	$summary .= "$term"; 
	$i++; 
	$summary .= ", " unless ($i == @forbidden); 
	} 
$summary .= "\n<a href=javascript:history.go(-1);>返回上页</a></PRE></BLOCKQUOTE>\n"; 
{    @months = ('一月','二月','三月','四月','五月','六月', 
                '七月','八月','九月','十月','十一月','十二月'); 
     @mdays= ('一日','二日','三日','四日','五日','六日','七日','八日','九日','十日', 
            '十一日','十二日','十三日','十四日','十五日','十六日','十七日','十八日', 
            '十九日','二十日','二十一日','二十二日','二十三日','二十四日','二十五日', 
            '二十六日','二十七日','二十八日','二十九日','三十日','三十一日'); 
 
      @days = ('星期日','星期一','星期二','星期三','星期四', 
	       '星期五','星期六'); 
      @py = ('O','一','二','三','四','五','六','七','八','九'); 
 
      ($sec,$min,$hour,$mday,$mon,$year,$wday) = (localtime(time))[0,1,2,3,4,5,6]; 
      if ($sec < 10) { $sec = "0$sec"; } 
      if ($min < 10) { $min = "0$min"; } 
      if ($hour < 10) { $hour = "0$hour"; } 
      if ($mday < 10) { $mday = "0$mday"; } 
      {$y1=substr($year,0,1)} 
      {$y2=substr($year,1,2)}  
      $date = "一九$py[$y1]$py[$y2]年,$months[$mon]$mdays[$mday-1], $days[$wday], $hour:$min:$sec"; 
  } 
open(SUMMARY,">>$summary_file"); 
print SUMMARY $summary; 
print SUMMARY "系 $ENV{'REMOTE_HOST'} 于$date 搜索。<BR>\n"; 
close(SUMMARY); 
 
# Now that the webmaster knows what's going on, we print the  
# results for the visitor: 
 
 
print "Content-type: text/html\n\n"; 
print <<EOM; 
<HTML> 
<HEAD><TITLE>搜索结果</TITLE></HEAD> 
<BODY background="$bg" bgcolor="#FFFFCC" Text="#000099" link="#336600" vlink="#669966" alink="#FF9933"> 
$summary 
<DL> 
EOM 
 
if ($hitcount > 0) 
{ 
foreach $key (reverse sort keys %HITS) 
	{ 
	$file = $HITS{$key}; 
	$size = -s "$basedir$file"; 
	if ($size > 1500) 
		{$size = int($size/1000) . " K";} 
	else 
		{$size = "$size bytes";} 
	$last = &Last_Modified("$basedir$file"); 
                 #如果是在Windows 95/nt上请将注释行去掉。 
                 if ($win eq 'on') { 
	             $file=substr($file,$docrootlen);  
	             $file =~ s/\\/\//g 
	             } 
     	#结束 
 
	print "<P><DT><a href=\"$baseurl$file\"><STRONG>$titles{$file}</STRONG></a></DT>\n"; 
 	print "<DD>$description{$file}<BR>\n"; 
           print "<CITE><A HREF=\"$baseurl$file\">$baseurl$file</A><FONT SIZE=-1>"; 
    	print " - $size - $last</FONT></CITE></DD>\n"; 
    } 
} 
else 
{ 
print <<EOM; 
<BLOCKQUOTE><B>不好意思,我们找不到符合您的要求的信息。您可以参照一下  
<A HREF="$cgi_url?tips">搜索技巧</A> 来重新定义您的查询。</B></BLOCKQUOTE> 
EOM 
} 
print <<EOM; 
</DL> 
<CENTER> 
<BR><BR><FORM METHOD=POST ACTION="$cgi_url"> 
<INPUT TYPE=TEXT NAME="terms" SIZE=40> 
<INPUT TYPE=SUBMIT VALUE="重新搜索"></FORM> 
</CENTER> 
<center><script> 
document.write("<a href=http://best.nease.net/cgi-bin/view/viewbasic.cgi?elvis target=_blank><img src=http://best.nease.net/cgi-bin/log.cgi?user=elvis&refer="+escape(document.referrer)+"&cur="+escape(document.URL)+" border=0 alt=网易中文网页排行榜></a>"); 
</script></center> 
<BR><H5 ALIGN=CENTER> 
<A HREF="$cgi_url?tips">搜索技巧</A> -  
<A HREF="$link_url">$link_title</A> 
<HR SIZE=1 NOSHADE WIDTH=50\%> 
<p align="center">本程序提供: <a HREF="http://oh.yeah.net">Oh Yeah Net</a>.  
</H5></BODY></HTML> 
EOM 
} 
 
 
sub Last_Modified 
# This wonderful snippet was written by Jeff Carnahan of Terminal  
# Productions (www.terminalp.com) 
{ 
$filename = shift; 
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime((stat($filename))[9]); 
@months = ('一月','二月','三月','四月','五月','六月', 
                '七月','八月','九月','十月','十一月','十二月'); 
return "$mday $months[$mon] $year"; 
} 
 
 
sub bad_base 
{ 
print "Content-type: text/html\n\n"; 
print "无法找到指定的搜索目录::\n"; 
print "<BLOCKQUOTE><PRE>$basedir</PRE></BLOCKQUOTE>\n"; 
print "请向管理员报告.\n"; 
exit; 
}