|
Kaye and Geoff's web page documentation
Perl script to return a web page with links to web pages containing a phrase
For those who want to follow the nitty-gritty of the code, the comments explain what is happening. $maxnlevels controls the number of directory levels to go to (it is also used as an error flag) - this value could need incrementing if part of the site is expanded. If someone enters a search string containing one or more single quote characters, they will be removed because the ultimate shell command must have the search string enclosed in quotes. There must be a fix for this but we cannot think of a straight-forward and universal one - it seems that back-slashing does not provide a solution.
#! /usr/bin/perl
#
# package: substitute escaped characters
#
require 'Re_sub.pm';
#
# cgi to search web pages
# simple html to use this cgi would be something like...
#
#
#
# write the HTML header code
#
print "Content-type: text/html\n\n";
print ' KGweb search results ';
print "\n";
print ' ';
print "\n\n";
#
# set up a few defaults
# base path
# extension, base URL
# maximum number of levels to search, maximum number of matches to display
#
$path = '/kgweb/www/';
$ext = '.html';
$url = 'http://www.kgweb.org.au/';
$maxnlevels = 5;
$maxnmatches = 20;
#
# get the path code and string from standard input
# it has the form name=value&name=value
# convert escaped characters back to their originals
# split input string into name=value[s] and pick out the two required fields
#
$paramstr = ;
foreach $item (split(/&/, $paramstr))
{
($name, $value) = split (/=/, $item);
$value = &resub ($value);
if ($name eq "sstring") {$sstring = $value};
}
#
# suppress quote characters which would otherwise annoy the shell
# check that the search string is substantial
# set up a grep command to find files containing the search string
# switches: i = ignore case
# l = return filename (only if match)
# F = fixed string (no special characters)
# the 2>&1 assigns standard error to standard output
#
$sstring =~ s/'//g;
unless ((length($sstring) > 0) && ($sstring =~ /\w/)) { $maxnlevels = -1; }
$cmd = "grep -liF '$sstring' $path*$ext 2>&1";
#
# do some more HTML
#
print "
\n";
print "Results of search for '$sstring': \n";
print " \n";
#
# initialise the 'number of levels to search' counter
# initialise the matched files counter
#
$nlevels = 0;
$nmatches = 0;
#
# do each level in turn
# do each matching file in turn
# grep or shell error messages start with 'grep: or bash:' (other Unix systems may be different)
# 'secret' is a directory that we do not want included in the search
#
while ($nlevels < $maxnlevels)
{
foreach $mfilename (`$cmd`)
{
next if $nmatches > $maxnmatches;
next if ($mfilename =~ /grep:|bash:/);
next if ($mfilename =~ /secret/i);
chop $mfilename;
#
# get the title from the file if there is one
#
$line = '1';
$title = 'Untitled';
unless (open (MFILE, "$mfilename")) { $line = '0'; }
while ($line)
{
$line = ;
if ($line =~ /\/i)
{
($a, $title, $b) = split (/\<.?title\>/i, $line);
$line = '0';
}
}
close (MFILE);
#
# turn the path into a URL - preserves current search depth
# output HTML for a link to the file
#
$mfilename =~ s/$path/$url/;
print ' ' . $title . ' ' . "\n";
$nmatches++;
}
#
# set the grep command to work at the next level down
# count the level
#
$cmd =~ s#\*#\*/\*#;
$nlevels++;
}
#
# finish off the html
#
print "\n";
if ($maxnlevels < 0) { print "Invalid search string\n"; }
else { print "$nmatches matches found\n"; }
print ' ';
print "\n";
#
# open the log file
# get the date and encode invalid search string
# append the information from this search to the log file
#
if (open (LOGFILE, ">>/home/kg/docs/search2.log"))
{
$d = `date`;
chop ($d);
$s = substr('maintravfoodkangdocsgame', 4);
if ($maxnlevels < 0) { $nmatches = -9; }
print LOGFILE "$d $ENV{'REMOTE_HOST'} $ENV{'HTTP_USER_AGENT'} ";
print LOGFILE " \[$s\] $sstring $nmatches\n";
close (LOGFILE);
}
exit;
|