www.pudn.com > 33_online_store.rar > HTMLIndex.java
package index;
import index.*;
import java.util.*;
import java.io.*;
import DebugLog;
/**
* HTMLIndex represents an index for a directory of HTML files.
* Once loaded the index is thread safe, but the loading and
* creation process should only happen from a single thread.
*/
public class HTMLIndex
{
protected Vector files;
protected Hashtable theIndex;
protected File indexFile;
protected File direc;
protected boolean indexLoaded;
protected String useRel;
public static final String INDEX_FILE_NAME=".htmlindex";
/**
* Looks for an index file and creates one if necessary.
*/
public HTMLIndex(File dir)
{
theIndex = new Hashtable();
files = new Vector();
if((dir != null)&&dir.isDirectory()) direc = dir;
if(direc != null) indexFile = new File(dir,INDEX_FILE_NAME);
useRel = null;
}
/**
* Should files be based on the
* provided path or the directories absolute path.
* Set this value BEFORE loading the index.
*/
public void setRelativePath(String rel)
{
useRel = rel;
}
/**
* Return true if the index is loaded.
*/
public synchronized boolean isIndexLoaded()
{
return indexLoaded;
}
public boolean isIndexBuilt()
{
if((indexFile != null)&&(indexFile.exists())) return true;
else return false;
}
public boolean indexNeedsRebuilding()
{
return IndexBuilder.indexNeedsRebuilding(direc);
}
public void buildIndex()
{
if((direc != null)&&(!isIndexBuilt() || indexNeedsRebuilding()))
{
IndexBuilder.buildIndex(direc);
}
}
public void loadIndex()
{
loadIndex(true);
}
public void loadIndex(boolean build)
{
if(!isIndexLoaded())
{
if(build) buildIndex();
loadIndexFile();
}
}
public Enumeration getFiles()
{
return files.elements();
}
public Enumeration getWords()
{
return theIndex.keys();
}
public File getDirectory()
{
return direc;
}
public Vector allFiles()
{
Vector results = new Vector();
int i,max;
max = files.size();
for(i=0;i0))
{
files.addElement(dirPath+curLine);
}
//Read the words
while(((curLine = lineIn.readLine()) != null)
&&(curLine.length()>0))
{
index = curLine.indexOf("|");
if(index > 0)
{
word = curLine.substring(0,index);
theIndex.put(word,curLine);
}
}
lineIn.close();
fileIn.close();
synchronized(this)
{
indexLoaded = true;
}
}
catch(Exception exp)
{
indexLoaded = false;
}
}
protected synchronized Vector cacheFilesForWord(String word)
{
Vector dataForWord;
String realWord = word.toLowerCase();
int ind;
String curEntry,curLine;
IndexEntry newEntry;
StringTokenizer cursor;
int curFile,curOc;
Object data;
data = theIndex.get(realWord);
//Double check that we need to cache this word
if(data instanceof String)
{
curLine = (String) data;
dataForWord = new Vector();
}
else
{
return (Vector) data;
}
try
{
cursor= new StringTokenizer(curLine,"|");
//Skip the word
cursor.nextToken();
while(cursor.hasMoreTokens())
{
curEntry = cursor.nextToken();
newEntry = new IndexEntry();
ind = curEntry.indexOf(" ");
curFile = Integer.parseInt(curEntry.substring(0,ind));
curOc = Integer.parseInt(curEntry.substring(ind+1));
newEntry.file = curFile;
newEntry.occurences = curOc;
if(!dataForWord.contains(newEntry))
dataForWord.addElement(newEntry);
}
}
catch(Exception exp)
{
dataForWord.removeAllElements();
}
theIndex.put(realWord,dataForWord);
return dataForWord;
}
public Vector filesForWord(String word)
{
Vector dataForWord;
Object data;
Vector results = new Vector();
IndexEntry entry;
if(word == null) return results;
if(!isIndexLoaded()) return results;
data = theIndex.get(word.toLowerCase());
if(data instanceof String)
{
dataForWord = cacheFilesForWord(word);
}
else
{
dataForWord = (Vector) data;
}
if(dataForWord != null)
{
int i,max;
max = dataForWord.size();
for(i=0;i or
if(and)
{
if(not)
{
addAndNotWordToResults(results,curWord);
}
else
{
addAndWordToResults(results,curWord);
}
}
else //either or or space
{
if(not)
{
addOrNotWordToResults(results,curWord);
}
else
{
addOrWordToResults(results,curWord);
}
}
}
//reset booleans
not = false;
and = false;
or = false;
}
}
return results;
}
protected void addAndWordToResults(Vector curResults,String word)
{
Vector wordFiles = filesForWord(word);
int i,max;
Object tmp;
max = curResults.size();
for(i=max-1;i>=0;i--)
{
tmp = curResults.elementAt(i);
if(!wordFiles.contains(tmp))
{
curResults.removeElementAt(i);
}
}
}
protected void addOrWordToResults(Vector curResults,String word)
{
Vector wordFiles = filesForWord(word);
int i,max;
Object tmp;
max = wordFiles.size();
for(i=0;i=0;i--)
{
tmp = curResults.elementAt(i);
if(!wordFiles.contains(tmp))
{
curResults.removeElementAt(i);
}
}
}
protected void addOrNotWordToResults(Vector curResults,String word)
{
Vector wordFiles = filesNotForWord(word);
int i,max;
Object tmp;
max = wordFiles.size();
for(i=0;i