www.pudn.com > 33_online_store.rar > HTMLIndex.java



package index;

import index.*;
import java.util.*;
import java.io.*;
import DebugLog;

/**
 * HTMLIndex represents an index for a directory of HTML files.
 * Once loaded the index is thread safe, but the loading and
 * creation process should only happen from a single thread.
 */
public class HTMLIndex
{
	protected Vector files;
	protected Hashtable theIndex;
	protected File indexFile;
	protected File direc;
	protected boolean indexLoaded;
	protected String useRel;
	
	public static final String INDEX_FILE_NAME=".htmlindex";
	
    /**
	 * Looks for an index file and creates one if necessary.
	 */
	public HTMLIndex(File dir)
	{
		theIndex = new Hashtable();
		files = new Vector();
        if((dir != null)&&dir.isDirectory()) direc = dir;
		if(direc != null) indexFile = new File(dir,INDEX_FILE_NAME);
        useRel = null;
	}
	
	/**
	 * Should files be based on the 
	 * provided path or the directories absolute path.
	 * Set this value BEFORE loading the index.
	 */
	public void setRelativePath(String rel)
	{
	    useRel = rel;
	}
	
	/**
	 * Return true if the index is loaded.
	 */
	public synchronized boolean isIndexLoaded()
	{
		return indexLoaded;
	}
	
	public boolean isIndexBuilt()
	{
		if((indexFile != null)&&(indexFile.exists())) return true;
		else return false;
	}
	
	public boolean indexNeedsRebuilding()
	{
		return IndexBuilder.indexNeedsRebuilding(direc);
	}
	
	public void buildIndex()
	{
	    if((direc != null)&&(!isIndexBuilt() || indexNeedsRebuilding()))
	    {       
		    IndexBuilder.buildIndex(direc);
		}
	}
	
	public void loadIndex()
	{
	    loadIndex(true);
	}
	
	public void loadIndex(boolean build)
	{
	    if(!isIndexLoaded())
	    {
	        if(build) buildIndex();
		    loadIndexFile();
		}
	}
	
	public Enumeration getFiles()
	{
		return files.elements();
	}
	
	public Enumeration getWords()
	{
		return theIndex.keys();	
	}
	
	public File getDirectory()
	{
	    return direc;
	}
	
	public Vector allFiles()
	{
		Vector results = new Vector();
		int i,max;
		
		max = files.size();
		
		for(i=0;i0))
			{
				files.addElement(dirPath+curLine);
			}
			
			//Read the words
			
			while(((curLine = lineIn.readLine()) != null)
					&&(curLine.length()>0))
			{
				index = curLine.indexOf("|");
				
				if(index > 0)
				{
				    word = curLine.substring(0,index);
				
				    theIndex.put(word,curLine);
				}
			}
			
			lineIn.close();
			fileIn.close();
			
			synchronized(this)
			{
			    indexLoaded = true;
		    }
		}
		catch(Exception exp)
		{
			indexLoaded = false;
		}
	}
	
	protected synchronized Vector cacheFilesForWord(String word)
	{
	    Vector dataForWord;
	    String realWord = word.toLowerCase();
	    int ind;
	    String curEntry,curLine;
	    IndexEntry newEntry;
	    StringTokenizer cursor;
	    int curFile,curOc;
	    Object data;
	    
	    data = theIndex.get(realWord);
	    
	    //Double check that we need to cache this word
	    if(data instanceof String)
	    {
	        curLine = (String) data;
	        dataForWord = new Vector();
	    }
	    else
	    {
	        return (Vector) data;
	    }
	    
	    try
	    {
			cursor= new StringTokenizer(curLine,"|");
		
		    //Skip the word
			cursor.nextToken();
		
			while(cursor.hasMoreTokens())
			{
				curEntry = cursor.nextToken();
				newEntry = new IndexEntry();
				
				ind = curEntry.indexOf(" ");
				
				curFile = Integer.parseInt(curEntry.substring(0,ind));
				curOc = Integer.parseInt(curEntry.substring(ind+1));
				
				newEntry.file = curFile;
				newEntry.occurences = curOc;
				
				if(!dataForWord.contains(newEntry))
					dataForWord.addElement(newEntry);
			}
		}
		catch(Exception exp)
		{
		    dataForWord.removeAllElements();
		}
		
		theIndex.put(realWord,dataForWord);
		
		return dataForWord;
	}
	
	public Vector filesForWord(String word)
	{
		Vector dataForWord;
		Object data;
		Vector results = new Vector();		
		IndexEntry entry;
		
		if(word == null) return results;
		
		if(!isIndexLoaded()) return results;
		
		data = theIndex.get(word.toLowerCase());
		
		if(data instanceof String)
		{
		    dataForWord = cacheFilesForWord(word);
		}
		else
		{
		    dataForWord = (Vector) data;
		}
		
		if(dataForWord != null)
		{
			int i,max;
			
			max = dataForWord.size();
			
			for(i=0;i or
					
					if(and)
					{
						if(not)
						{
							addAndNotWordToResults(results,curWord);
						}
						else
						{
							addAndWordToResults(results,curWord);
						}
					}
					else //either or or space
					{
						if(not)
						{
							addOrNotWordToResults(results,curWord);
						}
						else
						{
							addOrWordToResults(results,curWord);
						}
					}
				}
				
				//reset booleans
				not = false;
				and = false;
				or = false;
			}
		}
		
		return results;
	}
	
	protected void addAndWordToResults(Vector curResults,String word)
	{
		Vector wordFiles = filesForWord(word);
		int i,max;
		Object tmp;
		
		max = curResults.size();
		
		for(i=max-1;i>=0;i--)
		{
			tmp = curResults.elementAt(i);
			if(!wordFiles.contains(tmp))
			{
				curResults.removeElementAt(i);
			}
		}
	}
	
	protected void addOrWordToResults(Vector curResults,String word)
	{
		Vector wordFiles = filesForWord(word);
		int i,max;
		Object tmp;
		
		max = wordFiles.size();
		
		for(i=0;i=0;i--)
		{
			tmp = curResults.elementAt(i);
			if(!wordFiles.contains(tmp))
			{
				curResults.removeElementAt(i);
			}
		}
	}
	
	protected void addOrNotWordToResults(Vector curResults,String word)
	{
		Vector wordFiles = filesNotForWord(word);
		int i,max;
		Object tmp;
		
		max = wordFiles.size();
		
		for(i=0;i