www.pudn.com > lianxi_2_textquery.rar > textQuery.h


#pragma once 
 
#include  
#include  
#include  
#include  
#include  
#include  
#include  
 
#include  
 
#include  
#include  
 
//this system will be used to query any word exists in a text 
//first, we will use a vector to store the whole text 
//and then, we will use another vector to store the words in the text 
//next, we will use a vector to store the array information of each word 
//and we will bind them together to form a pair 
//we store the pair into a map 
 
//some definition 
using namespace std; 
typedef  pair location; 
typedef  vector loc; 
typedef  vector text; 
typedef  pair text_loc; 
 
//class definition 
class textQuery{ 
public: 
	textQuery(){memset(this, 0, sizeof(textQuery));} 
 
	static void FilterElements(string felems){filt_elems = felems;} 
	void QueryText(); 
	void DisplayMapText(); 
	void DisplayTextLocations(); 
	void DoRun() 
	{ 
		RetriveText(); 
		SeparateText(); 
		FilteText(); 
		SuffixText(); 
		StripCaps(); 
		BuildWordMap(); 
	} 
 
protected: 
	void RetriveText(); 
	void SeparateText(); 
	void FilteText(); 
	void SuffixText(); 
	void StripCaps(); 
	void BuildWordMap(); 
 
private: 
	text *linesOfText; 
	text_loc *textLocations; 
	map *wordMap; 
	static string filt_elems; 
}; 
 
string textQuery::filt_elems("\",.;:!<<)(\\/"); 
 
 
 
//read the text 
void textQuery::RetriveText() 
{ 
	string fileName; 
	char fileName_[100]; 
	cout << "Please input the name of the text:"; 
	cin >> fileName_; 
	ifstream inputFile(fileName_); 
	if (!inputFile) 
	{ 
		cerr << "Oops! Unable to open file" << fileName_ << endl; 
		cerr << "Quit!"; 
		exit( - 1); 
	} 
	else 
		cout << endl; 
 
	linesOfText = new text; 
	string sText; 
	while (getline(inputFile, sText)) 
	{ 
		linesOfText->push_back(sText); 
	} 
} 
 
 
//separate the words 
void textQuery::SeparateText() 
{ 
	text *separatedWords = new text; 
	loc *wordsLocatioin = new loc; 
 
	for (int textPos = 0; textPos < linesOfText->size(); textPos++) 
	{ 
		short wordPos = 0; 
		string textLine = (*linesOfText)[textPos]; 
		string::size_type eol = textLine.length(); 
		string::size_type pos = 0, pre_pos = 0; 
 
		while ((pos = textLine.find(' ', pos)) != string::npos) 
		{ 
			short wordPos = 0; 
			separatedWords->push_back( 
				textLine.substr(pre_pos, pos - pre_pos)); 
			wordsLocatioin->push_back( 
				make_pair(textPos, wordPos)); 
			wordPos++; 
			pos++; 
			pre_pos = pos; 
		} 
 
		separatedWords->push_back( 
			textLine.substr(pre_pos, pos - pre_pos)); 
		wordsLocatioin->push_back( 
			make_pair(textPos, wordPos)); 
	} 
 
	textLocations = new text_loc(separatedWords, wordsLocatioin); 
} 
 
//erase those unused symbols like ' or " 
void textQuery::FilteText() 
{ 
	if (filt_elems.empty()) 
	{ 
		return; 
	} 
 
 
	text *words = textLocations->first; 
	text::iterator iter = words->begin(); 
	text::iterator iter_end = words->end(); 
 
	while (iter != iter_end) 
	{ 
		string::size_type pos = 0; 
		if ((pos = (*iter).find_first_of(filt_elems, pos)) != string::npos) 
		{ 
			(*iter).erase(pos); 
		} 
 
		iter++; 
	} 
} 
 
void textQuery::SuffixText() 
{ 
	text *words = textLocations->first; 
	text::iterator iter = words->begin(); 
	text::iterator iter_end = words->end(); 
 
	while (iter != iter_end) 
	{ 
		if ((*iter).size() < 3) 
		{ 
			iter++; 
			continue; 
		} 
 
		//string::size_type pos = 0; 
		if ((*iter)[(*iter).size() - 1] == 's') 
		{ 
			//suffix_s(*iter); 
		} 
 
		iter++; 
	} 
} 
 
void textQuery::StripCaps() 
{ 
	text *words = textLocations->first; 
	text::iterator iter = words->begin(); 
	text::iterator iter_end = words->end(); 
 
	string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 
	while (iter != iter_end) 
	{ 
		string::size_type pos = 0; 
		while ((pos = (*iter).find_first_of(caps, pos)) != string::npos) 
		{ 
			(*iter)[pos] = tolower((*iter)[pos]); 
		} 
		iter++; 
	} 
} 
 
void textQuery::BuildWordMap() 
{ 
	wordMap = new map; 
 
	set diffWords; 
 
	char excludeFileName[] = "exclude.txt"; 
	ifstream exCludeFile(excludeFileName, ios::in); 
	if (!exCludeFile) 
	{ 
		static string exWords[] = { 
			"the","and","but","that","then","are","been", 
			"can","can't","cannot","could","did","for", 
			"had","have","him","his","her","its","into", 
			"were","which","when","with","would" 
		}; 
 
		cerr << "Exclusion file not found, use default!" << endl; 
		copy(exWords, exWords + 25, inserter(diffWords, diffWords.begin())); 
	} 
	else 
	{ 
		//istream_iterator> inputSet(exCludeFile); 
		//copy(inputSet, inputSet::) 
		exit(0); 
	} 
	 
	text * words = textLocations->first; 
	loc * locations = textLocations->second; 
 
	register int elemCnt = words->size(); 
	for (int ix = 0; ix < elemCnt; ix++) 
	{ 
		string sWord((*words)[ix]); 
		if (sWord.size() < 3 || diffWords.count(sWord)) 
		{ 
			continue; 
		} 
 
		if (!wordMap->count(sWord)) 
		{ 
			loc *ploc = new loc; 
			ploc->push_back((*locations)[ix]); 
			wordMap->insert(map::value_type(sWord, ploc)); 
		} 
		else 
			(*wordMap)[sWord]->push_back((*locations)[ix]); 
	} 
} 
 
void textQuery::QueryText() 
{ 
	string qWord; 
	do  
	{ 
		cout << "Please input the word you want to query, or you input a single letter to quit"<> qWord; 
		cout << endl; 
 
		if (qWord.size() < 2) 
		{ 
			break; 
		} 
		string::size_type pos = 0; 
		string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 
		while ((pos = qWord.find_first_of(caps, pos)) != string::npos) 
		{ 
			qWord[pos] = tolower(qWord[pos]); 
		} 
 
		if (!wordMap->count(qWord)) 
		{ 
			cout << "Sorry, "< occurrence_lines; 
		loc::iterator iter = ploc->begin(); 
		loc::iterator iter_end = ploc->end(); 
 
		while (iter != iter_end) 
		{ 
			occurrence_lines.insert(occurrence_lines.end(), (*iter).first); 
			iter++; 
		} 
 
		int size = occurrence_lines.size(); 
		cout << endl; 
		cout << qWord << " occurs"  
			 << size << (size == 1 ? "time" : "times"); 
		cout << endl; 
		cout << endl; 
 
		set::iterator it = occurrence_lines.begin(); 
 
		for (; it != occurrence_lines.end(); it++) 
		{ 
			int line = *it; 
			cout << "\t( line" 
				 << line + 1 << " ) " 
				 << (*linesOfText)[line] << endl; 
		} 
 
		cout << endl; 
 
	} while(!qWord.empty()); 
}