www.pudn.com > lianxi_2_textquery.rar > textQuery.h
#pragma once #include #include #include #include #include #include #include #include #include #include //this system will be used to query any word exists in a text //first, we will use a vector to store the whole text //and then, we will use another vector to store the words in the text //next, we will use a vector to store the array information of each word //and we will bind them together to form a pair //we store the pair into a map //some definition using namespace std; typedef pair location; typedef vector loc; typedef vector text; typedef pair text_loc; //class definition class textQuery{ public: textQuery(){memset(this, 0, sizeof(textQuery));} static void FilterElements(string felems){filt_elems = felems;} void QueryText(); void DisplayMapText(); void DisplayTextLocations(); void DoRun() { RetriveText(); SeparateText(); FilteText(); SuffixText(); StripCaps(); BuildWordMap(); } protected: void RetriveText(); void SeparateText(); void FilteText(); void SuffixText(); void StripCaps(); void BuildWordMap(); private: text *linesOfText; text_loc *textLocations; map *wordMap; static string filt_elems; }; string textQuery::filt_elems("\",.;:!<<)(\\/"); //read the text void textQuery::RetriveText() { string fileName; char fileName_[100]; cout << "Please input the name of the text:"; cin >> fileName_; ifstream inputFile(fileName_); if (!inputFile) { cerr << "Oops! Unable to open file" << fileName_ << endl; cerr << "Quit!"; exit( - 1); } else cout << endl; linesOfText = new text; string sText; while (getline(inputFile, sText)) { linesOfText->push_back(sText); } } //separate the words void textQuery::SeparateText() { text *separatedWords = new text; loc *wordsLocatioin = new loc; for (int textPos = 0; textPos < linesOfText->size(); textPos++) { short wordPos = 0; string textLine = (*linesOfText)[textPos]; string::size_type eol = textLine.length(); string::size_type pos = 0, pre_pos = 0; while ((pos = textLine.find(' ', pos)) != string::npos) { short wordPos = 0; separatedWords->push_back( textLine.substr(pre_pos, pos - pre_pos)); wordsLocatioin->push_back( make_pair(textPos, wordPos)); wordPos++; pos++; pre_pos = pos; } separatedWords->push_back( textLine.substr(pre_pos, pos - pre_pos)); wordsLocatioin->push_back( make_pair(textPos, wordPos)); } textLocations = new text_loc(separatedWords, wordsLocatioin); } //erase those unused symbols like ' or " void textQuery::FilteText() { if (filt_elems.empty()) { return; } text *words = textLocations->first; text::iterator iter = words->begin(); text::iterator iter_end = words->end(); while (iter != iter_end) { string::size_type pos = 0; if ((pos = (*iter).find_first_of(filt_elems, pos)) != string::npos) { (*iter).erase(pos); } iter++; } } void textQuery::SuffixText() { text *words = textLocations->first; text::iterator iter = words->begin(); text::iterator iter_end = words->end(); while (iter != iter_end) { if ((*iter).size() < 3) { iter++; continue; } //string::size_type pos = 0; if ((*iter)[(*iter).size() - 1] == 's') { //suffix_s(*iter); } iter++; } } void textQuery::StripCaps() { text *words = textLocations->first; text::iterator iter = words->begin(); text::iterator iter_end = words->end(); string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); while (iter != iter_end) { string::size_type pos = 0; while ((pos = (*iter).find_first_of(caps, pos)) != string::npos) { (*iter)[pos] = tolower((*iter)[pos]); } iter++; } } void textQuery::BuildWordMap() { wordMap = new map; set diffWords; char excludeFileName[] = "exclude.txt"; ifstream exCludeFile(excludeFileName, ios::in); if (!exCludeFile) { static string exWords[] = { "the","and","but","that","then","are","been", "can","can't","cannot","could","did","for", "had","have","him","his","her","its","into", "were","which","when","with","would" }; cerr << "Exclusion file not found, use default!" << endl; copy(exWords, exWords + 25, inserter(diffWords, diffWords.begin())); } else { //istream_iterator> inputSet(exCludeFile); //copy(inputSet, inputSet::) exit(0); } text * words = textLocations->first; loc * locations = textLocations->second; register int elemCnt = words->size(); for (int ix = 0; ix < elemCnt; ix++) { string sWord((*words)[ix]); if (sWord.size() < 3 || diffWords.count(sWord)) { continue; } if (!wordMap->count(sWord)) { loc *ploc = new loc; ploc->push_back((*locations)[ix]); wordMap->insert(map::value_type(sWord, ploc)); } else (*wordMap)[sWord]->push_back((*locations)[ix]); } } void textQuery::QueryText() { string qWord; do { cout << "Please input the word you want to query, or you input a single letter to quit"<> qWord; cout << endl; if (qWord.size() < 2) { break; } string::size_type pos = 0; string caps("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); while ((pos = qWord.find_first_of(caps, pos)) != string::npos) { qWord[pos] = tolower(qWord[pos]); } if (!wordMap->count(qWord)) { cout << "Sorry, "< occurrence_lines; loc::iterator iter = ploc->begin(); loc::iterator iter_end = ploc->end(); while (iter != iter_end) { occurrence_lines.insert(occurrence_lines.end(), (*iter).first); iter++; } int size = occurrence_lines.size(); cout << endl; cout << qWord << " occurs" << size << (size == 1 ? "time" : "times"); cout << endl; cout << endl; set::iterator it = occurrence_lines.begin(); for (; it != occurrence_lines.end(); it++) { int line = *it; cout << "\t( line" << line + 1 << " ) " << (*linesOfText)[line] << endl; } cout << endl; } while(!qWord.empty()); }