www.pudn.com > cppcc.rar > basic_dfa_generator.cc
/* * File: basic_dfa_generator.cc * $Id: basic_dfa_generator.cc,v 1.7 2002/05/27 02:58:12 alec Exp $ * * Author: Alec Panoviciu (alecu@email.com) * * Comments: * * Revision history: * * $Log: basic_dfa_generator.cc,v $ * Revision 1.7 2002/05/27 02:58:12 alec * doc update * * Revision 1.6 2002/05/22 01:31:42 alec * cleaned up firstpo/followpos/nullable computation * * Revision 1.5 2002/05/07 10:02:18 alec * fixed some bugs & mem leaks; added MORE tokens support * * Revision 1.4 2002/05/04 17:39:22 alec * the scanner works (slightly tested) * * Revision 1.3 2002/05/01 16:32:14 alec * dfa ok. huh ! * * Revision 1.2 2002/05/01 09:18:26 alec * - vBitset fixes * - FOLLOWPOS written (not tested yet) * * Revision 1.1 2002/04/30 16:26:38 alec * my big endian will eat your little endian * */ /* Copyright (C) 2002 Alexandru Panoviciu (alecu@email.com) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include#include #include "prop_registry.hh" #include "scanner_spec.hh" #include "basic_dfa_generator.hh" #include "basic_dfa_spec.hh" #include "dfa_source_re.hh" #include "re_node.hh" #include "re_node_algo.hh" #include "itoken_spec.hh" BasicDfaGenerator::BasicDfaGenerator (ITokenSpec &tokens_, PropRegistry ®istry_) : tokens(tokens_), registry(registry_) {} /** * Implements the regexp leaf nodes numbering algorithm. */ class LeafNumberGen : public ReNodeAlgo { public: LeafNumberGen () : counter(0) {} /** * Assigns each leaf node with unique consecutive ids. */ virtual bool operator () (ReNode &n) { ASSERT(&n != NULL, "Regexp tree corrupted !"); if (typeid(n) == typeid(ReCharNode)) { ReCharNode &nn = dynamic_cast (n); nn.pos = counter++; } else if (typeid(n) == typeid(ReEotNode)) { ReEotNode &nn = dynamic_cast (n); nn.pos = counter++; } else if (typeid(n) == typeid(ReLambdaNode)) { ReLambdaNode &nn = dynamic_cast (n); nn.pos = counter++; } return true; } unsigned int counter; }; /** * Implements the FOLLOWPOS table generation algorithm. */ class FollowPosGen : public ReNodeAlgo { public: FollowPosGen (vBitset *table_) : table(table_) {} virtual bool operator () (ReNode &n) { if (typeid(n) == typeid(ReCatNode)) { ReCatNode &nn = dynamic_cast (n); vBitset &preLast = nn.pre->lastPos; vBitset &postFirst = nn.post->firstPos; vBitset::reference ri = preLast.begin(); for (unsigned int i = 0; ri.neq(preLast.end()); ri.next(), i++) if (ri.get()) table[i] |= postFirst; } else if (typeid(n) == typeid(ReStarNode)) { ReStarNode &nn = dynamic_cast (n); vBitset &nnLast = nn.lastPos; vBitset &nnFirst = nn.firstPos; vBitset::reference ri = nnLast.begin(); for (unsigned int i = 0; ri.neq(nnLast.end()); ri.next(), i++) if (ri.get()) table[i] |= nnFirst; } return true; } vBitset *table; }; /** * Stores a pointers to each leaf node in a regexp tree into a given array of * pointers. The position in the array where the opinters are sored is given * by the leaf nodes' position fields. */ class LeafFinder : public ReNodeAlgo { public: LeafFinder (DfaSourceRe **leaves_) : leaves(leaves_) {} virtual bool operator () (ReNode &n) { ASSERT(&n != NULL, "Regexp tree corrupted !"); if (typeid(n) == typeid(ReCharNode)) { ReCharNode &nn = dynamic_cast (n); leaves[nn.pos] = &nn; } else if (typeid(n) == typeid(ReEotNode)) { ReEotNode &nn = dynamic_cast (n); leaves[nn.pos] = &nn; } else if (typeid(n) == typeid(ReLambdaNode)) { ReLambdaNode &nn = dynamic_cast (n); leaves[nn.pos] = &nn; } return true; } DfaSourceRe **leaves; }; /** * Computes the firstpos and lastpos sets for each node. The bitvectors that * represents the sets will have a 1 at position i if the leaf node with * position i belongs to the set. */ class AttributeGen : public ReNodeAlgo { public: virtual bool operator () (ReNode &n) { if (typeid(n) == typeid(ReCatNode)) { ReCatNode &nn = dynamic_cast (n); nn.nullable = nn.pre->nullable && nn.post->nullable; nn.firstPos = nn.pre->firstPos; if (nn.pre->nullable) nn.firstPos |= nn.post->firstPos; nn.lastPos = nn.post->lastPos; if (nn.post->nullable) nn.lastPos |= nn.pre->lastPos; } else if (typeid(n) == typeid(ReOrNode)) { ReOrNode &nn = dynamic_cast (n); nn.nullable = nn.pre->nullable || nn.post->nullable; nn.firstPos = nn.pre->firstPos; nn.firstPos |= nn.post->firstPos; nn.lastPos = nn.post->lastPos; nn.lastPos |= nn.pre->lastPos; } else if (typeid(n) == typeid(ReStarNode)) { ReStarNode &nn = dynamic_cast (n); nn.nullable = true; nn.firstPos = nn.in->firstPos; nn.lastPos = nn.in->lastPos; } else if (typeid(n) == typeid(ReCharNode)) { ReCharNode &nn = dynamic_cast (n); nn.nullable = false; nn.firstPos.resize(nn.pos + 1); nn.firstPos[nn.pos].set(); nn.lastPos.resize(nn.pos + 1); nn.lastPos[nn.pos].set(); } else if (typeid(n) == typeid(ReEotNode)) { ReEotNode &nn = dynamic_cast (n); nn.nullable = true; nn.firstPos.resize(nn.pos + 1); nn.firstPos[nn.pos].set(); nn.lastPos.resize(nn.pos + 1); nn.lastPos[nn.pos].set(); } else if (typeid(n) == typeid(ReLambdaNode)) { ReLambdaNode &nn = dynamic_cast (n); nn.nullable = true; } else ASSERT(0, "Bat typeid in AttributeGen::operator ()"); return true; } }; BasicDfaSpec& BasicDfaGenerator::createBasicDfa(const LexicalStateSpec &ls) { BasicDfaSpec &res = *new BasicDfaSpec(ls.name); DfaSourceRe *regexp = dynamic_cast (ls.regexp); // 1. do the numbering LeafNumberGen nr; regexp->dfTraverse(nr); // This vector tells us where leave nodes are. At index i, we have a a pointer // to leaf node with pos == i DfaSourceRe **leaves = new (DfaSourceRe*)[nr.counter]; LeafFinder lf(leaves); regexp->dfTraverse(lf); // 2. compute attributes AttributeGen aga; regexp->dfTraverse(aga); #ifdef DEBUG if (((string)registry["debug"]).find('f') != string::npos) { cerr << "-------------------------------------------------------------" << endl << "STATE NODES DUMP FOR STATE " << ls.name << endl; regexp->dump(cerr); } #endif // 3. compute the followpos vBitset *followPos = new vBitset[nr.counter]; FollowPosGen fpg(followPos); regexp->dfTraverse(fpg); #ifdef DEBUG if (((string)registry["debug"]).find('f') != string::npos) { cerr << "-------------------------------------------------------------" << endl << "FOLLOWPOS table:" << endl; for (unsigned int i = 0; i < nr.counter; i++) { cerr << i << ": "; vBitset::reference ri = followPos[i].begin(); for (unsigned int j = 0; ri.neq(followPos[i].end()); ri.next(), j++) if (ri.get()) cerr << j << " "; cerr << endl; } } #endif // the bitvector associated to each state vector statesDesc; // states for which transitions have not been computed yet. // the number stored is the index into statesDesc where the state is described queue q; // create the initial state statesDesc.push_back(new vBitset(regexp->firstPos)); res.states.push_back(BasicDfaSpec::State()); q.push(0); while (!q.empty()) { int Dnum = q.front(); q.pop(); vBitset &Dset = *statesDesc[Dnum]; for (unsigned int c = 0; c < 256; c++) { vBitset *Uset = new vBitset(); vBitset::reference pi = Dset.begin(); for (unsigned int p = 0; pi.neq(Dset.end()); p++, pi.next()) if (pi.get() && (typeid(*leaves[p]) == typeid(ReCharNode))) { ReCharNode &n = dynamic_cast (*leaves[p]); if (n.match == c) // node n at position p matches c, then add // followPos(n) into Udesc *Uset |= followPos[p]; } if (Uset->all0()) { delete Uset; continue; //if U is empty, continue with next c } int Unum = 0; for (; Unum < statesDesc.size(); Unum++) if (*statesDesc[Unum] == *Uset) break; if (Unum == statesDesc.size()) { // U is a new state statesDesc.push_back(Uset); res.states.push_back(BasicDfaSpec::State()); q.push(Unum); } else delete Uset; res.states[Dnum].addTransition(c, Unum); } } // while !w.empty() // determine and mark as such the accepting states // a state is accepting state if at least one of the position that describes // it is that of an EOT leaf node for (int Snum = 0; Snum < statesDesc.size(); Snum++) { vBitset::reference pi = statesDesc[Snum]->begin(); for (unsigned int i = 0; pi.neq(statesDesc[Snum]->end()); i++, pi.next()) if (pi.get() && (typeid(*leaves[i]) == typeid(ReEotNode))) { ReEotNode &n = dynamic_cast (*leaves[i]); // this warning is damn annoying ! if (res.states[Snum].isFinal) { /* cerr << formatWarning(leaves[i]->getPos(), string("Regexp for token ") + tokens[res.states[Snum].tokId].name() + " hides regexp for token " + tokens[n.tokId].name() + " which might not get matched while in lexical state " + ls.name) << endl;*/ continue; } res.states[Snum].isFinal = true; res.states[Snum].tokId = n.tokId; } } #ifdef DEBUG if (((string)registry["debug"]).find('f') != string::npos) { cerr << "----------------------------------------------------" << endl << "DFA dump:" << endl; res.dump(cerr); } #endif for (int i = 0; i < statesDesc.size(); i++) delete statesDesc[i]; delete[] followPos; delete[] leaves; return res; }