www.pudn.com > cppcc.rar > dfa_source_re.hh
/*
* File: dfa_source_re.hh
* $Id: dfa_source_re.hh,v 1.10 2002/06/05 21:32:14 alec Exp $
*
* Author: Alec Panoviciu (alecu@email.com)
*
* Comments:
*
* Revision history:
*
* $Log: dfa_source_re.hh,v $
* Revision 1.10 2002/06/05 21:32:14 alec
* g++ happy
*
* Revision 1.9 2002/05/27 02:59:39 alec
* doc update
*
* Revision 1.8 2002/05/22 01:32:05 alec
* cleaned up firstpo/followpos/nullable computation
*
* Revision 1.7 2002/05/16 21:30:44 alec
* firstPos/lastPos caching
*
* Revision 1.6 2002/05/01 16:32:14 alec
* dfa ok. huh !
*
* Revision 1.5 2002/05/01 09:18:26 alec
* - vBitset fixes
* - FOLLOWPOS written (not tested yet)
*
* Revision 1.4 2002/04/30 16:24:38 alec
* tested the scanner ptree & vBitVector implementation -> ok
*
* Revision 1.3 2002/04/30 09:22:32 alec
* bitset fixes
*
* Revision 1.2 2002/04/29 17:55:41 alec
* regexps almost done
*
* Revision 1.1 2002/04/29 09:40:01 alec
* *** empty log message ***
*
*/
/*
Copyright (C) 2002 Alexandru Panoviciu (alecu@email.com)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __DFA_SOURCE_RE_HH__
#define __DFA_SOURCE_RE_HH__
#include "vbitset.hh"
#include "re_node.hh"
/**
* The base class of the regexp parse tree nodes used for generating the
* scanner's sources.
*/
class DfaSourceRe : public ReNode
{
public:
DfaSourceRe (const Position &pos_) :
ReNode(pos_)
{}
~DfaSourceRe ()
{}
#ifdef DEBUG
static int indent_level;
static void indent()
{
indent_level++;
}
static void unindent ()
{
indent_level--;
}
static ostream& format (ostream &os)
{
for (int i = 0; i < indent_level; i++)
os << " ";
return os;
}
virtual void dump (ostream &os) const;
#endif
public:
/**
* True if the regexp represented by this node can match the empty string.
*/
bool nullable;
/**
* The FIRSTPOS set of this node.
*/
vBitset firstPos;
/**
* The LASTPOS set of this node,
*/
vBitset lastPos;
};
/**
* ReCatNode represents a regular expression that matches the successive
* occurences of two subexpression r1 and r2, in this order. The trees for r1
* and r2 are rooted into its pre and post fields.
*/
class ReCatNode : public DfaSourceRe
{
public:
ReCatNode (DfaSourceRe *pre_, DfaSourceRe *post_, const Position &pos_) :
pre(pre_), post(post_), DfaSourceRe(pos_)
{}
~ReCatNode ()
{
delete pre;
delete post;
}
virtual ReNode* clone ()
{
return new ReCatNode(dynamic_cast(pre->clone()),
dynamic_cast(post->clone()), getPos());
}
virtual int getChildCount ()
{
return 2;
}
virtual ReNode& operator [] (int index)
{
ASSERT((0 <= index) && (index <= 1),
"Bad index in ReCatNode::operator []");
return index ? *post : *pre;
}
#ifdef DEBUG
virtual void dump (ostream &os) const;
#endif
DfaSourceRe *pre, *post;
};
/**
* ReOrNode represents a regular expression that matches either one of two
* subexpressions, say r1 and r2. The trees for these two are rooted in the
* pre and post fields.
*/
class ReOrNode : public DfaSourceRe
{
public:
ReOrNode (DfaSourceRe *pre_, DfaSourceRe *post_, const Position &pos_) :
DfaSourceRe(pos_),
pre(pre_), post(post_)
{}
~ReOrNode ()
{
delete pre;
delete post;
}
virtual ReNode* clone ()
{
return new ReOrNode(dynamic_cast(pre->clone()),
dynamic_cast(post->clone()), getPos());
}
virtual int getChildCount ()
{
return 2;
}
virtual ReNode& operator [] (int index)
{
ASSERT((0 <= index) && (index <= 1),
"Bad index for ReOrNode::operator []");
return index ? *post : *pre;
}
#ifdef DEBUG
virtual void dump (ostream &os) const;
#endif
DfaSourceRe *pre, *post;
};
/**
* ReStarNode represents a regular expression that matches zero or more
* occurences of another regular expression r. The tree for r is rooted into
* the in field.
*/
class ReStarNode : public DfaSourceRe
{
public:
ReStarNode (DfaSourceRe *in_, const Position &pos_) :
DfaSourceRe(pos_),
in(in_)
{}
~ReStarNode ()
{
delete in;
}
virtual ReNode* clone ()
{
return new ReStarNode(dynamic_cast(in->clone()), getPos());
}
virtual int getChildCount ()
{
return 1;
}
virtual ReNode& operator [] (int index)
{
ASSERT(index == 0, "Bad index for ReStarNode::operator []");
return *in;
}
#ifdef DEBUG
virtual void dump (ostream &os) const;
#endif
DfaSourceRe *in;
};
/**
* ReCharNode represents a regular expression that matches a single
* character. The character is stored in its match field;
*/
class ReCharNode : public DfaSourceRe
{
public:
ReCharNode (unsigned char match_, const Position &pos_) :
DfaSourceRe(pos_),
match(match_)
{}
virtual ReNode* clone ()
{
return new ReCharNode(match, getPos());
}
virtual int getChildCount ()
{
return 0;
}
virtual ReNode& operator [] (int index)
{
ASSERT(0, "ReCharNode::operator [] called !");
return * (ReNode*) NULL;
}
#ifdef DEBUG
virtual void dump (ostream &os) const;
#endif
unsigned char match;
int pos;
};
/**
* ReEotNode is a special node that will be automatically concatenated (by a
* cat-node) to each token's regexp tree. The Eot in the class' name stands
* for ``end of token''. Eot nodes are actually the nodes that ``identify''
* each token in a lexical state, after all the regular expression that belong
* to a lexical state have been merged. For this reason, all the data about
* the token they represent (token name, token action, etc) will be stored in
* these nodes.
*/
class ReEotNode : public DfaSourceRe
{
public:
ReEotNode (int tokId_, const Position &pos_) :
DfaSourceRe(pos_),
tokId(tokId_)
{}
virtual ReNode* clone ()
{
return new ReEotNode(tokId, getPos());
}
virtual int getChildCount ()
{
return 0;
}
virtual ReNode& operator [] (int index)
{
ASSERT(0, "ReEotNode::operator [] called !");
return * (ReNode*) NULL;
}
#ifdef DEBUG
virtual void dump (ostream &os) const;
#endif
int tokId;
int pos;
};
/**
* ReLambdaNode represents a regular expression that matches the empty string;
*/
class ReLambdaNode : public DfaSourceRe
{
public:
ReLambdaNode (const Position &pos_) :
DfaSourceRe(pos_)
{}
virtual ReNode* clone ()
{
return new ReLambdaNode(getPos());
}
virtual int getChildCount ()
{
return 0;
}
virtual ReNode& operator [] (int index)
{
ASSERT(0, "ReLambdaNode::operator [] called !");
return * (ReNode*) NULL;
}
#ifdef DEBUG
virtual void dump (ostream &os) const;
#endif
int pos;
};
#endif /* #ifndef __DFA_SOURCE_RE_HH__ */