www.pudn.com > cppcc.rar > dfa_source_re.hh


/*
 *  File:       dfa_source_re.hh
 *              $Id: dfa_source_re.hh,v 1.10 2002/06/05 21:32:14 alec Exp $
 *
 *  Author:     Alec Panoviciu (alecu@email.com)
 *
 *  Comments:
 *
 *  Revision history:
 *
 *  $Log: dfa_source_re.hh,v $
 *  Revision 1.10  2002/06/05 21:32:14  alec
 *  g++ happy
 *
 *  Revision 1.9  2002/05/27 02:59:39  alec
 *  doc update
 *
 *  Revision 1.8  2002/05/22 01:32:05  alec
 *  cleaned up firstpo/followpos/nullable computation
 *
 *  Revision 1.7  2002/05/16 21:30:44  alec
 *  firstPos/lastPos caching
 *
 *  Revision 1.6  2002/05/01 16:32:14  alec
 *  dfa ok. huh !
 *
 *  Revision 1.5  2002/05/01 09:18:26  alec
 *  - vBitset fixes
 *  - FOLLOWPOS written (not tested yet)
 *
 *  Revision 1.4  2002/04/30 16:24:38  alec
 *  tested the scanner ptree & vBitVector implementation -> ok
 *
 *  Revision 1.3  2002/04/30 09:22:32  alec
 *  bitset fixes
 *
 *  Revision 1.2  2002/04/29 17:55:41  alec
 *  regexps almost done
 *
 *  Revision 1.1  2002/04/29 09:40:01  alec
 *  *** empty log message ***
 *
 */


/* 
  Copyright (C) 2002 Alexandru Panoviciu (alecu@email.com)

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */

#ifndef __DFA_SOURCE_RE_HH__
#define __DFA_SOURCE_RE_HH__

#include "vbitset.hh"
#include "re_node.hh"

/**
 * The base class of the regexp parse tree nodes used for generating the
 * scanner's sources.
 */
class DfaSourceRe : public ReNode
{
public:

  DfaSourceRe (const Position &pos_) :
    ReNode(pos_)
  {}

  ~DfaSourceRe ()
  {}

#ifdef DEBUG
  static int indent_level;

  static void indent()
  {
    indent_level++;
  }

  static void unindent ()
  {
    indent_level--;
  }

  static ostream& format (ostream &os)
  {
    for (int i = 0; i < indent_level; i++)
      os << " ";
    return os;
  }

  virtual void dump (ostream &os) const;

#endif
  
public:

  /**
   * True if the regexp represented by this node can match the empty string.
   */
  bool nullable;

  /**
   * The FIRSTPOS set of this node.
   */
  vBitset firstPos;

  /**
   * The LASTPOS set of this node,
   */
  vBitset lastPos;
};


/**
 * ReCatNode represents a regular expression that matches the successive
 * occurences of two subexpression r1 and r2, in this order. The trees for r1
 * and r2 are rooted into its pre and post fields.
 */
class ReCatNode : public DfaSourceRe
{
public:

  ReCatNode (DfaSourceRe *pre_, DfaSourceRe *post_, const Position &pos_) :
    pre(pre_), post(post_), DfaSourceRe(pos_)
  {}

  ~ReCatNode ()
  {
    delete pre;
    delete post;
  }

  virtual ReNode* clone ()
  {
    return new ReCatNode(dynamic_cast(pre->clone()),
                         dynamic_cast(post->clone()), getPos());
  }
  
  virtual int getChildCount ()
  {
    return 2;
  }

  virtual ReNode& operator [] (int index)
  {
    ASSERT((0 <= index) && (index <= 1),
           "Bad index in ReCatNode::operator []");
    return index ? *post : *pre;
  }

#ifdef DEBUG

  virtual void dump (ostream &os) const;
  
#endif

  DfaSourceRe *pre, *post;
};


/**
 * ReOrNode represents a regular expression that matches either one of two
 * subexpressions, say r1 and r2. The trees for these two are rooted in the
 * pre and post fields.
 */
class ReOrNode : public DfaSourceRe
{
public:

  ReOrNode (DfaSourceRe *pre_, DfaSourceRe *post_, const Position &pos_) :
    DfaSourceRe(pos_),
    pre(pre_), post(post_)
  {}

  ~ReOrNode ()
  {
    delete pre;
    delete post;
  }

  virtual ReNode* clone ()
  {
    return new ReOrNode(dynamic_cast(pre->clone()),
                        dynamic_cast(post->clone()), getPos());
  }
  
  virtual int getChildCount ()
  {
    return 2;
  }

  virtual ReNode& operator [] (int index)
  {
    ASSERT((0 <= index) && (index <= 1),
           "Bad index for ReOrNode::operator []");
    return index ? *post : *pre;
  }

#ifdef DEBUG

  virtual void dump (ostream &os) const;
  
#endif

  DfaSourceRe *pre, *post;
};


/**
 * ReStarNode represents a regular expression that matches zero or more
 * occurences of another regular expression r. The tree for r is rooted into
 * the in field.
 */
class ReStarNode : public DfaSourceRe
{
public:

  ReStarNode (DfaSourceRe *in_, const Position &pos_) :
    DfaSourceRe(pos_),
    in(in_)
  {}

  ~ReStarNode ()
  {
    delete in;
  }

  virtual ReNode* clone ()
  {
    return new ReStarNode(dynamic_cast(in->clone()), getPos());
  }
  
  virtual int getChildCount ()
  {
    return 1;
  }

  virtual ReNode& operator [] (int index)
  {
    ASSERT(index == 0, "Bad index for ReStarNode::operator []");
    return *in;
  }

#ifdef DEBUG

  virtual void dump (ostream &os) const;
  
#endif

  DfaSourceRe *in;
};


/**
 * ReCharNode represents a regular expression that matches a single
 * character. The character is stored in its match field;
 */
class ReCharNode : public DfaSourceRe
{
public:

  ReCharNode (unsigned char match_, const Position &pos_) :
    DfaSourceRe(pos_),
    match(match_)
  {}

  virtual ReNode* clone ()
  {
    return new ReCharNode(match, getPos());
  }
  
  virtual int getChildCount ()
  {
    return 0;
  }

  virtual ReNode& operator [] (int index)
  {
    ASSERT(0, "ReCharNode::operator [] called !");
    return * (ReNode*) NULL;
  }

#ifdef DEBUG

  virtual void dump (ostream &os) const;
  
#endif

  unsigned char match;

  int pos;
};


/**
 * ReEotNode is a special node that will be automatically concatenated (by a
 * cat-node) to each token's regexp tree. The Eot in the class' name stands
 * for ``end of token''. Eot nodes are actually the nodes that ``identify''
 * each token in a lexical state, after all the regular expression that belong
 * to a lexical state have been merged. For this reason, all the data about
 * the token they represent (token name, token action, etc) will be stored in
 * these nodes.
 */
class ReEotNode : public DfaSourceRe
{
public:

  ReEotNode (int tokId_, const Position &pos_) :
    DfaSourceRe(pos_),
    tokId(tokId_)
  {}

  virtual ReNode* clone ()
  {
    return new ReEotNode(tokId, getPos());
  }
  
  virtual int getChildCount ()
  {
    return 0;
  }

  virtual ReNode& operator [] (int index)
  {
    ASSERT(0, "ReEotNode::operator [] called !");
    return * (ReNode*) NULL;
  }

#ifdef DEBUG

  virtual void dump (ostream &os) const;
  
#endif

  int tokId;

  int pos;
};


/**
 * ReLambdaNode represents a regular expression that matches the empty string;
 */
class ReLambdaNode : public DfaSourceRe
{
public:

  ReLambdaNode (const Position &pos_) :
    DfaSourceRe(pos_)
  {}

  virtual ReNode* clone ()
  {
    return new ReLambdaNode(getPos());
  }
  
  virtual int getChildCount ()
  {
    return 0;
  }

  virtual ReNode& operator [] (int index)
  {
    ASSERT(0, "ReLambdaNode::operator [] called !");
    return * (ReNode*) NULL;
  }

#ifdef DEBUG

  virtual void dump (ostream &os) const;
  
#endif

  int pos;
};


#endif /* #ifndef __DFA_SOURCE_RE_HH__ */