www.pudn.com > LexYaccProgs.zip > PASLEX.L


 
%{ 
 
(* PASLEX.L: lexical analyzer for Pascal, adapted to TP Lex, 2-28-89 AG *) 
 
%} 
 
%{ 
(* 
 * lex input file for pascal scanner 
 * 
 * extensions: to ways to spell "external" and "->" ok for "^". 
 * 
 *) 
%} 
 
%{ 
 
(* Note: Keywords are determined by scanning a keyword table, rather 
   than including the keyword patterns in the Lex source which is done 
   in the original version of this file. I prefer this method, because 
   it makes the grammar itself more readable (handling case-insensitive 
   keywords in Lex is quite cumbersome, e.g., you will have to write 
   something like [Aa][Nn][Dd] to match the keyword `and'), and also 
   produces a more (space-) efficient analyzer (184 states and 375 
   transitions for the keyword pattern version, against only 40 states 
   and 68 transitions for the keyword table version). *) 
 
procedure commenteof; 
  begin 
    writeln('unexpected EOF inside comment at line ', yylineno); 
  end(*commenteof*); 
 
function upper(str : String) : String; 
  (* converts str to uppercase *) 
  var i : integer; 
  begin 
    for i := 1 to length(str) do 
      str[i] := upCase(str[i]); 
    upper := str 
  end(*upper*); 
 
function is_keyword(id : string; var token : integer) : boolean; 
  (* checks whether id is Pascal keyword; if so, returns corresponding 
     token number in token *) 
  const 
    id_len = 20; 
  type 
    Ident = string[id_len]; 
  const 
    (* table of Pascal keywords: *) 
    no_of_keywords = 39; 
    keyword : array [1..no_of_keywords] of Ident = ( 
      'AND',       'ARRAY',     'BEGIN',    'CASE', 
      'CONST',     'DIV',       'DO',       'DOWNTO', 
      'ELSE',      'END',       'EXTERNAL', 'EXTERN', 
      'FILE',      'FOR',       'FORWARD',  'FUNCTION', 
      'GOTO',      'IF',        'IN',       'LABEL', 
      'MOD',       'NIL',       'NOT',      'OF', 
      'OR',        'OTHERWISE', 'PACKED',   'PROCEDURE', 
      'PROGRAM',   'RECORD',    'REPEAT',   'SET', 
      'THEN',      'TO',        'TYPE',     'UNTIL', 
      'VAR',       'WHILE',     'WITH'); 
    keyword_token : array [1..no_of_keywords] of integer = ( 
      _AND,        _ARRAY,      _BEGIN,     _CASE, 
      _CONST,      _DIV,        _DO,        _DOWNTO, 
      _ELSE,       _END,        _EXTERNAL,  _EXTERNAL, 
                                (* EXTERNAL: 2 spellings (see above)! *) 
      _FILE,       _FOR,        _FORWARD,   _FUNCTION, 
      _GOTO,       _IF,         _IN,        _LABEL, 
      _MOD,        _NIL,        _NOT,       _OF, 
      _OR,         _OTHERWISE,  _PACKED,    _PROCEDURE, 
      _PROGRAM,    _RECORD,     _REPEAT,    _SET, 
      _THEN,       _TO,         _TYPE,      _UNTIL, 
      _VAR,        _WHILE,      _WITH); 
  var m, n, k : integer; 
  begin 
    id := upper(id); 
    (* binary search: *) 
    m := 1; n := no_of_keywords; 
    while m<=n do 
      begin 
        k := m+(n-m) div 2; 
        if id=keyword[k] then 
          begin 
            is_keyword := true; 
            token := keyword_token[k]; 
            exit 
          end 
        else if id>keyword[k] then 
          m := k+1 
        else 
          n := k-1 
      end; 
    is_keyword := false 
  end(*is_keyword*); 
 
%} 
 
NQUOTE    [^'] 
 
%% 
 
%{ 
 
var c  : char; 
    kw : integer; 
 
%} 
 
[a-zA-Z]([a-zA-Z0-9])*	if is_keyword(yytext, kw) then 
                          return(kw) 
                        else 
                          return(IDENTIFIER); 
 
":="			return(ASSIGNMENT); 
'({NQUOTE}|'')+'	return(CHARACTER_STRING); 
":"			return(COLON); 
","			return(COMMA); 
[0-9]+			return(DIGSEQ); 
"."			return(DOT); 
".."			return(DOTDOT); 
"="			return(EQUAL); 
">="			return(GE); 
">"			return(GT); 
"["			return(LBRAC); 
"<="			return(LE); 
"("			return(LPAREN); 
"<"			return(LT); 
"-"			return(MINUS); 
"<>"			return(NOTEQUAL); 
"+"			return(PLUS); 
"]"			return(RBRAC); 
[0-9]+"."[0-9]+		return(REALNUMBER); 
")"			return(RPAREN); 
";"			return(SEMICOLON); 
"/"			return(SLASH); 
"*"			return(STAR); 
"**"			return(STARSTAR); 
"->"            	| 
"^"			return(UPARROW); 
 
"(*"            	| 
"{"			begin 
                          repeat 
			    c := get_char; 
			    case c of 
			      '}' : ; 
			      '*' : begin 
				      c := get_char; 
				      if c=')' then exit else unget_char(c) 
				    end; 
                              #0 : begin 
                                     commenteof; 
                                     exit; 
                                   end; 
			    end; 
			  until false 
                        end; 
[ \n\t\f]		; 
 
.			return(ILLEGAL);