queryparser/lemon.c

Go to the documentation of this file.
00001 /*
00002 ** This file contains all sources (including headers) to the LEMON
00003 ** LALR(1) parser generator.  The sources have been combined into a
00004 ** single file to make it easy to include LEMON in the source tree
00005 ** and Makefile of another program.
00006 **
00007 ** The authors of this program disclaim copyright.
00008 **
00009 ** Modified to add "-o" and "-h" command line options.  Olly Betts 2005-02-14
00010 ** Modified to fix a number of compiler warnings.  Olly Betts 2007-02-20
00011 */
00012 #include <stdio.h>
00013 #include <stdarg.h>
00014 #include <string.h>
00015 #include <ctype.h>
00016 #include <stdlib.h>
00017 
00018 #ifndef __WIN32__
00019 #   if defined(_WIN32) || defined(WIN32)
00020 #       define __WIN32__
00021 #   endif
00022 #endif
00023 
00024 #ifndef __WIN32__
00025 #   include <unistd.h>
00026 #endif
00027 
00028 /* #define PRIVATE static */
00029 #define PRIVATE
00030 
00031 #ifdef TEST
00032 #define MAXRHS 5       /* Set low to exercise exception code */
00033 #else
00034 #define MAXRHS 1000
00035 #endif
00036 
00037 char *msort();
00038 /* ISO C prototypes malloc in stdlib.h: extern void *malloc(); */
00039 
00040 /******** From the file "action.h" *************************************/
00041 struct action *Action_new();
00042 struct action *Action_sort();
00043 
00044 /********* From the file "assert.h" ************************************/
00045 void myassert();
00046 #ifndef NDEBUG
00047 #  define assert(X) if(!(X))myassert(__FILE__,__LINE__)
00048 #else
00049 #  define assert(X)
00050 #endif
00051 
00052 /********** From the file "build.h" ************************************/
00053 void FindRulePrecedences();
00054 void FindFirstSets();
00055 void FindStates();
00056 void FindLinks();
00057 void FindFollowSets();
00058 void FindActions();
00059 
00060 /********* From the file "configlist.h" *********************************/
00061 void Configlist_init(/* void */);
00062 struct config *Configlist_add(/* struct rule *, int */);
00063 struct config *Configlist_addbasis(/* struct rule *, int */);
00064 void Configlist_closure(/* void */);
00065 void Configlist_sort(/* void */);
00066 void Configlist_sortbasis(/* void */);
00067 struct config *Configlist_return(/* void */);
00068 struct config *Configlist_basis(/* void */);
00069 void Configlist_eat(/* struct config * */);
00070 void Configlist_reset(/* void */);
00071 
00072 /********* From the file "error.h" ***************************************/
00073 void ErrorMsg(const char *, int,const char *, ...);
00074 
00075 /****** From the file "option.h" ******************************************/
00076 struct s_options {
00077   enum { OPT_FLAG=1,  OPT_INT,  OPT_DBL,  OPT_STR,
00078          OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
00079   char *label;
00080   void *arg;
00081   void(*func)();
00082 
00083   char *message;
00084 };
00085 int    OptInit(/* char**,struct s_options*,FILE* */);
00086 int    OptNArgs(/* void */);
00087 char  *OptArg(/* int */);
00088 void   OptErr(/* int */);
00089 void   OptPrint(/* void */);
00090 
00091 /******** From the file "parse.h" *****************************************/
00092 void Parse(/* struct lemon *lemp */);
00093 
00094 /********* From the file "plink.h" ***************************************/
00095 struct plink *Plink_new(/* void */);
00096 void Plink_add(/* struct plink **, struct config * */);
00097 void Plink_copy(/* struct plink **, struct plink * */);
00098 void Plink_delete(/* struct plink * */);
00099 
00100 /********** From the file "report.h" *************************************/
00101 void Reprint(/* struct lemon * */);
00102 void ReportOutput(/* struct lemon * */);
00103 void ReportTable(/* struct lemon * */);
00104 void ReportHeader(/* struct lemon * */);
00105 void CompressTables(/* struct lemon * */);
00106 
00107 /********** From the file "set.h" ****************************************/
00108 void  SetSize(/* int N */);             /* All sets will be of size N */
00109 char *SetNew(/* void */);               /* A new set for element 0..N */
00110 void  SetFree(/* char* */);             /* Deallocate a set */
00111 
00112 int SetAdd(/* char*,int */);            /* Add element to a set */
00113 int SetUnion(/* char *A,char *B */);    /* A <- A U B, thru element N */
00114 
00115 #define SetFind(X,Y) (X[Y])       /* True if Y is in set X */
00116 
00117 /********** From the file "struct.h" *************************************/
00118 /*
00119 ** Principal data structures for the LEMON parser generator.
00120 */
00121 
00122 typedef enum {B_FALSE=0, B_TRUE} Boolean;
00123 
00124 /* Symbols (terminals and nonterminals) of the grammar are stored
00125 ** in the following: */
00126 struct symbol {
00127   char *name;              /* Name of the symbol */
00128   int index;               /* Index number for this symbol */
00129   enum {
00130     TERMINAL,
00131     NONTERMINAL
00132   } type;                  /* Symbols are all either TERMINALS or NTs */
00133   struct rule *rule;       /* Linked list of rules of this (if an NT) */
00134   struct symbol *fallback; /* fallback token in case this token doesn't parse */
00135   int prec;                /* Precedence if defined (-1 otherwise) */
00136   enum e_assoc {
00137     LEFT,
00138     RIGHT,
00139     NONE,
00140     UNK
00141   } assoc;                 /* Associativity if predecence is defined */
00142   char *firstset;          /* First-set for all rules of this symbol */
00143   Boolean lambda;          /* True if NT and can generate an empty string */
00144   char *destructor;        /* Code which executes whenever this symbol is
00145                            ** popped from the stack during error processing */
00146   int destructorln;        /* Line number of destructor code */
00147   char *datatype;          /* The data type of information held by this
00148                            ** object. Only used if type==NONTERMINAL */
00149   int dtnum;               /* The data type number.  In the parser, the value
00150                            ** stack is a union.  The .yy%d element of this
00151                            ** union is the correct data type for this object */
00152 };
00153 
00154 /* Each production rule in the grammar is stored in the following
00155 ** structure.  */
00156 struct rule {
00157   struct symbol *lhs;      /* Left-hand side of the rule */
00158   char *lhsalias;          /* Alias for the LHS (NULL if none) */
00159   int ruleline;            /* Line number for the rule */
00160   int nrhs;                /* Number of RHS symbols */
00161   struct symbol **rhs;     /* The RHS symbols */
00162   char **rhsalias;         /* An alias for each RHS symbol (NULL if none) */
00163   int line;                /* Line number at which code begins */
00164   char *code;              /* The code executed when this rule is reduced */
00165   struct symbol *precsym;  /* Precedence symbol for this rule */
00166   int index;               /* An index number for this rule */
00167   Boolean canReduce;       /* True if this rule is ever reduced */
00168   struct rule *nextlhs;    /* Next rule with the same LHS */
00169   struct rule *next;       /* Next rule in the global list */
00170 };
00171 
00172 /* A configuration is a production rule of the grammar together with
00173 ** a mark (dot) showing how much of that rule has been processed so far.
00174 ** Configurations also contain a follow-set which is a list of terminal
00175 ** symbols which are allowed to immediately follow the end of the rule.
00176 ** Every configuration is recorded as an instance of the following: */
00177 struct config {
00178   struct rule *rp;         /* The rule upon which the configuration is based */
00179   int dot;                 /* The parse point */
00180   char *fws;               /* Follow-set for this configuration only */
00181   struct plink *fplp;      /* Follow-set forward propagation links */
00182   struct plink *bplp;      /* Follow-set backwards propagation links */
00183   struct state *stp;       /* Pointer to state which contains this */
00184   enum {
00185     COMPLETE,              /* The status is used during followset and */
00186     INCOMPLETE             /*    shift computations */
00187   } status;
00188   struct config *next;     /* Next configuration in the state */
00189   struct config *bp;       /* The next basis configuration */
00190 };
00191 
00192 /* Every shift or reduce operation is stored as one of the following */
00193 struct action {
00194   struct symbol *sp;       /* The look-ahead symbol */
00195   enum e_action {
00196     SHIFT,
00197     ACCEPT,
00198     REDUCE,
00199     ERROR,
00200     CONFLICT,                /* Was a reduce, but part of a conflict */
00201     SH_RESOLVED,             /* Was a shift.  Precedence resolved conflict */
00202     RD_RESOLVED,             /* Was reduce.  Precedence resolved conflict */
00203     NOT_USED                 /* Deleted by compression */
00204   } type;
00205   union {
00206     struct state *stp;     /* The new state, if a shift */
00207     struct rule *rp;       /* The rule, if a reduce */
00208   } x;
00209   struct action *next;     /* Next action for this state */
00210   struct action *collide;  /* Next action with the same hash */
00211 };
00212 
00213 /* Each state of the generated parser's finite state machine
00214 ** is encoded as an instance of the following structure. */
00215 struct state {
00216   struct config *bp;       /* The basis configurations for this state */
00217   struct config *cfp;      /* All configurations in this set */
00218   int index;               /* Sequencial number for this state */
00219   struct action *ap;       /* Array of actions for this state */
00220   int nTknAct, nNtAct;     /* Number of actions on terminals and nonterminals */
00221   int iTknOfst, iNtOfst;   /* yy_action[] offset for terminals and nonterms */
00222   int iDflt;               /* Default action */
00223 };
00224 #define NO_OFFSET (-2147483647)
00225 
00226 /* A followset propagation link indicates that the contents of one
00227 ** configuration followset should be propagated to another whenever
00228 ** the first changes. */
00229 struct plink {
00230   struct config *cfp;      /* The configuration to which linked */
00231   struct plink *next;      /* The next propagate link */
00232 };
00233 
00234 /* The state vector for the entire parser generator is recorded as
00235 ** follows.  (LEMON uses no global variables and makes little use of
00236 ** static variables.  Fields in the following structure can be thought
00237 ** of as begin global variables in the program.) */
00238 struct lemon {
00239   struct state **sorted;   /* Table of states sorted by state number */
00240   struct rule *rule;       /* List of all rules */
00241   int nstate;              /* Number of states */
00242   int nrule;               /* Number of rules */
00243   int nsymbol;             /* Number of terminal and nonterminal symbols */
00244   int nterminal;           /* Number of terminal symbols */
00245   struct symbol **symbols; /* Sorted array of pointers to symbols */
00246   int errorcnt;            /* Number of errors */
00247   struct symbol *errsym;   /* The error symbol */
00248   char *name;              /* Name of the generated parser */
00249   char *arg;               /* Declaration of the 3th argument to parser */
00250   char *tokentype;         /* Type of terminal symbols in the parser stack */
00251   char *vartype;           /* The default type of non-terminal symbols */
00252   char *start;             /* Name of the start symbol for the grammar */
00253   char *stacksize;         /* Size of the parser stack */
00254   char *include;           /* Code to put at the start of the C file */
00255   int  includeln;          /* Line number for start of include code */
00256   char *error;             /* Code to execute when an error is seen */
00257   int  errorln;            /* Line number for start of error code */
00258   char *overflow;          /* Code to execute on a stack overflow */
00259   int  overflowln;         /* Line number for start of overflow code */
00260   char *failure;           /* Code to execute on parser failure */
00261   int  failureln;          /* Line number for start of failure code */
00262   char *accept;            /* Code to execute when the parser excepts */
00263   int  acceptln;           /* Line number for the start of accept code */
00264   char *extracode;         /* Code appended to the generated file */
00265   int  extracodeln;        /* Line number for the start of the extra code */
00266   char *tokendest;         /* Code to execute to destroy token data */
00267   int  tokendestln;        /* Line number for token destroyer code */
00268   char *vardest;           /* Code for the default non-terminal destructor */
00269   int  vardestln;          /* Line number for default non-term destructor code*/
00270   char *filename;          /* Name of the input file */
00271   char *outname;           /* Name of the current output file */
00272   char *tokenprefix;       /* A prefix added to token names in the .h file */
00273   int nconflict;           /* Number of parsing conflicts */
00274   int tablesize;           /* Size of the parse tables */
00275   int basisflag;           /* Print only basis configurations */
00276   int has_fallback;        /* True if any %fallback is seen in the grammer */
00277   char *argv0;             /* Name of the program */
00278 };
00279 
00280 #define MemoryCheck(X) if((X)==0){ \
00281   extern void memory_error(); \
00282   memory_error(); \
00283 }
00284 
00285 /**************** From the file "table.h" *********************************/
00286 /*
00287 ** All code in this file has been automatically generated
00288 ** from a specification in the file
00289 **              "table.q"
00290 ** by the associative array code building program "aagen".
00291 ** Do not edit this file!  Instead, edit the specification
00292 ** file, then rerun aagen.
00293 */
00294 /*
00295 ** Code for processing tables in the LEMON parser generator.
00296 */
00297 
00298 /* Routines for handling a strings */
00299 
00300 char *Strsafe();
00301 
00302 void Strsafe_init(/* void */);
00303 int Strsafe_insert(/* char * */);
00304 char *Strsafe_find(/* char * */);
00305 
00306 /* Routines for handling symbols of the grammar */
00307 
00308 struct symbol *Symbol_new();
00309 int Symbolcmpp(const void *void_a, const void *void_b);
00310 void Symbol_init(/* void */);
00311 int Symbol_insert(/* struct symbol *, char * */);
00312 struct symbol *Symbol_find(/* char * */);
00313 struct symbol *Symbol_Nth(/* int */);
00314 int Symbol_count(/*  */);
00315 struct symbol **Symbol_arrayof(/*  */);
00316 
00317 /* Routines to manage the state table */
00318 
00319 int Configcmp(/* struct config *, struct config * */);
00320 struct state *State_new();
00321 void State_init(/* void */);
00322 int State_insert(/* struct state *, struct config * */);
00323 struct state *State_find(/* struct config * */);
00324 struct state **State_arrayof(/*  */);
00325 
00326 /* Routines used for efficiency in Configlist_add */
00327 
00328 void Configtable_init(/* void */);
00329 int Configtable_insert(/* struct config * */);
00330 struct config *Configtable_find(/* struct config * */);
00331 void Configtable_clear(/* int(*)(struct config *) */);
00332 /****************** From the file "action.c" *******************************/
00333 /*
00334 ** Routines processing parser actions in the LEMON parser generator.
00335 */
00336 
00337 /* Allocate a new parser action */
00338 struct action *Action_new(){
00339   static struct action *freelist = 0;
00340   struct action *new;
00341 
00342   if( freelist==0 ){
00343     int i;
00344     int amt = 100;
00345     freelist = (struct action *)malloc( sizeof(struct action)*amt );
00346     if( freelist==0 ){
00347       fprintf(stderr,"Unable to allocate memory for a new parser action.");
00348       exit(1);
00349     }
00350     for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
00351     freelist[amt-1].next = 0;
00352   }
00353   new = freelist;
00354   freelist = freelist->next;
00355   return new;
00356 }
00357 
00358 /* Compare two actions */
00359 static int actioncmp(ap1,ap2)
00360 struct action *ap1;
00361 struct action *ap2;
00362 {
00363   int rc;
00364   rc = ap1->sp->index - ap2->sp->index;
00365   if( rc==0 ) rc = (int)ap1->type - (int)ap2->type;
00366   if( rc==0 ){
00367     assert( ap1->type==REDUCE || ap1->type==RD_RESOLVED || ap1->type==CONFLICT);
00368     assert( ap2->type==REDUCE || ap2->type==RD_RESOLVED || ap2->type==CONFLICT);
00369     rc = ap1->x.rp->index - ap2->x.rp->index;
00370   }
00371   return rc;
00372 }
00373 
00374 /* Sort parser actions */
00375 struct action *Action_sort(ap)
00376 struct action *ap;
00377 {
00378   /* Cast to "char **" via "void *" to avoid aliasing problems. */
00379   ap = (struct action *)msort((char *)ap,(char **)(void *)&ap->next,actioncmp);
00380   return ap;
00381 }
00382 
00383 void Action_add(app,type,sp,arg)
00384 struct action **app;
00385 enum e_action type;
00386 struct symbol *sp;
00387 char *arg;
00388 {
00389   struct action *new;
00390   new = Action_new();
00391   new->next = *app;
00392   *app = new;
00393   new->type = type;
00394   new->sp = sp;
00395   if( type==SHIFT ){
00396     new->x.stp = (struct state *)arg;
00397   }else{
00398     new->x.rp = (struct rule *)arg;
00399   }
00400 }
00401 /********************** New code to implement the "acttab" module ***********/
00402 /*
00403 ** This module implements routines use to construct the yy_action[] table.
00404 */
00405 
00406 /*
00407 ** The state of the yy_action table under construction is an instance of
00408 ** the following structure
00409 */
00410 typedef struct acttab acttab;
00411 struct acttab {
00412   int nAction;                 /* Number of used slots in aAction[] */
00413   int nActionAlloc;            /* Slots allocated for aAction[] */
00414   struct {
00415     int lookahead;             /* Value of the lookahead token */
00416     int action;                /* Action to take on the given lookahead */
00417   } *aAction,                  /* The yy_action[] table under construction */
00418     *aLookahead;               /* A single new transaction set */
00419   int mnLookahead;             /* Minimum aLookahead[].lookahead */
00420   int mnAction;                /* Action associated with mnLookahead */
00421   int mxLookahead;             /* Maximum aLookahead[].lookahead */
00422   int nLookahead;              /* Used slots in aLookahead[] */
00423   int nLookaheadAlloc;         /* Slots allocated in aLookahead[] */
00424 };
00425 
00426 /* Return the number of entries in the yy_action table */
00427 #define acttab_size(X) ((X)->nAction)
00428 
00429 /* The value for the N-th entry in yy_action */
00430 #define acttab_yyaction(X,N)  ((X)->aAction[N].action)
00431 
00432 /* The value for the N-th entry in yy_lookahead */
00433 #define acttab_yylookahead(X,N)  ((X)->aAction[N].lookahead)
00434 
00435 /* Free all memory associated with the given acttab */
00436 void acttab_free(acttab *p){
00437   free( p->aAction );
00438   free( p->aLookahead );
00439   free( p );
00440 }
00441 
00442 /* Allocate a new acttab structure */
00443 acttab *acttab_alloc(void){
00444   acttab *p = malloc( sizeof(*p) );
00445   if( p==0 ){
00446     fprintf(stderr,"Unable to allocate memory for a new acttab.");
00447     exit(1);
00448   }
00449   memset(p, 0, sizeof(*p));
00450   return p;
00451 }
00452 
00453 /* Add a new action to the current transaction set
00454 */
00455 void acttab_action(acttab *p, int lookahead, int action){
00456   if( p->nLookahead>=p->nLookaheadAlloc ){
00457     p->nLookaheadAlloc += 25;
00458     p->aLookahead = realloc( p->aLookahead,
00459                              sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
00460     if( p->aLookahead==0 ){
00461       fprintf(stderr,"malloc failed\n");
00462       exit(1);
00463     }
00464   }
00465   if( p->nLookahead==0 ){
00466     p->mxLookahead = lookahead;
00467     p->mnLookahead = lookahead;
00468     p->mnAction = action;
00469   }else{
00470     if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
00471     if( p->mnLookahead>lookahead ){
00472       p->mnLookahead = lookahead;
00473       p->mnAction = action;
00474     }
00475   }
00476   p->aLookahead[p->nLookahead].lookahead = lookahead;
00477   p->aLookahead[p->nLookahead].action = action;
00478   p->nLookahead++;
00479 }
00480 
00481 /*
00482 ** Add the transaction set built up with prior calls to acttab_action()
00483 ** into the current action table.  Then reset the transaction set back
00484 ** to an empty set in preparation for a new round of acttab_action() calls.
00485 **
00486 ** Return the offset into the action table of the new transaction.
00487 */
00488 int acttab_insert(acttab *p){
00489   int i, j, k, n;
00490   assert( p->nLookahead>0 );
00491 
00492   /* Make sure we have enough space to hold the expanded action table
00493   ** in the worst case.  The worst case occurs if the transaction set
00494   ** must be appended to the current action table
00495   */
00496   n = p->mxLookahead + 1;
00497   if( p->nAction + n >= p->nActionAlloc ){
00498     int oldAlloc = p->nActionAlloc;
00499     p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
00500     p->aAction = realloc( p->aAction,
00501                           sizeof(p->aAction[0])*p->nActionAlloc);
00502     if( p->aAction==0 ){
00503       fprintf(stderr,"malloc failed\n");
00504       exit(1);
00505     }
00506     for(i=oldAlloc; i<p->nActionAlloc; i++){
00507       p->aAction[i].lookahead = -1;
00508       p->aAction[i].action = -1;
00509     }
00510   }
00511 
00512   /* Scan the existing action table looking for an offset where we can
00513   ** insert the current transaction set.  Fall out of the loop when that
00514   ** offset is found.  In the worst case, we fall out of the loop when
00515   ** i reaches p->nAction, which means we append the new transaction set.
00516   **
00517   ** i is the index in p->aAction[] where p->mnLookahead is inserted.
00518   */
00519   for(i=0; i<p->nAction+p->mnLookahead; i++){
00520     if( p->aAction[i].lookahead<0 ){
00521       for(j=0; j<p->nLookahead; j++){
00522         k = p->aLookahead[j].lookahead - p->mnLookahead + i;
00523         if( k<0 ) break;
00524         if( p->aAction[k].lookahead>=0 ) break;
00525       }
00526       if( j<p->nLookahead ) continue;
00527       for(j=0; j<p->nAction; j++){
00528         if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
00529       }
00530       if( j==p->nAction ){
00531         break;  /* Fits in empty slots */
00532       }
00533     }else if( p->aAction[i].lookahead==p->mnLookahead ){
00534       if( p->aAction[i].action!=p->mnAction ) continue;
00535       for(j=0; j<p->nLookahead; j++){
00536         k = p->aLookahead[j].lookahead - p->mnLookahead + i;
00537         if( k<0 || k>=p->nAction ) break;
00538         if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
00539         if( p->aLookahead[j].action!=p->aAction[k].action ) break;
00540       }
00541       if( j<p->nLookahead ) continue;
00542       n = 0;
00543       for(j=0; j<p->nAction; j++){
00544         if( p->aAction[j].lookahead<0 ) continue;
00545         if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
00546       }
00547       if( n==p->nLookahead ){
00548         break;  /* Same as a prior transaction set */
00549       }
00550     }
00551   }
00552   /* Insert transaction set at index i. */
00553   for(j=0; j<p->nLookahead; j++){
00554     k = p->aLookahead[j].lookahead - p->mnLookahead + i;
00555     p->aAction[k] = p->aLookahead[j];
00556     if( k>=p->nAction ) p->nAction = k+1;
00557   }
00558   p->nLookahead = 0;
00559 
00560   /* Return the offset that is added to the lookahead in order to get the
00561   ** index into yy_action of the action */
00562   return i - p->mnLookahead;
00563 }
00564 
00565 /********************** From the file "assert.c" ****************************/
00566 /*
00567 ** A more efficient way of handling assertions.
00568 */
00569 void myassert(file,line)
00570 char *file;
00571 int line;
00572 {
00573   fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file);
00574   exit(1);
00575 }
00576 /********************** From the file "build.c" *****************************/
00577 /*
00578 ** Routines to construction the finite state machine for the LEMON
00579 ** parser generator.
00580 */
00581 
00582 /* Find a precedence symbol of every rule in the grammar.
00583 ** 
00584 ** Those rules which have a precedence symbol coded in the input
00585 ** grammar using the "[symbol]" construct will already have the
00586 ** rp->precsym field filled.  Other rules take as their precedence
00587 ** symbol the first RHS symbol with a defined precedence.  If there
00588 ** are not RHS symbols with a defined precedence, the precedence
00589 ** symbol field is left blank.
00590 */
00591 void FindRulePrecedences(xp)
00592 struct lemon *xp;
00593 {
00594   struct rule *rp;
00595   for(rp=xp->rule; rp; rp=rp->next){
00596     if( rp->precsym==0 ){
00597       int i;
00598       for(i=0; i<rp->nrhs; i++){
00599         if( rp->rhs[i]->prec>=0 ){
00600           rp->precsym = rp->rhs[i];
00601           break;
00602         }
00603       }
00604     }
00605   }
00606   return;
00607 }
00608 
00609 /* Find all nonterminals which will generate the empty string.
00610 ** Then go back and compute the first sets of every nonterminal.
00611 ** The first set is the set of all terminal symbols which can begin
00612 ** a string generated by that nonterminal.
00613 */
00614 void FindFirstSets(lemp)
00615 struct lemon *lemp;
00616 {
00617   int i;
00618   struct rule *rp;
00619   int progress;
00620 
00621   for(i=0; i<lemp->nsymbol; i++){
00622     lemp->symbols[i]->lambda = B_FALSE;
00623   }
00624   for(i=lemp->nterminal; i<lemp->nsymbol; i++){
00625     lemp->symbols[i]->firstset = SetNew();
00626   }
00627 
00628   /* First compute all lambdas */
00629   do{
00630     progress = 0;
00631     for(rp=lemp->rule; rp; rp=rp->next){
00632       if( rp->lhs->lambda ) continue;
00633       for(i=0; i<rp->nrhs; i++){
00634          if( rp->rhs[i]->lambda==B_FALSE ) break;
00635       }
00636       if( i==rp->nrhs ){
00637         rp->lhs->lambda = B_TRUE;
00638         progress = 1;
00639       }
00640     }
00641   }while( progress );
00642 
00643   /* Now compute all first sets */
00644   do{
00645     struct symbol *s1, *s2;
00646     progress = 0;
00647     for(rp=lemp->rule; rp; rp=rp->next){
00648       s1 = rp->lhs;
00649       for(i=0; i<rp->nrhs; i++){
00650         s2 = rp->rhs[i];
00651         if( s2->type==TERMINAL ){
00652           progress += SetAdd(s1->firstset,s2->index);
00653           break;
00654         }else if( s1==s2 ){
00655           if( s1->lambda==B_FALSE ) break;
00656         }else{
00657           progress += SetUnion(s1->firstset,s2->firstset);
00658           if( s2->lambda==B_FALSE ) break;
00659         }
00660       }
00661     }
00662   }while( progress );
00663   return;
00664 }
00665 
00666 /* Compute all LR(0) states for the grammar.  Links
00667 ** are added to between some states so that the LR(1) follow sets
00668 ** can be computed later.
00669 */
00670 PRIVATE struct state *getstate(/* struct lemon * */);  /* forward reference */
00671 void FindStates(lemp)
00672 struct lemon *lemp;
00673 {
00674   struct symbol *sp;
00675   struct rule *rp;
00676 
00677   Configlist_init();
00678 
00679   /* Find the start symbol */
00680   if( lemp->start ){
00681     sp = Symbol_find(lemp->start);
00682     if( sp==0 ){
00683       ErrorMsg(lemp->filename,0,
00684 "The specified start symbol \"%s\" is not "
00685 "in a nonterminal of the grammar.  \"%s\" will be used as the start "
00686 "symbol instead.",lemp->start,lemp->rule->lhs->name);
00687       lemp->errorcnt++;
00688       sp = lemp->rule->lhs;
00689     }
00690   }else{
00691     sp = lemp->rule->lhs;
00692   }
00693 
00694   /* Make sure the start symbol doesn't occur on the right-hand side of
00695   ** any rule.  Report an error if it does.  (YACC would generate a new
00696   ** start symbol in this case.) */
00697   for(rp=lemp->rule; rp; rp=rp->next){
00698     int i;
00699     for(i=0; i<rp->nrhs; i++){
00700       if( rp->rhs[i]==sp ){
00701         ErrorMsg(lemp->filename,0,
00702 "The start symbol \"%s\" occurs on the "
00703 "right-hand side of a rule. This will result in a parser which "
00704 "does not work properly.",sp->name);
00705         lemp->errorcnt++;
00706       }
00707     }
00708   }
00709 
00710   /* The basis configuration set for the first state
00711   ** is all rules which have the start symbol as their
00712   ** left-hand side */
00713   for(rp=sp->rule; rp; rp=rp->nextlhs){
00714     struct config *newcfp;
00715     newcfp = Configlist_addbasis(rp,0);
00716     SetAdd(newcfp->fws,0);
00717   }
00718 
00719   /* Compute the first state.  All other states will be
00720   ** computed automatically during the computation of the first one.
00721   ** The returned pointer to the first state is not used. */
00722   (void)getstate(lemp);
00723   return;
00724 }
00725 
00726 /* Return a pointer to a state which is described by the configuration
00727 ** list which has been built from calls to Configlist_add.
00728 */
00729 PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */
00730 PRIVATE struct state *getstate(lemp)
00731 struct lemon *lemp;
00732 {
00733   struct config *cfp, *bp;
00734   struct state *stp;
00735 
00736   /* Extract the sorted basis of the new state.  The basis was constructed
00737   ** by prior calls to "Configlist_addbasis()". */
00738   Configlist_sortbasis();
00739   bp = Configlist_basis();
00740 
00741   /* Get a state with the same basis */
00742   stp = State_find(bp);
00743   if( stp ){
00744     /* A state with the same basis already exists!  Copy all the follow-set
00745     ** propagation links from the state under construction into the
00746     ** preexisting state, then return a pointer to the preexisting state */
00747     struct config *x, *y;
00748     for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
00749       Plink_copy(&y->bplp,x->bplp);
00750       Plink_delete(x->fplp);
00751       x->fplp = x->bplp = 0;
00752     }
00753     cfp = Configlist_return();
00754     Configlist_eat(cfp);
00755   }else{
00756     /* This really is a new state.  Construct all the details */
00757     Configlist_closure(lemp);    /* Compute the configuration closure */
00758     Configlist_sort();           /* Sort the configuration closure */
00759     cfp = Configlist_return();   /* Get a pointer to the config list */
00760     stp = State_new();           /* A new state structure */
00761     MemoryCheck(stp);
00762     stp->bp = bp;                /* Remember the configuration basis */
00763     stp->cfp = cfp;              /* Remember the configuration closure */
00764     stp->index = lemp->nstate++; /* Every state gets a sequence number */
00765     stp->ap = 0;                 /* No actions, yet. */
00766     State_insert(stp,stp->bp);   /* Add to the state table */
00767     buildshifts(lemp,stp);       /* Recursively compute successor states */
00768   }
00769   return stp;
00770 }
00771 
00772 /* Construct all successor states to the given state.  A "successor"
00773 ** state is any state which can be reached by a shift action.
00774 */
00775 PRIVATE void buildshifts(lemp,stp)
00776 struct lemon *lemp;
00777 struct state *stp;     /* The state from which successors are computed */
00778 {
00779   struct config *cfp;  /* For looping thru the config closure of "stp" */
00780   struct config *bcfp; /* For the inner loop on config closure of "stp" */
00781   struct config *new;  /* */
00782   struct symbol *sp;   /* Symbol following the dot in configuration "cfp" */
00783   struct symbol *bsp;  /* Symbol following the dot in configuration "bcfp" */
00784   struct state *newstp; /* A pointer to a successor state */
00785 
00786   /* Each configuration becomes complete after it contibutes to a successor
00787   ** state.  Initially, all configurations are incomplete */
00788   for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
00789 
00790   /* Loop through all configurations of the state "stp" */
00791   for(cfp=stp->cfp; cfp; cfp=cfp->next){
00792     if( cfp->status==COMPLETE ) continue;    /* Already used by inner loop */
00793     if( cfp->dot>=cfp->rp->nrhs ) continue;  /* Can't shift this config */
00794     Configlist_reset();                      /* Reset the new config set */
00795     sp = cfp->rp->rhs[cfp->dot];             /* Symbol after the dot */
00796 
00797     /* For every configuration in the state "stp" which has the symbol "sp"
00798     ** following its dot, add the same configuration to the basis set under
00799     ** construction but with the dot shifted one symbol to the right. */
00800     for(bcfp=cfp; bcfp; bcfp=bcfp->next){
00801       if( bcfp->status==COMPLETE ) continue;    /* Already used */
00802       if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
00803       bsp = bcfp->rp->rhs[bcfp->dot];           /* Get symbol after dot */
00804       if( bsp!=sp ) continue;                   /* Must be same as for "cfp" */
00805       bcfp->status = COMPLETE;                  /* Mark this config as used */
00806       new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
00807       Plink_add(&new->bplp,bcfp);
00808     }
00809 
00810     /* Get a pointer to the state described by the basis configuration set
00811     ** constructed in the preceding loop */
00812     newstp = getstate(lemp);
00813 
00814     /* The state "newstp" is reached from the state "stp" by a shift action
00815     ** on the symbol "sp" */
00816     Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
00817   }
00818 }
00819 
00820 /*
00821 ** Construct the propagation links
00822 */
00823 void FindLinks(lemp)
00824 struct lemon *lemp;
00825 {
00826   int i;
00827   struct config *cfp, *other;
00828   struct state *stp;
00829   struct plink *plp;
00830 
00831   /* Housekeeping detail