00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <stdio.h>
00013 #include <stdarg.h>
00014 #include <string.h>
00015 #include <ctype.h>
00016 #include <stdlib.h>
00017
00018 #ifndef __WIN32__
00019 # if defined(_WIN32) || defined(WIN32)
00020 # define __WIN32__
00021 # endif
00022 #endif
00023
00024 #ifndef __WIN32__
00025 # include <unistd.h>
00026 #endif
00027
00028
00029 #define PRIVATE
00030
00031 #ifdef TEST
00032 #define MAXRHS 5
00033 #else
00034 #define MAXRHS 1000
00035 #endif
00036
00037 char *msort();
00038
00039
00040
00041 struct action *Action_new();
00042 struct action *Action_sort();
00043
00044
00045 void myassert();
00046 #ifndef NDEBUG
00047 # define assert(X) if(!(X))myassert(__FILE__,__LINE__)
00048 #else
00049 # define assert(X)
00050 #endif
00051
00052
00053 void FindRulePrecedences();
00054 void FindFirstSets();
00055 void FindStates();
00056 void FindLinks();
00057 void FindFollowSets();
00058 void FindActions();
00059
00060
00061 void Configlist_init();
00062 struct config *Configlist_add();
00063 struct config *Configlist_addbasis();
00064 void Configlist_closure();
00065 void Configlist_sort();
00066 void Configlist_sortbasis();
00067 struct config *Configlist_return();
00068 struct config *Configlist_basis();
00069 void Configlist_eat();
00070 void Configlist_reset();
00071
00072
00073 void ErrorMsg(const char *, int,const char *, ...);
00074
00075
00076 struct s_options {
00077 enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR,
00078 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
00079 char *label;
00080 void *arg;
00081 void(*func)();
00082
00083 char *message;
00084 };
00085 int OptInit();
00086 int OptNArgs();
00087 char *OptArg();
00088 void OptErr();
00089 void OptPrint();
00090
00091
00092 void Parse();
00093
00094
00095 struct plink *Plink_new();
00096 void Plink_add();
00097 void Plink_copy();
00098 void Plink_delete();
00099
00100
00101 void Reprint();
00102 void ReportOutput();
00103 void ReportTable();
00104 void ReportHeader();
00105 void CompressTables();
00106
00107
00108 void SetSize();
00109 char *SetNew();
00110 void SetFree();
00111
00112 int SetAdd();
00113 int SetUnion();
00114
00115 #define SetFind(X,Y) (X[Y])
00116
00117
00118
00119
00120
00121
00122 typedef enum {B_FALSE=0, B_TRUE} Boolean;
00123
00124
00125
00126 struct symbol {
00127 char *name;
00128 int index;
00129 enum {
00130 TERMINAL,
00131 NONTERMINAL
00132 } type;
00133 struct rule *rule;
00134 struct symbol *fallback;
00135 int prec;
00136 enum e_assoc {
00137 LEFT,
00138 RIGHT,
00139 NONE,
00140 UNK
00141 } assoc;
00142 char *firstset;
00143 Boolean lambda;
00144 char *destructor;
00145
00146 int destructorln;
00147 char *datatype;
00148
00149 int dtnum;
00150
00151
00152 };
00153
00154
00155
00156 struct rule {
00157 struct symbol *lhs;
00158 char *lhsalias;
00159 int ruleline;
00160 int nrhs;
00161 struct symbol **rhs;
00162 char **rhsalias;
00163 int line;
00164 char *code;
00165 struct symbol *precsym;
00166 int index;
00167 Boolean canReduce;
00168 struct rule *nextlhs;
00169 struct rule *next;
00170 };
00171
00172
00173
00174
00175
00176
00177 struct config {
00178 struct rule *rp;
00179 int dot;
00180 char *fws;
00181 struct plink *fplp;
00182 struct plink *bplp;
00183 struct state *stp;
00184 enum {
00185 COMPLETE,
00186 INCOMPLETE
00187 } status;
00188 struct config *next;
00189 struct config *bp;
00190 };
00191
00192
00193 struct action {
00194 struct symbol *sp;
00195 enum e_action {
00196 SHIFT,
00197 ACCEPT,
00198 REDUCE,
00199 ERROR,
00200 CONFLICT,
00201 SH_RESOLVED,
00202 RD_RESOLVED,
00203 NOT_USED
00204 } type;
00205 union {
00206 struct state *stp;
00207 struct rule *rp;
00208 } x;
00209 struct action *next;
00210 struct action *collide;
00211 };
00212
00213
00214
00215 struct state {
00216 struct config *bp;
00217 struct config *cfp;
00218 int index;
00219 struct action *ap;
00220 int nTknAct, nNtAct;
00221 int iTknOfst, iNtOfst;
00222 int iDflt;
00223 };
00224 #define NO_OFFSET (-2147483647)
00225
00226
00227
00228
00229 struct plink {
00230 struct config *cfp;
00231 struct plink *next;
00232 };
00233
00234
00235
00236
00237
00238 struct lemon {
00239 struct state **sorted;
00240 struct rule *rule;
00241 int nstate;
00242 int nrule;
00243 int nsymbol;
00244 int nterminal;
00245 struct symbol **symbols;
00246 int errorcnt;
00247 struct symbol *errsym;
00248 char *name;
00249 char *arg;
00250 char *tokentype;
00251 char *vartype;
00252 char *start;
00253 char *stacksize;
00254 char *include;
00255 int includeln;
00256 char *error;
00257 int errorln;
00258 char *overflow;
00259 int overflowln;
00260 char *failure;
00261 int failureln;
00262 char *accept;
00263 int acceptln;
00264 char *extracode;
00265 int extracodeln;
00266 char *tokendest;
00267 int tokendestln;
00268 char *vardest;
00269 int vardestln;
00270 char *filename;
00271 char *outname;
00272 char *tokenprefix;
00273 int nconflict;
00274 int tablesize;
00275 int basisflag;
00276 int has_fallback;
00277 char *argv0;
00278 };
00279
00280 #define MemoryCheck(X) if((X)==0){ \
00281 extern void memory_error(); \
00282 memory_error(); \
00283 }
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 char *Strsafe();
00301
00302 void Strsafe_init();
00303 int Strsafe_insert();
00304 char *Strsafe_find();
00305
00306
00307
00308 struct symbol *Symbol_new();
00309 int Symbolcmpp(const void *void_a, const void *void_b);
00310 void Symbol_init();
00311 int Symbol_insert();
00312 struct symbol *Symbol_find();
00313 struct symbol *Symbol_Nth();
00314 int Symbol_count();
00315 struct symbol **Symbol_arrayof();
00316
00317
00318
00319 int Configcmp();
00320 struct state *State_new();
00321 void State_init();
00322 int State_insert();
00323 struct state *State_find();
00324 struct state **State_arrayof();
00325
00326
00327
00328 void Configtable_init();
00329 int Configtable_insert();
00330 struct config *Configtable_find();
00331 void Configtable_clear();
00332
00333
00334
00335
00336
00337
00338 struct action *Action_new(){
00339 static struct action *freelist = 0;
00340 struct action *new;
00341
00342 if( freelist==0 ){
00343 int i;
00344 int amt = 100;
00345 freelist = (struct action *)malloc( sizeof(struct action)*amt );
00346 if( freelist==0 ){
00347 fprintf(stderr,"Unable to allocate memory for a new parser action.");
00348 exit(1);
00349 }
00350 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
00351 freelist[amt-1].next = 0;
00352 }
00353 new = freelist;
00354 freelist = freelist->next;
00355 return new;
00356 }
00357
00358
00359 static int actioncmp(ap1,ap2)
00360 struct action *ap1;
00361 struct action *ap2;
00362 {
00363 int rc;
00364 rc = ap1->sp->index - ap2->sp->index;
00365 if( rc==0 ) rc = (int)ap1->type - (int)ap2->type;
00366 if( rc==0 ){
00367 assert( ap1->type==REDUCE || ap1->type==RD_RESOLVED || ap1->type==CONFLICT);
00368 assert( ap2->type==REDUCE || ap2->type==RD_RESOLVED || ap2->type==CONFLICT);
00369 rc = ap1->x.rp->index - ap2->x.rp->index;
00370 }
00371 return rc;
00372 }
00373
00374
00375 struct action *Action_sort(ap)
00376 struct action *ap;
00377 {
00378
00379 ap = (struct action *)msort((char *)ap,(char **)(void *)&ap->next,actioncmp);
00380 return ap;
00381 }
00382
00383 void Action_add(app,type,sp,arg)
00384 struct action **app;
00385 enum e_action type;
00386 struct symbol *sp;
00387 char *arg;
00388 {
00389 struct action *new;
00390 new = Action_new();
00391 new->next = *app;
00392 *app = new;
00393 new->type = type;
00394 new->sp = sp;
00395 if( type==SHIFT ){
00396 new->x.stp = (struct state *)arg;
00397 }else{
00398 new->x.rp = (struct rule *)arg;
00399 }
00400 }
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 typedef struct acttab acttab;
00411 struct acttab {
00412 int nAction;
00413 int nActionAlloc;
00414 struct {
00415 int lookahead;
00416 int action;
00417 } *aAction,
00418 *aLookahead;
00419 int mnLookahead;
00420 int mnAction;
00421 int mxLookahead;
00422 int nLookahead;
00423 int nLookaheadAlloc;
00424 };
00425
00426
00427 #define acttab_size(X) ((X)->nAction)
00428
00429
00430 #define acttab_yyaction(X,N) ((X)->aAction[N].action)
00431
00432
00433 #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead)
00434
00435
00436 void acttab_free(acttab *p){
00437 free( p->aAction );
00438 free( p->aLookahead );
00439 free( p );
00440 }
00441
00442
00443 acttab *acttab_alloc(void){
00444 acttab *p = malloc( sizeof(*p) );
00445 if( p==0 ){
00446 fprintf(stderr,"Unable to allocate memory for a new acttab.");
00447 exit(1);
00448 }
00449 memset(p, 0, sizeof(*p));
00450 return p;
00451 }
00452
00453
00454
00455 void acttab_action(acttab *p, int lookahead, int action){
00456 if( p->nLookahead>=p->nLookaheadAlloc ){
00457 p->nLookaheadAlloc += 25;
00458 p->aLookahead = realloc( p->aLookahead,
00459 sizeof(p->aLookahead[0])*p->nLookaheadAlloc );
00460 if( p->aLookahead==0 ){
00461 fprintf(stderr,"malloc failed\n");
00462 exit(1);
00463 }
00464 }
00465 if( p->nLookahead==0 ){
00466 p->mxLookahead = lookahead;
00467 p->mnLookahead = lookahead;
00468 p->mnAction = action;
00469 }else{
00470 if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead;
00471 if( p->mnLookahead>lookahead ){
00472 p->mnLookahead = lookahead;
00473 p->mnAction = action;
00474 }
00475 }
00476 p->aLookahead[p->nLookahead].lookahead = lookahead;
00477 p->aLookahead[p->nLookahead].action = action;
00478 p->nLookahead++;
00479 }
00480
00481
00482
00483
00484
00485
00486
00487
00488 int acttab_insert(acttab *p){
00489 int i, j, k, n;
00490 assert( p->nLookahead>0 );
00491
00492
00493
00494
00495
00496 n = p->mxLookahead + 1;
00497 if( p->nAction + n >= p->nActionAlloc ){
00498 int oldAlloc = p->nActionAlloc;
00499 p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
00500 p->aAction = realloc( p->aAction,
00501 sizeof(p->aAction[0])*p->nActionAlloc);
00502 if( p->aAction==0 ){
00503 fprintf(stderr,"malloc failed\n");
00504 exit(1);
00505 }
00506 for(i=oldAlloc; i<p->nActionAlloc; i++){
00507 p->aAction[i].lookahead = -1;
00508 p->aAction[i].action = -1;
00509 }
00510 }
00511
00512
00513
00514
00515
00516
00517
00518
00519 for(i=0; i<p->nAction+p->mnLookahead; i++){
00520 if( p->aAction[i].lookahead<0 ){
00521 for(j=0; j<p->nLookahead; j++){
00522 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
00523 if( k<0 ) break;
00524 if( p->aAction[k].lookahead>=0 ) break;
00525 }
00526 if( j<p->nLookahead ) continue;
00527 for(j=0; j<p->nAction; j++){
00528 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break;
00529 }
00530 if( j==p->nAction ){
00531 break;
00532 }
00533 }else if( p->aAction[i].lookahead==p->mnLookahead ){
00534 if( p->aAction[i].action!=p->mnAction ) continue;
00535 for(j=0; j<p->nLookahead; j++){
00536 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
00537 if( k<0 || k>=p->nAction ) break;
00538 if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break;
00539 if( p->aLookahead[j].action!=p->aAction[k].action ) break;
00540 }
00541 if( j<p->nLookahead ) continue;
00542 n = 0;
00543 for(j=0; j<p->nAction; j++){
00544 if( p->aAction[j].lookahead<0 ) continue;
00545 if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++;
00546 }
00547 if( n==p->nLookahead ){
00548 break;
00549 }
00550 }
00551 }
00552
00553 for(j=0; j<p->nLookahead; j++){
00554 k = p->aLookahead[j].lookahead - p->mnLookahead + i;
00555 p->aAction[k] = p->aLookahead[j];
00556 if( k>=p->nAction ) p->nAction = k+1;
00557 }
00558 p->nLookahead = 0;
00559
00560
00561
00562 return i - p->mnLookahead;
00563 }
00564
00565
00566
00567
00568
00569 void myassert(file,line)
00570 char *file;
00571 int line;
00572 {
00573 fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file);
00574 exit(1);
00575 }
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591 void FindRulePrecedences(xp)
00592 struct lemon *xp;
00593 {
00594 struct rule *rp;
00595 for(rp=xp->rule; rp; rp=rp->next){
00596 if( rp->precsym==0 ){
00597 int i;
00598 for(i=0; i<rp->nrhs; i++){
00599 if( rp->rhs[i]->prec>=0 ){
00600 rp->precsym = rp->rhs[i];
00601 break;
00602 }
00603 }
00604 }
00605 }
00606 return;
00607 }
00608
00609
00610
00611
00612
00613
00614 void FindFirstSets(lemp)
00615 struct lemon *lemp;
00616 {
00617 int i;
00618 struct rule *rp;
00619 int progress;
00620
00621 for(i=0; i<lemp->nsymbol; i++){
00622 lemp->symbols[i]->lambda = B_FALSE;
00623 }
00624 for(i=lemp->nterminal; i<lemp->nsymbol; i++){
00625 lemp->symbols[i]->firstset = SetNew();
00626 }
00627
00628
00629 do{
00630 progress = 0;
00631 for(rp=lemp->rule; rp; rp=rp->next){
00632 if( rp->lhs->lambda ) continue;
00633 for(i=0; i<rp->nrhs; i++){
00634 if( rp->rhs[i]->lambda==B_FALSE ) break;
00635 }
00636 if( i==rp->nrhs ){
00637 rp->lhs->lambda = B_TRUE;
00638 progress = 1;
00639 }
00640 }
00641 }while( progress );
00642
00643
00644 do{
00645 struct symbol *s1, *s2;
00646 progress = 0;
00647 for(rp=lemp->rule; rp; rp=rp->next){
00648 s1 = rp->lhs;
00649 for(i=0; i<rp->nrhs; i++){
00650 s2 = rp->rhs[i];
00651 if( s2->type==TERMINAL ){
00652 progress += SetAdd(s1->firstset,s2->index);
00653 break;
00654 }else if( s1==s2 ){
00655 if( s1->lambda==B_FALSE ) break;
00656 }else{
00657 progress += SetUnion(s1->firstset,s2->firstset);
00658 if( s2->lambda==B_FALSE ) break;
00659 }
00660 }
00661 }
00662 }while( progress );
00663 return;
00664 }
00665
00666
00667
00668
00669
00670 PRIVATE struct state *getstate();
00671 void FindStates(lemp)
00672 struct lemon *lemp;
00673 {
00674 struct symbol *sp;
00675 struct rule *rp;
00676
00677 Configlist_init();
00678
00679
00680 if( lemp->start ){
00681 sp = Symbol_find(lemp->start);
00682 if( sp==0 ){
00683 ErrorMsg(lemp->filename,0,
00684 "The specified start symbol \"%s\" is not "
00685 "in a nonterminal of the grammar. \"%s\" will be used as the start "
00686 "symbol instead.",lemp->start,lemp->rule->lhs->name);
00687 lemp->errorcnt++;
00688 sp = lemp->rule->lhs;
00689 }
00690 }else{
00691 sp = lemp->rule->lhs;
00692 }
00693
00694
00695
00696
00697 for(rp=lemp->rule; rp; rp=rp->next){
00698 int i;
00699 for(i=0; i<rp->nrhs; i++){
00700 if( rp->rhs[i]==sp ){
00701 ErrorMsg(lemp->filename,0,
00702 "The start symbol \"%s\" occurs on the "
00703 "right-hand side of a rule. This will result in a parser which "
00704 "does not work properly.",sp->name);
00705 lemp->errorcnt++;
00706 }
00707 }
00708 }
00709
00710
00711
00712
00713 for(rp=sp->rule; rp; rp=rp->nextlhs){
00714 struct config *newcfp;
00715 newcfp = Configlist_addbasis(rp,0);
00716 SetAdd(newcfp->fws,0);
00717 }
00718
00719
00720
00721
00722 (void)getstate(lemp);
00723 return;
00724 }
00725
00726
00727
00728
00729 PRIVATE void buildshifts();
00730 PRIVATE struct state *getstate(lemp)
00731 struct lemon *lemp;
00732 {
00733 struct config *cfp, *bp;
00734 struct state *stp;
00735
00736
00737
00738 Configlist_sortbasis();
00739 bp = Configlist_basis();
00740
00741
00742 stp = State_find(bp);
00743 if( stp ){
00744
00745
00746
00747 struct config *x, *y;
00748 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
00749 Plink_copy(&y->bplp,x->bplp);
00750 Plink_delete(x->fplp);
00751 x->fplp = x->bplp = 0;
00752 }
00753 cfp = Configlist_return();
00754 Configlist_eat(cfp);
00755 }else{
00756
00757 Configlist_closure(lemp);
00758 Configlist_sort();
00759 cfp = Configlist_return();
00760 stp = State_new();
00761 MemoryCheck(stp);
00762 stp->bp = bp;
00763 stp->cfp = cfp;
00764 stp->index = lemp->nstate++;
00765 stp->ap = 0;
00766 State_insert(stp,stp->bp);
00767 buildshifts(lemp,stp);
00768 }
00769 return stp;
00770 }
00771
00772
00773
00774
00775 PRIVATE void buildshifts(lemp,stp)
00776 struct lemon *lemp;
00777 struct state *stp;
00778 {
00779 struct config *cfp;
00780 struct config *bcfp;
00781 struct config *new;
00782 struct symbol *sp;
00783 struct symbol *bsp;
00784 struct state *newstp;
00785
00786
00787
00788 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
00789
00790
00791 for(cfp=stp->cfp; cfp; cfp=cfp->next){
00792 if( cfp->status==COMPLETE ) continue;
00793 if( cfp->dot>=cfp->rp->nrhs ) continue;
00794 Configlist_reset();
00795 sp = cfp->rp->rhs[cfp->dot];
00796
00797
00798
00799
00800 for(bcfp=cfp; bcfp; bcfp=bcfp->next){
00801 if( bcfp->status==COMPLETE ) continue;
00802 if( bcfp->dot>=bcfp->rp->nrhs ) continue;
00803 bsp = bcfp->rp->rhs[bcfp->dot];
00804 if( bsp!=sp ) continue;
00805 bcfp->status = COMPLETE;
00806 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
00807 Plink_add(&new->bplp,bcfp);
00808 }
00809
00810
00811
00812 newstp = getstate(lemp);
00813
00814
00815
00816 Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
00817 }
00818 }
00819
00820
00821
00822
00823 void FindLinks(lemp)
00824 struct lemon *lemp;
00825 {
00826 int i;
00827 struct config *cfp, *other;
00828 struct state *stp;
00829 struct plink *plp;
00830
00831