00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 #include "basix/mmx_syntax.hpp"
00017 #include "basix/parse_tools.hpp"
00018 namespace mmx {
00019 
00020 #define s     (obj->lex_string)
00021 #define n     (obj->lex_length)
00022 
00023 #define start (obj->lex_start)
00024 #define prev  (obj->lex_prev)
00025 #define pos   (obj->lex_pos)
00026 
00027 #define start_line (obj->lex_start_line)
00028 #define prev_line  (obj->lex_prev_line)
00029 #define line       (obj->lex_line)
00030 
00031 #define start_begin_line (obj->lex_start_begin_line)
00032 #define prev_begin_line  (obj->lex_prev_begin_line)
00033 #define begin_line       (obj->lex_begin_line)
00034 
00035 #define file  (obj->lex_file_name)
00036 #define input (obj->lex_input_number)
00037 
00038 #define INC_POS   { pos++; }
00039 #define ADD_POS(z){ pos += z; }
00040 #define INC_LINE  { pos++; line++; begin_line = pos; }
00041 #define SET_PREV  { prev = pos; prev_line = line; prev_begin_line = begin_line; }
00042 #define SAVE_START { start = pos; start_line = line; start_begin_line = begin_line; }
00043 #define RESTORE_START { pos = start; line = start_line; begin_line = start_begin_line; }
00044 
00045 #define produce(code) { \
00046   *lval = lit(string (s + start, pos - start));         \
00047   source_insert (*lval, source_location (*lval, file, input,    \
00048                 source_position(start, start_line, start - start_begin_line),\
00049                 source_position(pos, line, pos - begin_line))); \
00050   return code; }
00051 
00052 #define test(c,code) \
00053   if ((pos<n) && (s[pos]==c)) { INC_POS; produce (code); }
00054 
00055 #define keyword(c,k,code) \
00056   if ((s[pos]==c) && test_keyword(k,obj)) produce (code);
00057 
00058 #define is_alpha(c) \
00059   ((((c)>='0') && ((c)<='9')) || \
00060    (((c)>='A') && ((c)<='Z')) || \
00061    (((c)>='a') && ((c)<='z')) || \
00062    (((c)=='_') || ((c)=='?') || ((c)=='$')))
00063 
00064 static bool
00065 test_keyword (const char* k, parse_instance* obj) {
00066   RESTORE_START;
00067   while ((*k)!='\0') {
00068     if (pos>=n) return false;
00069     if (s[pos]!=(*k)) return false;
00070     INC_POS;
00071     k++;
00072   }
00073   if (pos>=n) return true;
00074   if (is_alpha (s[pos])) return false;
00075   return true;
00076 }
00077 
00078 int
00079 mmx_lex (generic *lval, parse_instance* obj) {
00080   if (pos == n) {
00081     s= NULL;
00082     *lval= generic ();
00083     return 0;
00084   }
00085 
00086   SET_PREV;
00087   char c= s[pos];
00088   while ((c<=' ') || (c>'~')) {
00089     if ((c == '\n') || (c == '\r')) 
00090       INC_LINE
00091     else
00092       INC_POS;
00093     if (pos == n) {
00094       s= NULL;
00095       *lval= generic ();
00096       return 0;
00097     }
00098     c= s[pos];
00099   }
00100 
00101   SAVE_START;
00102   INC_POS;
00103   switch (c) {
00104   case '!':
00105     test ('=', NOT_EQUAL);
00106     if ((pos<n) && (s[pos]=='<')) {
00107       INC_POS;
00108       test ('=', NOT_LEQ);
00109       produce (NOT_LESS);
00110     }
00111     if ((pos<n) && (s[pos]=='>')) {
00112       INC_POS;
00113       test ('=', NOT_GEQ);
00114       produce (NOT_GREATER);
00115     }
00116     produce (NOT);
00117   case '\042':
00118     while (pos<n) {
00119       if ((s[pos]=='\\') && ((pos+2)<n)) { ADD_POS(2); continue; }
00120       if (s[pos]=='\n' || (s[pos]=='\r')) { INC_LINE; continue; }
00121       if (s[pos]=='\042') { INC_POS; break; }
00122       INC_POS;
00123     }
00124     produce (STRING);
00125   case '#':
00126     produce (SIZE);
00127   case '$':
00128     goto identifier;
00129   case '%':
00130     produce (PERCENT);
00131   case '&':
00132     produce (AMPERSAND);
00133   case '\047':
00134     produce (QUOTE);
00135   case '(':
00136   case ')':
00137     produce (c);
00138   case '*':
00139     test ('=', TIMES_ASSIGN);
00140     produce (TIMES);
00141   case '+':
00142     test ('=', PLUS_ASSIGN);
00143     test ('+', INC);
00144     produce (PLUS);
00145   case ',':
00146     produce (c);
00147   case '-':
00148     test ('=', MINUS_ASSIGN);
00149     test ('-', DEC);
00150     test ('>', INTO);
00151     produce (MINUS);
00152   case '.':
00153     test ('.', RANGE);
00154     produce (ACCESS);
00155   case '/':
00156     test ('\\', AND);
00157     if (s[pos]=='/') {
00158       INC_POS;
00159       while ((pos<n) && (s[pos]!='\n')) {
00160         INC_POS;
00161       }
00162       if (pos<n) { INC_LINE; }
00163       return mmx_lex (lval, obj);
00164     }
00165     if (s[pos]=='{') {
00166       nat level= 1;
00167       INC_POS;
00168       while ((pos+1<n) && (level>0)) {
00169         if (s[pos]=='\n' || (s[pos]=='\r')) {
00170           INC_LINE; continue; }
00171         if ((s[pos]=='/') && (s[pos+1]=='{')) {
00172           level++; ADD_POS(2); continue; }
00173         if ((s[pos]=='}') && (s[pos+1]=='/')) {
00174           level--; ADD_POS(2); continue; }
00175         INC_POS;
00176       }
00177       if (level>0) pos=n;
00178       return mmx_lex (lval, obj);
00179     }
00180     if (s[pos] == '\"') {
00181       INC_POS;
00182       while ((pos+1<n) && ((s[pos]!='\"') || (s[pos+1]!='/'))) {
00183         if (s[pos]=='\n' || (s[pos]=='\r')) {
00184           INC_LINE; }
00185         else {
00186           INC_POS; }
00187       }
00188       if (pos+1 < n) {
00189         ADD_POS(2);
00190       }
00191       produce (STRING);
00192     }
00193     test ('=', OVER_ASSIGN);
00194     produce (OVER);
00195   case '0':
00196   case '1':
00197   case '2':
00198   case '3':
00199   case '4':
00200   case '5':
00201   case '6':
00202   case '7':
00203   case '8':
00204   case '9':
00205     goto identifier;
00206   case ':':
00207     if ((pos<n) && (s[pos]=='=')) {
00208       if ((pos+1<n) && (s[pos+1]=='>')) {
00209         ADD_POS(2); produce (ASSIGN_MACRO); }
00210       INC_POS; produce (ASSIGN);
00211     }
00212     test ('>', TRANSTYPE);
00213     if ((pos<n) && (s[pos]==':')) {
00214       if ((pos+1<n) && (s[pos+1]=='>')) {
00215         ADD_POS(2); produce (VARTRANSTYPE); }
00216       INC_POS; produce (VARTYPE);
00217     }
00218     if ((pos+1<n) && (s[pos]=='-') && (s[pos+1]=='>')) {
00219       ADD_POS(2); produce (MAPSTO);
00220     }
00221     produce (TYPE);
00222   case ';':
00223     produce (c);
00224   case '<':
00225     if ((pos<n) && (s[pos]=='<')) {
00226       if ((pos+1<n) && (s[pos+1]=='<')) {
00227         ADD_POS(2); produce (LEFT_FLUX_BIN); }
00228       if ((pos+1<n) && (s[pos+1]=='*')) {
00229         ADD_POS(2); produce (LEFT_FLUX_VAR); }
00230       if ((pos+1<n) && (s[pos+1]=='%')) {
00231         ADD_POS(2); produce (LEFT_FLUX_STR); }
00232       if ((pos+1<n) && (s[pos+1]=='=')) {
00233         ADD_POS(2); produce (LL_ASSIGN); }
00234       INC_POS; produce (LEFT_FLUX);
00235     }
00236     if ((pos+1<n) && (s[pos]=='=') && (s[pos+1]=='>')) {
00237       ADD_POS(2); produce (EQUIVALENT);
00238     }
00239     test ('=', LEQ);
00240     produce (LESS);
00241   case '=':
00242     if ((pos<n) && (s[pos]=='=')) {
00243       if ((pos+1<n) && (s[pos+1]=='>')) {
00244         ADD_POS(2); produce (DEFINE_MACRO); }
00245       INC_POS; produce (DEFINE);
00246     }
00247     test ('>', IMPLIES);
00248     produce (EQUAL);
00249   case '>':
00250     test ('<', APPEND);
00251     if ((pos<n) && (s[pos]=='>')) {
00252       if ((pos+1<n) && (s[pos+1]=='>')) {
00253         ADD_POS(2); produce (RIGHT_FLUX_BIN); }
00254       if ((pos+1<n) && (s[pos+1]=='=')) {
00255         ADD_POS(2); produce (GG_ASSIGN); }
00256       INC_POS; produce (RIGHT_FLUX);
00257     }
00258     test ('=', GEQ);
00259     produce (GREATER);
00260   case '?':
00261     goto identifier;
00262   case '@':
00263     test ('+', OPLUS);
00264     test ('-', OMINUS);
00265     test ('*', OTIMES);
00266     test ('/', OOVER);
00267     produce (COMPOSE);
00268   case 'A':
00269   case 'B':
00270   case 'C':
00271   case 'D':
00272   case 'E':
00273   case 'F':
00274   case 'G':
00275   case 'H':
00276   case 'I':
00277   case 'J':
00278   case 'K':
00279   case 'L':
00280   case 'M':
00281   case 'N':
00282   case 'O':
00283   case 'P':
00284   case 'Q':
00285   case 'R':
00286   case 'S':
00287   case 'T':
00288   case 'U':
00289   case 'V':
00290   case 'W':
00291   case 'X':
00292   case 'Y':
00293   case 'Z':
00294     goto identifier;
00295   case '[':
00296     produce (c);
00297   case '\\':
00298     test ('/', OR);
00299     produce (c);
00300   case ']':
00301     produce (c);
00302   case '^':
00303     test ('^', FILL);
00304     produce (POWER);
00305   case '_':
00306     goto identifier;
00307   case '`':
00308     produce (BACKQUOTE);
00309   case 'a':
00310     if (pos<n) {
00311       keyword ('b', "abstract", ABSTRACT);
00312       keyword ('n', "and", SEQAND);
00313       keyword ('s', "assume", ASSUME);
00314       keyword ('u', "autofold", AUTOFOLD);
00315     }
00316     goto identifier;
00317   case 'b':
00318     if (pos<n) keyword ('r', "break", BREAK);
00319     goto identifier;
00320   case 'c':
00321     if ((pos+3)<n) {
00322       if (s[pos]=='a') {
00323         INC_POS;
00324         keyword ('s', "case", CASE);
00325         if (s[pos] == 't') {
00326           INC_POS;
00327           keyword ('c', "catch", CATCH);
00328           keyword ('e', "category", CATEGORY);
00329         }
00330         goto identifier;
00331       }
00332       keyword ('l', "class", CLASS);
00333       if ((s[pos]=='o') && (s[pos+1]=='n')) {
00334         ADD_POS(2);
00335         keyword ('c', "concrete", CONCRETE);
00336         if (((pos+1)<n) && s[pos]=='s' && s[pos+1]=='t') {
00337           ADD_POS(2);
00338           keyword ('a', "constant", CONSTANT);
00339           keyword ('r', "constructor", CONSTRUCTOR);
00340           goto identifier;
00341         }
00342         keyword ('t', "continue", CONTINUE);
00343       }
00344     }
00345     goto identifier;
00346   case 'd':
00347     if (pos<n) {
00348       keyword ('e', "destructor", DESTRUCTOR);
00349       if (s[pos]=='i') {
00350         INC_POS;
00351         keyword ('r', "direct", DIRECT);
00352         if (((pos+1)<n) && s[pos]=='s') {
00353           INC_POS;
00354           keyword ('j', "disjunction", DISJUNCTION);
00355           keyword ('p', "dispatch", DISPATCH);
00356           goto identifier;
00357         }
00358         keyword ('v', "div", DIV);
00359         goto identifier;
00360       }
00361       if (s[pos]=='o') {
00362         INC_POS;
00363         if ((pos >= n) || (!is_alpha (s[pos]))) produce (DO);
00364         keyword ('w', "downto", DOWNTO);
00365       }
00366     }
00367     goto identifier;
00368   case 'e':
00369     if ((pos+1)<n) {
00370       keyword ('l', "else", ELSE);
00371       keyword ('v', "evolutive", EVOLUTIVE);
00372       if (s[pos]=='x') {
00373         INC_POS;
00374         keyword ('i', "exists", EXISTS);
00375         if (s[pos]=='p' && pos+1<n) {
00376           INC_POS;
00377           keyword ('l', "explicit", EXPLICIT);
00378           keyword ('o', "export", EXPORT);
00379         }
00380         else if (((pos+2)<n) && (s[pos]=='t') && (s[pos+1]=='e')) {
00381           ADD_POS(2);
00382           keyword ('n', "extend", EXTEND);
00383           keyword ('r', "extern", EXTERN);
00384         }
00385       }
00386     }
00387     goto identifier;
00388   case 'f':
00389     if (pos<n) {
00390       if ((pos+1<n) && (s[pos]=='o') && (s[pos+1]=='r')) {
00391         if ((pos+2<n) && (s[pos+2]=='a')) {
00392           keyword ('o', "forall", FORALL); }
00393         else if ((pos+2<n) && (s[pos+2]=='e')) {
00394           keyword ('o', "foreign", FOREIGN); }
00395         else {
00396           keyword ('o', "for", FOR); }
00397       }
00398       keyword ('r', "from", FROM);
00399     }
00400     goto identifier;
00401   case 'g':
00402     if (pos<n) keyword ('e', "generate", GENERATE);
00403     goto identifier;
00404   case 'h':
00405     if (pos<n) {
00406       keyword ('a', "has", HAS);
00407       keyword ('i', "hidden", HIDDEN);
00408       keyword ('o', "holds", HOLDS);
00409     }
00410     goto identifier;
00411   case 'i':
00412     if (pos<n) {
00413       keyword ('f', "if", IF);
00414       if (s[pos]=='m' && pos+2<n && s[pos+1]=='p') {
00415         ADD_POS(2);
00416         keyword ('o', "import", IMPORT);
00417         keyword ('l', "implicit", IMPLICIT);
00418       }
00419       else if (s[pos]=='n') {
00420         INC_POS;
00421         if ((pos >= n) || (!is_alpha (s[pos]))) produce (IN);
00422         keyword ('d', "indirect", INDIRECT);
00423         keyword ('f', "infix", INFIX);
00424         keyword ('h', "inherit", INHERIT);
00425         keyword ('l', "inline", INLINE);
00426         keyword ('p', "inplace", INPLACE);
00427         if ((pos+3<n) && (s[pos]=='t') && (s[pos+1]=='e') && (s[pos+2]=='r')) {
00428           ADD_POS(3);
00429           keyword ('a', "interactive", INTERACTIVE);
00430           keyword ('n', "intern", INTERN);
00431         }
00432       }
00433     }
00434     goto identifier;
00435   case 'j':
00436     if (pos<n) keyword ('o', "join", JOIN);
00437     goto identifier;
00438   case 'k':
00439     if (pos<n) keyword ('e', "keyword", KEYWORD);
00440     goto identifier;
00441   case 'l':
00442     if (pos<n) {
00443       keyword ('a', "lambda", LAMBDA);
00444       keyword ('i', "literal", LITERAL);
00445       if (((pos+1)<n) && (s[pos]=='o')) {
00446         INC_POS;
00447         keyword ('c', "locked", LOCKED);
00448         keyword ('o', "loop", LOOP);
00449       }
00450     }
00451     goto identifier;
00452   case 'm':
00453     if (pos<n) {
00454       if (((pos+1)<n) && (s[pos]=='a')) {
00455         INC_POS;
00456         keyword ('c', "macro", MACRO);
00457         keyword ('t', "match", MATCH);
00458         goto identifier;
00459       }
00460       keyword ('e', "method", METHOD);
00461       if (((pos+1)<n) && (s[pos]=='o') && (s[pos+1]=='d')) {
00462         if ((pos+2<n) && (s[pos+2]=='u')) {
00463           keyword ('o', "module", MODULE); }
00464         else {
00465           keyword ('o', "mod", MOD); }
00466       }
00467       keyword ('u', "mutable", MUTABLE);
00468     }
00469     goto identifier;
00470   case 'n':
00471     goto identifier;
00472   case 'o':
00473     if (pos<n) {
00474       keyword ('p', "operator", OPERATOR);
00475       keyword ('r', "or", SEQOR);
00476       keyword ('u', "outline", OUTLINE);
00477     }
00478     goto identifier;
00479   case 'p':
00480     if ((pos+1)<n) {
00481       if (s[pos]=='a') {
00482         INC_POS;
00483         keyword ('c', "packed", PACKED);
00484         keyword ('t', "pattern", PATTERN);
00485         goto identifier;
00486       }
00487       keyword ('e', "penalty", PENALTY);
00488       keyword ('o', "postfix", POSTFIX);
00489       if (s[pos]=='r') {
00490         INC_POS;
00491         if ((pos+3<n) && s[pos] == 'e' && s[pos+1] == 'f') {
00492           ADD_POS(2);
00493           keyword ('e', "prefer", PREFER);
00494           keyword ('i', "prefix", PREFIX);
00495           goto identifier;
00496         }
00497         keyword ('i', "private", PRIVATE);
00498         keyword ('o', "protected", PROTECTED);
00499         goto identifier;
00500       }
00501       keyword ('u', "public", PUBLIC);
00502     }
00503     goto identifier;
00504   case 'q':
00505     if (pos<n) keyword ('u', "quo", QUO);
00506     goto identifier;
00507   case 'r':
00508     if ((pos+1)<n) {
00509       keyword ('a', "raise", RAISE);
00510       if (s[pos]=='e') {
00511         INC_POS;
00512         keyword ('m', "rem", REM);
00513         keyword ('t', "return", RETURN);
00514       }
00515     }
00516     goto identifier;
00517   case 's':
00518     if (pos<n) {
00519       keyword ('e', "sequel", SEQUEL);
00520       keyword ('p', "split", SPLIT);
00521       if ((pos+1<n) && (s[pos]=='t')) {
00522         INC_POS;
00523         keyword ('e', "step", STEP);
00524         keyword ('r', "structure", STRUCTURE);
00525         goto identifier;
00526       }
00527     }
00528     goto identifier;
00529   case 't':
00530     if ((pos+1)<n) {
00531       if (s[pos]=='h') {
00532         INC_POS;
00533         keyword ('e', "then", THEN);
00534         keyword ('i', "this", THIS);
00535         goto identifier;
00536       }
00537       keyword ('r', "try", TRY);
00538       keyword ('o', "to", TO);
00539     }
00540     goto identifier;
00541   case 'u':
00542     if (((pos+1)<n) && (s[pos]=='n')) {
00543       INC_POS;
00544       keyword ('p', "unpacked", UNPACKED);
00545       keyword ('t', "until", UNTIL);
00546     }
00547     goto identifier;
00548   case 'v':
00549     if (pos<n) {
00550       keyword ('a', "value", VALUE);
00551       keyword ('i', "virtual", VIRTUAL);
00552     }
00553     goto identifier;
00554   case 'w':
00555     if (pos<n) {
00556       keyword ('h', "while", WHILE);
00557       keyword ('i', "with", WITH);
00558     }
00559     goto identifier;
00560   case 'x':
00561     if (pos<n) keyword ('o', "xor", XOR);
00562     goto identifier;
00563   case 'y':
00564     if (pos<n) keyword ('i', "yield", YIELD);
00565     goto identifier;
00566   case 'z':
00567     goto identifier;
00568   case '{':
00569     produce (c);
00570   case '|':
00571     test ('|', VWHERE);
00572     produce (WHERE);
00573   case '}':
00574     produce (c);
00575   case '~':
00576     test ('>', CONVERTS);
00577     produce (TILDA);
00578   default:
00579     return mmx_lex (lval, obj);
00580 
00581   identifier:
00582     while ((pos<n) &&
00583            (is_alpha (s[pos]) ||
00584             (s[pos] == '.' && pos>0 && (pos+1)<n &&
00585              s[pos-1] >= '0' && s[pos-1] <= '9' &&
00586              s[pos+1] >= '0' && s[pos+1] <= '9')))
00587       INC_POS;
00588     produce (IDENTIFIER);
00589   }
00590 }
00591 
00592 }