用状态机提取词语(lexer)

最后更新于:2022-04-02 04:07:49

[TOC] ## 概述 提取关键字和变量名 提取字符串 提取操作符 提取数字 ## 方式 ### 提取关键词或变量 **状态机描述:** ![](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/91/e6/91e67d23073c1dfce9cce73e43f56568_800x195.png)
token.java ``` public static Token makeVarOrKeyWord(PeekIterator it) { String s = ""; // 获取一个字符串 while (it.hasNext()) { Character lookahead = it.peek(); if (AlphabetHelper.isLetter(lookahead)) { s += lookahead; } else { break; } it.next(); } // 判断是否是关键词 if (KeyWords.isKeyWord(s)) { return new Token(TokenType.KEYWORD, s); } if (s.equals("true") || s.equals("false")) { return new Token(TokenType.BOOLEAN, s); } return new Token(TokenType.VARIABLE, s); } ```

### 提取字符串 **状态机描述:** ![](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/57/5f/575f430b105d529527df7c7ea5543047_400x187.png) - 如果两边都为单引号或双引号,则为字符串
token.java ``` public static Token makeString(PeekIterator it) throws LexicalException { StringBuilder s= new StringBuilder(); int state = 0; while(it.hasNext()){ char c = it.next(); switch(state){ case 0: if (c=='\''){ state=1; }else if (c=='\"'){ state=2; } s.append(c); break; case 1: if (c=='\''){ return new Token(ToKenType.STRING,s.toString()+c); }else{ s.append(c); } break; case 2: if (c=='\"'){ return new Token(ToKenType.STRING,s.toString()+c); }else{ s.append(c); } break; } } // end while // 不可能到这里,但是为了 java的规范,添加一句 throw new LexicalException("Unexpected error"); ```

### 提取操作符 **状态机描述:**
点击打开状态机描述 ![](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/56/02/5602757aea59d814fae73b46fc2d1544_1078x5260.png)

token.java ``` public static Token makeOp(PeekIterator it) throws LexicalException { int state = 0; while (it.hasNext()) { char lookahead = it.next(); switch (state) { case 0: switch (lookahead) { case '+': state = 1; break; case '-': state = 2; break; case '*': state = 3; break; case '/': state = 4; break; case '>': state = 5; break; case '<': state = 6; break; case '=': state = 7; break; case '!': state = 8; break; case '&': state = 9; break; case '|': state = 10; break; case '^': state = 11; break; case '%': state = 12; break; case ',': return new Token(TokenType.OPERATOR, ","); case ';': return new Token(TokenType.OPERATOR, ";"); }// while end break; case 1: if (lookahead=='+'){ return new Token(TokenType.OPERATOR,"++"); }else if (lookahead == '='){ return new Token(TokenType.OPERATOR,"+="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"+"); } case 2: if (lookahead=='-'){ return new Token(TokenType.OPERATOR,"--"); }else if (lookahead == '='){ return new Token(TokenType.OPERATOR,"-="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"+"); } case 3: if (lookahead == '='){ return new Token(TokenType.OPERATOR,"*="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"*"); } case 4: if (lookahead == '='){ return new Token(TokenType.OPERATOR,"/="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"/"); } case 5: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,">="); }else if (lookahead == '>'){ return new Token(TokenType.OPERATOR,">>"); }else{ it.putBack(); return new Token(TokenType.OPERATOR,">"); } case 6: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,"<="); }else if (lookahead == '<'){ return new Token(TokenType.OPERATOR,"<<"); }else{ it.putBack(); return new Token(TokenType.OPERATOR,">"); } case 7: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,"=="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"="); } case 8: if ( lookahead == '='){ return new Token(TokenType.OPERATOR,"!="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"!"); } case 9: if ( lookahead == '&'){ return new Token(TokenType.OPERATOR,"&&"); }else if (lookahead=='='){ return new Token(TokenType.OPERATOR,"&="); }else{ it.putBack(); return new Token(TokenType.OPERATOR,"&"); } case 10: if(lookahead == '|') { return new Token(TokenType.OPERATOR, "||"); } else if (lookahead == '=') { return new Token(TokenType.OPERATOR, "|="); } else { it.putBack(); return new Token(TokenType.OPERATOR, "|"); } case 11: if(lookahead == '^') { return new Token(TokenType.OPERATOR, "^^"); } else if (lookahead == '=') { return new Token(TokenType.OPERATOR, "^="); } else { it.putBack(); return new Token(TokenType.OPERATOR, "^"); } case 12: if (lookahead == '=') { return new Token(TokenType.OPERATOR, "%="); } else { it.putBack(); return new Token(TokenType.OPERATOR, "%"); } } } throw new LexicalException("Unexpected error"); } ```

### 提取数字 **状态机描述:** ![](https://docs.gechiui.com/gc-content/uploads/sites/kancloud/c4/b0/c4b01b6bd40ed6b2b59f6cab96007569_800x268.png) 简单说明: - 查看状态 "0"->"1" 这条状态,发现若用户一致输入"0",并接下来输入一个非"1-9",非".",则说明这个整数为0
token.java ``` public static Token makeNumber(PeekIterator it )throws LexicalException{ StringBuilder s= new StringBuilder(); int state =0; while(it.hasNext()){ char lookahead = it.next(); switch (state){ case 0: if (lookahead=='0'){ state=1; }else if(AlphabetHelper.isNumber(lookahead)){ state=2; }else if(lookahead=='+' || lookahead=='-' ){ state=3; }else if(lookahead=='.' ){ state=5; } break; case 1: if (lookahead =='0'){ state=1; }else if (AlphabetHelper.isNumber(lookahead)){ state=2; }else if(lookahead=='.'){ state=4; }else{ return new Token(TokenType.INTEGER, s.toString()); } break; case 2: if (AlphabetHelper.isNumber(lookahead)){ state=2; }else if (lookahead=='.'){ state=4; }else{ return new Token(TokenType.INTEGER, s.toString()); } break; case 3: if (AlphabetHelper.isNumber(lookahead)){ state=2; }else if(lookahead=='.'){ state=5; }else { throw new LexicalException(lookahead); } break; case 4: if (lookahead=='.'){ throw new LexicalException(lookahead); }else if (AlphabetHelper.isNumber(lookahead)){ state=20; }else{ return new Token(TokenType.FLOAT, s.toString()); } break; case 5: if (AlphabetHelper.isNumber(lookahead)){ state=20; }else{ throw new LexicalException(lookahead); } break; case 20: if (AlphabetHelper.isNumber(lookahead)){ state=20; }else if (lookahead=='.'){ throw new LexicalException(lookahead); }else { return new Token(TokenType.FLOAT, s.toString()); } } // end switch s.append(lookahead); }// end while throw new LexicalException("Unexpected err "); } ```

';