词法分析
2017-01-16 19:07
190 查看
#include <stdio.h> #include <string.h> const int INT=0; const int BOOL = 1; const int VOID = 2; const int TRUE = 3; const int FALSE = 4; const int IF = 5; const int ELSE = 6; const int FOR = 7; const int WHILE = 8; const int CONTINUE = 9; const int BREAK = 10; const int INC = 11; const int DEC = 12; const int ADD = 13; const int SUBTRACT = 14; const int MULTI = 15; const int DIVIDE = 16; const int GT = 17; const int GE = 18; const int LT = 19; const int LE = 20; const int EQ = 21; const int NE = 22; const int AND = 23; const int OR = 24; const int NOT = 25; const int ASSIGN = 26; const int ASSIGN_ADD = 27; const int ASSIGN_SUBTRACT = 28; const int ASSIGN_MULTI = 29; const int ASSIGN_DIVIDE = 30; const int PARENTHESIS_START = 31; const int PARENTHESIS_END = 32; const int BRACE_START = 33; const int BRACE_END = 34; const int COMMA = 35; const int SEMICOLON = 36; const int ID = 37; const int CONST = 38; const int POSITIVE = 39; const int NEGATIVE = 40; const int NONE = 41; char* keywords[]={"int", "bool", "void", "true", "false", "if", "else", "for", "while", "continue", "break", "++", "--", "+", "-", "*", "/", ">", ">=", "<", "<=", "==", "!=", "&&", "||", "!", "=", "+=", "-=", "*=", "/=", "(", ")", "{", "}", ",", ";"}; class Token{ public: int type, value; char* p_name; Token(int NONE, int value=-1, char* p_name = NULL); void print(); ~Token(); }; class BufferedReader{ public: const static int BLOCKN = 1024; BufferedReader(); Token readToken(); private: char buffer[BLOCKN+1]; int buffer_off; char buffer_c; char readChar(); void pushChar(char c){buffer_c = c;} bool loadBuffer(); Token readOperator(); Token readInt(); Token readIDOrKey(); }; Token::Token(int type, int value, char* p_name) { this->type = type; this->value = value; this->p_name = p_name; } Token::~Token() { if(p_name != NULL) delete[] p_name; } void Token::print() { for(int i = 0; i <= 10; i++) { if(type == i) { printf("<keyword, %s>\n", keywords[i]); return; } } for(int i = 11; i <= 36; i++) { if(type == i) { printf("<operator, %s>\n", keywords[i]); return; } } if(type == ID) { printf("<ID, %s>\n", p_name); } else if(type == CONST) { printf("<const, %d>\n", value); } } bool BufferedReader::loadBuffer() { if(fgets(buffer, BufferedReader::BLOCKN, stdin) == NULL)return false; buffer_off = 0; return true; } BufferedReader::BufferedReader() { buffer[buffer_off=0] = 0; buffer_c = 0; } char BufferedReader::readChar() { if(buffer_c != 0) { char ch = buffer_c; buffer_c = 0; return ch; } if(buffer[buffer_off] == 0 && !loadBuffer())return EOF; return buffer[buffer_off++]; } Token BufferedReader::readOperator() { char ch1 = readChar(); char ch2; switch(ch1) { case '+': ch2 = readChar(); if(ch2 == '+')return Token(INC); else if(ch2 == '=')return Token(ASSIGN_ADD); else { putchar(ch2); return Token(ADD); } case '-': ch2 = readChar(); if(ch2 == '-')return Token(DEC); else if(ch2 == '=')return Token(ASSIGN_SUBTRACT); else { putchar(ch2); return Token(SUBTRACT); } case '*': ch2 = readChar(); if(ch2 == '=')return Token(ASSIGN_MULTI); else { putchar(ch2); return Token(MULTI); } case '/': ch2 = readChar(); if(ch2 == '=')return Token(ASSIGN_DIVIDE); else { putchar(ch2); return Token(DIVIDE); } case '>': ch2 = readChar(); if(ch2 == '=')return Token(GE); else { putchar(ch2); return Token(GT); } case '<': ch2 = readChar(); if(ch2 == '=')return Token(LE); else { putchar(ch2); return Token(LT); } case '!': ch2 = readChar(); if(ch2 == '=')return Token(NE); else { putchar(ch2); return Token(NOT); } case '=': ch2 = readChar(); if(ch2 == '=')return Token(EQ); else { putchar(ch2); return Token(ASSIGN); } case '&': ch2 = readChar(); if(ch2 == '&')return Token(AND); else { putchar(ch2); return Token(NONE); } case '|': ch2 = readChar(); if(ch2 == '|')return Token(OR); else { putchar(ch2); return Token(NONE); } case '(': return Token(PARENTHESIS_START); case ')': return Token(PARENTHESIS_END); case '{': return Token(BRACE_START); case '}': return Token(BRACE_END); case ',': return Token(COMMA); case ';': return Token(SEMICOLON); } return Token(NONE); } Token BufferedReader::readToken() { char ch = readChar(); if(ch == '+' || ch == '-' || ch == '*' || ch == '/' ||\ ch == '>' || ch == '<' || ch == '=' || ch == '!' ||\ ch == '&' || ch == '|'||\ ch == '(' || ch == ')' || ch == '{' || ch == '}' ||\ ch == ',' || ch == ';') { pushChar(ch); return this->readOperator(); } else if(ch >= '0' && ch <= '9') { pushChar(ch); return this->readInt(); } else if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_') { pushChar(ch); return this->readIDOrKey(); } else if(ch == EOF) { return Token(EOF); } else { return Token(NONE); } } Token BufferedReader::readIDOrKey() { char* p_name = new char[256]; int len = 0; char ch; while(true) { ch = readChar(); if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' || ch == '_') { p_name[len++] = ch; } else { pushChar(ch); p_name[len++] = 0; break; } } for(int i = 0; i <= 10; i++) { if(strcmp(p_name, keywords[i]) == 0) { delete[] p_name; return Token(i); } } return Token(ID, -1, p_name); } Token BufferedReader::readInt() { int res = 0; char ch; while(true) { ch = readChar(); if(ch >= '0' && ch <= '9') { res = res * 10 + ch - '0'; } else { pushChar(ch); return Token(CONST, res); } } } int main() { //freopen("D:/Documents/Visual Studio 2010/Projects/proj2/Debug/in.txt", "r", stdin); BufferedReader reader = BufferedReader(); while(true) { Token t = reader.readToken(); if(t.type == EOF)break; t.print(); } return 0; }