您的位置:首页 > 其它

词法分析

2017-01-16 19:07 190 查看
#include <stdio.h>
#include <string.h>

const int INT=0;
const int BOOL = 1;
const int VOID = 2;
const int TRUE = 3;
const int FALSE = 4;
const int IF = 5;
const int ELSE = 6;
const int FOR = 7;
const int WHILE = 8;
const int CONTINUE = 9;
const int BREAK = 10;

const int INC = 11;
const int DEC = 12;

const int ADD = 13;
const int SUBTRACT = 14;
const int MULTI = 15;
const int DIVIDE = 16;

const int GT = 17;
const int GE = 18;
const int LT = 19;
const int LE = 20;
const int EQ = 21;
const int NE = 22;

const int AND = 23;
const int OR = 24;
const int NOT = 25;

const int ASSIGN = 26;
const int ASSIGN_ADD = 27;
const int ASSIGN_SUBTRACT = 28;
const int ASSIGN_MULTI = 29;
const int ASSIGN_DIVIDE = 30;

const int PARENTHESIS_START = 31;
const int PARENTHESIS_END = 32;
const int BRACE_START = 33;
const int BRACE_END = 34;
const int COMMA = 35;
const int SEMICOLON = 36;

const int ID = 37;
const int CONST = 38;

const int POSITIVE = 39;
const int NEGATIVE = 40;

const int NONE = 41;

char* keywords[]={"int", "bool", "void", "true", "false", "if", "else", "for", "while", "continue", "break",
"++", "--", "+", "-", "*", "/", ">", ">=", "<", "<=", "==", "!=", "&&", "||", "!",
"=", "+=", "-=", "*=", "/=", "(", ")", "{", "}", ",", ";"};

class Token{
public:
int type, value;
char* p_name;
Token(int NONE, int value=-1, char* p_name = NULL);
void print();
~Token();
};

class BufferedReader{
public:
const static int BLOCKN = 1024;

BufferedReader();
Token readToken();

private:
char buffer[BLOCKN+1];
int buffer_off;
char buffer_c;

char readChar();
void pushChar(char c){buffer_c = c;}
bool loadBuffer();
Token readOperator();
Token readInt();
Token readIDOrKey();
};

Token::Token(int type, int value, char* p_name)
{
this->type = type;
this->value = value;
this->p_name = p_name;
}

Token::~Token()
{
if(p_name != NULL) delete[] p_name;
}

void Token::print()
{
for(int i = 0; i <= 10; i++)
{
if(type == i)
{
printf("<keyword, %s>\n", keywords[i]);
return;
}
}

for(int i = 11; i <= 36; i++)
{
if(type == i)
{
printf("<operator, %s>\n", keywords[i]);
return;
}
}

if(type == ID)
{
printf("<ID, %s>\n", p_name);
}
else if(type == CONST)
{
printf("<const, %d>\n", value);
}

}

bool BufferedReader::loadBuffer()
{
if(fgets(buffer, BufferedReader::BLOCKN, stdin) == NULL)return false;
buffer_off = 0;
return true;
}

BufferedReader::BufferedReader()
{
buffer[buffer_off=0] = 0;
buffer_c = 0;
}

char BufferedReader::readChar()
{
if(buffer_c != 0)
{
char ch = buffer_c;
buffer_c = 0;
return ch;
}
if(buffer[buffer_off] == 0 && !loadBuffer())return EOF;
return buffer[buffer_off++];
}

Token BufferedReader::readOperator()
{
char ch1 = readChar();
char ch2;
switch(ch1)
{
case '+':
ch2 = readChar();
if(ch2 == '+')return Token(INC);
else if(ch2 == '=')return Token(ASSIGN_ADD);
else
{
putchar(ch2);
return Token(ADD);
}
case '-':
ch2 = readChar();
if(ch2 == '-')return Token(DEC);
else if(ch2 == '=')return Token(ASSIGN_SUBTRACT);
else
{
putchar(ch2);
return Token(SUBTRACT);
}
case '*':
ch2 = readChar();
if(ch2 == '=')return Token(ASSIGN_MULTI);
else
{
putchar(ch2);
return Token(MULTI);
}
case '/':
ch2 = readChar();
if(ch2 == '=')return Token(ASSIGN_DIVIDE);
else
{
putchar(ch2);
return Token(DIVIDE);
}
case '>':
ch2 = readChar();
if(ch2 == '=')return Token(GE);
else
{
putchar(ch2);
return Token(GT);
}
case '<':
ch2 = readChar();
if(ch2 == '=')return Token(LE);
else
{
putchar(ch2);
return Token(LT);
}
case '!':
ch2 = readChar();
if(ch2 == '=')return Token(NE);
else
{
putchar(ch2);
return Token(NOT);
}
case '=':
ch2 = readChar();
if(ch2 == '=')return Token(EQ);
else
{
putchar(ch2);
return Token(ASSIGN);
}
case '&':
ch2 = readChar();
if(ch2 == '&')return Token(AND);
else
{
putchar(ch2);
return Token(NONE);
}
case '|':
ch2 = readChar();
if(ch2 == '|')return Token(OR);
else
{
putchar(ch2);
return Token(NONE);
}
case '(':
return Token(PARENTHESIS_START);
case ')':
return Token(PARENTHESIS_END);
case '{':
return Token(BRACE_START);
case '}':
return Token(BRACE_END);
case ',':
return Token(COMMA);
case ';':
return Token(SEMICOLON);
}
return Token(NONE);
}

Token BufferedReader::readToken()
{
char ch = readChar();

if(ch == '+' || ch == '-' || ch == '*' || ch == '/' ||\
ch == '>' || ch == '<' || ch == '=' || ch == '!' ||\
ch == '&' || ch == '|'||\
ch == '(' || ch == ')' || ch == '{' || ch == '}' ||\
ch == ',' || ch == ';')
{
pushChar(ch);
return this->readOperator();
}
else if(ch >= '0' && ch <= '9')
{
pushChar(ch);
return this->readInt();
}
else if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_')
{
pushChar(ch);
return this->readIDOrKey();
}
else if(ch == EOF)
{
return Token(EOF);
}
else
{
return Token(NONE);
}
}

Token BufferedReader::readIDOrKey()
{
char* p_name = new char[256];
int len = 0;
char ch;

while(true)
{
ch = readChar();
if(ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' || ch == '_')
{
p_name[len++] = ch;
}
else
{
pushChar(ch);
p_name[len++] = 0;
break;
}
}

for(int i = 0; i <= 10; i++)
{
if(strcmp(p_name, keywords[i]) == 0)
{
delete[] p_name;
return Token(i);
}
}
return Token(ID, -1, p_name);
}

Token BufferedReader::readInt()
{
int res = 0;
char ch;
while(true)
{
ch = readChar();
if(ch >= '0' && ch <= '9')
{
res = res * 10 + ch - '0';
}
else
{
pushChar(ch);
return Token(CONST, res);
}

}
}

int main()
{
//freopen("D:/Documents/Visual Studio 2010/Projects/proj2/Debug/in.txt", "r", stdin);
BufferedReader reader = BufferedReader();
while(true)
{
Token t = reader.readToken();
if(t.type == EOF)break;
t.print();
}
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: