您的位置:首页 > 其它

词法分析 与 LL(1)语法分析

2011-06-19 13:55 399 查看
词法分析

lex.h
#pragma once
#ifndef LEX_H
#define LEX_H
#include <fstream>
#include <string>
#include <cassert>
typedef long PreciseInteger;//暂时使用long,假设PreciseInteger能存储任何整数类型吧,有机会再完善
typedef double PreciseFloat;//暂时使用doulbe,假设PreciseFloat能存储任何类型的浮点数,有机会再完善
//枚举关键字 关键字对应的代码就是枚举值
enum Keyword
{
//无效关键字
KEYWORD_INVALID,
//数据类型关键字
KEYWORD_CHAR,
KEYWORD_SHORT,
KEYWORD_INT,
KEYWORD_LONG,
KEYWORD_FLOAT,
KEYWORD_DOULBE,
KEYWORD_SIGNED,
KEYWORD_UNSIGNED,
KEYWORD_ENUM,
KEYWORD_UNION,
KEYWORD_STRUCT,
KEYWORD_VOID,
//控制语句关键字
//循环
KEYWORD_FOR,
KEYWORD_DO,
KEYWORD_WHILE,
KEYWORD_BREAK,
KEYWORD_CONTINUE,
//条件
KEYWORD_IF,
KEYWORD_ELSE,
KEYWORD_GOTO,
//开关
KEYWORD_SWITCH,
KEYWORD_CASE,
KEYWORD_DEFAULT,
//返回
KEYWORD_RETURN,
//存储类型
KEYWORD_AUTO,
KEYWORD_EXTERN,
KEYWORD_REGISTER,
KEYWORD_STATIC,
//其它
KEYWORD_CONST,
KEYWORD_SIZEOF,
KEYWORD_TYPEDEF,
KEYWORD_VOLATILE
};
const int KeywordNumber = KEYWORD_VOLATILE + 1;
//操作符 枚举的值就是操作符的值
enum Operator
{
OPERATOR_INVALID,
OPERATOR_OROR,	// ||
OPERATOR_ANDAND,	// &&
OPERATOR_EQEQ,	// ==
OPERATOR_NOTEQ,	// !=
OPERATOR_LT,		// <
OPERATOR_LE,		// <=
OPERATOR_GT,		// >
OPERATOR_GE,		// >=
OPERATOR_PLUS,	// +
OPERATOR_MINUS,	// -
OPERATOR_OR,		// |
OPERATOR_XOR,		// ^
OPERATOR_MULT,	// *
OPERATOR_DIV,		// /
OPERATOR_MOD,		// %
OPERATOR_LSHIFT,	// <<
OPERATOR_RSHIFT,	// >>
OPERATOR_AND,		// &
OPERATOR_NOT,		// !
OPERATOR_EQ,		// =
OPERATOR_PLUSEQ,	// +=
OPERATOR_MINUSEQ,	// -=
OPERATOR_OREQ,	// |=
OPERATOR_XOREQ,	// ^=
OPERATOR_MULTEQ,	// *=
OPERATOR_DIVEQ,	// /=
OPERATOR_MODEQ,	// %=
OPERATOR_LSHIFTEQ,	// <<=
OPERATOR_RSHIFTEQ,	// >>=
OPERATOR_ANDEQ,	// &=
OPERATOR_PLUSPLUS,	// ++
OPERATOR_MINUSMINUS,	// --
// ? : 比较特殊是3元运算符
OPERATOR_CONDITON_BEGIN, //?
OPERATOR_CONDITON_END,  //:
OPERATOR_SEMICOLON,	// ;
OPERATOR_DOT,		// .
OPERATOR_RPOINTOR,	// ->
OPERATOR_COMMA,	// ,
OPERATOR_LPAREN,	// (
OPERATOR_RPAREN,	// )
OPERATOR_LCURLY,	// {
OPERATOR_RCURLY,	// }
OPERATOR_LSQUARE,	// [
OPERATOR_RSQUARE	// ]
};
const int OperatorNumber = OPERATOR_RSQUARE + 1;
//符号
class Token
{
public:
//符号分类
enum Classification
{
TOKEN_INVALID,//无效
TOKEN_EOF,//文件结束
TOKEN_KEYWORD,//关键字
TOKEN_IDENTIFIER,//标识符
TOKEN_OPERATOR,//操作符
TOKEN_STRING,//字符串
TOKEN_INTEGER,//整数
TOKEN_FLOAT,//浮点数
TOKEN_CHAR,//字符
};
public:
Token(const Token& t);
Token& operator=(const Token& t);
bool operator==(const Token& t);
//产生各种Token
static Token make_invalid_token(int location)
{
return Token(TOKEN_INVALID, location);
}
static Token make_eof_token(int location)
{
return Token(TOKEN_EOF, location);
}
static Token make_keyword_token(Keyword keyword, int location)
{
Token token(TOKEN_KEYWORD, location);
token.value_.keyword_value = keyword;
return token;
}
static Token make_identifier_token(std::string &value, int location)
{
Token token(TOKEN_IDENTIFIER, location);
token.value_.identifier_value = new std::string(value);
return token;
}
static Token make_operator_token(Operator op, int location)
{
Token token(TOKEN_OPERATOR, location);
token.value_.operator_value = op;
return token;
}
static Token make_string_token(std::string &value, int location)
{
Token token(TOKEN_STRING, location);
token.value_.string_value = new std::string(value);
return token;
}
static Token make_integer_token(PreciseInteger value, int location)
{
Token token(TOKEN_INTEGER, location);
token.value_.integer_value = value;
return token;
}
static Token make_float_token(PreciseFloat vaule, int location)
{
Token token(TOKEN_FLOAT, location);
token.value_.float_value = vaule;
return token;
}
static Token make_char_token(char value, int location)
{
Token token(TOKEN_CHAR, location);
token.value_.char_value = value;
return token;
}
//返回Token的各种属性
Classification get_classification() const
{
return this->classification_;
}
int get_location() const
{
return this->source_location_;
}
Keyword get_keyword_value() const
{
assert(this->classification_ == TOKEN_KEYWORD);
return this->value_.keyword_value;
}
std::string get_identifier_value() const
{
assert(this->classification_ == TOKEN_IDENTIFIER);
return *(this->value_.identifier_value);
}
Operator get_operator_value() const
{
assert(this->classification_ == TOKEN_OPERATOR);
return this->value_.operator_value;
}
std::string get_string_value() const
{
assert(this->classification_ == TOKEN_STRING);
return *(this->value_.string_value);
}
PreciseInteger get_integer_value() const
{
assert(this->classification_ == TOKEN_INTEGER);
return this->value_.integer_value;
}
PreciseFloat get_float_value() const
{
assert(this->classification_ == TOKEN_FLOAT);
return this->value_.float_value;
}
char get_char_value() const
{
assert(this->classification_ == TOKEN_CHAR);
return this->value_.char_value;
}
bool is_eof() const
{
if (this->classification_ == TOKEN_EOF)
{
return true;
}
return false;
}
~Token();
private:
//Token是由make_***_token静态函数产生的,这个构造函数不能给用户使用
//保证Token必然是Classification的某种类型
Token(Classification classification, int location)
:classification_(classification), source_location_(location)
{
}
//释放资源
void clear();
//复制Token的内容
void copy(const Token& t);
Classification classification_;//Token的符号类型
unsigned int source_location_;//Token在文件中的位置
union
{
Keyword keyword_value;//关键字值
Operator operator_value;//操作符值
std::string* identifier_value;//标识符值 即标识符名字
std::string* string_value;//字符串值
PreciseInteger integer_value;//整数的值
PreciseFloat float_value;//浮点数的值
char char_value;//字符的值
}value_;
};

//词法分析器
class lex
{
public:
lex(const char* input_file_name, std::fstream &input_file_);
~lex(void);
Token next_token();
private:
bool require_line();	//获取下一行
void skip_c_comment();//跳过c注释
void skip_cpp_comment();//跳过c++注释
Token gather_identifier_or_keyword(); //判定是标识符或者是关键字
Token gather_number();//判定是整数或者是浮点数
Token gather_character();//判定character
Token gather_string();//判定string
Keyword string_to_keyword(const std::string &str);//转换到特定的关键字码
private:
//文件名
const char* input_file_name_;
//输入文件
std::fstream &input_file_;
//一行的buffer
std::string linebuf_;
//一行的真实大小
size_t line_size_;
//读取到当前行的某个位置
size_t line_offset_;
//读取到当前行的行号
size_t line_number_;
//当前读取到的文件位置
size_t location_;
};
#endif


lex.cpp
#include "lex.h"
struct KeywordStringTable
{
Keyword keyword;
std::string name;
};
KeywordStringTable keyword_string_mapping[] =
{
//数据类型关键字
KEYWORD_CHAR, "char",
KEYWORD_SHORT, "short",
KEYWORD_INT, "int",
KEYWORD_LONG, "long",
KEYWORD_FLOAT, "float",
KEYWORD_DOULBE, "double",
KEYWORD_SIGNED, "signed",
KEYWORD_UNSIGNED, "unsigned",
KEYWORD_ENUM, "enum",
KEYWORD_UNION, "unio",
KEYWORD_STRUCT, "struct",
KEYWORD_VOID, "void",
//控制语句关键字
//循环
KEYWORD_FOR, "for",
KEYWORD_DO, "do",
KEYWORD_WHILE, "while",
KEYWORD_BREAK, "break",
KEYWORD_CONTINUE, "continue",
//条件
KEYWORD_IF, "if",
KEYWORD_ELSE, "else",
KEYWORD_GOTO, "goto",
//开关
KEYWORD_SWITCH, "switch",
KEYWORD_CASE, "case",
KEYWORD_DEFAULT, "default",
//返回
KEYWORD_RETURN, "return",
//存储类型
KEYWORD_AUTO, "auto",
KEYWORD_EXTERN, "extern",
KEYWORD_REGISTER, "register",
KEYWORD_STATIC, "static",
//其它
KEYWORD_CONST, "const",
KEYWORD_SIZEOF, "sizeof",
KEYWORD_TYPEDEF, "typedef",
KEYWORD_VOLATILE, "volatile",
};
static size_t mapping_table_size = sizeof(keyword_string_mapping) / sizeof(KeywordStringTable);
Token::Token(const Token& t)
{
copy(t);
}
Token& Token::operator=(const Token& t)
{
if (&t != this)
{
this->clear();
copy(t);
}
return *this;
}
bool Token::operator==(const Token& t)
{
if (classification_ == t.classification_)
{
switch (classification_)
{
case TOKEN_KEYWORD:
return value_.keyword_value == t.value_.keyword_value;
case TOKEN_IDENTIFIER:
return *value_.identifier_value == *t.value_.identifier_value;
case TOKEN_OPERATOR:
return value_.operator_value == t.value_.operator_value;
case TOKEN_STRING:
return *value_.string_value == *t.value_.string_value;
case TOKEN_INTEGER:
return value_.integer_value == t.value_.integer_value;
case TOKEN_FLOAT:
return value_.float_value == t.value_.float_value;
case TOKEN_CHAR:
return value_.char_value == t.value_.char_value;
default:
return true;
}
}
return false;
}
void Token::copy(const Token& t)
{
this->source_location_ = t.source_location_;
this->classification_ = t.classification_;
switch (this->classification_)
{
case TOKEN_INVALID:
case TOKEN_EOF:
break;
case TOKEN_KEYWORD:
this->value_.keyword_value = t.value_.keyword_value;
break;
case TOKEN_IDENTIFIER:
this->value_.identifier_value = new std::string(*(t.value_.identifier_value));
break;
case TOKEN_OPERATOR:
this->value_.operator_value = t.value_.operator_value;
break;
case TOKEN_STRING:
this->value_.string_value = new std::string(*(t.value_.string_value));
break;
case TOKEN_INTEGER:
this->value_.integer_value = t.value_.integer_value;
break;
case TOKEN_FLOAT:
this->value_.float_value = t.value_.float_value;
break;
case TOKEN_CHAR:
this->value_.char_value = t.value_.char_value;
break;
}
}
void Token::clear()
{
if (this->classification_ == TOKEN_IDENTIFIER)
{
delete this->value_.identifier_value;
}
else if (this->classification_ == TOKEN_STRING)
{
delete this->value_.string_value;
}
}
Token::~Token()
{
clear();
}

lex::lex(const char* input_file_name, std::fstream &input_file)
:input_file_name_(input_file_name),
input_file_(input_file),
line_size_(0),
line_offset_(0),
line_number_(0)
{

}

//注意下 可能没考虑到 location_的改变问题
bool lex::require_line()
{
if (line_offset_ < line_size_)
{
return true;
}
else if(std::getline(input_file_, linebuf_))
{
line_offset_ = 0;
++line_number_;
linebuf_ += '/n';//因为本来有换行符 +1
line_size_ = linebuf_.size();
return true;
}
return false;
}
void lex::skip_c_comment()
{
do
{
while(line_offset_ < line_size_)
{
if ((linebuf_[line_offset_] == '*') && (linebuf_[line_offset_+1] == '/'))
{
line_offset_ += 2;
location_ += 2;
return;
}
++line_offset_;
++location_;
}
} while (require_line());
}
void lex::skip_cpp_comment()
{
location_ += (line_size_ - line_offset_);
line_offset_ = line_size_;
}
Token lex::gather_identifier_or_keyword()
{
std::string::iterator word_begin = linebuf_.begin() + line_offset_;
while(line_offset_ < line_size_)
{
if (((linebuf_[line_offset_] >= 'a') && (linebuf_[line_offset_] <= 'z'))
||((linebuf_[line_offset_] >= 'A') && (linebuf_[line_offset_] <= 'Z'))
||((linebuf_[line_offset_] >= '0') && (linebuf_[line_offset_] <= '9'))
||(linebuf_[line_offset_] == '_'))
{
++location_;
++line_offset_;
}
else
{
break;
}
}
std::string::iterator word_end = linebuf_.begin() + line_offset_;
std::string word(word_begin, word_end);
Keyword keyword = string_to_keyword(word);
if(keyword == KEYWORD_INVALID)
{
return Token::make_identifier_token(word, location_ - word.size());
}
else
{
return Token::make_keyword_token(keyword, location_ - word.size());
}
}
Token lex::gather_number()
{
enum Number{INT, FLOAT};
Number number_type = INT;
bool have_point = false;
std::string::iterator number_begin = linebuf_.begin() + line_offset_;
while (line_offset_ < line_size_) //由于时间关系 暂不考虑科学计数法
{
if ((linebuf_[line_offset_] <= '9') && (linebuf_[line_offset_] >= '0'))
{
++line_offset_;
++location_;
}else if ((linebuf_[line_offset_] == '.') && (have_point == false))
{
have_point = true;
number_type = FLOAT;
++line_offset_;
++location_;
}else
{
break;
}
}
std::string::iterator number_end = linebuf_.begin() + line_offset_;
std::string str_number(number_begin, number_end);
if (number_type == INT)
{
PreciseInteger  integer_value = std::atoi(str_number.c_str());
return Token::make_integer_token(integer_value, location_ - str_number.size());
}else if (number_type == FLOAT)
{
PreciseFloat float_value = std::atof(str_number.c_str());
return Token::make_float_token(float_value, location_ - str_number.size());
}
return Token::make_invalid_token(location_);
}
Token lex::gather_character()
{
//由于时间关系 不考虑转义字符
++line_offset_;
++location_;
if ((line_offset_ < line_size_)
&& ((linebuf_[line_offset_] >= 0) && (linebuf_[line_offset_] <= 255))
&& (linebuf_[line_offset_ + 1] == '/''))
{
line_offset_ += 2;
location_ += 2;
return Token::make_char_token(linebuf_[line_offset_ - 2], location_ - 3);
}
else
{
return Token::make_invalid_token(location_);
}
}
Token lex::gather_string()
{
//由于时间关系 不考虑多行和转移的内容 只做简单的ascii 字符的识别
++line_offset_;
++location_;
std::string::iterator string_begin = linebuf_.begin() + line_offset_;
while(line_offset_ < line_size_)
{
if (linebuf_[line_offset_] == '"')
{
break;
}
++line_offset_;
++location_;
}
if (line_offset_ == line_size_)
{
return Token::make_invalid_token(location_);
}
std::string::iterator string_end = linebuf_.begin() + line_offset_;
std::string string_value(string_begin, string_end);
++line_offset_;
++location_;
return Token::make_string_token(string_value, location_ - string_value.size() - 2);
}
Keyword lex::string_to_keyword(const std::string &str)
{
for (int i = 0; i < mapping_table_size; ++i)
{
if(str == keyword_string_mapping[i].name)
return keyword_string_mapping[i].keyword;
}
return KEYWORD_INVALID;
}
Token lex::next_token()
{
do
{
if (!require_line())
{
return Token::make_eof_token(++location_);
}
while (line_offset_ < line_size_)
{
char cc = linebuf_[line_offset_];
switch(cc)
{
case '/t':case '/r':case ' '://空白
++line_offset_;
++location_;
while ((linebuf_[line_offset_] == '/t')
|| (linebuf_[line_offset_] == '/r')
|| (linebuf_[line_offset_] == ' '))
{
++line_offset_;
++location_;
}
break;
case '/n'://换行
++line_offset_;
++location_;
break;
case '/':
++line_offset_;
++location_;
if(linebuf_[line_offset_] == '/')
{
++line_offset_;
++location_;
skip_cpp_comment();
}else if (linebuf_[line_offset_] == '*')
{
++line_offset_;
++location_;
skip_c_comment();
}else if (linebuf_[line_offset_] == '=')
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_DIVEQ, location_ - 2);
}else
{
return Token::make_operator_token(OPERATOR_DIV, location_ - 1);
}
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '_':
return gather_identifier_or_keyword();
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return gather_number();
case '/'':
return gather_character();
case '"':
return gather_string();
case '+':
if (linebuf_[line_offset_+1] == '+')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_PLUSPLUS, location_ - 2);
}else if(linebuf_[line_offset_+1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_PLUSEQ, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_PLUS, location_ - 1);
}
break;
case '-':
if (linebuf_[line_offset_+1] == '-')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_MINUSMINUS, location_ - 2);
}else if(linebuf_[line_offset_+1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_MINUSEQ, location_ - 2);
}
else if (linebuf_[line_offset_ + 1] == '>')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_RPOINTOR, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_MINUS, location_ - 1);
}
break;
case '*':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_MULTEQ, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_MULT, location_ - 1);
}
break;
case '%':
if (linebuf_[line_offset_ + 1] == '=')
{

line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_MOD, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_MODEQ, location_ - 1);
}
break;
case '=':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_EQEQ, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_EQ, location_ - 1);
}
break;
case '>':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_GE, location_ - 2);
}
else if (linebuf_[line_offset_ + 1] == '>')
{
if (((line_offset_ + 2) < line_size_) && (linebuf_[line_offset_ + 2] == '='))
{
line_offset_ += 3;
location_ += 3;
return Token::make_operator_token(OPERATOR_RSHIFTEQ, location_ - 3);
}
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_RSHIFT, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_GT, location_ - 1);
}
break;
case '<':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_LE, location_ - 2);
}
else if (linebuf_[line_offset_ + 1] == '>')
{
if (((line_offset_ + 2) < line_size_) && (linebuf_[line_offset_ + 2] == '='))
{
line_offset_ += 3;
location_ += 3;
return Token::make_operator_token(OPERATOR_LSHIFTEQ, location_ - 3);
}
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_LSHIFT, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_LT, location_ - 1);
}
break;
case '|':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_OREQ, location_ - 2);
}
else if (linebuf_[line_offset_ + 1] == '|')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_OROR, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_OR, location_ - 1);
}
break;
case '&':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_ANDEQ, location_ - 2);
}
else if (linebuf_[line_offset_ + 1] == '&')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_ANDAND, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_AND, location_ - 1);
}
break;
case '!':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_NOTEQ, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_NOT, location_ - 1);
}
break;
case '^':
if (linebuf_[line_offset_ + 1] == '=')
{
line_offset_ += 2;
location_ += 2;
return Token::make_operator_token(OPERATOR_XOREQ, location_ - 2);
}
else
{
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_XOR, location_ - 1);
}
break;
case ';':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_SEMICOLON, location_ - 1);
break;
case '(':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_LPAREN, location_ - 1);
break;
case ')':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_RPAREN, location_ - 1);
break;
case '}':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_RCURLY, location_ - 1);
break;
case '{':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_LCURLY, location_ - 1);
break;
case '[':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_LSQUARE, location_ - 1);
break;
case ']':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_RSQUARE, location_ - 1);
break;
case '.':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_DOT, location_ - 1);
break;
case '?':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_CONDITON_BEGIN, location_ - 1);
break;
case ':':
++line_offset_;
++location_;
return Token::make_operator_token(OPERATOR_CONDITON_END, location_ - 1);
break;
default:
++line_offset_;
++location_;
return Token::make_invalid_token(location_ - 1);
}
}
}while(true);
}
lex::~lex(void)
{
}


语法分析

使用LL(1)算法

parse.h
#pragma once
#ifndef PARSE_H
#define PARSE_H
#include "lex.h"
#include "LL_ONE.h"
#include <fstream>
#include <string>
#include <stack>
#include <iostream>

//使用LL(1)算法的语法分析器
class Parse
{
public:
Parse(lex *lexer, std::fstream &file);
void program();//语法分析
private:
lex *lex_;//词法分析器
std::fstream &file_;//输入的文件
std::stack<Symbol> symbol_stack_;//符号栈
};
#endif


parse.cpp
#include "parse.h"
Parse::Parse(lex *lexer, std::fstream &file)
:lex_(lexer),
file_(file)
{
if (lexer == NULL)
{
lex_ = new lex("", file_);
}
symbol_stack_.push(Symbol::make_special_symbol());//界符
symbol_stack_.push(Symbol::make_nonterminal_symbol(Nonterminal_E));//文法开始非终结符
init_LL_ONE_TABLE();//初始化LL(1)符号表
}
void print_symbol(Symbol &s);
void Parse::program()
{
Token current_token_ = lex_->next_token();//从文件中读取下一个输入符号
Symbol pop_symbol;//临时保存栈顶符号
do
{
pop_symbol = symbol_stack_.top();//取出栈顶元素
std::cout << "弹出-"; print_symbol(pop_symbol);

symbol_stack_.pop();//将栈顶元素弹出
if (pop_symbol.is_terminal())//如果符号属于终结符
{
if (pop_symbol == Symbol::make_terminal_symbol(current_token_))
{
if (current_token_.get_classification() != Token::TOKEN_EOF)//文件结束为止
{
current_token_ = lex_->next_token();
}
else
{
//exit(-1);//语法有误
std::cout << "error" << std::endl;
}
}
}
else
{
if (!pop_symbol.is_special())
{
LL_one_element element = LL_ONE_TABLE[pop_symbol.get_nonterminal_value()][token_to_terminal_value(current_token_)];
if ( element != NULL)
{
std::cout << std::endl;
for (int i = element->size() - 1; i >= 0 ; --i)
{
symbol_stack_.push(element->at(i));
std::cout << "插入-"; print_symbol(element->at(i));
}
std::cout << std::endl;
}
else
{
//exit(-1);//语法有误
std::cout << "error" << std::endl;
}
}
}
} while (!(pop_symbol.is_special()));
}

//用来查看结果的函数 仅用来测试而已
void print_symbol(Symbol &s)
{
Symbol::SymbolType type = s.get_symbol_type();
switch(type)
{
case Symbol::TERMINAL:
std::cout << "终结符";
if (s.get_token_value().get_classification() == Token::TOKEN_OPERATOR)
{
if (s.get_token_value().get_operator_value() == OPERATOR_PLUS)
{
std::cout << "+";
}
if (s.get_token_value().get_operator_value() == OPERATOR_MULT)
{
std::cout << "*";
}
if(s.get_token_value().get_operator_value() == OPERATOR_LPAREN)
{
std::cout << "(";
}
if(s.get_token_value().get_operator_value() == OPERATOR_RPAREN)
{
std::cout << ")";
}
}
if (s.get_token_value().get_classification() == Token::TOKEN_INTEGER)
{
std::cout << "整数";
}
std::cout << std::endl;
break;
case Symbol::NONTERMINAL:
std::cout << "非终结符";
if (s.get_nonterminal_value() == Nonterminal_E)
{
std::cout << "E";
}
if (s.get_nonterminal_value() == Nonterminal_EX)
{
std::cout << "E'";
}
if (s.get_nonterminal_value() == Nonterminal_T)
{
std::cout << "T";
}
if (s.get_nonterminal_value() == Nonterminal_TX)
{
std::cout << "T'";
}
if (s.get_nonterminal_value() == Nonterminal_F)
{
std::cout << "F";
}
std::cout << std::endl;
break;
}
}


LL_ONE表格的构造和相关数据结构

LL_ONE.h
#include "lex.h"
#include <string>
#include <vector>
//非终结符
enum NonterminalSymbol;
//用来保存终结符和非终结符的类
class Symbol
{
public:
enum SymbolType{TERMINAL, NONTERMINAL, SPECIAL_TERMINAL};//终结符和非终结符
public:
Symbol(){};
~Symbol();
Symbol(SymbolType t);
Symbol(const Symbol& s);
Symbol& operator=(const Symbol& symbol);
Token get_token_value();
NonterminalSymbol get_nonterminal_value();
bool is_terminal() const { return type_ == TERMINAL; }
bool is_special() const { return type_ == SPECIAL_TERMINAL; }
bool is_nonterminal() const { return type_ == NONTERMINAL; }
bool operator==(Symbol &s);//判断是否是某种的相等的symbol
SymbolType get_symbol_type(){return type_;}
static Symbol make_terminal_symbol(Token &t);
static Symbol make_nonterminal_symbol(NonterminalSymbol s);
static Symbol make_special_symbol();
private:
Symbol(Token &t)
{
type_ = TERMINAL;
value_.token_ = new Token(t);
}
Symbol(NonterminalSymbol s)
{
type_ = NONTERMINAL;
value_.nonterminal_value = s;
}
union
{
NonterminalSymbol nonterminal_value;//非终结符
Token *token_;//终结符的值
}value_;//值
SymbolType type_;//类型
};
//将Token的值转换到终结符的值
int token_to_terminal_value(Token &t);
//要填写的非终结符的名称
//此次要填写
enum NonterminalSymbol
{
Nonterminal_E,//文法开始
Nonterminal_EX,
Nonterminal_T,
Nonterminal_TX,
Nonterminal_F
};
//非终结符的数量
unsigned const NonterminalNumber = Nonterminal_F + 1;
//LL表的元素
typedef std::vector<Symbol>* LL_one_element;
typedef std::vector<Symbol> LL_one_element_entry;
extern LL_one_element LL_ONE_TABLE[NonterminalNumber][KeywordNumber + OperatorNumber + 2];
void init_LL_ONE_TABLE();


LL_ONE.cpp
#include "LL_ONE.h"

Symbol::Symbol(SymbolType t)
{
type_ = t;
}
Symbol::~Symbol()
{
if(is_terminal())
{
delete value_.token_;
}
}
Symbol::Symbol(const Symbol& s)
{
if(s.is_terminal())
{
type_ = TERMINAL;
value_.token_ = new Token(*(s.value_.token_));
}
else if(s.is_nonterminal())
{
type_ = NONTERMINAL;
value_.nonterminal_value = s.value_.nonterminal_value;
}
else if(s.is_special())
{
type_ = SPECIAL_TERMINAL;
}
}
Symbol Symbol::make_terminal_symbol(Token &t)
{
return Symbol(t);
}
Symbol Symbol::make_nonterminal_symbol(NonterminalSymbol s)
{
return Symbol(s);
}
Symbol Symbol::make_special_symbol()
{
return Symbol(SPECIAL_TERMINAL);
}
bool Symbol::operator==(Symbol &s)
{
if (s.is_terminal() && this->is_terminal())
{
Token::Classification classification = this->get_token_value().get_classification();
Token::Classification classification_s = s.get_token_value().get_classification();
if (classification == classification_s)
{
switch(classification)
{
case Token::TOKEN_INTEGER:
return true;
case Token::TOKEN_OPERATOR:
return this->get_token_value().get_operator_value() == s.get_token_value().get_operator_value();
case Token::TOKEN_KEYWORD:
return this->get_token_value().get_keyword_value() == s.get_token_value().get_keyword_value();
case Token::TOKEN_EOF:
return true;
}
return false;
}
return false;
}
if (s.is_nonterminal() && this->is_nonterminal())
{
return s.get_nonterminal_value() == this->get_nonterminal_value();
}
return false;
}
Symbol& Symbol::operator=(const Symbol& symbol)
{
if (&symbol != this)
{
if (this->is_terminal())
{
delete this->value_.token_;
}
if(symbol.is_terminal())
{
this->type_ = TERMINAL;
this->value_.token_ = new Token(*(symbol.value_.token_));
}
else if(symbol.is_nonterminal())
{
this->type_ = NONTERMINAL;
this->value_.nonterminal_value = symbol.value_.nonterminal_value;
}
else
{
this->type_ = SPECIAL_TERMINAL;
}
}
return *this;
}
Token Symbol::get_token_value()
{
assert(type_ == TERMINAL);
return *(value_.token_);
}
NonterminalSymbol Symbol::get_nonterminal_value()
{
assert(type_ == NONTERMINAL);
return value_.nonterminal_value;
}
int token_to_terminal_value(Token &t)
{
int value = 0;
Token::Classification classification = t.get_classification();
switch(classification)
{
case Token::TOKEN_KEYWORD:
value += t.get_keyword_value();
break;
case Token::TOKEN_OPERATOR:
value += KeywordNumber + t.get_operator_value();
break;
case Token::TOKEN_INTEGER:
value += KeywordNumber + OperatorNumber;
break;
case Token::TOKEN_EOF:
value += KeywordNumber + OperatorNumber + 1;
break;
}
return value;
}
//LL(1)表 其中+2表示一个是整数 另一个是结束符的列
LL_one_element LL_ONE_TABLE[NonterminalNumber][KeywordNumber + OperatorNumber + 2] = {NULL};
void init_LL_ONE_TABLE()
{
static LL_one_element_entry expr_empty;//代表空啊 那个类似e的符号
//E
static LL_one_element_entry expr_e;
expr_e.push_back(Symbol::make_nonterminal_symbol(Nonterminal_T));
expr_e.push_back(Symbol::make_nonterminal_symbol(Nonterminal_EX));
//E'
static LL_one_element_entry expr_ex;
expr_ex.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_PLUS, 0)));
expr_ex.push_back(Symbol::make_nonterminal_symbol(Nonterminal_T));
expr_ex.push_back(Symbol::make_nonterminal_symbol(Nonterminal_EX));
//T
static LL_one_element_entry expr_t;
expr_t.push_back(Symbol::make_nonterminal_symbol(Nonterminal_F));
expr_t.push_back(Symbol::make_nonterminal_symbol(Nonterminal_TX));
//T'
static LL_one_element_entry expr_tx;
expr_tx.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_MULT, 0)));
expr_tx.push_back(Symbol::make_nonterminal_symbol(Nonterminal_F));
expr_tx.push_back(Symbol::make_nonterminal_symbol(Nonterminal_TX));
//F
static LL_one_element_entry expr_f_1;
expr_f_1.push_back(Symbol::make_terminal_symbol(Token::make_integer_token(1, 0)));
static LL_one_element_entry expr_f_2;
expr_f_2.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_LPAREN, 0)));
expr_f_2.push_back(Symbol::make_nonterminal_symbol(Nonterminal_E));
expr_f_2.push_back(Symbol::make_terminal_symbol(Token::make_operator_token(OPERATOR_RPAREN, 0)));

//填充表格
LL_ONE_TABLE[Nonterminal_E][token_to_terminal_value(Token::make_integer_token(1, 0))] = &expr_e;
LL_ONE_TABLE[Nonterminal_E][token_to_terminal_value(Token::make_operator_token(OPERATOR_LPAREN, 0))] = &expr_e;
LL_ONE_TABLE[Nonterminal_EX][token_to_terminal_value(Token::make_operator_token(OPERATOR_PLUS, 0))] = &expr_ex;
LL_ONE_TABLE[Nonterminal_EX][token_to_terminal_value(Token::make_operator_token(OPERATOR_RPAREN, 0))] = &expr_empty;
LL_ONE_TABLE[Nonterminal_EX][KeywordNumber + OperatorNumber + 1] = &expr_empty;
LL_ONE_TABLE[Nonterminal_T][token_to_terminal_value(Token::make_integer_token(1, 0))] = &expr_t;
LL_ONE_TABLE[Nonterminal_T][token_to_terminal_value(Token::make_operator_token(OPERATOR_LPAREN, 0))] = &expr_t;
LL_ONE_TABLE[Nonterminal_TX][token_to_terminal_value(Token::make_operator_token(OPERATOR_PLUS, 0))] = &expr_empty;
LL_ONE_TABLE[Nonterminal_TX][token_to_terminal_value(Token::make_operator_token(OPERATOR_MULT, 0))] = &expr_tx;
LL_ONE_TABLE[Nonterminal_TX][token_to_terminal_value(Token::make_operator_token(OPERATOR_RPAREN, 0))] = &expr_empty;
LL_ONE_TABLE[Nonterminal_TX][KeywordNumber + OperatorNumber + 1] = &expr_empty;
LL_ONE_TABLE[Nonterminal_F][token_to_terminal_value(Token::make_integer_token(1, 0))] = &expr_f_1;
LL_ONE_TABLE[Nonterminal_F][token_to_terminal_value(Token::make_operator_token(OPERATOR_LPAREN, 0))] = &expr_f_2;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: