您的位置:首页 > 编程语言 > Java开发

一个简单词法分析器的实现代码(java实现)

2010-09-21 19:10 831 查看
Main.java

/*
* 主程序
*/
import java.io.*;
import lexer.*;

public class Main {
public static void main(String[] args) throws IOException {
Lexer lexer = new Lexer();

while (lexer.getReaderState() == false) {
lexer.scan();
}

/* 保存相关信息 */
lexer.saveTokens();
lexer.saveSymbolsTable();

}
}


Lexer.java

package lexer;

import java.io.*;
import java.util.*;

import symbols.*;

public class Lexer {
public static int line = 1;		/* 记录行号 */
char peek = ' ';		/* 下一个读入字符 */
Hashtable<String, Word> words =
new Hashtable<String, Word>();
/* 符号表 */
private Hashtable<Token, String> table =
new Hashtable<Token, String>();
/* token序列 */
private List<String> tokens =
new LinkedList<String> ();
/* 读取文件变量 */
BufferedReader reader = null;
/* 保存当前是否读取到了文件的结尾  */
private Boolean isEnd = false;

/* 是否读取到文件的结尾 */
public Boolean getReaderState() {
return this.isEnd;
}

/* 保存存储在table中的 */
public void saveSymbolsTable() throws IOException {
FileWriter writer = new FileWriter("符号表.txt");
writer.write("[符号]			[符号类型信息]\n");
writer.write("\r\n");

Enumeration<Token> e = table.keys();
while( e.hasMoreElements() ){
Token token = (Token)e.nextElement();
String desc = table.get(token);

/* 写入文件 */
writer.write(token + "\t\t\t" + desc + "\r\n");
}

writer.flush();
}

/* 保存Tokens */
public void saveTokens() throws IOException {
FileWriter writer = new FileWriter("Tokens表.txt");
writer.write("[符号]	\n");
writer.write("\r\n");

for(int i = 0; i < tokens.size(); ++i) {
String tok = (String)tokens.get(i);

/* 写入文件 */
writer.write(tok + "\r\n");
}

writer.flush();
}

void reserve(Word w) {
words.put(w.lexme, w);
}

/*
* 构造函数中将关键字和类型添加到hashtable words中
*/
public Lexer() {
/* 初始化读取文件变量 */
try {
reader = new BufferedReader(new FileReader("输入.txt"));
}
catch(IOException e) {
System.out.print(e);
}

/* 关键字 */
this.reserve(new Word("if", Tag.IF));
this.reserve(new Word("then", Tag.THEN));
this.reserve(new Word("else", Tag.ELSE));
this.reserve(new Word("while", Tag.WHILE));
this.reserve(new Word("do", Tag.DO));

/* 类型 */
this.reserve(Word.True);
this.reserve(Word.False);
this.reserve(Type.Int);
this.reserve(Type.Char);
this.reserve(Type.Bool);
this.reserve(Type.Float);
}

public void readch() throws IOException {
/* 这里应该是使用的是 */
peek = (char)reader.read();
if((int)peek == 0xffff){
this.isEnd = true;
}
// peek = (char)System.in.read();
}

public Boolean readch(char ch) throws IOException {
readch();
if (this.peek != ch) {
return false;
}

this.peek = ' ';
return true;
}

public Token scan() throws IOException {
/* 消除空白 */
for( ; ; readch() ) {
if(peek == ' ' || peek == '\t')
continue;
else if (peek == '\n')
line = line + 1;
else
break;
}

/* 下面开始分割关键字,标识符等信息  */
switch (peek) {
/* 对于 ==, >=, <=, !=的区分使用状态机实现 */
case '=' :
if (readch('=')) {
tokens.add("==");
return Word.eq;
}
else {
tokens.add("=");
return new Token('=');
}
case '>' :
if (readch('=')) {
tokens.add(">=");
return Word.ge;
}
else {
tokens.add(">");
return new Token('>');
}
case '<' :
if (readch('=')) {
tokens.add("<=");
return Word.le;
}
else {
tokens.add("<");
return new Token('<');
}
case '!' :
if (readch('=')) {
tokens.add("!=");
return Word.ne;
}
else {
tokens.add("!");
return new Token('!');
}
}

/* 下面是对数字的识别,根据文法的规定的话,这里的
* 数字只要是能够识别整数就行.
*/
if(Character.isDigit(peek)) {
int value = 0;
do {
value = 10 * value + Character.digit(peek, 10);
readch();
} while (Character.isDigit(peek));

Num n = new Num(value);
tokens.add(n.toString());
//table.put(n, "Num");
return n;
}

/*
* 关键字或者是标识符的识别
*/
if(Character.isLetter(peek)) {
StringBuffer sb = new StringBuffer();

/* 首先得到整个的一个分割 */
do {
sb.append(peek);
readch();
} while (Character.isLetterOrDigit(peek));

/* 判断是关键字还是标识符 */
String s = sb.toString();
Word w = (Word)words.get(s);

/* 如果是关键字或者是类型的话,w不应该是空的 */
if(w != null) {
// table.put(w, "KeyWord or Type");
tokens.add(w.toString());
return w; /* 说明是关键字 或者是类型名 */
}

/* 否则就是一个标识符id */
w = new Word(s, Tag.ID);
tokens.add(w.toString());
table.put(w, "id");
words.put(s,  w);

return w;
}

/* peek中的任意字符都被认为是词法单元返回 */
Token tok  = new Token(peek);
// table.put(tok, "Token or Seprator");
if ((int)peek != 0xffff )
tokens.add(tok.toString());
peek = ' ';

return tok;
}
}


Num.java

package lexer;

public class Num extends Token{
public final int value;

public Num(int v) {
super(Tag.NUM);
this.value = v;
}

public String toString() {
return  "" + value;
}
}


Tag.java

package lexer;

public class Tag {
public final static int
AND		= 256,
BASIC	= 257,
BREAK	= 258,
DO		= 259,
ELSE	= 260,
EQ		= 261,	/* == */
FALSE	= 262,
GE		= 263,
ID		= 264,
IF		= 265,
INDEX	= 266,
LE		= 267,
MINUS	= 268,
NE		= 269,
NUM		= 270,
OR		= 271,
REAL	= 272,
TEMP	= 273,
TRUE	= 274,
WHILE	= 275,
/* 后面添加 */
THEN	= 276;
}


Token.java

package lexer;

public class Token {
public final int tag;

public Token(int t) {
this.tag = t;
}

public String toString() {
return "" + (char)tag;
}

public static void main(String[] args) {
Token tok = new Token('a');
System.out.println(tok);
}
}


Word.java

/*
* 类word用于管理保留字,标识符以及像&&这样的复合单词元素 。
*/
package lexer;

public class Word extends Token {
public String lexme = "";

public Word (String s, int t) {
super(t);
this.lexme = s;
}

public String toString() {
return this.lexme;
}

public static final Word
and = new Word("&&", Tag.AND),
or = new Word("||", Tag.OR),
eq = new Word ("==", Tag.EQ),
ne = new Word("!=", Tag.NE),
le = new Word("<=", Tag.LE),
ge = new Word(">=", Tag.GE),
minus = new Word("minus", Tag.MINUS),
True = new Word("true", Tag.TRUE),
False = new Word("false", Tag.FALSE),
temp = new Word("t", Tag.TEMP);
}


Type.java

/*
* 说明数据类型
*/
package symbols;

import lexer.*;

public class Type extends Word{
public Type(String s, int tag) {
super(s, tag);
}

public static final Type
Int = new Type("int", Tag.BASIC),
Float = new Type("float", Tag.BASIC),
Char = new Type ("char", Tag.BASIC),
Bool =  new Type("bool", Tag.BASIC);
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: