您的位置:首页 > 编程语言 > Java开发

哈工大编译原理第一次实验--词法分析(Java版本)

2012-10-27 13:50 591 查看
1.在判断空行的时候,java里面用 line == "" 不好使,调试发现进不去if,然后用line.equals("")就好使。

2.java标准化输出,可以有:System.out.printf("%-10s\t<ERROR:标识符重复!>\n",token);这种写法!printf啊,但是可以不能输出到文件中。不过我们可以这么写:

output.write(String.format("%-10s\t<%s,-->",token,token));  

String.format 救了我们哦~~

3.输出到文件中怎么换行呢? output.write("空行~\r\n"); 呵呵,win下是\r\n哦,linux下\n。。。

4.传说中的符号表的C语音代码实现:http://blog.163.com/ppt_compiler/blog/static/20281300720125120041966/

===================================================================================

如何解读这个看起来很糟糕的基本没啥注释的代码呢?

1.看清楚结构,结构如下:

(1)读入一行line,把line转成char[] 的strLine数组,然后每次处理一个字符ch(看红色代码,所有的处理都在for里面)。

        (2)然后对每个ch进行分类:if else if else if 。。。建议每次看一个if{}就不会头晕啦

2.看清楚算法,这个是基于很精巧的“状态转移图”的程序,我拿个数字处理的代码讲解下:



那么我们就建立个二维数组来实现这个状态的转移:

   123456

 1 d.#e##

 2 ##d###

 3 ##de##

 4 ####-d

 5 #####d

 6 #####d

我们忽略0状态,因为我们已经进入了。

状态1到状态1有矢量连接,所以数组d[1][1] = 'd'

状态1到状态2有矢量连接,所以数组d[1][2] = '.'

依次类推,没有矢量的就标为'#',然后关键代码如下:

int s = 1;
Boolean isfloat = false;
while (ch != '\0'&& (isDigit(ch) || ch == '.' || ch == 'e' || ch == '-')) {
if (ch == '.' || ch == 'e')
isfloat = true;

int k;
for (k = 1; k <= 6; k++) {
char tmpstr[] = digitDFA[s].toCharArray();
if (ch != '#'&& 1 == in_digitDFA(ch, tmpstr[k])) {
token += ch;
s = k;
break;
}
}
if (k > 6)
break;
ch = strLine[++i];
}
当循环退出的时候(k为6),然后s是状态,当状态为 1 , 3 ,6 的时候是正常退出

为 2 ,4 ,5的时候是有错误地退出。

=====================================================================

我的code.txt:

int a="a;
main()
{
int b =99A1;
int a= 999;
int c='a';
int abc = "hahah";
/*你妹啊*/
//你好啊

print("Hello World!\n");//你又好了
return 0;/*你妹啊*/
}


我的输出:

line : 1
int       	<int,-->
a         	<标识符,(a,入口:0)>
=         	<=,-->
"a        	ERROR:字符串常量引号不封闭
;         	<;,-->

line : 2
main      	<标识符,(main,入口:1)>
(         	<(,-->
)         	<),-->

line : 3
{         	<{,-->

line : 4
int       	<int,-->
b         	<标识符,(b,入口:2)>
=         	<=,-->
99A1      	ERROR:请确保实常数输入正确
;         	<;,-->

line : 5
int       	<int,-->
a         	<ERROR:标识符重复!>
=         	<=,-->
999       	<实型常量,999>
;         	<;,-->

line : 6
int       	<int,-->
c         	<标识符,(c,入口:3)>
=         	<=,-->
'a'       	<字符常量,a>
;         	<;,-->

line : 7
int       	<int,-->
abc       	<标识符,(abc,入口:4)>
=         	<=,-->
"hahah"   	<字符串常量,hahah>
;         	<;,-->

line : 8
/*你妹啊*/   	(注释:/*你妹啊*/)

line : 9
//你好啊     	(注释://你好啊)

line : 10
空行~

line : 11
print     	<标识符,(print,入口:5)>
(         	<(,-->
"Hello World!\n"	<字符串常量,Hello World!\n>
)         	<),-->
;         	<;,-->
//你又好了    	(注释://你又好了)

line : 12
return    	<return,-->
0         	<实型常量,0>
;         	<;,-->
/*你妹啊*/   	(注释:/*你妹啊*/)

line : 13
}         	<},-->


这是我那个很糟糕的源代码:
package ouyang;

import java.io.*;
import java.util.*;

public class AnalysisCodeToWord {
public static void main(String args[]) {
String infile = "code.txt";
String outfile = "out.txt";
try {
FileInputStream f = new FileInputStream(infile);
BufferedReader dr = new BufferedReader(new InputStreamReader(f));

BufferedWriter output = new BufferedWriter(new FileWriter(outfile));

String line = "";
int cnt = 0;
while ((line = dr.readLine()) != null) {
cnt++;
if (cnt == 1) {
System.out.println("line : " + cnt);
output.write(String.format("line : %d\r\n", cnt));
} else {
System.out.println("\n\nline : " + cnt);
output.write(String.format("\r\n\r\nline : %d\r\n", cnt));
}
if (line.equals("")) {
System.out.println("空行~");
output.write("空行~\r\n");
} else {

char[] strLine = line.toCharArray();

for (int i = 0; i < strLine.length; i++) {
char ch = strLine[i];
String token = "";

if (isAlpha(ch)) // 判断关键字和标识符
{
do {
token += ch;
i++;
if(i>=strLine.length) break;
ch = strLine[i];
} while (ch != '\0' && (isAlpha(ch) || isDigit(ch)));

--i; // 指针回退

if (isMatchKeyword(token.toString())) // 是关键字
{
System.out.printf("%-10s\t<%s,-->\n", token,
token);
output.write(String.format(
"%-10s\t<%s,-->\r\n", token, token));
} else // 是标识符
{
if (symbol.isEmpty()
|| (!symbol.isEmpty() && !symbol
.containsKey(token))) {
symbol.put(token, symbol_pos);

System.out.printf(
"%-10s\t<标识符,(%s,入口:%d)>\n", token,
token, symbol_pos);
output.write(String.format(
"%-10s\t<标识符,(%s,入口:%d)>\r\n",
token, token, symbol_pos));
symbol_pos++;
} else {
System.out.printf(
"%-10s\t<ERROR:标识符重复!>\n", token);
output
.write(String
.format(
"%-10s\t<ERROR:标识符重复!>\r\n",
token));
}
}
token = "";
} else if (isDigit(ch)) // 判断数字常量
{
int s = 1;
Boolean isfloat = false;
while (ch != '\0'
&& (isDigit(ch) || ch == '.' || ch == 'e' || ch == '-')) {
if (ch == '.' || ch == 'e')
isfloat = true;

int k;
for (k = 1; k <= 6; k++) {
char tmpstr[] = digitDFA[s].toCharArray();
if (ch != '#'
&& 1 == in_digitDFA(ch, tmpstr[k])) {
token += ch;
s = k;
break;
}
}
if (k > 6)
break;
i++;if(i>=strLine.length) break;
ch = strLine[i];
}
// if(ch) --i; // 指针回退
Boolean haveMistake = false;

if (s == 2 || s == 4 || s == 5) {
haveMistake = true;
} else // 1,3,6
{
if (!isOp(ch) || ch == '.')
haveMistake = true;
}

if (haveMistake) // 错误处理
{
while (ch != '\0' && ch != ',' && ch != ';'
&& ch != ' ') // 一直到“可分割”的字符结束
{
token += ch;
i++;if(i>=strLine.length) break;
ch = strLine[i];
}
System.out.printf("%-10s\tERROR:请确保实常数输入正确\n",
token);
output.write(String.format(
"%-10s\tERROR:请确保实常数输入正确!\r\n", token));
} else {
if (isfloat) {
System.out.printf("%-10s\t<实型常量,%s>\n",
token, token);
output.write(String.format(
"%-10s\t<实型常量,%s>\r\n", token,
token));
} else {
System.out.printf("%-10s\t<实型常量,%s>\n",
token, token);
output.write(String.format(
"%-10s\t<整型常量,%s>\r\n", token,
token));
}
}
--i;
token = "";
} else if (ch == '\'') // 识别字符常量,类似处理字符串常量。
{
int s = 0;
Boolean haveMistake = false;
String token1 = "";
token1 += ch;
while (s != 3) {
i++;if(i>=strLine.length) break;
ch = strLine[i];
if (ch == '\0') {
haveMistake = true;
break;
}
for (int k = 0; k < 4; k++) {
char tmpstr[] = stConDFA[s].toCharArray();
if (in_sinStConDFA(ch, tmpstr[k])) {
token1 += ch; // 为输出
if (k == 2 && s == 1) {
if (isEsSt(ch)) // 是转义字符
token = token + '\\' + ch;
else
token += ch;
} else if (k != 3 && k != 1)
token += ch;
s = k;
break;
}
}
}
if (haveMistake) {
System.out.printf("%s\tERROR:字符常量引号不封闭\n",
token1);
output.write(String.format(
"%s\tERROR:字符常量引号不封闭\r\n", token1));
--i;
} else {
if (token.length() == 1) {
System.out.printf("%-10s\t<字符常量,%s>\n",
token1, token);
output.write(String.format(
"%-10s\t<字符常量,%s>\r\n", token1,
token));
} else if (token.length() == 2) {
if (isEsSt(token.charAt(1))
&& token.charAt(0) == '\\') {
System.out.printf("%-10s\t<字符常量,%s>\n",
token1, token);
output.write(String.format(
"%-10s\t<字符常量,%s>\r\n", token1,
token));
}
}
}
token = "";
} else if (ch == '"') // 处理字符串常量的
{
String token1 = "";
token1 += ch;

int s = 0;
Boolean haveMistake = false;
while (s != 3 ) {
i++;
if(i>=strLine.length-1)
{
haveMistake = true;
break;
}

ch = strLine[i];
if (ch == '\0') {
haveMistake = true;
break;
}
for (int k = 0; k < 4; k++) {
char tmpstr[] = stConDFA[s].toCharArray();
if (in_stConDFA(ch, tmpstr[k])) {
token1 += ch;
if (k == 2 && s == 1) {
if (isEsSt(ch)) // 是转义字符
token = token + '\\' + ch;
else
token += ch;
} else if (k != 3 && k != 1)
token += ch;
s = k;
break;
}
}
}
if (haveMistake) {
System.out.printf("%-10s\tERROR:字符串常量引号不封闭\n",
token1);
output.write(String.format(
"%-10s\tERROR:字符串常量引号不封闭\n", token1));
--i;
} else {
System.out.printf("%-10s\t<字符串常量,%s>\n",
token1, token);
output
.write(String.format(
"%-10s\t<字符串常量,%s>\r\n",
token1, token));
}
token = "";
} else if (isOp(ch)) // 运算符,界符
{
token += ch;
if (isPlusEqu(ch)) // 后面可以用一个"="
{
i++;if(i>=strLine.length) break;
ch = strLine[i];
if (ch == '=')
token += ch;
else {
if (isPlusSame(strLine[i - 1])
&& ch == strLine[i - 1])
token += ch; // 后面可以用一个和自己一样的
else {
--i;
}
}
}
System.out.printf("%-10s\t<%s,-->\n", token, token);
output.write(String.format("%-10s\t<%s,-->\r\n",
token, token));
token = "";
} else if (ch == '/') // 注释+除号: 注释只要识别出来就好。
{
token += ch;
i++;if(i>=strLine.length) break;
ch = strLine[i];

if (ch != '*' && ch != '/') // 除号处理
{
if (ch == '=')
token += ch; // /=
else {
--i; // 指针回退 // /
}
System.out.printf("%-10s\t<%s,-->\n", token,
token);
output.write(String.format("%-10s\t<%s,-->\n",
token, token));
token = "";
} else // 注释可能是‘//’也可能是‘/*’
{
Boolean haveMistake = false;
if (ch == '*') {
token += ch; // ch == '*'
int s = 2;

while (s != 4) {
i++;if(i>=strLine.length) break;
ch = strLine[i]; // 注意判断溢出!
if (ch == '\0') {
haveMistake = true;
break;
}
for (int k = 2; k <= 4; k++) {
char tmpstr[] = noteDFA[s]
.toCharArray();
if (1 == in_noteDFA(ch, tmpstr[k],
s)) {
token += ch;
s = k;
break;
}
}
}
}
else if(ch == '/') //这里就不用状态转移了...
{
int index = line.lastIndexOf("//");

String tmpstr=line.substring(index);
int tmpint = tmpstr.length();
for(int k=0;k<tmpint;k++)
{
i++;
}
token = tmpstr;
}
System.out.printf("%-10s\t", token);
output.write(String.format("%-10s\t", token));
if (haveMistake) {
System.out.printf("ERROR:注释没有封闭\n");
output.write("ERROR:注释没有封闭\r\n");
--i;
} else {
System.out.printf("(注释:%s)\n", token);
output.write(String.format("(注释:%s)\n",
token));
}

token = "";
}
}
else // 一些很奇怪的字符
{
if(ch != ' ' && ch != '\t')
{
System.out.printf("%-10c ERROR:存在不合法字符\n",ch);
output.write(String.format("%-10c ERROR:存在不合法字符\n",ch));
}
}
}
}

}

f.close();
dr.close();
output.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}

}

public static Boolean isAlpha(char ch) {
return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_');
}

public static Boolean isDigit(char ch) {
return (ch >= '0' && ch <= '9');
}

public static Boolean isMatchKeyword(String str) {
Boolean flag = false;
for (int i = 0; i < 32; i++) {
if (str.equals(keywords[i])) {
flag = true;
break;
}
}
return flag;
}

public static Boolean isOp(char ch) // 判断是否是运算符
{
for (int i = 0; i < 22; i++)
if (ch == oper[i]) {
return true;
}
return false;
}

public static int in_digitDFA(char ch, char dD) {
if (dD == 'd') {
if (isDigit(ch))
return 1;
else
return 0;
}
return (ch == dD) ? 1 : 0;
}

public static Boolean in_stConDFA(char ch, char key) {
if (key == 'a')
return true;
if (key == '\\')
return ch == key;
if (key == '"')
return ch == key;
if (key == 'd')
return ch != '\\' && ch != '"';
return false;
}

public static Boolean in_sinStConDFA(char ch, char key) {
if (key == 'a')
return true;
if (key == '\\')
return ch == key;
if (key == '"')
return ch == '\'';
if (key == 'd')
return ch != '\\' && ch != '\'';
return false;
}

public static Boolean isPlusEqu(char ch) // 运算符后可加等于
{
return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '='
|| ch == '>' || ch == '<' || ch == '&' || ch == '|'
|| ch == '^';
}

public static Boolean isPlusSame(char ch) // 可以连续两个运算符一样
{
return ch == '+' || ch == '-' || ch == '&' || ch == '|';
}

public static Boolean isEsSt(char ch) {
return ch == 'a' || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r'
|| ch == 't' || ch == 'v' || ch == '?' || ch == '0';
}

public static int in_noteDFA(char ch, char nD, int s) {
if (s == 2) {
if (nD == 'c') {
if (ch != '*')
return 1;
else
return 0;
}
}
if (s == 3) {
if (nD == 'c') {
if (ch != '*' && ch != '/')
return 1;
else
return 0;
}
}
return (ch == nD) ? 1 : 0;
}

public static String code = "";

public static Map<String, Integer> symbol = new HashMap<String, Integer>();// =new
// HashMap<String,int>;

public static int symbol_pos = 0;

// 32个
public static String keywords[] = { "auto", "double", "int", "struct",
"break", "else", "long", "switch", "case", "enum", "register",
"typedef", "char", "extern", "return", "union", "const", "float",
"short", "unsigned", "continue", "for", "signed", "void",
"default", "goto", "sizeof", "volatile", "do", "if", "while",
"static" };

// 7个
public static String digitDFA[] = { "#", "#d.#e##", "###d###", "###de##",
"#####-d", "######d", "######d" };

// 22个
public static char oper[] = { '+', '-', '*', '=', '<', '>', '&', '|', '~',
'^', '!', '(', ')', '[', ']', '{', '}', '%', ';', ',', '#', '.' };

// 4个
public static String stConDFA[] = { "#\\d#", "##a#", "#\\d\"", "####" };

// 4个
public static String noteDFA[] = { "#", "##*##", "##c*#", "##c*/", "#####" };

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: