您的位置:首页 > 编程语言 > C语言/C++

编译原理——Tiny词法分析器c++实现

2016-04-28 14:01 309 查看
Tiny语言和c-相比更为简单,在实现的时候,对文本的读取,发现回车换行是一个头疼的问题,fgetc()函数和fseek()函数并不是一一对应的。fseek()会回车换行算两个字符,而fgetc()读到’\r’或’\n’其中一个都会返回’\n’,也就是说把回车换行符当作一个字符。

代码如下:

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define sfName "source.tiny"
#define tfName "target.tiny"
using namespace std;
FILE *source;
FILE *target;
int sequence=0; //目标文件序号
int s[5]={0}; //保存token类型的状态
char str[200]=""; //保存需要返回的字符串
char op[10]={'+','-','*','/',':','=','<',';','(',')'};
char line[200]="" //保存一行字符
char *resw[]={"if","then","else","end","repeat","until","read","write"};
char *token[]={"ID","NUM","RESW","OP","ERROR","COM"};
typedef enum{ID=0,NUM,RESW,OP,ERROR,COM}TokenType;

void clearState()
{
memset(s,0,sizeof(s));
memset(str,'\0',sizeof(str));
}

void printStr(TokenType t)
{
if(t==5)
{
fprintf(target,"%d:%s\n",sequence,str);
}else
fprintf(target,"%d:<%s,%s>\n",sequence,token[t],str);
sequence++;
}

void comment(char ch,int i)
{
bool isExit = false;
int count =0;
while(!feof(source))
{
switch( s[i] )
{
case 0:
if(ch=='{')
{
s[i]=1;
str[count]=ch;
count++;
}
break;
case 1:
if(ch=='}')
{
s[i]=2;
isExit = true;
str[count]=ch;
printStr(COM);
}else
{
s[i]=1;
str[count]=ch;
count++;
}
break;
default:cout<<"data error\n";
}
if(isExit) return;
ch = fgetc(source);
}
}

void digit(char ch,int i)
{
int cout=0;
bool isExit = false;
while(!feof(source))
{
if(ch>='0' && ch<='9')
{
s[i]=1;
str[cout]=ch;
cout++;
}else
{
s[i]=0;
fseek(source,-1L,1);
printStr(NUM);
isExit = true;
}
if(isExit) return;
ch = fgetc(source);
}
}

bool isOperator(char ch)
{
for(int i=0;i<10;i++)
{
if(ch==op[i])
return true;
}
return false;
}

void myOperator(char ch,int i)
{
int count = 0;
while(!feof(source))
{
if(ch=='+' || ch =='-'||ch=='*'||ch=='/'||ch=='<'||ch=='('||ch==')'||ch==';')
{
str[count]=ch;
printStr(OP);
return;
}else if(ch==':' || ch=='=')
{
switch(s[i])
{
case 0:
if(ch==':')
{
s[i]=1;
str[count]=ch;
count++;
}else
{
s[i]=0;
str[count]=ch;
printStr(OP);
return;
}
break;
case 1:
if(ch=='=')
{
s[i]=2;
str[count]=ch;
printStr(OP);
return;
}
break;
default:cout<<"data error\n";
}
}else
{
str[count]=ch;
printStr(ERROR);
return;
}
ch = fgetc(source);
}
}

bool isResw()
{
for(int i=0;i<8;i++)
{
if(strcmp(str,resw[i])==0)
return true;
}
return false;
}

void identifier(char ch,int i)
{
int cout=0;
while(!feof(source))
{
if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z'))
{
s[i]=1;
str[cout]=ch;
cout++;
}else
{
s[i]=0;
fseek(source,-1L,1); //回退一个字符
if( isResw())
printStr(RESW);
else
printStr(ID);
return;
}

ch = fgetc(source);
}
if(ch == EOF)
{
if( isResw())
printStr(RESW);
else
printStr(ID);
return;
}
}

void scanner()
{
char ch,s;
ch = fgetc(source);
while(!feof(source))
{
for(int i=0;ch!='\n' || ch!=EOF;i++)
{
line[i]=ch;
ch = fgetc(source);
}
for(int j=0;line[j]!='\0';j++)
{

}

if(ch=='\n' ||ch==' '||ch=='\t')
{
;//跳过
}else if(ch=='{')
{
comment(ch,0);
}else if(ch>='0' && ch<='9')
{
digit(ch,1);
}else if( isOperator(ch) )
{
myOperator(ch,2);
}else if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z'))
{
identifier(ch,3);
}else
{
str[0] = ch;
printStr(ERROR);
}
clearState();//清除状态
ch = fgetc(source);
}
}

int main()
{
FILE *fp;
char s;
if((fp=fopen(sfName,"r"))==NULL)
{
printf("file open error!\n");
exit(0);
}else
{
s = fgetc(fp);
while(!feof(fp))
{
printf("%c",s); // diaplay source.tiny
s=fgetc(fp);
}
printf("\n\n");
}
fclose(fp);

if((source=fopen(sfName,"r"))==NULL)
{
printf("文件打开错误!\n");
exit(0);
}
if((target=fopen(tfName,"w"))==NULL)
{
printf("文件打开错误!\n");
exit(0);
}
scanner();        //********************entrance******************************//
fclose(source);
fclose(target);
cout<<"词法分析如下"<<endl;
if((fp=fopen(tfName,"r"))==NULL)
{
printf("文件打开错误!\n");
exit(0);
}else
{
s = fgetc(fp);
while(!feof(fp))
{
printf("%c",s); // diaplay target.tiny
s=fgetc(fp);
}
printf("\n\n");
}
fclose(fp);
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息