您的位置:首页 > 运维架构 > Linux

linux下用lex/yacc实现的一个小汇编器,for 体系实习2,实习中唯一可以拿的出来的东西

2007-07-04 00:29 465 查看
上学期在编译实习课上在Window环境中用Lex/yacc完成了一个不大不小的Cm(C minus,呵呵,比C小多了)编译器, 而今天做体系实习2,要求设计一个新的指令系统,然后写汇编,再到二进制,再到simplescalar执行.汇编到二进制要是手工就很麻烦了.

由于汇编到二进制,基本上是直接翻译就可,所以可以使用awk来完成.但我花了一个通宵的时间(也不算,还做了别的事情),在Linux环境下使用GNU的lex(flex)和Yacc(bison),写了一个简单的汇编器,贴出来,让初学者看看,如果你是高手,就找找毛病啦,我的没有进行错误处理,默认输入是stdin,并且是正确的.

首先先看一个写好的简单汇编代码.


DATA SEG:


10


10


9


8


7


6


5


4


3


2


1


DATA END




CODE SEG:


addi $4, $0, 512 #当然也可以写成 addi $4, $0, 0x200


lw $3, $4, 0 #还可以写成 lw $3, 0($4)


add $2, $3, $0


addi $1, $4, 4


swi 2


addi $5, $0, 1


add $11, $3, $3


add $11, $11, $11


add $11, $11, $4


addi $11, $11, 4


addi $6, $4, 4






L4: slt $9, $6, $11


beq $9, $0, L1


lw $8, $6, 0 #$8=key


addi $7, $6, -4 #A+j = A+i -4


L3: slt $9, $4, $7


beq $9, $0, L2


lw $10, $7, 0


slt $9, $8, $10


beq $9, $0, L2 #$


sw $10, $7, 4 #


addi $7, $7, -4


beq $0, $0, L3


L2: sw $8, $7, 4


addi $6, $6, 4


beq $0, $0, L4


L1: swi 2 #swi 2为系统调用,显示内存中的一段数据,$1为基址,$2为长度


swi 1 #swi 1为系统调用,退出程序,是我们自定义的


CODE END





呵呵,是一个Insert Sort排序,对上面的十个数排序.

要生成二进制代码和二进制文件.我的Lex/Yacc的程序是在控制台里运行,标准输入输出.文件只需使用管道就好.

下面首先是lex文件:

%{
#include "myp.tab.h"
extern "C"{
int yywrap(void);
int yylex(void);
}
%}
%x cc //用来处理注释
%%
"0x" { return HEX; } //处理16进制
"CODE SEG" { return CODEBEG; }
"CODE END" { return CODEEND; }
"DATA SEG" { return DATABEG; }
"DATA END" { return DATAEND; }
"lw" { return LW; }
"sw" { return SW; }
"add" { return ADD; }
"addi" { return ADDI; }
"sub" { return SUB; }
"subi" { return SUBI; }
"mult" { return MULT; }
"multi" { return MULTI; }
"swi" { return SWI; }
"jmp" { return JMP; }
"slt" { return SLT; }
"beq" { return BEQ; }
"bne" { return BNE; }
"swp" { return SWP; }
[0-9]+ {
yylval.ival = atoi(yytext);
return NUMBER;
}
[a-zA-Z_][0-9a-zA-Z_]* {
strcpy(yylval.imm,yytext);
return ID;
}
"(" { return '('; }
")" { return ')'; }
":" { return ':'; }
"," { return ','; }
"$" { return '$'; }
"-" { return '-'; }
[ /t]|[/n] ;
"#" { BEGIN cc; }
<cc>(.)* { ; }
<cc>" " { BEGIN 0; }

%%
int yywrap(void)
{
return 1;
}

呵呵,下面是Yacc文件

%{
#include<iostream>
#include<string>
#include<fstream>
#include "src/func.h"
//#include "inc/func.cpp"
using namespace std;
extern "C"{
void yyerror(const char *s);
extern int yylex(void);
}
//extern int yylex();
//mylexer lexer;

char bin[MAX][33]; //用来记录生成的01字符串结果
int binNum = 0; //记录指令条数
int data[MAX]; //记录数据段数据
int dataNum = 0; //数据段数目
int curPC = 0; //当前处理的PC值
int dataInd = 0; //
char pindex[max][33]; //标签的名称
int indPo[max]; //用来记录标签对应的位置,这里是PC值,不过因为PC值和当前的指令位置有4倍关系
int indexNum = 0; //记录标签的数目
//条件转移时
char backId[max][33]; //需要回填的标签
int backPo[max]; //要回填的标签所在的指令位置
int backPC[max]; //要回填的标签所在的PC值
int backNum = 0; //要回填的标签数目

//非条件转移,因为只是为了简单的实现InsertSort函数,所以现在的指令系统中没有非条件转移指令
//不过设计时加上了

char jbackId[max][33]; //要回填的JMP标签
int jbackPo[max]; //要回填的JMP标签位置
int jbackNum = 0; //回填的JMP数目
%}

%union {
char imm[33];
char opName[6];
int ival;
}
%start PROGRAM
%token ADDI ADD SUB SUBI LW SWI SW MULT MULTI DATABEG DATAEND CODEBEG CODEEND BEQ BNE SLT JMP SWP HEX
%token<imm> ID
%type<opName> I_OP R_OP J_OP
%token<ival> NUMBER
%type<ival>IMM
%%

PROGRAM: {
// cout<<"world"<<endl;
}DATASEG CODESEG{
fillback();
//cout<<"fillback ok"<<endl;
int i;
for(i = 0;i< binNum;i++)
cout<<bin[i]<<endl;

}
;
DATASEG: DATABEG ':' DATA DATAEND{
int i = 0;
char temp[33];
for(i = 0;i < dataNum; i++){
int2bin32(data[i],temp);
strcpy(bin[i+2],temp);
}
binNum = dataNum + 2;
// printf("data ok ");
}
;
DATA: IMM {data[dataNum++] = $1;} DATA
|
;
CODESEG:CODEBEG ':' CODE CODEEND{
// cout<<"CODESEG OK"<<endl;
}
;
CODE: STATE CODE
|
;
STATE: ID {
strcpy(pindex[indexNum],$1);
indPo[indexNum++] = curPC;
} ':' COMMAND
{curPC += 4;binNum++;}
| COMMAND {curPC += 4;binNum++;}
;
COMMAND:I_COM
|R_COM
|J_COM
;
I_COM:I_OP '$' NUMBER ',' IMM '(' '$' NUMBER ')'
{
char temp[17];
strcat(bin[binNum],$1);
int2bin5($3,temp);
strcat(bin[binNum],temp);
int2bin5($8,temp);
strcat(bin[binNum],temp);
int2bin16($5,temp);
strcat(bin[binNum],temp);
}
|I_OP '$' NUMBER ',' '$' NUMBER ',' IMM
{
char temp[17];
strcat(bin[binNum], $1);
int2bin5($3,temp);
strcat(bin[binNum],temp);
int2bin5($6,temp);
strcat(bin[binNum],temp);
int2bin16($8, temp);
strcat(bin[binNum],temp);
}
| I_OP '$' NUMBER ',' '$' NUMBER ',' ID
{
char temp[17];

strcpy(bin[binNum],$1);
int2bin5($3,temp);
strcat(bin[binNum],temp);
int2bin5($6,temp);
strcat(bin[binNum],temp);
strcpy(backId[backNum],$8);
backPC[backNum] = curPC;
backPo[backNum++] = binNum;

}
| I_OP '$' NUMBER ',' '$' NUMBER
{
// cout<<"swp"<<endl;
char temp[17];
strcpy(bin[binNum],$1);
int2bin5($3,temp);
strcat(bin[binNum], temp);
int2bin5($6,temp);
strcat(bin[binNum],temp);
strcat(bin[binNum],"0000000000000000");
}
;
R_COM: R_OP '$' NUMBER ',' '$' NUMBER ',' '$' NUMBER
{
char temp[6];
strcpy(bin[binNum],$1);
int2bin5($3,temp);
strcat(bin[binNum],temp);
int2bin5($6,temp);
strcat(bin[binNum],temp);
int2bin5($9,temp);
strcat(bin[binNum],temp);
strcat(bin[binNum],"00000000000");
}
;
J_COM: J_OP ID{
strcpy(bin[binNum],$1);
strcpy(jbackId[jbackNum],$2);
jbackPo[jbackNum] = binNum;
}
| J_OP NUMBER{
strcpy(bin[binNum],$1);
char temp[27];
if(!int2bin26($2,temp))
cout<<"J_op Number:failed"<<endl;
strcat(bin[binNum],temp);
}
;
I_OP: ADDI { int2bin6(opAddi,$$);}
| SUBI { int2bin6(opSubi,$$);}
| MULTI { int2bin6(opMulti,$$);}
| LW { int2bin6(opLw,$$);}
| SW { int2bin6(opSw,$$);}
| BEQ { int2bin6(opBeq,$$);}
| BNE { int2bin6(opBne,$$);}
| SWP { int2bin6(opSwp,$$);}
;
R_OP: ADD { int2bin6(opAdd,$$);}
| SUB { int2bin6(opSub,$$);}
| MULT { int2bin6(opMult,$$);}
| SLT { int2bin6(opSlt,$$);}
;
J_OP: JMP { int2bin6(opJmp,$$);}
| SWI { int2bin6(opSwi,$$);}
;
IMM: NUMBER { $$ = $1;}
| HEX NUMBER { $$ = hex2dec($2); }
| '-' NUMBER { $$ = 0-$2;}
;
%%
void yyerror(const char*s)
{
extern int yylineno;
cout<<"error: "<<s<<": line"<<yylineno<<endl;
}
int main(int args,char *argv[])
{
/* parse parser;
if(args<=2)
cout<<"no file indicated"<<endl;
ifstream *fin = new ifstream(argv[1]);
if(!fin){
cout<<"Can't open file: "<<argv[1]<<endl;
return 1;
}
lexer.yyin = fin;
int n = 1;
if(parser.yycreate(&lexer)){
if(lexer.yycreate(&parser)){
n = parser.yyparser();
}
}
*/
// printf("hello ");
yyparse();
return 0;
}

然后是func.cpp和func.h文件,两个文件放在src子目录里面

func.h


#ifndef FUNC_H


#define FUNC_H


#define MAX 1000


#define max 100




#define opAdd 1


#define opAddi 2


#define opSlt 3


#define opBeq 4


#define opLw 5


#define opSw 6


#define opSwi 7


#define opSwp 8




#define opBne 0


#define opJmp 0


#define opSub 0


#define opSubi 0


#define opMult 0


#define opMulti 0








bool int2bin5(int a, char *b);


bool int2bin16(int a, char *b);


bool int2bin26(int a,char *b);


bool int2bin6(int a,char *b);


bool int2bin32(int a,char *b);


bool fillback();


int hex2dec(int a);


#endif



func.cpp


#include "func.h"
#include<iostream>
using namespace std;
#define MAX 1000
#define max 100
bool int2bin5(int a,char *b) //regNum 转化寄存器号成5位01字符串
{
int i = 0;
unsigned int base = 16;
for(i = 0;i< 5;i++){
b[i]=( a&base ?'1':'0');
base /=2;
}
b[i] = '/0';
return true;
}
bool int2bin6(int a,char *b) //opCode,操作码 占六位
{
int i = 0;
unsigned int base = 32;
for(i = 0;i< 6;i++){
b[i] = (a&base?'1':'0');
base/=2;
}
b[i] = '/0';
return true;
}
bool int2bin26(int a,char *b) //J型指令的立即数转换
{
int i = 0;
unsigned int base;
base = (1<<25);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i<26;i++){
b[i] = (a&base?'1':'0');
base >>=1;
}
b[i] = '/0';
return true;

}
bool int2bin16(int a,char *b) //I型指令的立即数转换
{
int i = 0;
unsigned int base = (1<<15);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i< 16;i++){
b[i] = (a&base?'1':'0');
base>>=1;
}
b[i] = '/0';
return false;
}
bool int2bin32(int a,char *b) //数据段的数据转换
{
// cout<<"int2bin32:"<<a<<endl;
int i = 0;
unsigned int base = (1<<31);
if(a>0&&(a&base))
return false;
if(a<0&&!(a&base))
return false;
for(i = 0;i< 32;i++){
b[i] = (a&base?'1':'0');
// cout<<base<<endl;
base>>=1;
}
b[i] = '/0';
//cout<<b<<endl;
return false;
}

bool fillback() //回填
{

extern char pindex[max][33],bin[MAX][33],
backId[max][33],jbackId[max][33];
extern int backPo[max], backNum,backPC[max];
extern int jbackPo[max],jbackNum;
extern int indPo[max],indexNum;
extern int dataNum,binNum;
//cout<<"fillback begins"<<endl;
int i = 0,j = 0, offset;
char temp[33];
//cout<<"back fill Num:"<<backNum<<" and indexNum is "<<indexNum<<endl;
for(i = 0;i < backNum;i++){
//cout<<"This time "<<backId[i]<<endl;
for(j = 0;j < indexNum;j++){
if(strcmp(backId[i],pindex[j])==0)
break;
}
if(j == indexNum){
cout<<backId[i]<<" not found"<<endl;
return false;
}
offset = (indPo[j]-backPC[i])/4;
int2bin16(offset,temp);
strcat(bin[backPo[i]],temp);
//cout<<i<<"hello"<<endl;
}

//cout<<"backfill ok"<<endl;

//非条件转移
for(i = 0;i< jbackNum;i++){
for(j = 0;j< indexNum;j++){
if(strcmp(jbackId[i],pindex[j])==0)
break;
}
if(j == indexNum){
cout<<jbackId[i]<<"not found"<<endl;
return false;
}
int2bin26(indPo[j],temp);
strcat(bin[jbackPo[i]],temp);
}
//完成程序开头的设置,第一个为指令数目,第二个是起始地址
// cout<<"jback ok"<<endl;
int2bin32(binNum -2,temp);
strcpy(bin[0],temp);
int2bin32(0x200 + dataNum*4,temp);
strcpy(bin[1],temp);
return true;
}
int hex2dec(int a) //16进制处理
{
int res = 0;
int base = 1;
while(a!=0){
res += (a%10)*base;
base *= 16;
a /= 10;
}
// cout<<res<<endl;
return res;
}

接着就是Makefile了

LEX=flex
YACC=bison
CPP=g++
CC=gcc
all:myasm
myasm:lex.yy.o myp.tab.o func.o
$(CPP) lex.yy.o myp.tab.o func.o -o myasm

func.o:src/func.cpp src/func.h
$(CPP) -c src/func.cpp -o func.o
lex.yy.o:lex.yy.c myp.tab.h
$(CPP) -c lex.yy.c

myp.tab.o:myp.tab.c
$(CPP) -c myp.tab.c

myp.tab.c myp.tab.h:myp.y
$(YACC) -d myp.y

lex.yy.c:myassm.l myp.tab.h
$(LEX) myassm.l
clean:
rm -f *.o *.c *.h
rm -f *.bin *.txt

嘿嘿,这就是整个代码,在debian 3.1r5,gcc g++版本为4.1.2,flex,bison 环境下运行很正常.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐