您的位置:首页 > 编程语言 > Lua

Lua虚拟机之语法分析(二)

2014-05-29 23:19 246 查看
      最近实在是忙,项目就快要发布了,加班加点在所难免。继上一篇关于lua基本数据类型的简单分析后,我将继续写阅读lua虚拟机的源代码笔记,这是第一次接触虚拟机工作原理,lua虚拟机代码量不大,是个很好的学习的例子,应当坚持下去。关于语法分析,我觉得只需要弄清楚从哪分析,怎么分析以及最终的生成结果就差不多了。

      当通过调用lstate.c:lua_newstate()方法生成一个新的lua_State时,其内部又会调用llex.c:luaX_init方法,该方法的作用是生成lua的关键字(保留字)到global_State的字符串表中(stringtable,以哈希表的形式保存),关于lua的保留字,在llex.c中可看到下面这段:

/* ORDER RESERVED */
static const char *const luaX_tokens [] = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function", "goto", "if",
"in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while",
"..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
"<number>", "<name>", "<string>"
};      而真正让虚拟机开始分析源代码,则是通过lapi.c:lua_load函数,lauxlib.c中则封装了对lua_load的调用,其中包括从文件加载源码的分析lauxlib.c:luaL_loadfilex,以及从内存加载lauxlib.c:luaL_loadbufferex。在lua_load中,除了必要的初始化,比如生成一个ZIO对象(流对象)负责处理文件或字符串的输入,还调用了ldo.c:luaD_protectedparser函数,而在这个函数中,又最终调用了lparser.c:luaY_parser函数,好了,虚拟机开始进入源码的分析阶段了,代码如下:
Closure *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff,
Dyndata *dyd, const char *name, int firstchar) {
LexState lexstate; // 扫描状态机,单词读取、步进等操作
FuncState funcstate; // 函数状态机,每输入一个源代码文件或者是一段源代码字符串,都将有一个状态机对应
Closure *cl = luaF_newLclosure(L, 1); /* create main closure */
/* anchor closure (to avoid being collected) */
setclLvalue(L, L->top, cl);
incr_top(L);
funcstate.f = cl->l.p = luaF_newproto(L);
funcstate.f->source = luaS_new(L, name); /* create and anchor TString */
lexstate.buff = buff;
lexstate.dyd = dyd;
dyd->actvar.n = dyd->gt.n = dyd->label.n = 0;
luaX_setinput(L, &lexstate, z, funcstate.f->source, firstchar);
mainfunc(&lexstate, &funcstate);
lua_assert(!funcstate.prev && funcstate.nups == 1 && !lexstate.fs);
/* all scopes should be correctly finished */
lua_assert(dyd->actvar.n == 0 && dyd->gt.n == 0 && dyd->label.n == 0);
return cl; /* it's on the stack too */ // 返回的闭包,将保存在lua_State当前的栈顶
}      关于LexState对象,其责任就是记录当前的行号、符号、期望的下一个符号、读取字符串或者数字,当然还有很多其他的信息,具体可以参看llex.h中的定义。而FuncState中,则保存了函数的原型(Proto),包含的元素,所在的块,下一个字节码的地址,局部变量个数等,具体定义如下:(lparser.h)
/* state needed to generate code for a given function */
typedef struct FuncState {
Proto *f; /* current function header */
Table *h; /* table to find (and reuse) elements in `k' */
struct FuncState *prev; /* enclosing function */
struct LexState *ls; /* lexical state */
struct BlockCnt *bl; /* chain of current blocks */
int pc; /* next position to code (equivalent to `ncode') */
int lasttarget; /* 'label' of last 'jump label' */
int jpc; /* list of pending jumps to `pc' */
int nk; /* number of elements in `k' */
int np; /* number of elements in `p' */
int firstlocal; /* index of first local var (in Dyndata array) */
short nlocvars; /* number of elements in 'f->locvars' */
lu_byte nactvar; /* number of active local variables */
lu_byte nups; /* number of upvalues */
lu_byte freereg; /* first free register */
} FuncState;      还有几个比较重要的结构,分别是闭包与原型,定义分别如下:
/*
** Function Prototypes
*/
typedef struct Proto {
CommonHeader;
TValue *k; /* constants used by the function */
Instruction *code; // 字节码数组
struct Proto **p; /* functions defined inside the function */
int *lineinfo; /* map from opcodes to source lines (debug information) */
LocVar *locvars; /* information about local variables (debug information) */
Upvaldesc *upvalues; /* upvalue information */
union Closure *cache; /* last created closure with this prototype */
TString *source; /* used for debug information */
int sizeupvalues; /* size of 'upvalues' */
int sizek; /* size of `k' */
int sizecode; // 字节码数组大小
int sizelineinfo;
int sizep; /* size of `p' */
int sizelocvars; // 局部变量个数
int linedefined;
int lastlinedefined;
GCObject *gclist;
lu_byte numparams; /* number of fixed parameters */
lu_byte is_vararg;
lu_byte maxstacksize; /* maximum stack used by this function */
} Proto;

/*
** Lua Upvalues
*/
typedef struct UpVal {
CommonHeader;
TValue *v; /* points to stack or to its own value */
union {
TValue value; /* the value (when closed) */
struct { /* double linked list (when open) */
struct UpVal *prev;
struct UpVal *next;
} l;
} u;
} UpVal;

/*
** Closures
*/

#define ClosureHeader \
CommonHeader; lu_byte nupvalues; GCObject *gclist

typedef struct CClosure {
ClosureHeader;
lua_CFunction f;
TValue upvalue[1]; /* list of upvalues */
} CClosure;

typedef struct LClosure {
ClosureHeader;
struct Proto *p;
UpVal *upvals[1]; /* list of upvalues */
} LClosure;

typedef union Closure {
CClosure c;
LClosure l;
} Closure;      UpValue可以简单的理解为函数外部定义的局部变量,如下所示的a就是upvalue
function f()
local a = 0;
return function() return a+1 end;
end      还有个概念需要稍微关注,那就是block,直接翻译成代码块好了,什么是一个代码块以及怎么表示一个代码块,如下:
if xxxx then
abcd.... // 这部分可以算代码块
end

//代码块的定义
typedef struct BlockCnt {
struct BlockCnt *previous; /* chain */
short firstlabel; /* index of first label in this block */
short firstgoto; /* index of first pending goto in this block */
lu_byte nactvar; /* # active locals outside the block */
lu_byte upval; /* true if some variable in the block is an upvalue */
lu_byte isloop; /* true if `block' is a loop */
} BlockCnt;      阅读lparser.c时,还发现有两种东西需要去理解,一个称为exp即表达式,一个是statement,表达式可能是简单的true或false,也可能是~a这种带一元操作符的,还有可能是a+b这种,也还有形如{a=x,b=x}被称为构造(constructor)的表达式,复杂的表达式,也是由简单的表达式所组成的,以下是lua中简单表达式的定义:
static void simpleexp (LexState *ls, expdesc *v) {
/* simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
constructor | FUNCTION body | suffixedexp */
switch (ls->t.token) {
case TK_NUMBER: {
init_exp(v, VKNUM, 0);
v->u.nval = ls->t.seminfo.r;
break;
}
case TK_STRING: {
codestring(ls, v, ls->t.seminfo.ts);
break;
}
case TK_NIL: {
init_exp(v, VNIL, 0);
break;
}
case TK_TRUE: {
init_exp(v, VTRUE, 0);
break;
}
case TK_FALSE: {
init_exp(v, VFALSE, 0);
break;
}
case TK_DOTS: { /* vararg */ // 点操作符
FuncState *fs = ls->fs;
check_condition(ls, fs->f->is_vararg,
"cannot use " LUA_QL("...") " outside a vararg function");
init_exp(v, VVARARG, luaK_codeABC(fs, OP_VARARG, 0, 1, 0));
break;
}
case '{': { /* constructor */
constructor(ls, v);
return;
}
case TK_FUNCTION: {
luaX_next(ls);
body(ls, v, 0, ls->linenumber);
return;
}
default: {
suffixedexp(ls, v);
return;
}
}
luaX_next(ls);
}      而statement则可以看lparser.c:statement()函数,通过该函数反过来又可以对lua的语法有个大概的了解与印象加深,如下:
static void statement (LexState *ls) {
int line = ls->linenumber; /* may be needed for error messages */
enterlevel(ls);
switch (ls->t.token) {
case ';': { /* stat -> ';' (empty statement) */
luaX_next(ls); /* skip ';' */
break;
}
case TK_IF: { /* stat -> ifstat */
ifstat(ls, line);
break;
}
case TK_WHILE: { /* stat -> whilestat */
whilestat(ls, line);
break;
}
case TK_DO: { /* stat -> DO block END */
luaX_next(ls); /* skip DO */
block(ls);
check_match(ls, TK_END, TK_DO, line);
break;
}
case TK_FOR: { /* stat -> forstat */
forstat(ls, line);
break;
}
case TK_REPEAT: { /* stat -> repeatstat */
repeatstat(ls, line);
break;
}
case TK_FUNCTION: { /* stat -> funcstat */
funcstat(ls, line);
break;
}
case TK_LOCAL: { /* stat -> localstat */
luaX_next(ls); /* skip LOCAL */
if (testnext(ls, TK_FUNCTION)) /* local function? */
localfunc(ls);
else
localstat(ls);
break;
}
case TK_DBCOLON: { /* stat -> label */
luaX_next(ls); /* skip double colon */
labelstat(ls, str_checkname(ls), line);
break;
}
case TK_RETURN: { /* stat -> retstat */
luaX_next(ls); /* skip RETURN */
retstat(ls);
break;
}
case TK_BREAK: /* stat -> breakstat */
case TK_GOTO: { /* stat -> 'goto' NAME */
gotostat(ls, luaK_jump(ls->fs));
break;
}
default: { /* stat -> func | assignment */
exprstat(ls);
break;
}
}
lua_assert(ls->fs->f->maxstacksize >= ls->fs->freereg &&
ls->fs->freereg >= ls->fs->nactvar);
ls->fs->freereg = ls->fs->nactvar; /* free registers */
leavelevel(ls);
}      exp也好,statement也好,这种代码的组织形式是非常容易掌握的,就不再对case里的各种exp或者stat进一步分析了,可能还忽略了一点,那就是luaX_next是怎么保证工作正确的呢,如下:llex.c
void luaX_next (LexState *ls) {
ls->lastline = ls->linenumber;
if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
ls->t = ls->lookahead; /* use this one */
ls->lookahead.token = TK_EOS; /* and discharge it */
}
else
ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
}      llex.c:llex()函数就不再列出了,一个大循环加一个大switch,阅读没什么难度。还有最后一个问题,这样分析的最后保存的结果是什么,其实在lpaerse.c:luaY_parser函数中就已经知道了,那就是FuncState,表达式被翻成变量与字节码,存入Proto中。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: