您的位置:首页 > 其它

RT-Thread finsh源码分析: finsh_token.c

2015-08-18 23:19 351 查看
/*
*  token lex for finsh shell.
*
* COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team
*
*  This file is part of RT-Thread (http://www.rt-thread.org)
*  Maintainer: bernard.xiong <bernard.xiong at gmail.com>
*
*  All rights reserved.
*
*  This program is free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License as published by
*  the Free Software Foundation; either version 2 of the License, or
*  (at your option) any later version.
*
*  This program is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU General Public License for more details.
*
*  You should have received a copy of the GNU General Public License along
*  with this program; if not, write to the Free Software Foundation, Inc.,
*  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Change Logs:
* Date           Author       Notes
* 2010-03-22     Bernard      first version
* 2013-04-03     Bernard      strip more characters.
*/
#include <finsh.h>
#include <stdlib.h>

#include "finsh_token.h"
#include "finsh_error.h"

//是否是字母
#define is_alpha(ch)    ((ch | 0x20) - 'a') < 26u
//是否是数字
#define is_digit(ch)    ((ch) >= '0' && (ch) <= '9')
//其他符号
#define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
|| ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
//eof 文件结束符,置1表示已经是结尾
#define is_eof(self) (self)->eof

//name作为标识符,用于查找
struct name_table
{
char* name;
enum finsh_token_type type;
};

/* keyword */
static const struct name_table finsh_name_table[] =
{
{"void",        finsh_token_type_void},
{"char",        finsh_token_type_char},
{"short",       finsh_token_type_short},
{"int",         finsh_token_type_int},
{"long",        finsh_token_type_long},
{"unsigned",    finsh_token_type_unsigned},

{"NULL",        finsh_token_type_value_null},
{"null",        finsh_token_type_value_null}
};

static char token_next_char(struct finsh_token* self);
static void token_prev_char(struct finsh_token* self);
static long token_spec_number(char* string, int length, int b);
static void token_run(struct finsh_token* self);
static int  token_match_name(struct finsh_token* self, const char* str);
static void token_proc_number(struct finsh_token* self);
static u_char* token_proc_string(struct finsh_token* self);
static void token_trim_space(struct finsh_token* self);
static char token_proc_char(struct finsh_token* self);
static int token_proc_escape(struct finsh_token* self);

void finsh_token_init(struct finsh_token* self, u_char* line)
{
memset(self, 0, sizeof(struct finsh_token));

self->line = line;
}

enum finsh_token_type finsh_token_token(struct finsh_token* self)
{
if ( self->replay ) self->replay = 0;
else token_run(self);

return (enum finsh_token_type)self->current_token;
}

void finsh_token_get_token(struct finsh_token* self, u_char* token)
{
strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
}

int token_get_string(struct finsh_token* self, u_char* str)
{
unsigned char *p=str;
char ch;

ch = token_next_char(self);
if (is_eof(self)) return -1;

str[0] = '\0';

if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
{
token_prev_char(self);
return -1;
}

while (!is_separator(ch) && !is_eof(self))
{
*p++ = ch;

ch = token_next_char(self);
}
self->eof = 0;

token_prev_char(self);
*p = '\0';

return 0;
}

/*
get next character.
*/
static char token_next_char(struct finsh_token* self)
{
//如果结束符为真,返回'\0'
if (self->eof) return '\0';

//如果到达字符串尾部或者遇到换行符
if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
{
self->eof = 1;
self->position = 0;
return '\0';
}

//line处于结构体结尾,类似栈指针,向高地址生长
//此处使用技巧类似Linux内核thread->info
return self->line[self->position++];
}

static void token_prev_char(struct finsh_token* self)
{
if ( self->eof ) return;

if ( self->position == 0 ) return;
else self->position--;
}

static void token_run(struct finsh_token* self)
{
char ch;

token_trim_space(self); /* first trim space and tab. */
token_get_string(self, &(self->string[0]));

if ( is_eof(self) ) /*if it is eof, break;*/
{
self->current_token = finsh_token_type_eof;
return ;
}

if (self->string[0] != '\0') /*It is a key word or a identifier.*/
{
if ( !token_match_name(self, (char*)self->string) )
{
self->current_token = finsh_token_type_identifier;
}
}
else/*It is a operator character.*/
{
ch = token_next_char(self);

switch ( ch )
{
case '(':
self->current_token = finsh_token_type_left_paren;
break;

case ')':
self->current_token = finsh_token_type_right_paren;
break;

case ',':
self->current_token = finsh_token_type_comma;
break;

case ';':
self->current_token = finsh_token_type_semicolon;
break;

case '&':
self->current_token = finsh_token_type_and;
break;

case '*':
self->current_token = finsh_token_type_mul;
break;

case '+':
ch = token_next_char(self);

if ( ch == '+' )
{
self->current_token = finsh_token_type_inc;
}
else
{
token_prev_char(self);
self->current_token = finsh_token_type_add;
}
break;

case '-':
ch = token_next_char(self);

if ( ch == '-' )
{
self->current_token = finsh_token_type_dec;
}
else
{
token_prev_char(self);
self->current_token = finsh_token_type_sub;
}
break;

case '/':
ch = token_next_char(self);
if (ch == '/')
{
/* line comments, set to end of file */
self->current_token = finsh_token_type_eof;
}
else
{
token_prev_char(self);
self->current_token = finsh_token_type_div;
}
break;

case '<':
ch = token_next_char(self);

if ( ch == '<' )
{
self->current_token = finsh_token_type_shl;
}
else
{
token_prev_char(self);
self->current_token = finsh_token_type_bad;
}
break;

case '>':
ch = token_next_char(self);

if ( ch == '>' )
{
self->current_token = finsh_token_type_shr;
}
else
{
token_prev_char(self);
self->current_token = finsh_token_type_bad;
}
break;

case '|':
self->current_token = finsh_token_type_or;
break;

case '%':
self->current_token = finsh_token_type_mod;
break;

case '~':
self->current_token = finsh_token_type_bitwise;
break;

case '^':
self->current_token = finsh_token_type_xor;
break;

case '=':
self->current_token = finsh_token_type_assign;
break;

case '\'':
self->value.char_value = token_proc_char(self);
self->current_token = finsh_token_type_value_char;
break;

case '"':
token_proc_string(self);
self->current_token = finsh_token_type_value_string;
break;

default:
if ( is_digit(ch) )
{
token_prev_char(self);
token_proc_number(self);
break;
}

finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
self->current_token = finsh_token_type_bad;

break;
}
}
}

//name查找匹配
static int token_match_name(struct finsh_token* self, const char* str)
{
int i;

for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
{
if ( strcmp(finsh_name_table[i].name, str)==0 )
{
self->current_token = finsh_name_table[i].type;
return 1;
}
}

return 0;
}

//空格检测
static void token_trim_space(struct finsh_token* self)
{
char ch;
while ( (ch = token_next_char(self)) ==' ' ||
ch == '\t' ||
ch == '\r');

token_prev_char(self);
}

//char类型处理
static char token_proc_char(struct finsh_token* self)
{
char ch;
char buf[4], *p;

p = buf;
ch = token_next_char(self);

if ( ch == '\\' )
{
ch = token_next_char(self);
switch ( ch )
{
case 'n': ch = '\n'; break;
case 't': ch = '\t'; break;
case 'v': ch = '\v'; break;
case 'b': ch = '\b'; break;
case 'r': ch = '\r'; break;
case '\\': ch = '\\';  break;
case '\'': ch = '\'';  break;
default :
while ( is_digit(ch) )/*for '\113' char*/
{
ch = token_next_char(self);
*p++ = ch;
}

token_prev_char(self);
*p = '\0';
ch = atoi(p);
break;
}
}

if ( token_next_char(self) != '\'' )
{
token_prev_char(self);
finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
return ch;
}

return ch;
}

//字符串类型处理
static u_char* token_proc_string(struct finsh_token* self)
{
u_char* p;

for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
{
char ch = token_next_char(self);

if ( is_eof(self) )
{
finsh_error_set(FINSH_ERROR_UNEXPECT_END);
return NULL;;
}
if ( ch == '\\' )
{
ch = token_proc_escape(self);
}
else if ( ch == '"' )/*end of string.*/
{
*p = '\0';
return self->string;
}

*p++ = ch;
}

return NULL;
}

static int token_proc_escape(struct finsh_token* self)
{
char ch;
int result=0;

ch = token_next_char(self);
switch (ch)
{
case 'n':
result = '\n';
break;
case 't':
result = '\t';
break;
case 'v':
result = '\v';
break;
case 'b':
result = '\b';
break;
case 'r':
result = '\r';
break;
case 'f':
result = '\f';
break;
case 'a':
result = '\007';
break;
case 'x':
result = 0;
ch  = token_next_char(self);
while ( (ch - '0')<16u )
{
result = result*16 + ch - '0';
ch = token_next_char(self);
}
token_prev_char(self);
break;
default:
if ( (ch - '0') < 8u)
{
result = 0;
while ( (ch - '0') < 8u )
{
result = result*8 + ch - '0';
ch = token_next_char(self);
}

token_prev_char(self);
}
break;
}

return result;
}

/*
(0|0x|0X|0b|0B)number+(l|L)
*/
static void token_proc_number(struct finsh_token* self)
{
char ch;
char *p, buf[128];
long value;

value = 0;
p = buf;

ch  = token_next_char(self);
if ( ch == '0' )
{
int b;  //用于保存进制
ch = token_next_char(self);
if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
{
b = 16;
ch = token_next_char(self);
while ( is_digit(ch) || is_alpha(ch) )
{
*p++ = ch;
ch = token_next_char(self);
}
//防止字符读错误
*p = '\0';
}
else if ( ch == 'b' || ch == 'B' )
{
b = 2;
ch = token_next_char(self);
while ( (ch=='0')||(ch=='1') )
{
*p++ = ch;
ch = token_next_char(self);
}

*p = '\0';
}
else if ( '0' <= ch && ch <= '7' )
{
b = 8;
while ( '0' <= ch && ch <= '7' )
{
*p++ = ch;
ch = token_next_char(self);
}

*p = '\0';
}
else
{
token_prev_char(self);

/* made as 0 value */
self->value.int_value = 0;
self->current_token = finsh_token_type_value_int;
return;
}

self->value.int_value = token_spec_number(buf, strlen(buf), b);
self->current_token = finsh_token_type_value_int;
}
else
{
while ( is_digit(ch) )
{
value = value*10 + ( ch - '0' );
ch = token_next_char(self);
}

self->value.int_value = value;
self->current_token = finsh_token_type_value_int;
}

switch ( ch )
{
case 'l':
case 'L':
self->current_token = finsh_token_type_value_long;
break;

default:
token_prev_char(self);
break;
}
}

/*use 64 bit number*/
#define BN_SIZE 2

static long token_spec_number(char* string, int length, int b)
{
char* p;
int t;
int i, j, shift=1;
unsigned int bn[BN_SIZE], v;
long d;

p = string;
i = 0;

switch ( b )
{
case 16: shift = 4;
break;
case 8:  shift = 3;
break;
case 2:  shift = 1;
break;
default: break;
}

for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;

while ( i<length )
{
t = *p++;
if ( t>='a' && t <='f' )
{
t = t - 'a' +10;
}
else if ( t >='A' && t <='F' )
{
t = t - 'A' +10;
}
else t = t - '0';

for ( j=0; j<BN_SIZE ; j++)
{
v = bn[j];
bn[j] = (v<<shift) | t;
t = v >> (32 - shift);
}
i++;
}

d = (long)bn[0];

return d;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: