您的位置:首页 > 编程语言 > Python开发

python实现词法分析

2013-09-18 18:59 120 查看
#请先安装Ply

# -*- coding: utf-8 -*-
#--------------------------------------------------------------------------
#Author:Jmdebugger
#email: pengkailb@gmail.com
#date: 2013-9-17
#--------------------------------------------------------------------------
import ply.lex as lex

tokens = [
"TOKEN_IDENT",
"TOKEN_INT",
"TOKEN_FLOAT",
"TOKEN_STRING",
"TOKEN_OP",
"TOKEN_DELIM_COMMA",        #,
"TOKEN_DELIM_OPEN_PAREN",   #(
"TOKEN_DELIM_CLOSE_PAREN",  #)
"TOKEN_DELIM_OPEN_BRACKET", #[
"TOKEN_DELIM_CLOSE_BRACKET",#]
"TOKEN_DELIM_OPEN_BRACE",   #{
"TOKEN_DELIM_CLOSE_BRACE",  #}
"TOKEN_DELIM_SEMICOLON"    #;

]

reserved = {
'if'     :   'TOKEN_RSRVD_IF',
'else'   :   'TOKEN_RSRVD_ELSE',
'true'   :   'TOKEN_RSRVD_TRUE',
'false'  :   'TOKEN_RSRVD_FALSE',
'while'  :   'TOKEN_RSRVD_WHILE',
'break'  :   'TOKEN_RSRVD_BREAK',
'continue':  'TOKEN_RSRVD_CONTINUE',
'goto'   :   'TOKEN_RSRVD_GOTO',
'func'   :   'TOKEN_RSRVD_FUNC',
'var'    :   'TOKEN_RSRVD_VAR',
'for'    :   'TOKEN_RSRVD_FOR',
'return' :   'TOKEN_RSRVD_RETURN'
}

tokens += reserved .values()

t_ignore = r' \t\r'
def t_COMMENT(t):
r'(/\*(.|\n)*?\*/)|(\/\/.*)'
pass

def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)

def t_error(t):
print "LaunchScript error: "+repr(t.value)

def t_TOKEN_IDENT(t):
r'[a-zA-Z_][a-zA-Z_0-9]*' #标识符
t.type = reserved.get(t.value , 'TOKEN_IDENT')
return t

def t_TOKEN_INT(t):
r'(0x[a-fA-F0-9]+)|([0-9]+)'
return t

t_TOKEN_FLOAT =  r'[0-9]*\.[0-9]+'
t_TOKEN_STRING = r'(\"([^\\\r]|(\\.))*?\")' #|(\"([^\\\n]|(\\.))*?\")' only for windows
t_TOKEN_DELIM_COMMA = r'\,'
t_TOKEN_DELIM_OPEN_PAREN = r'\('
t_TOKEN_DELIM_CLOSE_PAREN = r'\)'
t_TOKEN_DELIM_OPEN_BRACKET = r'\['
t_TOKEN_DELIM_CLOSE_BRACKET = r'\]'
t_TOKEN_DELIM_OPEN_BRACE = r'\{'
t_TOKEN_DELIM_CLOSE_BRACE = r'\}'
t_TOKEN_DELIM_SEMICOLON = r'\;'

def t_TOKEN_OP(t):
r'(\<\<\=)|(\>\>\=)|([\+\-\*\/\%\&\|\^\=\!\>\<]\=)|(\|\|)|(\&\&)|(\+\+)|(\-\-)|[\+\-\*\/\%\^\=\&\|\>\<\!\~]'
return t

if __name__ == "__main__":
lexer = lex.lex()
f = open("./test.txt" , 'rb')
data = f.read()
f.close()
lexer.input(data)

while True:
tok = lexer.token()
if not tok: break      # No more input
print tok.value+"\t---->\t"+tok.type
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: