您的位置：首页 > 编程语言 > Python开发

mako源码解读(2)——文档解析

2014-12-28 15:12 232 查看

mako的生成模板首先需要把文本编译成可执行的Python代码，然后再从外部添加变量，进行执行，输出文本

编译过程也要分为两部分，先是根据文档结构进行解析，然后根据解析好的节点生成Python代码（好像编译原理中的中间代码生成啊，后悔上课不认真听那门课T^T）

这边文章我们先看解析成节点

老规矩，先上测试案例

def test_integration(self):
template = """<%namespace name="foo" file="somefile.html"/>
# inherit from foobar.html
<%inherit file="foobar.html"/>

<%component name="header">
<div>header</div>
</%component>
<%component name="footer">
<div> footer</div>
</%component>

<table>
% for j in data():
<tr>
% for x in j:
<td>Hello ${x| h}</td>
% endfor
</tr>
% endfor
</table>
"""
nodes = Lexer(template).parse()
#print nodes
assert repr(nodes) == r"""TemplateNode({}, [NamespaceTag('namespace', {'name': '"foo"', 'file': '"somefile.html"'}, (1, 1), []), Text('\n', (1, 46)), Comment('inherit from foobar.html', (2, 1)), InheritTag('inherit', {'file': '"foobar.html"'}, (3, 1), []), Text('\n\n', (3, 31)), ComponentTag('component', {'name': '"header"'}, (5, 1), ["Text('\\n     <div>header</div>\\n', (5, 27))"]), Text('\n', (7, 14)), ComponentTag('component', {'name': '"footer"'}, (8, 1), ["Text('\\n    <div> footer</div>\\n', (8, 27))"]), Text('\n\n<table>\n', (10, 14)), ControlLine('for', 'for j in data():', False, (13, 1)), Text('    <tr>\n', (14, 1)), ControlLine('for', 'for x in j:', False, (15, 1)), Text('            <td>Hello ', (16, 1)), Expression('x', ['h'], (16, 23)), Text('</td>\n', (16, 30)), ControlLine('for', 'endfor', True, (17, 1)), Text('    </tr>\n', (18, 1)), ControlLine('for', 'endfor', True, (19, 1)), Text('</table>\n', (20, 1))])"""

注意！，这里的节点并不是指html的节点，指的是mako当中的标签，如<%block>，<%def>这种，mako是文本模板引擎，而不是html模板引擎

上面测试代码的template是一段混合着mako各种元素的文本，让我们看看mako是如何对这段文本进行操作的

上面的测试代码用到了两个方法

class Lexer(object):
def __init__(self, text):
self.text = text
self.template = parsetree.TemplateNode()
self.matched_lineno = 1
self.matched_charpos = 0
self.lineno = 1
self.match_position = 0
self.tag = []
self.control_line = []

def parse(self):
length = len(self.text)
while (True):
if self.match_position > length:
break

if self.match_end():
break
if self.match_expression():
continue
if self.match_control_line():
continue
if self.match_tag_start():
continue
if self.match_tag_end():
continue
if self.match_python_block():
continue
if self.match_text():
continue

if (self.current.match_position > len(self.current.source)):
break
raise "assertion failed"

if len(self.tag):
raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, self.matched_lineno, self.matched_charpos)
return self.template

我们来一个个解释各个属性的含义

self.template
文本的根节点，含有nodes属性字段，是一个节点数组
self.tag
当前文档分析过程中的Mako标签
self.control_line
当前文档分析过程中的单行命令

def parse()这个函数按顺序去匹配当前行应该的处理

match_end()匹配文档结束
match_expression()匹配表达式（例：${123+321}）
match_control_line()匹配Python指令（例：% if a == True:）
match_tag_start()匹配Mako标签的开始部分（例：<%block>）
match_tag_end()匹配Mako标签的结束部分（例：</%block>）
match_python_block()匹配Python代码块（例：<% %>或者<%! %>）
match_text()匹配正常的文本（不止一行）

都是通过正则表达式去判别，看完我才发现正则真的不是一般的强大啊！！

其中有个上述函数都调用的部分

def match(self, regexp, flags=None):

mp = self.match_position
if flags:
reg = re.compile(regexp, flags)
else:
reg = re.compile(regexp)
match = reg.match(self.text, self.match_position)
if match:
(start, end) = match.span()
if end == start:
self.match_position = end + 1
else:
self.match_position = end
self.matched_lineno = self.lineno
lines = re.findall(r"\n", self.text[mp:self.match_position])
cp = mp - 1
while (cp >= 0 and cp<len(self.text) and self.text[cp] != '\n'):
cp -=1
self.matched_charpos = mp - cp
self.lineno += len(lines)

return match

这个函数改变匹配的起始位置（match_position）以及正在多少行（lineno），多少列（match_charpos），后面两个主要是在发生异常的时候报错定位用的

下面看各个检测方法的正则
match_tag_start：

</pre><pre name="code" class="python">(r'''\<%(\w+)\s+(.+?["'])?\s*(/)?>''', re.I | re.S )

match_tag_end：

(r'\</%\s*' + self.tag[-1].keyword + '\s*>')

match_end：

(r'\Z', re.S)

match_text：

(r"""
(.*?)         # anything, followed by:
(
(?<=\n)(?=\s*[%#]) # an eval or comment line, preceded by a consumed \n and whitespace
|
(?=\${)   # an expression
|
(?=</?[%&])  # a substitution or block or call start or end
# - don't consume
|
(\\\n)         # an escaped newline  - throw away
|
\Z           # end of string
)""", re.X | re.S)

match_python_block：

</pre><pre name="code" class="python">(r"<%(!)?(.*?)%>", re.S)

match_expression：

(r"\${(.+?)(?:\|\s*(.+?)\s*)?}", re.S)

match_control_line：

(r"(?<=^)\s*([%#])\s*([^\n]*)(?:\n|\Z)", re.M)

对于正则，，我也不知道说什么，看自己的理解能力吧 http://www.cnblogs.com/huxi/archive/2010/07/04/1771073.html 这边文档介绍正则挺不错的，还有正则里面那些特殊构造，我之前看Mako里的正则看懵了，没见过这些正则

下面分开叙述这些匹配函数

def match_tag_start(self):
match = self.match(r'''\<%(\w+)\s+(.+?["'])?\s*(/)?>''', re.I | re.S )
if match:
(keyword, attr, isend) = (match.group(1).lower(), match.group(2), match.group(3))
self.keyword = keyword
attributes = {}
if attr:
for att in re.findall(r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr):
(key, val1, val2) = att
attributes[key] = val1 or val2

self.append_node(parsetree.Tag, keyword, attributes)
if isend:
self.tag.pop()
return True
else:
return False

其中有个函数
self.append_node

def append_node(self, nodecls, *args, **kwargs):
kwargs['lineno'] = self.matched_lineno
kwargs['pos'] = self.matched_charpos
node = nodecls(*args, **kwargs)
if len(self.tag):
self.tag[-1].nodes.append(node)
else:
self.template.nodes.append(node)
if isinstance(node, parsetree.Tag):
self.tag.append(node)
elif isinstance(node, parsetree.ControlLine):
if node.isend:
self.control_line.pop()
elif node.is_primary:
self.control_line.append(node)
elif len(self.control_line) and not self.control_line[-1].is_ternary(node.keyword):
raise exceptions.SyntaxException("Keyword '%s' not a legal ternary for keyword '%s'" % (node.keyword, self.control_line[-1].keyword), self.matched_lineno, self.matched_charpos)

nodecls是一个类对象，用来创建node的类，
让我们来看看有哪些node的子类

"""object model defining a Mako template."""

from mako import exceptions, ast, util
import re

class Node(object):
"""base class for a Node in the parse tree."""
def __init__(self, lineno, pos):

b84f
self.lineno = lineno
self.pos = pos
def get_children(self):
return []
def accept_visitor(self, visitor):
def traverse(node):
for n in node.get_children():
n.accept_visitor(visitor)
method = getattr(visitor, "visit" + self.__class__.__name__, traverse)
method(self)

class TemplateNode(Node):
"""a 'container' node that stores the overall collection of nodes."""
def __init__(self):
super(TemplateNode, self).__init__(0, 0)
self.nodes = []
self.page_attributes = {}
def get_children(self):
return self.nodes
def __repr__(self):
return "TemplateNode(%s, %s)" % (repr(self.page_attributes), repr(self.nodes))

class ControlLine(Node):
"""defines a control line, a line-oriented python line or end tag.

% if foo:
(markup)
% endif
"""
def __init__(self, keyword, isend, text, **kwargs):
super(ControlLine, self).__init__(**kwargs)
self.text = text
self.keyword = keyword
self.isend = isend
self.is_primary = keyword in ['for','if', 'while', 'try']
if self.isend:
self._declared_identifiers = []
self._undeclared_identifiers = []
else:
code = ast.PythonFragment(text, self.lineno, self.pos)
(self._declared_identifiers, self._undeclared_identifiers) = (code.declared_identifiers, code.undeclared_identifiers)
def declared_identifiers(self):
return self._declared_identifiers
def undeclared_identifiers(self):
return self._undeclared_identifiers
def is_ternary(self, keyword):
"""return true if the given keyword is a ternary keyword for this ControlLine"""
return keyword in {
'if':util.Set(['else', 'elif']),
'try':util.Set(['except', 'finally']),
'for':util.Set(['else'])
}.get(self.keyword, [])
def __repr__(self):
return "ControlLine(%s, %s, %s, %s)" % (repr(self.keyword), repr(self.text), repr(self.isend), repr((self.lineno, self.pos)))

class Text(Node):
"""defines plain text in the template."""
def __init__(self, content, **kwargs):
super(Text, self).__init__(**kwargs)
self.content = content
def __repr__(self):
return "Text(%s, %s)" % (repr(self.content), repr((self.lineno, self.pos)))

class Code(Node):
"""defines a Python code block, either inline or module level.

inline:
<%
x = 12
%>

module level:
<%!
import logger
%>

"""
def __init__(self, text, ismodule, **kwargs):
super(Code, self).__init__(**kwargs)
self.text = text
self.ismodule = ismodule
self.code = ast.PythonCode(text, self.lineno, self.pos)
def declared_identifiers(self):
return self.code.declared_identifiers
def undeclared_identifiers(self):
return self.code.undeclared_identifiers
def __repr__(self):
return "Code(%s, %s, %s)" % (repr(self.text), repr(self.ismodule), repr((self.lineno, self.pos)))

class Comment(Node):
"""defines a comment line.

# this is a comment

"""
def __init__(self, text, **kwargs):
super(Comment, self).__init__(**kwargs)
self.text = text
def __repr__(self):
return "Comment(%s, %s)" % (repr(self.text), repr((self.lineno, self.pos)))

class Expression(Node):
"""defines an inline expression.

${x+y}

"""
def __init__(self, text, escapes, **kwargs):
super(Expression, self).__init__(**kwargs)
self.text = text
self.escapes = escapes
self.code = ast.PythonCode(text, self.lineno, self.pos)
def declared_identifiers(self):
return []
def undeclared_identifiers(self):
return list(self.code.undeclared_identifiers) + [n for n in self.escapes]
def __repr__(self):
return "Expression(%s, %s, %s)" % (repr(self.text), repr(self.escapes), repr((self.lineno, self.pos)))

class _TagMeta(type):
"""metaclass to allow Tag to produce a subclass according to its keyword"""
_classmap = {}
def __init__(cls, clsname, bases, dict):
if cls.__keyword__ is not None:
cls._classmap[cls.__keyword__] = cls
super(_TagMeta, cls).__init__(clsname, bases, dict)
def __call__(cls, keyword, attributes, **kwargs):
try:
cls = _TagMeta._classmap[keyword]
except KeyError:
raise exceptions.CompileException("No such tag: '%s'" % keyword, kwargs['lineno'], kwargs['pos'])
return type.__call__(cls, keyword, attributes, **kwargs)

class Tag(Node):
"""base class for tags.

<%sometag/>

<%someothertag>
stuff
</%someothertag>
"""
__metaclass__ = _TagMeta
__keyword__ = None
def __init__(self, keyword, attributes, **kwargs):
super(Tag, self).__init__(**kwargs)
self.keyword = keyword
self.attributes = attributes
self.nodes = []
def get_children(self):
return self.nodes
def __repr__(self):
return "%s(%s, %s, %s, %s)" % (self.__class__.__name__, repr(self.keyword), repr(self.attributes), repr((self.lineno, self.pos)), repr([repr(x) for x in self.nodes]))

class IncludeTag(Tag):
__keyword__ = 'include'
class NamespaceTag(Tag):
__keyword__ = 'namespace'
class ComponentTag(Tag):
__keyword__ = 'component'
def __init__(self, keyword, attributes, **kwargs):
super(ComponentTag, self).__init__(keyword, attributes, **kwargs)
self.function_decl = ast.FunctionDecl("def " + attributes['name'] + ":pass", self.lineno, self.pos)
def declared_identifiers(self):
# TODO: args in the function decl
return [self.function_decl.funcname]
def undeclared_identifiers(self):
# TODO: args in the function decl
return [self.function_decl.funcname]
class CallTag(Tag):
__keyword__ = 'call'
class InheritTag(Tag):
__keyword__ = 'inherit'
class PageTag(Tag):
__keyword__ = 'page'

可以看到基本分为了几大类，templateNode、Tag、ControlLine。。。。。。
在append_node做了if分类讨论来做操作，
值得注意的就是当 self.tag存在对象的时候，就代表并不是处于文档的最外层，已被别的Tag包裹，需要添加到那个Tag下的nodes里面，
当全部分析完毕之后，文本便转换为节点对象了

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： python mako

相关文章推荐

新的分享

章节导航