您的位置:首页 > 编程语言 > Python开发

python判断一个字符是否是xml合法字符

2012-05-08 17:08 597 查看
项目中碰到的问题,记录如下,期望能对他人有用。

def valid_XML_char_ordinal(c):
"""
@summary:
check if the char is a valid xml character
@param c: the character to be checked
@see: # http://www.w3.org/TR/2008/REC-xml-20081126/#charsets @result: True/False
"""
return ( # conditions ordered by presumed frequency
0x20 <= c <= 0xD7FF
or c in (0x09, 0x0A, 0x0D)
or 0xE000 <= c <= 0xFFFD
or 0x10000 <= c <= 0x10FFFF
)


考虑对于其他非法xml字符通过base64加密处理,具体代码如下:

try:
import xml.sax.saxutils
except ImportError:
raise ImportError("requires xml.sax.saxutils package, pleas check if xml.sax.saxutils is installed!")
import base64
import logging

logger = logging.getLogger(__name__)

__all__ = ["escape", "unescape"]

def escape(data):
"""
@summary:
Escape '&', '<', and '>' in a string of data.
if the data is not ascii, then encode in base64
@param data: the data to be processed
@return
{"base64": True | False,
"data": data}
"""

# check if all of the data is in ascii code
is_base64 = False
escaped_data = ""
try:
#data.decode("ascii")
if data is None:
data = ""

is_base64 = False
for c in data:
if not valid_XML_char_ordinal(c):
is_base64 = True
break
# check if need base64 encode
if is_base64:
logger.debug("%s is not ascii-encoded string, so i will encode it in base64")
# base64 encode
escaped_data = base64.b64encode(data)
else:
# check if the data should be escaped to be stored in xml
escaped_data = xml.sax.saxutils.escape(data)

except Exception, e:
logger.excpetion(e)

return {"base64": is_base64,
"data": escaped_data}

def unescape(data, is_base64 = False):
"""
@summary:
Unescape '&', '<', and '>' in a string of data.
if base64 is True, then base64 decode will be processed first
@param data: the data to be processed
@param base64: specify if the data is encoded by base64
@result: unescaped data
"""
# check if base64
unescaped_data = data
if is_base64:
try:
unescaped_data = base64.b64decode(data)
except Exception, ex:
logger.debug("some excpetion occured when invoke b64decode")
logger.error(ex)
print ex
else:
# unescape it
unescaped_data = xml.sax.saxutils.unescape(data)

return unescaped_data
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: