python使用codecs模块进行文件操作-读写中英文字符
2017-07-19 09:45
811 查看
摘自:
python使用codecs模块进行文件操作以下是将源文件转成带BOM的UTF-8编码python脚本
import sys import os import codecs """ Usage: ConvertCp.py SrcDir DstDir e.g. if your source folder is "D:/Test/NonUnicode", destination folder is "D:/Test/utf8", just run command as follow : python ConvertCp.py "D:/Test/NonUnicode" "D:/Test/utf8" """ codePageList = (\ "utf-8", "cp1251", "cp855", "cp1252", "cp1250", "cp1251", "cp1254", "cp936", "cp950", "cp932", "cp949", "cp874", "cp1253" ) fileExtFilter = (\ ".cpp", ".c", ".cxx", ".h", ".hpp", ".hxx", ".cc", ".inl" ) def FileIsBomUtf8Encoding(filePath): """ Judge the file whether is Bom Utf_8. """ content = "" try: f = open(filePath, "rb") try: content = f.read() finally: f.close() except IOError: print ("open file %s failed." % (os.path.basename(filePath))) return False if content[0:3] == '\xef\xbb\xbf': return True return False #---------------------------------------------------------------------- def ConvertFileEncoding(sourceFilePath, targetFilePath, targetEncoding = "utf_8"): """ Convert the text files from ANSI encoding into 'targetEncoding'(utf_8). @param sourceFilePath source files path. @param targetFilePath target files path. @param targetEncoding target files encoding. """ # # filter file ext. # (filePathname, filePathExt) = os.path.splitext(sourceFilePath) if filePathExt.lower() not in fileExtFilter: return False # # If the file is Bom utf_8 just skip. # if FileIsBomUtf8Encoding(sourceFilePath): # print ("File \"%s\" is utf_8 format, not need convert." % (sourceFilePath)) return False # # Get the source content. # content = None sourceEncoding = None for cp in codePageList: try: sourceFile = codecs.open(sourceFilePath, mode = "r", encoding = cp) content = sourceFile.read() sourceEncoding = cp break except UnicodeDecodeError: sourceFile.close() content = None continue except IOError: print ("open file %s failed." % (os.path.basename(sourceFilePath))) return False if content == None: print ("File \"%s\" is not valid encoding." % (sourceFilePath)) return False # # ensure the target directory exist. # targetPathDir = os.path.dirname(targetFilePath) if not os.path.exists(targetPathDir): os.makedirs(targetPathDir) # # convert the file content. # try: targetFile = codecs.open(targetFilePath, mode = "w", encoding = targetEncoding) try: if targetEncoding.lower().startswith("utf") and targetEncoding.lower()[len(targetEncoding)-1] == "8": targetFile.write(unicode( codecs.BOM_UTF8, "utf_8" )) if content[0:3] == u'\xef\xbb\xbf': content = content[3:] targetFile.write(content) except UnicodeDecodeError: # # skip the failure file. # print ("convert file: \"%s\" failure" % (sourceFilePath) ) sourceFile.close() targetFile.close() os.remove(targetFilePath) return False finally: sourceFile.close() targetFile.close() except IOError: print ("open file %s failed." % (targetFilePath)) return False # print ("convert file: \"%s\" from %s to %s successfully" % (os.path.basename(sourceFilePath), sourceEncoding, targetEncoding) ) return True if __name__=='__main__': """""" if len(sys.argv) <= 2: # print __doc__ sSourceDir = r"D:\\trunk" sTargetDir = r"D:\\\trunk_new" else: sSourceDir = sys.argv[1] sTargetDir = sys.argv[2] for root, dirs, files in os.walk(sSourceDir): for fileName in files: sourcePath = os.path.join(root, fileName) targetPath = sourcePath.replace(sSourceDir, sTargetDir) ConvertFileEncoding(sourcePath, targetPath)
相关文章推荐
- python使用codecs模块进行文件操作-读写中英文字符
- python使用codecs模块进行文件操作-读写中英文字符
- 分别使用(字符流)和(字节流)对文件进行读写操作
- 使用字符/字节流对文件进行读写操作
- 使用 Python 进行稳定可靠的文件操作
- 使用 Python 进行稳定可靠的文件操作
- python对文件进行读写操作
- python对文件进行读写操作
- python对文件进行读写操作
- python对文件进行读写操作
- 使用 Python 进行稳定可靠的文件操作
- Python简单对文件进行读写操作
- 使用Python进行二进制文件读写(转)
- 使用 Python 进行稳定可靠的文件操作
- python对文件进行读写操作
- 使用Python进行稳定可靠的文件操作详解
- 使用 Python 进行稳定可靠的文件操作
- python 中对配置文件(如.ini文件)进行读写等操作ConfigParser模块
- python对文件进行读写操作
- 使用 Python 进行稳定可靠的文件操作