您的位置:首页 > 编程语言 > Python开发

Python正则表达式

2015-09-09 21:16 736 查看
代码实例:

# -*- coding: utf-8 -*-

#Author:qiang

import re

import os

import csv

class HTML_Process(object):

    def __init__(self, page):

        self.page = page

        self.record_dict, self.record_arr = self.deal_page()

        self.save_data()

        print "\nfinish"

   

    #save data

    def save_data(self):

        output_file_object=open("output_file.txt","wb")

        writer=csv.writer(output_file_object)

        header=["用户编号","用户类型","计量点名称","资产编号","出厂编号","示数类型","上次示数","本次示数","综合倍率",

                 "上次抄见电量","本次电量","抄表状态","抄表异常分类","抄表数据来源","用电地址"]

        writer.writerow(header)

        for line in self.record_arr:

            writer.writerow(line)

        output_file_object.close()

        

    #deal page and return segment data

    def deal_page(self):

        #find all records

        page_patt = re.compile("green..href=.javascript:queryConsInfo(.*?)<a")

        records = page_patt.findall(self.page)

        

        #compile the pattern for record

        record_patt = re.compile("<td noWrap>([^<].*?)</td>")

        user_number_patt = re.compile("\).>(.*?)</a>")

        

        count = 0

        record_dict = []

        record_arr = []

        for record in records:

            print record

            #get the user number

            user_number = user_number_patt.findall(record)

            arr = record_patt.findall(record)

            result_arr = user_number + arr

            

            #catch 15 segments data

            if len(result_arr) == 15:

                one_record_dict = {}

                one_record_dict["user_number"] = result_arr[0]#用户编号

                one_record_dict["user_type"] = result_arr[1]#用户类型

                one_record_dict["meter_point_name"] = result_arr[2]#计量点名称

                one_record_dict["asset_number"] = result_arr[3]#资产编号

                one_record_dict["factory_number"] = result_arr[4]#出厂编号

                one_record_dict["display_type"] = result_arr[5]#示数类型

                one_record_dict["last_display"] = result_arr[6]#上次示数

                one_record_dict["current_display"] = result_arr[7]#本次示数

                one_record_dict["comprehensive_rate"] = result_arr[8]#综合倍率

                one_record_dict["last_power"] = result_arr[9]#上次抄见电量

                one_record_dict["current_power"] = result_arr[10]#本次电量

                one_record_dict["status"] = result_arr[11]#抄表状态

                one_record_dict["abnormal_assort"] = result_arr[12]#抄表异常分类

                one_record_dict["data_from"] = result_arr[13]#抄表数据来源

                one_record_dict["user_address"] = result_arr[14]#用电地址

                

                record_dict.append(one_record_dict)

                record_arr.append(result_arr)

                

                #print the data to the screen

                count = count + 1

                strs = ""

                for i in result_arr:

                    strs = strs + str(i) + "-"

                print strs

        print "total records :" + str(count)

        if count==0:

            print "maybe will modify regular expression or check the page is ok"

        return record_dict, record_arr

    

    

def get_Page():

    try:

        file_object = open("monInfo.txt")

        page = file_object.read()

        file_object.close()

        return page

    except:

        print "no [monInfo.txt] file in current folder"

if __name__ == "__main__":

    try:

        page=get_Page()

        HTML_Process(page)

    except:

        pass

    os.system("pause")
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: