Python 数据可视化编程(一)
2018-03-28 19:18
921 查看
读取CSV文件
#method one import csv import sys filename = 'ch02-data.csv' data = [] try: with open(filename) as f: reader = csv.reader(f) header = reader.next() data = [row for row in reader] except csv.Error as e: print"error reading csv file at line %s:%s"%(reader.line_num,e) sys.exit(-1) if header: print header print"=========================" for datarow in data: print datarow #method two import numpy data = numpy.loadtxt('ch02-data.csv',dtype = 'string',delimiter = ',')#不包含表头
读取Excel文件
import xlrd file = 'data' wb = xlrd.open_workbook(filename = file) ws = wb.sheet_by_name('sheet1') dataset = [] for r in xrange(ws.nrows): col = [] for c in range(ws.ncols): col.append(ws.cell(r,c).value) dataset.append(col) from pprint import pprint pprint(dataset
从定宽数据文件导入数据
import struct import string datafile = 'ch02-data-width-1M.data' mask = '9s14s5s' with open(datafile,'r') as f: for line in f: fields = struct.Struct(mask).unpack_from(line)#unpack_from()逐行读取文件内容并根据指定的格式解析 print'fields:',[field.strip() for field in fields]#strip()去掉前导和后导空格
从制表符分隔的文件读取数据
import csv import sys filename = 'ch02-data.tab' data = [] try: with open(filename) as f: reader = csv.reader(f,dialect = csv.excel_tab) header = reader.next() data = [row for row in reader] except csv.Error as e: print"error reading csv file at line %s:%s"%(reader.line_num,e) sys.exit(-1) if header: print header print"=========================" for datarow in data: print datarow #对脏数据进行清理 datafile = 'data-1.tab' with open(datafile,'r') as f: for line in f: line = line.strip() print line.split('\t')
从JSON数据源导入数据
""" 从JSON数据源导入数据 pip install requests """ import requests url = 'https://github.com/timeline.json' r = requests.get(url)#获取远程资源 json_obj = r.json() repos = set() for entry in json_obj: try: repos.add(entry['repository']['url']) except KeyError as e: print "no key %s.skipping......"%(e) from pprint import pprint pprint(repos)
导出数据到JSON、CSV、Excel
""" 导出数据到JSON、CSV、Excel """ #first,import the moudle we need import os import sys import argparse try: import cStringIO as StringIO except: import StringIO import struct import json import csv #second,define the method to 读写数据 def import_data(import_file): mask = '9s14s5s' data = [] with open(import_file,'r') as f: for line in f: fields = struct.Struct(mask).unpack_from(line) data.append(list([f.strip() for f in fields])) return data def write_data(data,export_format): if export_format == 'csv': return write_csv(data) elif export_format == 'json': return write_json(data) elif export_format == 'xlsx': return write_xlsx(data) else: raise Exception('Illegal format defined') #third,为每种数据格式实现各自的方法 def write_csv(data): f = StringIO.StringIO() writer = csv.writer(f) for row in data: writer.writerow(row) return f.getvalue() def write_json(data): j = json.dumps(data) return j def write_xlsx(data): from xlwt import Workbook book = Workbook() sheet1 = book.add_sheet('Sheet 1') row = 0 for line in data: col = 0 for datum in line: print datum sheet1.write(row,col,datum) col += 1 row += 1 if row > 65535: print >> sys.stderr break f = StringIO.StringIO() book.save(f) return f.getvalue() #forth,main入口 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('import-file',help = 'Path to a fixed-width data file.') parser.add_argument('export_format',help = 'Export format: json, csv, xlsx') args = parser.parse_args() if args.import_file is None: print >> sys.stderr sys.exit(1) if args.export_format not in ('csv', 'xlsx', 'json'): print >> sys.stderr sys.exit(1) if not os.path.isfile(args.import_file): print >> sys.stderr sys.exit(1) datda = import_data(args.import_file) print write_data(data,args.export_format)
从HTML中导入数据
# 从HTML中导入数据 from lxml.html import parse from urllib2 import urlopen parsed = parse(urlopen("https://finance.yahoo.com/q/op?s=AAPL+Options")) # 找到文档中的表格,并将其导入。 doc = parsed.getroot() table = doc.findall(".//table") # 然后选择一个表格做测试。 put = table[1] # 对于一个表格来说,有一个标题和数据。在HTML中th单元格就表示标题行,td则表示数据行。 def _unpack(row, kind="td"): elts = row.findall(".//%s" % kind) return [val.text_content() for val in elts] # 同时,在导入数据表格时,应该考虑到文本类型。我们使用pandas中的TextParser类自动类型转换。 from pandas.io.parsers import TextParser def parse_options_data(table): rows = table.findall(".//tr") header = _unpack(rows[0], kind="th") data = [_unpack(r) for r in rows[1:]] return TextParser(data, names=header).get_chunk() # 最后对这个表格调用该解析函数 put_data = parse_options_data(put) print put_data[:10] # 同时,我们也可以获取文档的全部URL # 链接的标签是a。 links = doc.findall(".//a") # print links[15:20] # 得到一个链接的URL和文本内容分别使用,get()和text_content()方法 urls = [lnk.get("href") for lnk in links] text = [lnk.text_content() for lnk in links] from pprint import pprint pprint(urls[:10]) print “============”
从数据库导入数据
""" 从数据库导入数据 pip install sqlite3 """ #first,吧SQL文件导入到SQLite数据库中 import sqlite3 import sys if len(sys.argv) < 2: print'error: you must supply at least SQL script' print'usage: %s tabel.db ./sql-dump.sql'%(sys.argv[0]) sys.exit(1) script_path = sys.argv[1] if len(sys.argv) == 3: db = sys.argv[2] else: db = ':memory:' try: con = sqlite3.connect(db) with con: cur = con.cursor() with open(script_path,'rb') as f: cur.executescript(f.read()) except sqlite3.Error as err: print'Error occured: %s'%err #从数据库文件读取数据的代码 import sqlite3 import sys if len(sys.argv) != 2: print"please specify database file.' sys.exit(1) db = sys.argv[1] try: con = sqlite3.connect(db) with con: cur = con.cursor() query = 'SELECT ID,Name,Population FROM City ORDER BY Population DESC LIMIT 1000' con.text_factory = str cur.execute(query) resultset = cur.fetchall() col_names = [cn[0] for cn in cur.description] print'%10s %30s a254 %10s'%tuple(col_names) print'='*(10+1+30+1+10) for row in resultset: print'%10s %30s %10s'%row except sqlite3.Error as err: print'[ERROR]:',err
相关文章推荐
- Python数据可视化编程通过Matplotlib创建散点图代码示例
- 利用Python绘制MySQL数据图实现数据可视化
- Python数据可视化之Matplotlib学习笔记
- python数据采集及可视化
- python—matplotlib数据可视化实例注解系列-----之柱状图
- Caffe学习系列(11):数据可视化环境(python接口)配置
- python 数据可视化练习(2)
- python天天进步(5)--网络编程之数据传输UDP
- “R语言机器学习与大数据可视化”暨“Python文本挖掘与自然语言处理”核心技术高级研修班的通知
- 【转】Python数据可视化利器Matplotlib,colors系列,颜色的指定形式
- Python数据可视化之数据密度分布
- 数据可视化:python画散点图scatter
- Python学习笔记(一)——编程0基础数据分析进阶之路
- python数据可视化(五) seaborn
- 使用PyQtGraph进行Python数据可视化:绘制精美折线图(以 上证指数走势为例) 推荐
- python—matplotlib数据可视化实例注解系列-----之plot图线型设置
- 利用Python进行数据可视化常见的9种方法!超实用!
- Python黑帽编程 4.1 Sniffer(嗅探器)之数据捕获(上)
- python 数据可视化PYECHARTS
- python网络编程之数据传输UDP实例分析