您的位置:首页 > 编程语言 > Python开发

python数据分析案例1-2

2018-02-05 11:14 429 查看
import pandas as pd
import matplotlib.pyplot as plt
from pylab import *
from matplotlib.ticker import  MultipleLocator
from matplotlib.ticker import  FormatStrFormatter

dir = './数据/'
train_2013 = pd.read_table(dir + '2013.txt',engine='python')
train_2014 = pd.read_table(dir + '2014.txt',engine='python')
train_2015 = pd.read_table(dir + '2015.txt',engine='python')

def dataRead(fileName):
print('read {name} context!'.format(name=fileName))
dataList = []
f = open(fileName, encoding='UTF-8')
context = f.readline()
while True:
context = f.readline()
if context == '':
break
context = context.strip('\n').split()
context = [eval(i) for i in context]
dataList.append(context)
return dataList

#写txt
def writeTxt(dataList, fileName, strHead):
f = open(fileName, 'a')
length = len(strHead)
for i in range(length-1):
f.write(strHead[i])
f.write('\t')
f.write(strHead[length-1])
f.write('\n')
length = len(dataList[0])
for ele in dataList:
for i in range(length-1):
f.write(str(ele[i])+'\t')
f.write(str(ele[length-1]))
f.write('\n')

def generateData(year, n_days, start_week):
dataList = []
day1=1; day2=1; day3=1; day4=1; day5=1; day6=1; day7=1; day8=1; day9=1; day10=1; day11=1; day12=1
if year==2013:
num_days=0
elif year==2014:
num_days = 365
elif year==2015:
num_days = 730
elif year==2016:
num_days = 1095
elif year==2017:
num_days = 1461
if year%4!=0:    #非闰年
for i in range(n_days-num_days):
if i<=30:  #1月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 1, day1])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day1 = day1+1
elif i>30 and i<=58:  #2月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 2, day2])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day2 = day2+1
elif i>58 and i<=89:  #3月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 3, day3])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day3 = day3+1
elif i>89 and i<=119:  #4月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 4, day4])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day4 = day4+1
elif i>119 and i<=150:  #5月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 5, day5])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day5 = day5+1
elif i>150 and i<=180:  #6月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 6, day6])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day6 = day6+1
elif i>180 and i<=211:  #7月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 7, day7])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day7 = day7+1
elif i>211 and i<=242:  #8月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 8, day8])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day8 = day8+1
elif i>242 and i<=272:  #9月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 9, day9])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day9 = day9+1
elif i>272 and i<=303:  #10月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 10, day10])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day10 = day10+1
elif i>303 and i<=333:  #11月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 11, day11])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day11 = day11+1
elif i>333 and i<=364:  #12月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 12, day12])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day12 = day12+1
else:
for i in range(n_days):
if i<=30:  #1月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 1, day1])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day1 = day1+1
elif i>30 and i<=59:  #2月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 2, day2])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day2 = day2+1
elif i>59 and i<=90:  #3月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 3, day3])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day3 = day3+1
elif i>90 and i<=120:  #4月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 4, day4])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day4 = day4+1
elif i>120 and i<=151:  #5月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 5, day5])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day5 = day5+1
elif i>151 and i<=181:  #6月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 6, day6])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day6 = day6+1
elif i>181 and i<=212:  #7月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 7, day7])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day7 = day7+1
elif i>212 and i<=243:  #8月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 8, day8])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day8 = day8+1
elif i>243 and i<=273:  #9月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 9, day9])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day9 = day9+1
elif i>273 and i<=304:  #10月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 10, day10])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day10 = day10+1
elif i>304 and i<=334:  #11月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 11, day11])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day11 = day11+1
elif i>334 and i<=365:  #12月
for j in range(5):
dataList.append([i+1, start_week, j+1, 0, year, 12, day12])
start_week = (start_week+1) % 7
if start_week==0:
start_week=7
day12 = day12+1
return dataList

def generateDict(data):
length = len(data)
dataDict = {}
for i in range(length):
tmpData = data[i]
strIndex = str(tmpData[4])+'-'+str(tmpData[5])+'-'+str(tmpData[6])+'-'+str(tmpData[2])
dataDict[strIndex] = tmpData
return dataDict

#参数1是短的字典(不完全)
def combineData(dictOne, dictTwo):
for key in dictOne:
if key in dictTwo:
dictTwo[key][1::]=dictOne[key][1::]
return dictTwo

def selectData(dict):
dataList =[]
for key, value in dict.items():
dataList.append(value)
return dataList

# 1. 读取源数据
train2013 = dataRead(dir+'2013.txt')
train2014 = dataRead(dir+'2014.txt')
train2015 = dataRead(dir+'2015.txt')
train2016 = dataRead(dir+'2016.txt')
# print(train2013)
# print(train2014)
# print(train2015)

# 2. 将源数据转换为字典,方便后续合并原始数据和完整数据
train2013 = generateDict(train2013)
train2014 = generateDict(train2014)
train2015 = generateDict(train2015)
train2016 = generateDict(train2016)
# print(train2013)

# 3. 给每年生成完整的数据
data2013 = generateData(2013,365,2)
data2014 = generateData(2014,730,3)
data2015 = generateData(2015,1095,4)
data2016 = generateData(2016,1461,5)
data2017 = generateData(2017,1826,7)
# print(data2013)
# print(data2014)
# print(data2015)
# print(data2016)
# print(data2017)

# 4. 将完整数据转换为字典,方便后续合并
data2013 = generateDict(data2013)
data2014 = generateDict(data2014)
data2015 = generateDict(data2015)
data2016 = generateDict(data2016)

# 5. 将原始数据和完整数据合并(这里操作的是字典)
data2013 = combineData(train2013, data2013)
data2014 = combineData(train2014, data2014)
data2015 = combineData(train2015, data2015)
data2016 = combineData(train2016, data2016)
# print(data2013)

# 6. 将每年的数据提取出来,作为模型训练数据
data2013 = selectData(data2013)
data2014 = selectData(data2014)
data2015 = selectData(data2015)
data2016 = selectData(data2016)

# 7. 将步骤6中得到的数据写入txt文本中
writeTxt(data2013, './data/data2013.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
writeTxt(data2014, './data/data2014.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
writeTxt(data2015, './data/data2015.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
writeTxt(data2016, './data/data2016.txt', ['date', 'day_of_week', 'brand', 'cnt', 'year', 'month', 'day'])
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息