您的位置:首页 > 编程语言 > Python开发

用Python将统计数据不存在的记录按维度对应指标补齐(续:日数据情形)

2017-02-09 15:54 459 查看
接【Python多线程跑Hive批一例】-->http://blog.csdn.net/babyfish13/article/details/54906405

及【用Python将统计数据不存在的记录按维度对应指标补齐】-->http://blog.csdn.net/babyfish13/article/details/54632508

本文主要是将数据按日存储的结果表有些维度为空的记录给补起来,与按周的又有所不同。
1、用到的日期临时表
show create table bitmp_date_list;
CREATE TABLE `bitmp_date_list` (
`str_date` varchar(10) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

2、改进及更新后的日期处理脚本
/Users/nisj/PycharmProjects/EsDataProc/RemainProcByDay/DayProc.py
# -*- coding=utf-8 -*-
import warnings
import datetime

warnings.filterwarnings("ignore")

def getNowDay():
DayNow = datetime.datetime.today().strftime('%Y-%m-%d')
return DayNow

def getFristDay():
FristDay=datetime.datetime.strptime('2015-06-29', '%Y-%m-%d').strftime('%Y-%m-%d')
return FristDay

def getDayAllList(run_day):
DayAllList=[]
begin_date = datetime.datetime.strptime(getFristDay(), "%Y-%m-%d")

if run_day != '':
end_date = datetime.datetime.strptime(run_day, '%Y-%m-%d')
else:
end_date = datetime.datetime.strptime(getNowDay(), "%Y-%m-%d")

while begin_date <= end_date:
date_str = begin_date.strftime("%Y-%m-%d")
DayAllList.append(date_str)
begin_date += datetime.timedelta(days=1)
return DayAllList

def getDayRunList(run_day):
DayRunList=[]
if run_day != '':
end_date = datetime.datetime.strptime(run_day, '%Y-%m-%d')
else:
end_date = datetime.datetime.strptime(getNowDay(), "%Y-%m-%d")

if end_date - datetime.timedelta(days=60) <= datetime.datetime.strptime(getFristDay(), "%Y-%m-%d"):
begin_date = datetime.datetime.strptime(getFristDay(), "%Y-%m-%d")
else:
begin_date = end_date - datetime.timedelta(days=60)

while begin_date <= end_date:
date_str = begin_date.strftime("%Y-%m-%d")
DayRunList.append(date_str)
begin_date += datetime.timedelta(days=1)

return DayRunList

def getDayRunListReverse(run_day):
DayRunList=[]
begin_date = datetime.datetime.strptime(run_day, '%Y-%m-%d') + datetime.timedelta(days=1)

if begin_date + datetime.timedelta(days=60) >= datetime.datetime.strptime(getNowDay(), "%Y-%m-%d") - datetime.timedelta(days=1):
end_date = datetime.datetime.strptime(getNowDay(), "%Y-%m-%d") - datetime.timedelta(days=1)
else:
end_date = begin_date + datetime.timedelta(days=60)

while begin_date <= end_date:
date_str = begin_date.strftime("%Y-%m-%d")
DayRunList.append(date_str)
begin_date += datetime.timedelta(days=1)

return DayRunList

def getDayRemainRunIntervalList(str_begin_date, str_end_date):
DayList=[]
begin_date = datetime.datetime.strptime(str_begin_date, "%Y-%m-%d")
end_date = datetime.datetime.strptime(str_end_date, "%Y-%m-%d")

while begin_date <= end_date:
date_str = begin_date.strftime("%Y-%m-%d")
DayList.append(date_str)
begin_date += datetime.timedelta(days=1)
return DayList

# Batch Test
# run_day = '2016-07-10'
# for remain_day in getDayAllList(run_day):
#     for his_day in getDayRunList(run_day=remain_day):
#         print remain_day, his_day

# one day
# for his_day in getDayRunList(run_day=run_day):
#     print run_day, his_day

# run_day = '2017-01-01'
# for remain_day in getDayRunList(run_day):
#     print remain_day,run_day
# for his_day in getDayRunList(run_day=remain_day):
#     print remain_day, his_day

# 计算2017年的数据信息
# str_begin_date = '2017-01-01'
# str_end_date = '2017-02-05'
# for remain_day in getDayRemainRunIntervalList(str_begin_date, str_end_date):
#     for his_day in getDayRunList(run_day=remain_day):
#         print remain_day,his_day

3、空记录查找及插入
/Users/nisj/PycharmProjects/EsDataProc/RemainProcByDay/Daily_result_null_proc.py
# -*- coding=utf-8 -*-
import os
import re
from DayProc import *

def Insert_TmpDate_Data(run_day):
os.system("""mysql -hMysqlHost -PMysqlPort -uMysqlUser -pMysqlPass -N -e "use funnyai_data; truncate table bitmp_date_list; " """ )
for date_str in getDayRunListReverse(run_day):
os.system("""mysql -hMysqlHost -PMysqlPort -uMysqlUser -pMysqlPass -N -e "use funnyai_data; \
insert into bitmp_date_list(str_date) \
select '%s'; \
" """ % (date_str))

def resultNullProc(run_day):
result_data = os.popen("""mysql -hMysqlHost -PMysqlPort -uMysqlUser -pMysqlPass -N -e "use funnyai_data; \
select appkey,app_source,date \
from chushou_user_compare_daily_stat \
where date='%s' and app_source <>'' \
group by appkey,app_source,date; \
" """ % (run_day)).readlines();

nrpd_list = []
for nrp_list in result_data:
nrp = re.split('\t', nrp_list.replace('\n', ''))
nrpd_list.append(nrp)
for nrpd in nrpd_list:
appkey = nrpd[0]
appsource = nrpd[1]
date = nrpd[2]

result_data1 = os.popen("""mysql -hMysqlHost -PMysqlPort -uMysqlUser -pMysqlPass -N -e "use funnyai_data; \
select '%s' date,a1.str_date compare_date,'%s' appkey,'%s' app_source \
from bitmp_date_list a1 \
left join ( \
select appkey,app_source,date,compare_date \
from chushou_user_compare_daily_stat \
where date='%s' and appkey='%s' and app_source='%s' \
) a2 on a1.str_date=a2.compare_date \
where a2.appkey is null \
order by a2.date,a1.str_date;" """ % (date, appkey, appsource, date, appkey, appsource)).readlines();
nrpd_list = []
for nrp_list in result_data1:
nrp = re.split('\t', nrp_list.replace('\n', ''))
nrpd_list.append(nrp)
for nrpd in nrpd_list:
date = nrpd[0]
compare_date = nrpd[1]
appkey = nrpd[2]
appsource = nrpd[3]
print date,compare_date,appkey,appsource
os.system("""mysql -hMysqlHost -PMysqlPort -uMysqlUser -pMysqlPass -N -e "use funnyai_data; \
insert into chushou_user_compare_daily_stat(appkey,app_source,date,compare_date,compare_identify_count,compare_register_user_count,compare_user_count,compare_pay_amount,compare_pay_user_count,compare_pay_count) \
select '%s','%s','%s','%s','%d','%d','%d','%d','%d','%d'; \
" """ % (appkey, appsource, date, compare_date, 0, 0, 0, 0, 0, 0))

# run_day = '2017-01-01'
# Insert_TmpDate_Data(run_day)
# resultNullProc(run_day)

for run_day in getDayRemainRunIntervalList('2017-01-02', '2017-01-03'):
print run_day
Insert_TmpDate_Data(run_day)
resultNullProc(run_day)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: