Python脚本进行主播招募相关数据统计的案例
2017-10-20 16:52
281 查看
脚本中主要关注点:
1、从不同源中取数据(两个mysql源及Hive源)
2、两大List根据关键字进行左连接
其实也可以先将match_apply表中数据取到之后先插入到目标表中,然后根据关键字uid再将Hive计算好的指标数据更新到目标表中;在这里是左连接之后再进行一次性插入;并没有考虑mysql多条记录插入的效率,有需要的话可以参考以往脚本进行完善。
/Users/nisj/PycharmProjects/BiDataProc/Demand/hadoopStat/anchorRecruit.py
1、从不同源中取数据(两个mysql源及Hive源)
2、两大List根据关键字进行左连接
其实也可以先将match_apply表中数据取到之后先插入到目标表中,然后根据关键字uid再将Hive计算好的指标数据更新到目标表中;在这里是左连接之后再进行一次性插入;并没有考虑mysql多条记录插入的效率,有需要的话可以参考以往脚本进行完善。
/Users/nisj/PycharmProjects/BiDataProc/Demand/hadoopStat/anchorRecruit.py
# -*- coding=utf-8 -*- import datetime import time import os import warnings import sys import re reload(sys) sys.setdefaultencoding('utf8') warnings.filterwarnings("ignore") def anchorRecruitProc(): matchRange = os.popen("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P6605 -uMysqlUser -pMysqlPass --default-character-set=utf8 -N -e " \ select a1.match_id,a1.start_time,a1.end_time,a1.id range_id \ from jellyfish_hadoop_stat.match_apply_stat_range a1 \ left join jellyfish_hadoop_stat.match_apply_stat_result a2 on a1.id=a2.range_id \ where a2.range_id is null; \ " """).readlines(); matchRange_list = [] for matchRangeList in matchRange: matchR = re.split('\t', matchRangeList.replace('\n', '').replace('`', '').replace('\'', '').replace('"', '')) matchRange_list.append(matchR) for matchRangeData in matchRange_list: match_id = matchRangeData[0] start_time = matchRangeData[1] end_time = matchRangeData[2] range_id = matchRangeData[3] gameId_Data = os.popen("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P50512 -uMysqlUser -pMysqlPass --default-character-set=utf8 -N -e " \ select game_id from jellyfish_event.event_apply_template where id={match_id}; \ " """.format(match_id=match_id)).readlines(); gameId = re.split('\t', gameId_Data[0].replace('\n', '').replace('`', '').replace('\'', '').replace('"', ''))[0] matchApply_Data = os.popen("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P50512 -uMysqlUser -pMysqlPass --default-character-set=utf8 -N -e " \ select uid,\`name\`,phone_num,qq,area,is_leader from jellyfish_event.match_apply where match_id={match_id}; \ " """.format(match_id=match_id)).readlines(); matchApply_list = [] for matchApplyList in matchApply_Data: matchA = re.split('\t', matchApplyList.replace('\n', '').replace('`', '').replace('\'', '').replace('"', '')) matchApply_list.append(matchA) indexData=os.popen("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e "with tab_live_long as( \ select pt_day,room_id,sum(live_minute) anthor_live_time \ from (select room_id,pt_day,hour(time_interval)*60+minute(time_interval)+second(time_interval)/60 live_minute from ( \ select room_id,cast(updated_time as timestamp)-cast(switch_time as timestamp) time_interval,pt_day \ from honeycomb_all_live_history_status \ where pt_day between '{start_time}' and '{end_time}' and game_id={gameId}) x) xx \ group by pt_day,room_id \ ), \ tab_room_composite_popularity as( \ select roomid room_id,sum(view_time) composite_popularity,row_number()over(order by sum(view_time)) composite_popularity_rank \ from recommend_data_view \ where pt_day between '{start_time}' and '{end_time}' and gameid={gameId} \ group by roomid \ ), \ tab_room_highest_popularity as( \ select room_id,max(online_count) highest_popularity,row_number()over(order by max(online_count)) highest_popularity_rank \ from oss_room_online_stat \ where pt_day between '{start_time}' and '{end_time}' \ group by room_id \ ), \ tab_user_info as ( \ select a2.nickname,a1.id room_id,a2.uid \ from oss_room_v2 a1 \ left join oss_chushou_user_profile a2 on a1.creator_uid=a2.uid \ where a1.pt_day='{yesterday}' and a2.pt_day='{yesterday}') \ select a1.room_id,a4.uid,a4.nickname,a2.composite_popularity_rank composite_popularity,a3.highest_popularity_rank highest_popularity,sum(a1.anthor_live_time) live_duration,count(distinct a1.pt_day) live_days \ from tab_live_long a1 \ left join tab_room_composite_popularity a2 on a1.room_id=a2.room_id \ left join tab_room_highest_popularity a3 on a1.room_id=a3.room_id \ left join tab_user_info a4 on a1.room_id=a4.room_id group by a1.room_id,a4.uid,a4.nickname,a2.composite_popularity_rank,a3.highest_popularity_rank \ ; \ " """.format(start_time=start_time, end_time=end_time, gameId=gameId, yesterday=(datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d'))).readlines(); index_list = [] for indexList in indexData: indexV = re.split('\t', indexList.replace('\n', '').replace('`', '').replace('\'', '').replace('"', '')) index_list.append(indexV) # delete data for rollback os.system("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P6605 -uMysqlUser -pMysqlPass --default-character-set=utf8 -e " \ delete from jellyfish_hadoop_stat.match_apply_stat_result where range_id='{range_id}'; \ " """.format(range_id=range_id)); for matchApplyData in matchApply_list: uid = matchApplyData[0] name = matchApplyData[1] phone_num = matchApplyData[2] qq = matchApplyData[3] area = matchApplyData[4] is_leader = matchApplyData[5] indexValueDataKey = [] for indexValueData in index_list: room_id = indexValueData[0] # uid = indexValueData[1] nickname = indexValueData[2] composite_popularity = indexValueData[3] highest_popularity = indexValueData[4] live_duration = indexValueData[5] live_days = indexValueData[6] indexValueDataKey.append(indexValueData[1]) if matchApplyData[0] == indexValueData[1]: # print uid,name,phone_num,qq,area,is_leader,room_id,nickname,composite_popularity,highest_popularity,live_duration,live_days os.system("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P6605 -uMysqlUser -pMysqlPass --default-character-set=utf8 -e " \ insert into jellyfish_hadoop_stat.match_apply_stat_result(range_id,uid,room_id,\`name\`,phone_num,qq,area,is_leader,composite_popularity,highest_popularity,live_duration,live_days,created_time,updated_time) \ values ({range_id},{uid},{room_id},ifnull('{name}',' '),ifnull('{phone_num}',' '),ifnull('{qq}',' '),ifnull('{area}',' '),ifnull({is_leader},0),ifnull({composite_popularity},0),ifnull({highest_popularity},0),ifnull({live_duration},0),ifnull({live_days},0),'{created_time}','{updated_time}') \ ; \ " """.format(range_id=range_id, uid=uid, room_id=room_id, name=name, phone_num=phone_num, qq=qq, area=area, is_leader=is_leader, composite_popularity=composite_popularity, highest_popularity=highest_popularity, live_duration=live_duration, live_days=live_days, created_time=time.strftime('%Y-%m-%d %X', time.localtime()), updated_time=time.strftime('%Y-%m-%d %X', time.localtime()))); if matchApplyData[0] not in indexValueDataKey: os.system("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P6605 -uMysqlUser -pMysqlPass --default-character-set=utf8 -e " \ insert into jellyfish_hadoop_stat.match_apply_stat_result(range_id,uid,room_id,\`name\`,phone_num,qq,area,is_leader,composite_popularity,highest_popularity,live_duration,live_days,created_time,updated_time) \ values ({range_id},{uid},ifnull({room_id},-1),ifnull('{name}',' '),ifnull('{phone_num}',' '),ifnull('{qq}',' '),ifnull('{area}',' '),ifnull({is_leader},0),ifnull({composite_popularity},0),ifnull({highest_popularity},0),ifnull({live_duration},0),ifnull({live_days},0),'{created_time}','{updated_time}') \ ; \ " """.format(range_id=range_id, uid=uid, room_id='NULL', name=name, phone_num=phone_num, qq=qq, area=area, is_leader=is_leader, composite_popularity='NULL', highest_popularity='NULL', live_duration='NULL', live_days='NULL', created_time=time.strftime('%Y-%m-%d %X', time.localtime()), updated_time=time.strftime('%Y-%m-%d %X', time.localtime()))); # Batch Test anchorRecruitProc()
相关文章推荐
- Python进行主播拉新相关数据统计的脚本
- Python进行主播收入统计的脚本
- 用python进行科学统计及数据挖掘--便捷工具环境搭建
- python数据统计脚本实例mysql,redis
- 用python对文本格式的数据进行统计处理
- Python中的数据类型转换举例及脚本统计服务器内存实例
- Python从阿里云Oss拉数据写入Hive表并进行相关处理
- 使用rdb文件进行redis数据迁移--python脚本
- 利用Python进行数据分析(9) pandas基础: 汇总统计和计算
- python把csv数据做成列表、字典类型的数据进行存储脚本(readDataToDic_V2.2)
- 利用Python进行数据分析_python3实现_pandas入门_相关系数与协方差
- Mysql分表数据通过Python进行汇总统计
- Python连麦相关信息统计的脚本
- 利用Python进行数据导入、变化、统计和假设检验等基本数据分析
- python把csv数据做成列表、字典类型的数据进行存储脚本(readDataToDic_V2.2)
- hive 的日志处理统计网站的 PV 、UV案例 与 给合 python的数据清洗数据案例
- 利用 Python 进行数据分析(九)pandas 汇总统计和计算
- python进行数据分析------相关分析
- Python脚本进行游戏专区拉新方面的计算案例
- Python利用groupby模块进行Mysql分表数据的汇总统计