您的位置:首页 > 移动开发

【HiveETL】互联网行业 -- APP统计分析、业绩分析

2016-06-28 21:53 369 查看


--========== rates ==========--

app0	1
app1	2
app2	2
app3	3
app4	3
app5	3
app6	5
app7	5
app8	5
app9	5

CREATE EXTERNAL TABLE rates (
app_name    STRING
, star_rates  STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/tmp/db/rates';

create table app_ranks as
select app_name as app
, star_rates as stars
, <strong>NTILE(3) </strong>OVER (ORDER BY star_rates DESC) as nt
, <strong>row_number()</strong> OVER (ORDER BY star_rates DESC) as rn
, <strong>rank()</strong> OVER (ORDER BY star_rates DESC) as rk
, <strong>dense_rank()</strong> OVER (ORDER BY star_rates DESC) as drk
,<strong> CUME_DIST()</strong> OVER (ORDER BY star_rates) as cd
,<strong> PERCENT_RANK() OVER (ORDER BY star_rates) as pr</strong>
from rates
order by stars desc
;

select app, stars, cd, <strong>sum(cd) </strong>OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from app_ranks;

select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) from app_ranks;

select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) from app_ranks;

select app, stars, rn, <strong>lead(rn) OVER (</strong>PARTITION BY stars ORDER BY rn), <strong>lag(rn) OVER (</strong>PARTITION BY stars ORDER BY rn) from app_ranks;
--========== visitors ==========--

d001	201301	101
d002	201301	102
d003	201301	103
d001	201302	111
d002	201302	112
d003	201302	113
d001	201303	121
d002	201303	122
d003	201303	123
d001	201304	131
d002	201304	132
d003	201304	133
d001	201305	141
d002	201305	142
d003	201305	143
d001	201306	151
d002	201306	152
d003	201306	153
d001	201307	201
d002	201307	202
d003	201307	203
d001	201308	211
d002	201308	212
d003	201308	213
d001	201309	221
d002	201309	222
d003	201309	223
d001	201310	231
d002	201310	232
d003	201310	233
d001	201311	241
d002	201311	242
d003	201311	243
d001	201312	301
d002	201312	302
d003	201312	303
d001	201401	301
d002	201401	302
d003	201401	303
d001	201402	211
d002	201402	212
d003	201402	213
d001	201403	271
d002	201403	272
d003	201403	273
d001	201404	331
d002	201404	332
d003	201404	333
d001	201405	351
d002	201405	352
d003	201405	353

CREATE EXTERNAL TABLE visitors (
domain  STRING
, month   STRING
, visitor STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/tmp/db/visitors';

select * from visitors where domain = 'd001';

select domain
, month
, visitor
, <strong>first_value(visitor) OVER (</strong>PARTITION BY domain ORDER BY month DESC)
, <strong>last_value(visitor) OVER (</strong>PARTITION BY domain ORDER BY month DESC)
, <strong>lead(visitor) OVER (</strong>PARTITION BY domain ORDER BY month DESC)
, <strong>lag(visitor) OVER (</strong>PARTITION BY domain ORDER BY month DESC)
from visitors
where domain = 'd001';

select domain
, month
, visitor     , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
, last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC)
, lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)
, lag(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)
,<strong> lead(visitor, 12, 0) OVER (</strong>PARTITION BY domain ORDER BY month DESC)
, <strong>lag(visitor, 12, 0) OVER (</strong>PARTITION BY domain ORDER BY month DESC)
from visitors
where domain = 'd001';

create table visitors_report as
select domain
, month
, visitor
, lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)            as last_mon
, visitor - lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC)  as delta_mon
, lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)           as last_year
, <strong>visitor - lead(visitor, 12, 0) OVER (</strong>PARTITION BY domain ORDER BY month DESC) as delta_year
from visitors
;

select * from visitors_report where domain = 'd001' and month > '2014';

select month
, domain
, visitor
, last_mon
, last_year
from visitors_report
where (domain = 'd001' or domain = 'd002') and month > '2014'
order by month desc, domain asc
limit 100;

select month
, domain
, visitor
,<strong> max(visitor) OVER (</strong>PARTITION BY month) as max_visitors
, <strong>min(visitor) OVER (</strong>PARTITION BY month) as min_visitors
from visitors
where month > '2014'
order by month desc, domain asc;

select *
from (
select month
, domain
, visitor
, max(visitor) OVER (PARTITION BY domain ORDER BY month DESC <strong>ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING)</strong> as max_visitors_last_12_mon
, min(visitor) OVER (PARTITION BY domain ORDER BY month DESC <strong>ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING)</strong> as min_visitors_last_12_mon
from visitors
) v
where month > '20131'
order by month desc, domain asc;




----打印版:





内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: