大数据Spark “蘑菇云”行动第96课:基于Hive对电商数据案例分析
2016-12-12 21:19
661 查看
大数据Spark “蘑菇云”行动第96课:基于Hive对电商数据案例分析
tbStock.txt
订单号, 交易位置 ,交易日期
BYSL00000893,ZHAO,2007-8-23
BYSL00000897,ZHAO,2007-8-24
BYSL00000898,ZHAO,2007-8-25
tbStockDetail.txt
订单号,行号,订单产品,有效性,数量,金额
BYSL00000893,0,FS527258160501,-1,268,-268
BYSL00000893,1,FS527258169701,1,268,268
tbDate.txt
日期,年月,年,月,日, 周三,第一周,第一季度,上旬,上半月
2003-1-1,200301,2003,1,1,3,1,1,1,1
2003-1-2,200301,2003,1,2,4,1,1,1,1
2003-1-3,200301,2003,1,3,5,1,1,1,1
2003-1-4,200301,2003,1,4,6,1,1,1,1
create table tbData(dataID string,theyearmonth string,theyear string,themonth string,thedate string,theweek
string,theweeks string,thequot string, thetenday string ,thehalfmonth string) ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LINES TERMINATED BY '\n';
CREATE TABLE tbStock(ordernumber STRING,locatitionid string,dataID string) ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LINES TERMINATED BY '\n';
CREATE TABLE tbStockDetail(ordernumber STRING,rownum int,itemid string,qty int,price int ,amout int) ROW FORMAT
DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';
load data local inpath '/usr/local/IMFdatatest/HiveTBdata/tbDate.txt' into table tbdata;
load data inpath '/library/HiveTBdata/tbStock.txt'into table tbStock;
load data inpath '/library/HiveTBdata/tbStockDetail.txt'into table tbstockdetail;
//销售金额大于50000的订单
select ordernumber,sum(t2.amout) as totalfees FROM tbStock as t1 ,tbStockDetail as t2 where t1.ordernumber = t2.ordernumber group by t1.oredernumber having
totalfees >50000;
//每年每季度销售排名前10名的商品
select c.theyear,c.thequot,sum(b.amout) as sumofamount from tbstock a,tbstockdetail b,tbdata c where
a.ordernumber=b.ordernumber and a.dataid=c.dataid group by c.theyear,c.thequot order by sumofamount desc limit 10;
//所有的订单系统每年最畅销的产品
select distinct e.theyear,e.itemid,f.maxofmount from
(select c.theyear,b.itemid,sum(b.amout) as sumofmount from tbstock a,tbstockdetail b,tbdata c where a.ordernumber=b.ordernumber and a.dataid=c.dataid group by
c.theyear,b.itemid ) e,
(select d.theyear, max(d.sumofamount) as maxofmount from (select c.theyear,b.itemid,sum(b.amout) as sumofamount from tbstock a,tbstockdetail b,tbdata c where
a.ordernumber=b.ordernumber and
a.dataid=c.dataid group by c.theyear,b.itemid ) d
group by d.theyear ) f
where e.theyear=f.theyear and
e.sumofmount=f.maxofmount order by e.theyear;
//每年所有订单中最大金额订单的全部销售额
tbStock.txt
订单号, 交易位置 ,交易日期
BYSL00000893,ZHAO,2007-8-23
BYSL00000897,ZHAO,2007-8-24
BYSL00000898,ZHAO,2007-8-25
tbStockDetail.txt
订单号,行号,订单产品,有效性,数量,金额
BYSL00000893,0,FS527258160501,-1,268,-268
BYSL00000893,1,FS527258169701,1,268,268
tbDate.txt
日期,年月,年,月,日, 周三,第一周,第一季度,上旬,上半月
2003-1-1,200301,2003,1,1,3,1,1,1,1
2003-1-2,200301,2003,1,2,4,1,1,1,1
2003-1-3,200301,2003,1,3,5,1,1,1,1
2003-1-4,200301,2003,1,4,6,1,1,1,1
create table tbData(dataID string,theyearmonth string,theyear string,themonth string,thedate string,theweek
string,theweeks string,thequot string, thetenday string ,thehalfmonth string) ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LINES TERMINATED BY '\n';
CREATE TABLE tbStock(ordernumber STRING,locatitionid string,dataID string) ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LINES TERMINATED BY '\n';
CREATE TABLE tbStockDetail(ordernumber STRING,rownum int,itemid string,qty int,price int ,amout int) ROW FORMAT
DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';
load data local inpath '/usr/local/IMFdatatest/HiveTBdata/tbDate.txt' into table tbdata;
load data inpath '/library/HiveTBdata/tbStock.txt'into table tbStock;
load data inpath '/library/HiveTBdata/tbStockDetail.txt'into table tbstockdetail;
//销售金额大于50000的订单
select ordernumber,sum(t2.amout) as totalfees FROM tbStock as t1 ,tbStockDetail as t2 where t1.ordernumber = t2.ordernumber group by t1.oredernumber having
totalfees >50000;
//每年每季度销售排名前10名的商品
select c.theyear,c.thequot,sum(b.amout) as sumofamount from tbstock a,tbstockdetail b,tbdata c where
a.ordernumber=b.ordernumber and a.dataid=c.dataid group by c.theyear,c.thequot order by sumofamount desc limit 10;
//所有的订单系统每年最畅销的产品
select distinct e.theyear,e.itemid,f.maxofmount from
(select c.theyear,b.itemid,sum(b.amout) as sumofmount from tbstock a,tbstockdetail b,tbdata c where a.ordernumber=b.ordernumber and a.dataid=c.dataid group by
c.theyear,b.itemid ) e,
(select d.theyear, max(d.sumofamount) as maxofmount from (select c.theyear,b.itemid,sum(b.amout) as sumofamount from tbstock a,tbstockdetail b,tbdata c where
a.ordernumber=b.ordernumber and
a.dataid=c.dataid group by c.theyear,b.itemid ) d
group by d.theyear ) f
where e.theyear=f.theyear and
e.sumofmount=f.maxofmount order by e.theyear;
//每年所有订单中最大金额订单的全部销售额
相关文章推荐
- 大数据Spark “蘑菇云”行动第71课: 基于Spark 2.0.1项目开发分析与实战
- 大数据Spark “蘑菇云”行动第91课:Hive中Index和Bucket案例实战及存储类型rcfile实战详解
- 大数据Spark “蘑菇云”行动第104课:Hive源码大师之路第二步:Hive真正的入口、词法分析和语法分析
- 大数据Spark “蘑菇云”行动补充内容第69课: Spark SQL案例之分析电影评分系统.
- 大数据Spark “蘑菇云”行动第80课:Spark GraphX 综合案例分析与实战
- 大数据spark“蘑菇云”行动超大型项目实战第68课:spark RDD案例和spark sql案例对比实战 看电影的例子分析 某门热门电影的年龄、性别分析
- 大数据Spark “蘑菇云”行动第87课:Hive嵌套查询与Case、When、Then案例实战
- 大数据Spark “蘑菇云”行动第92课:HIVE中的array、map、struct及自定义数据类型案例实战
- 大数据Spark “蘑菇云”行动第90课:Hive中Join电影店铺系统案例和性能优化、Index和Bucket案例实战
- 大数据Spark “蘑菇云”行动第82课:Spark机器学习本质思考及案例初体验
- 2016年大数据Spark“蘑菇云”行动代码学习之AdClickedStreamingStats模块分析
- 大数据Spark“蘑菇云”行动第55课:在线广告点击流处理代码的分析和实现
- 大数据Spark“蘑菇云”行动第56课:在线广告点击黑名单分析和实现
- 大数据Spark “蘑菇云”行动第88课:Hive脚本、常用命令、having查询及变种实战
- 第107课: Spark Streaming电商广告点击综合案例底层数据层的建模和编码实现(基于MySQL)
- 大数据Spark “蘑菇云”行动第86课:Hive分区表剖析与代码实战
- 大数据Spark “蘑菇云”行动第64课: 页面跳转功能分析与架构 A、B、C页面之间跳转率的计算方法
- 大数据Spark “蘑菇云”行动第83课:Hive架构剖析与初体验
- 大数据Spark “蘑菇云”行动第59课: 广告点击系统TopN热门广告分析与实现
- 大数据Spark “蘑菇云”行动第72课: 基于Spark 2.0.1项目实现之二. 实战 各种小bug修复及性能调优 200并行度调整为2个task