您的位置:首页 > 数据库 > Oracle

oracle 开发 第08章 分析数据

2016-01-12 16:14 555 查看
2016-02-02

目录

一、评级函数
  1.RANK()和DENSE_RANK()函数
  2.CUME_DIST()和PERCENT_RANK()函数
  3.NTILE()函数
  4.ROW_NUMBER()函数
  5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数
二、窗口函数
  1.计算累积和
  2.计算移动平均值
  3.计算中心平均值
  4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行
  5.用NTH_VALUE()函数获取第n行
三、报表函数
  1.总计报表
  2.RATIO_TO_REPORT()函数
  3.LISTAGG()函数
  4.LAG()和LEAD()函数
  5.FIRST()和LAST()函数
四、线性回归函数
五、MODEL子句
  1.位置标记访问数据单元
  2.符号标记访问数据单元
  3.用between和and返回特定范围的数据单元
  4.用any和is any 访问所有数据单元
  5.用currentv()函数获取某个维度当前值
  6.用for循环访问数据单元
  7.使用is present
  8.使用presentv()函数
  9.使用presentnnv()函数
  10.使用ignore nav和keep nav
  11.使用rules udpate更新已有单元
六、PIVOT和UNPIVOT子句
  1.pivot简单示例
  2.转换多列
  3.使用多个聚合函数
  4.使用UNPIVOT子句

一、评级函数
1.RANK()和DENSE_RANK()函数

--返回数据项在分组中的排名
select * from all_sales where rownum <= 12;

select prd_type_id,
sum(amount),
rank() over(order by sum(amount) desc) as rank,
dense_rank() over(order by sum(amount) desc) as dense_rank
from all_sales
where year = 2003
and amount is not NULL
group by prd_type_id
order by prd_type_id;

select prd_type_id,
sum(amount),
rank() over(order by sum(amount) desc) as rank,
dense_rank() over(order by sum(amount) desc) as dense_rank
from all_sales
where year = 2003
/*   and amount is not NULL*/
group by prd_type_id
order by prd_type_id;

--NULLS FIRST和NULLS LAST子句控制空值排名
select prd_type_id,
sum(amount),
rank() over(order by sum(amount) desc nulls last) as rank,
dense_rank() over(order by sum(amount) desc nulls last) as dense_rank
from all_sales
where year = 2003
/*   and amount is not NULL*/
group by prd_type_id
order by prd_type_id;

--RANK()和PARTITION BY
select prd_type_id,
month,
sum(amount),
rank() over(partition by month order by sum(amount) desc) as rank
from all_sales
where year = 2003
and amount is not NULL
group by prd_type_id, month
order by prd_type_id, month;

--RANK()和ROLLUP()
select prd_type_id,
sum(amount),
rank() over(order by sum(amount) desc) as rank
from all_sales
where year = 2003
group by rollup(prd_type_id)
order by prd_type_id;
--RANK()和CUBE()
select prd_type_id,
emp_id,
sum(amount),
rank() over(order by sum(amount) desc) as rank
from all_sales
where year = 2003
group by cube(prd_type_id, emp_id)
order by prd_type_id, emp_id;
--RANK()和GROUPTING SETS
select prd_type_id,
emp_id,
sum(amount),
rank() over(order by sum(amount) desc) as rank
from all_sales
where year = 2003
group by cube(prd_type_id, emp_id)
order by prd_type_id, emp_id;


2.CUME_DIST()和PERCENT_RANK()函数

--CUME_DIST()计算某个特定值在一组值中的累积分布
--PERCENT_RANK计算某个值相对一组值的百分比排名
select prd_type_id,
sum(amount),
cume_dist() over(order by sum(amount) desc) as cume_dist,
percent_rank() over(order by sum(amount) desc) as percent_rank
from all_sales
where year = 2003
group by prd_type_id, emp_id
order by prd_type_id, emp_id;


3.NTILE()函数

--计算n分片
select prd_type_id,
sum(amount),
ntile(4) over(order by sum(amount) desc) as ntile
from all_sales
where year = 2003
and amount is not null
group by prd_type_id
order by prd_type_id;


4.ROW_NUMBER()函数

--从1开始,为分组中的每行返回一个数字
select prd_type_id,
sum(amount),
row_number() over(order by sum(amount) desc) as row_number
from all_sales
where year = 2003
and amount is not null
group by prd_type_id
order by prd_type_id;


5.PERCENILE_DISC(x)和PERCENTILE_CONT(x)反百分位函数

select percentile_cont(0.6) within group(order by sum(amount) desc) as percentile_cont,
percentile_disc(0.6) within group(order by sum(amount) desc) as percentile_disc
from all_sales
where year = 2003
group by prd_type_id;


二、窗口函数
1.计算累积和

select month,
sum(amount) as month_amount,
sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount
from all_sales
where year = 2003
group by month
order by month;

select month,
sum(amount) as month_amount,
sum(sum(amount)) over(order by month rows between unbounded preceding and current row) as cumulative_amount
from all_sales
where year = 2003
and month between 6 and 12
group by month
order by month;


2.计算移动平均值

select month,
sum(amount) as month_amount,
avg(sum(amount)) over(order by month rows between 3 preceding and current row) as moving_average
from all_sales
where year = 2003
group by month
order by month;


3.计算中心平均值

select month,
sum(amount) as month_amount,
avg(sum(amount)) over(order by month rows between 1 preceding and 1 following) as moving_average
from all_sales
where year = 2003
group by month
order by month;


4.用FIRST_VALUE()和LAST_VALUE()获取第一行和最后一行

select month,
sum(amount) as month_amount,
first_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as previous_month_amount,
last_value(sum(amount)) over(order by month rows between 1 preceding and 1 following) as next_month_amount
from all_sales
where year = 2003
group by month
order by month;


5.用NTH_VALUE()函数获取第n行

select month,
sum(amount) as month_amount,
nth_value(sum(amount), 2) over(order by month rows between unbounded preceding and unbounded following) as nth_value
from all_sales
where year = 2003
group by month
order by month;

select prd_type_id,
emp_id,
max(amount),
nth_value(max(amount), 4) over(partition by prd_type_id order by emp_id rows between unbounded preceding and unbounded following) as nth_value
from all_sales
where prd_type_id between 1 and 3
group by prd_type_id, emp_id
order by prd_type_id, emp_id;


三、报表函数
1.总计报表

select month,
prd_type_id,
sum(sum(amount)) over(partition by month) as total_month,
sum(sum(amount)) over(partition by prd_type_id) as total_product_type_amount
from all_sales
where year = 2003
and month <= 3
group by month, prd_type_id
order by month, prd_type_id;


2.RATIO_TO_REPORT()函数

--计算某个值在一组值的总和中所占的比率
select month,
prd_type_id,
sum(amount) as prd_type_amount,
ratio_to_report(sum(amount)) over(partition by month) as prd_type_ratio
from all_sales
where year = 2003
and month <= 3
group by month, prd_type_id
order by month, prd_type_id;


3.LISTAGG()函数

--对分组内的各行排序并将分组的值集连接起来
select * from products order by product_id;

select listagg(name, ', ') within group(order by price, name) as "Product List",
max(price) as "Most Expensive"
from products
where product_id <= 5;

select product_id,
product_type_id,
name,
listagg(name, ', ') within group(order by name) over(partition by product_type_id) as "Product List"
from products
where product_id <= 5
order by product_id, product_type_id;


4.LAG()和LEAD()函数

--获得位于距当前行指定距离的那一行数据
select month,
sum(amount) as month_amount,
lag(sum(amount), 1) over(order by month) as previous_month_amount,
lead(sum(amount), 1) over(order by month) as next_month_amount
from all_sales
where year = 2003
group by month
order by month;


5.FIRST()和LAST()函数

--获取排序分组中第一个值和最后一个值
select min(month) keep (dense_rank first order by sum(amount)) as highest_sales_month,
min(month) keep (dense_rank last order by sum(amount)) as lowest_sales_month
from all_sales
where year = 2003
group by month
order by month;


四、线性回归函数

select prd_type_id,
regr_avgx(amount, month) as avgx,
regr_avgy(amount, month) as avgy,
regr_count(amount, month) as count,
regr_intercept(amount, month) as inter,
regr_r2(amount, month) as r2,
regr_slope(amount, month) as slope,
regr_sxx(amount, month) as sxx,
regr_sxy(amount, month) as sxy,
regr_syy(amount, month) as syy
from all_sales
where year = 2003
group by prd_type_id
order by prd_type_id;

select prd_type_id,
sum(amount),
rank() over(order by sum(amount) desc) as rank,
percent_rank() over(order by sum(amount) desc) as percent_rank
from all_sales
where year = 2003
and amount is not null
group by prd_type_id
order by prd_type_id;

select rank(500000) within group(order by sum(amount) desc) as rank,
percent_rank(500000) within group(order by sum(amount) desc) as percent_rank
from all_sales
where year = 2003
and amount is not null
group by prd_type_id
order by prd_type_id;


五、MODEL子句
1.位置标记访问数据单元

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount [ 1, 2004 ] = sales_amount [ 1, 2003 ],
sales_amount [ 2, 2004 ] = sales_amount [ 2, 2003 ] + sales_amount [ 3, 2003 ],
sales_amount [ 3, 2004 ] = round(sales_amount [ 3, 2003 ] * 1.25, 2))
order by prd_type_id, year, month;


2.符号标记访问数据单元

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount [ month = 1, year = 2004 ] = sales_amount [ month = 1,year = 2003 ],
sales_amount [ month = 2, year = 2004] = sales_amount [ month = 2, year = 2003 ] + sales_amount [month = 3, year = 2003 ],
sales_amount [ month = 3, year = 2004] = round(sales_amount [ month = 3, year = 2003 ] * 1.25, 2))
order by prd_type_id, year, month;


3.用between和and返回特定范围的数据单元

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount[ 1, 2004] = round(avg(sales_amount) [ month between 1 and 3 ,2003], 2))
order by prd_type_id, year, month;


4.用any和is any 访问所有数据单元

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount[ 1, 2004] = round(sum(sales_amount) [ any, year is any], 2))
order by prd_type_id, year, month;


5.用currentv()函数获取某个维度当前值

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount[ 1, 2004] = round(sales_amount[ currentv(),2003] * 1.25, 2))
order by prd_type_id, year, month;


6.用for循环访问数据单元

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount[ for month from 1 to 3 increment 1,2004] = round(sales_amount[currentv(),2003] * 1.25,2))
order by prd_type_id, year, month;


7.使用is present

--如果cell指定的行在model子句执行之前存在,那么is present返回true
select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount [ for month from 1 to 3 increment 1, 2004 ] =
case when sales_amount [ currentv(), 2003 ] is present
then
round(sales_amount [ currentv(), 2003 ] * 1.25, 2)
else
0
end)
order by prd_type_id, year, month;


8.使用presentv()函数

--如果cell引用的行在model子句执行前存在,那么presentv(cell,expr1,expr2)返回表达式expr1;如果不存在,返回expr2
select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount [ for month from 1 to 3 increment 1, 2004 ] =
presentv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0)
)
order by prd_type_id, year, month;


9.使用presentnnv()函数

--如果cell引用的行在model子句执行前存在,并且该单元值不为空,那么presentv(cell,expr1,expr2)返回表达式expr1;如果行不存在或单元值为空,返回expr2
select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount [ for month from 1 to 3 increment 1, 2004 ] =
presentnnv(sales_amount[currentv(),2003],round(sales_amount[currentv(),2003] * 1.25,2),0)
)
order by prd_type_id, year, month;


10.使用ignore nav和keep nav

select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model ignore nav
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
(sales_amount [ for month from 1 to 3 increment 1, 2004 ] =
round(sales_amount[currentv(),2003] * 1.25,2)
)
order by prd_type_id, year, month;


11.使用rules udpate更新已有单元

--在单元不存在的情况下不创建新行
select prd_type_id, year, month, sales_amount
from all_sales
where prd_type_id between 1 and 2
and emp_id = 21
model
partition by(prd_type_id)
dimension by(month, year)
measures(amount sales_amount)
rules update (sales_amount [ for month from 1 to 3 increment 1, 2004 ] =
round(sales_amount[currentv(),2003] * 1.25,2)
)
order by prd_type_id, year, month;


六、PIVOT和UNPIVOT子句
1.pivot简单示例

select *
from (select month, prd_type_id, amount
from all_sales
where year = 2003
and prd_type_id in (1, 2, 3));

select *
from (select month, prd_type_id, amount
from all_sales
where year = 2003
and prd_type_id in (1, 2, 3))
pivot(sum(amount) for month in(1 as jan,
2 as feb,
3 as mar,
4 as apr))
order by prd_type_id;


2.转换多列

SELECT
*
FROM
(SELECT
MONTH,
prd_type_id,
amount
FROM
all_sales
WHERE YEAR = 2003
AND prd_type_id IN (1, 2, 3)) pivot (
SUM(amount) FOR (MONTH, prd_type_id) IN (
(1, 2) AS jan_prdtype2,
(2, 3) AS feb_prdtype3,
(3, 1) AS mar_prdtype1,
(4, 2) AS apr_prdtype2
)
) ;


3.使用多个聚合函数

SELECT
*
FROM
(SELECT
MONTH,
prd_type_id,
amount
FROM
all_sales
WHERE YEAR = 2003
AND prd_type_id IN (1, 2, 3)
)
PIVOT (
SUM(amount) AS sum_amount,
AVG(amount) AS avg_amount
FOR (month) IN (1 AS JAN,2 AS FEB
)
)
ORDER BY prd_type_id ;


4.使用UNPIVOT子句

SELECT *
FROM pivot_sales_data
UNPIVOT (
amount FOR month IN (JAN,FEB,MAR,APR)
)
ORDER BY prd_type_id;


【参考资料】

[1] Jason Price.精通Oracle Database 12c SQL&PLSQL编程(第3版).[M].北京:清华大学出版社,2014
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: