【SQL解惑】谜题10:社会保险号的工资
2017-11-19 23:02
302 查看
解惑一:
1、创建表
create table Pensions
(
sinn char(10) not null,
pen_year integer not null,
month_cnt integer default 0 not null
check (month_cnt between 0 and 12),
earnings decimal (8,2) default 0.00 not null
)
(1)sinn 社会保险号
(2)pen_year退休金的日历年份
(3)month_cnt一年工作的总月数
(4)earnings一年工作的总收入
2、插入数据
insert into Pensions(sinn,pen_year,month_cnt,earnings)
values
('0001',2015,12,60000),
('0001',2016,12,60000),
('0001',2017,12,60000),
('0001',2018,12,60000),
('0001',2019,12,60000),
('0001',2020,2,60000),
('0001',2021,0,0.00),
('0001',2022,12,60000),
('0001',2023,12,60000),
('0001',2024,12,60000),
('0001',2025,12,60000),
('0001',2026,12,60000),
('0001',2027,0,0.00),
('0001',2028,12,60000),
('0001',2029,12,60000),
('0001',2030,12,60000),
('0001',2031,12,60000),
('0001',2032,12,60000),
('0001',2033,12,60000)
go
3、创建视图
(1)from语句获取pensions表分别为表1和表2
(2)where语句根据条件筛选数据
a.筛选表1和表2中社会保险号相等的;
b.筛选表2的中退休金年份大于等于四年的表1的退休金年份;
c.筛选表1和表2的对应的连续年数的每年工作月数是否每一个都大于0(其中子查询表3中筛选出社会保险号等于表1的社会保险号,再筛选出退休金年份在表1和表2的退休金年份之中);
d.筛选表1和表2的对应的连续年数的总工作月数之和是否大于60(其中子查询表4中筛选出社会保险号等于表1的社会保险号,再筛选出退休金年份在表1和表2的退休金年份之中的总工作月数之和)。
(3)select语句根据符合的条件筛选社会保险号、退休金开始年份、退休金结束年份、退休金开始年份和结束年份之间的总收入(其中子查询表2中筛选出社会保险号等于表1的社会保险号,再筛选出退休金年份在表1和表2的退休金年份之中的总收入之和)
create view PenPeriods(sinn,start_year,end_year,earnings_tot)
as
select p0.sinn,p0.pen_year,p1.pen_year,
(select SUM(earnings)
from Pensions as p2
where p2.sinn = p0.sinn
and p2.pen_year between p0.pen_year and p1.pen_year)
from Pensions as p0,Pensions as p1
where p0.sinn = p1.sinn
and p1.pen_year >= (p0.pen_year - 4)
and 0 < all(select month_cnt
from Pensions as p3
where p3.sinn = p0.sinn
and p3.pen_year between p0.pen_year and p1.pen_year)
and 60 <= (select SUM(month_cnt)
from Pensions as p4
where p4.sinn = p0.sinn
and p4.pen_year between p0.pen_year and p1.pen_year)
go
4、取得最近的一次连续工作的数据
(改进了语句)因为题目要求是找到每一个雇员在连续年份的、最近60个月的总收入。因此,选择开始年份和结束年份为最近的一次退休金年份即可。
select *
from PenPeriods as p0
where end_year = (select MAX(end_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
and start_year = (select max(start_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
解惑二:
1、创建表和插入数据
create table Pensions
(
sinn char(10) not null,
pen_year integer not null,
month_cnt integer default 0 not null
check (month_cnt between 0 and 12),
earnings decimal (8,2) default 0.00 not null
)
go
insert into Pensions(sinn,pen_year,month_cnt,earnings)
values
('0001',2015,12,60000),
('0001',2016,12,60000),
('0001',2017,12,60000),
('0001',2018,12,60000),
('0001',2019,12,60000),
('0001',2020,2,60000),
('0001',2021,0,0.00),
('0001',2022,12,60000),
('0001',2023,12,60000),
('0001',2024,12,60000),
('0001',2025,12,60000),
('0001',2026,12,60000),
('0001',2027,0,0.00),
('0001',2028,12,60000),
('0001',2029,12,60000),
('0001',2030,12,60000),
('0001',2031,12,60000),
('0001',2032,12,60000),
('0001',2033,12,60000)
go
2、筛选符合的数据
select *
from (select p0.sinn as sinn,
p0.pen_year as start_year,
p2.pen_year as end_year,
SUM(p1.earnings) as earnings_tot
from Pensions as p0,
Pensions as p1,
Pensions as p2
where p0.month_cnt > 0
and p1.month_cnt > 0
and p2.month_cnt > 0
and p0.sinn = p1.sinn
and p0.sinn = p2.sinn
and p0.pen_year between p2.pen_year - 59 and (p2.pen_year - 4) --p0代表了开始年份要大于小于
and p1.pen_year between p0.pen_year and p2.pen_year
group by p0.sinn,p0.pen_year,p2.pen_year
having sum(p1.month_cnt) >= 60 and (p2.pen_year - p0.pen_year) = (count(*) - 1)) as p0
where end_year = (select MAX(end_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
and start_year = (select max(start_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
分解步骤:
(1)from子句里面的p0子查询,作用:筛选连续工作60个月以上的退休金年份开始年数和结束年数和总工作收入。
a.from子句自联结三张Pensions表p0,p1,p2
b.where子句筛选
b.1先筛选掉三表的month_cnt每年工作月数等于0的数
b.2再连接sinn社会保险号相等的
b.3筛选表p0的退休金年份中,大于p2的年份5年,小于60年的退休金年份
b.4筛选出p1的退休金年份中在p0的退休金年份和p2的退休金年份,通过b.3的步骤即可了解p0的退休金年份和p2的退休金年份分别代表开始年份和结束年份
c.groupby子句将表按社会保险号、开始年份和结束年份分组
d.having子句
d.1筛选分组内的总工作月数之和大于60的
d.2筛选分组内的(结束年份 - 开始年份的计数)= 分组内统计数 - 1,这样做的原因是,一旦这个结束年份与开始年份之间的数不连续,即存在某个年份的总工作月数小于0的情况,将不会连续。
例如:以下是某人的社会保险号数据,其中2021年和2027年的总工作年数为0
根据以下的语句来过滤数据检查d.2的条件的作用
select p0.sinn,p0.pen_year,p1.pen_year,p2.pen_year
from Pensions as p0,Pensions as p1,Pensions as p2
where p0.month_cnt > 0
and p1.month_cnt > 0
and p2.month_cnt > 0
and p0.sinn = p1.sinn
and p0.sinn = p2.sinn
and p0.pen_year between p2.pen_year - 59 and (p2.pen_year - 4)
and p1.pen_year between p0.pen_year and p2.pen_year
以下取部分数据为,由于groupby子句按社会保险号、开始年份和结束年份分组,因此假设取部分数据为p0.sinn=1和p0.pen_year=2015和p2.pen_year=2025,以此来统计p1.pen_year中是否存在为0被筛选出的年份,根据条件计算为p2.pen_year - p0.pen_year = count(p1.pen_year) - 1的结果为2025-2015=10-1即10=9,实际两边结果并不相等,存在该原因为2015到2025年之间存在2021年的工作总月数为0因此没有统计到。
以下是该子查询的最终语句:
select p0.sinn as sinn,
p0.pen_year as start_year,
p2.pen_year as end_year,
SUM(p1.earnings) as earnings_tot
from Pensions as p0,Pensions as p1,Pensions as p2
where p0.month_cnt > 0
and p1.month_cnt > 0
and p2.month_cnt > 0
and p0.sinn = p1.sinn
and p0.sinn = p2.sinn
and p0.pen_year between p2.pen_year - 59 and (p2.pen_year - 4)
and p1.pen_year between p0.pen_year and p2.pen_year
group by p0.sinn,p0.pen_year,p2.pen_year
having sum(p1.month_cnt) >= 60 and (p2.pen_year - p0.pen_year) = (count(*) - 1)
(3)根据以上的统计结果再筛选出最近的一次连续工作的数据
(改进了语句)因为题目要求是找到每一个雇员在连续年份的、最近60个月的总收入。因此,选择开始年份和结束年份为最近的一次退休金年份即可。
select *
from (...) as p0
where end_year = (select MAX(end_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
and start_year = (select max(start_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
解惑三:
1、创建表
create
view
PensionsView(sinn,start_year,end_year,sumofearnings,sumofmonth_cnt)
as
select
p0.sinn
as
sinn,
p0.pen_year
as
start_year,
p2.pen_year
as
end_year,
SUM(p1.earnings)
as
sumofearnings,
sum(p1.month_cnt)
as
sumofmonth_cnt
from
Pensions
as
p0,Pensions
as
p1,Pensions
as
p2
where
p0.month_cnt
>
0
and
p1.month_cnt
>
0
and
p2.month_cnt
>
0
and
p0.sinn
=
p1.sinn
and
p0.sinn
=
p2.sinn
and
p0.pen_year
between
p2.pen_year
-
59
and
(p2.pen_year
-
4)
and
p1.pen_year
between
p0.pen_year
and
p2.pen_year
group
by
p0.sinn,p0.pen_year,p2.pen_year
having
sum(p1.month_cnt)
>=
60
and
(p2.pen_year
-
p0.pen_year)
=
(count(*)
-
1)
go
2、语句
select
p0.sinn,p0.end_year,
max(p0.start_year)
as
laststart_year,
min(p0.sumofearnings)
as
minearnings,
min(p0.sumofmonth_cnt)
as
minmonth_cnt,
min(p0.start_year)
as
firstart_year,
max(p0.sumofearnings)
as
maxearnings,
max(p0.sumofmonth_cnt)
as
maxmonth_cnt
from
PensionsView
as
p0
where
end_year
=
(select
max(end_year)
from
Pensions
as
p1
where
p1.sinn
=
p0.sinn)
group
by
p0.sinn,p0.end_year
3、报错:
聚合不应出现在 WHERE 子句中,除非该聚合位于 HAVING 子句或选择列表所包含的子查询中,并且要对其进行聚合的列是外部引用。
改错正后的语句为:
select
p0.sinn,p0.end_year,
max(p0.start_year)
as
laststart_year,
min(p0.sumofearnings)
as
minearnings,
min(p0.sumofmonth_cnt)
as
minmonth_cnt,
min(p0.start_year)
as
firstart_year,
max(p0.sumofearnings)
as
maxearnings,
max(p0.sumofmonth_cnt)
as
maxmonth_cnt
from
PensionsView
as
p0
where
end_year
=
(select
max(end_year)
from
PensionsView
as
p1
where
p0.sinn
=
p1.sinn)
group
by
p0.sinn,p0.end_year
4、分解步骤:
(1)from语句获取PensionsView视图
(2)where子句的end_year字段与子查询进行过滤。按照社会保险号来区分,获取其中最大的end_year,即2033年
(3)再按照sinn和end_year来进行分组
(4)在sinn和end_year分组内来获取各个数据
a.获取社会保险号、获取最接近现在的退休金年份
b.按照sinn和end_year的分组中,start_year最大的即是最近的、连续总工作月份为60个月起始退休金年份
c.按照sinn和end_year的分组中,最小的总工作收入即30000,也即最近的、连续总工作月份
d.按照sinn和end_year的分组中,最小的月份数即60,也即最近的、连续总工作月份
e.按照sinn和end_year的分组中,start_year最小的即是最大的end_year中从最开始连续的到现在的退休金开始年份
f.按照sinn和end_year的分组中,最大的总工作收入即36000,也即最大的end_year中从最开始连续的到现在的退休金开始年份中这段时间的总工作收入
g.按照sinn和end_year的分组中,最大的总工作月份即72,也即最大的end_year中从最开始连续的到现在的退休金开始年份中这段时间的总工作月份
解惑四:
1、创建表并插入数据
create
table
Pensions
(
sinn
char(10)
not
null,
pen_year
integer
not
null,
month_cnt
integer
default
0
not
null
check
(month_cnt
between
0
and
12),
earnings
decimal
(8,2)
default
0.00
not
null
)
go
insert
into
Pensions(sinn,pen_year,month_cnt,earnings)
values
('0001',2015,12,60000),
('0001',2016,12,60000),
('0001',2017,12,60000),
('0001',2018,12,60000),
('0001',2019,12,60000),
('0001',2020,12,60000),
('0001',2021,12,60000),
('0001',2022,12,60000),
('0001',2023,12,60000),
('0001',2024,12,60000),
('0001',2025,6,30000)
2、谜题的要求是需要取到一个雇员最近的、连续60个月的month_cnt的收入。假设现在有一雇员,从2015年1月份开始连续工作到2024年总共10年,在第11年即2025年工作了6个月。那么最近的、连续工作60个月的时间应该从2020年的7月份开始算起。但是如果采用前面两种方法得出来的总收入则会将2020年到2025年的66个月的工资都算进去。
分解步骤:
(1)
select
p1.sinn,p1.pen_year
as
first_year,
p2.pen_year
as
last_year,
p1.month_cnt
as
first_year_month_cnt,
p1.earnings
as
first_year_earnings,
count(p3.pen_year)
as
year_cnt,
sum(p3.month_cnt)
as
total_month_cnt,
sum(p3.earnings)
as
earnings_tot
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt,p1.earnings
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1
a.from语句从表Pensions取数
b.inner join将Pensions与另外两个Pensions内连接。
c.where语句筛选出表3的pen_year在表1的pen_year和表2的pen_year中间,即表1的pen_year作为开始时间,表2的pen_year作为结束时间;且表3的pen_year对应的month_cnt大于0,即要获取所有有工作的年份。
d.groupby语句将筛选出来的数据根据p1的社会保险号、p1的pen_year(开始时间)、p2的pen_year(结束时间)、p1的month_cnt(开始工作当年的总工作月份)、p1的earnings(开始工作当年的总工作收入)进行分组
e.1在分组内进行筛选数据,如果位于表1的pen_year和表2的pen_year中间的表3的pen_year计数等于表2的pen_year减表1的pen_year+1则证明表3的pen_year是从开始时间到结束时间的连续数据,中间不存在没有工作的年份;
e.2再筛选位于表1的pen_year和表2的pen_year中间的表3的month_cnt的总工作月份之和是否大于60到60+首年工作总月份数-1,由于题目要求是最近连续的60个月,因为总月份数肯定要大于60个月,但是统计总收入是按年累加,且并不是所有年份都是每年12个月的工作月份,因此统计总工作月数要加上限为60个月加上第一年工作月数-1。
f.select语句再选择p1的sinn、p1的pen_year(即开始时间)、p2的pen_year(即结束时间)、p1的month_cnt(开始工作当年的总工作月份)、p1的earnings(开始工作当年的总工作收入),统计p3的pen_year的年份数(即连续工作的总年数)、p3的month_cnt的总工作月数(即连续工作的总月份数)、p3的earnings的总工作收入(即连续工作的总工作收入)。
(2)
分解步骤:
a.from语句从表a取数
b.where子句筛选表a的last_year
b.1 子查询筛选最大的last_year
b.1.1 子查询的from语句从表Pensions取数
b.1.2 innerjoin子句内联两张表Pensions
b.1.3 where子句查询位于表1的pen_year和表2的pen_year中
且总工作月数month_cnt大于0的
且表1的社会保险号等于表a的社会保险号(主要是根据a表的结果可以直接在b表筛选掉不符合数据的社会保险号)
的表3的pen_year
b.1.4 groupby语句将筛选出来的数据根据p1的社会保险号、p1的pen_year(开始时间)、p2的pen_year(结束时间)、p1的month_cnt(开始工作当年的总工作月份)进行分组
b.1.5 在分组内进行筛选数据,如果位于表1的pen_year和表2的pen_year中间的表3的pen_year计数等于表2的pen_year减表1的pen_year+1则证明表3的pen_year是从开始时间到结束时间的连续数据,中间不存在没有工作的年份
b.1.6 再统计位于表1的pen_year和表2的pen_year中间的表3的month_cnt的总工作月份之和是否大于60到60+首年工作总月份数-1,由于题目要求是最近连续的60个月,因为总月份数肯定要大于60个月,但是统计总收入是按年累加,且并不是所有年份都是每年12个月的工作月份,因此统计总工作月数要加上限为60个月加上第一年工作月数-1。
c. 从数据中筛选出最大的last_year
select...
from
(...)
as
a
where
a.last_year
= select
max(last_year)
from (select
p2.pen_year
as
last_year
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
and
p1.sinn
=
a.sinn
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
(3)
分解步骤:
a.select语句将选择a表中的p1的sinn、p1的pen_year(即开始时间)、p2的pen_year(即结束时间)、p1的month_cnt(开始工作当年的总工作月份)、p1的earnings(开始工作当年的总工作收入),统计p3的pen_year的年份数(即连续工作的总年数)、p3的month_cnt的总工作月数(即连续工作的总月份数)、p3的earnings的总工作收入(即连续工作的总工作收入)。
b.1 同时通过连续工作的总月份数除以12取余数获得第一年需要扣减的月份数;
b.2 通过连续工作的总月份数除以12取余数再除以第一个月份的总工作月份数再乘以第一年的总收入,即计算总收入中不属于60个月的需要扣减掉的收入;
b.3 通过表a计算的总工作收入减去b.2算出来的需调整的不属于60个月的需要扣减掉的收入得到真实收入。
select
*,
total_month_cnt
%
12
as
nonutillzed_first_year_month_cnt,
total_month_cnt
%
12
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
first_year_adjustment,
earnings_tot
-
(total_month_cnt
%
12)
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
adjusted_eaernings_tot
from
(...)
as
a
where
a.last_year
=
(select
max(last_year)
from
(...)
as
b)
3、总查询语句
select
*,
total_month_cnt
%
12
as
nonutillzed_first_year_month_cnt,
total_month_cnt
%
12
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
first_year_adjustment,
earnings_tot
-
(total_month_cnt
%
12)
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
adjusted_eaernings_tot
from
(
select
p1.sinn,p1.pen_year
as
first_year,
p2.pen_year
as
last_year,
p1.month_cnt
as
first_year_month_cnt,
p1.earnings
as
first_year_earnings,
count(p3.pen_year)
as
year_cnt,
sum(p3.month_cnt)
as
total_month_cnt,
sum(p3.earnings)
as
earnings_tot
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt,p1.earnings
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
as
a
where
a.last_year
=
(select
max(last_year)
from
(select
p2.pen_year
as
last_year
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
and
p1.sinn
=
a.sinn
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
as
b)
解惑五:分别两种公用表达式的方法
1、用公共表表达式得出来的结果
With
A(sinn,row_number,first_year,last_year,first_year_month_cnt,first_year_earnings,year_cnt,
total_month_cnt,earnings_tot)
as
(select
p1.sinn,ROW_NUMBER()
over
(order
by
p1.sinn),
p1.pen_year,p2.pen_year,
p1.month_cnt,p1.earnings,
COUNT(p3.pen_year),
SUM(p3.month_cnt),sum(p3.earnings)
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt,p1.earnings
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
SUM(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
select
sinn,earnings_tot
from
A
as
Parent
where
not
exists
(select
*
from
A
where
sinn
=
Parent.sinn
and
row_number
>
parent.row_number)
2、用公共表表达式得出来的结果
With
Ranges(sinn,first_year,last_year,earnings_tot)
as
(select
p1.sinn,
p1.pen_year,p2.pen_year,
sum(p3.earnings)
as
earnings_tot
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
SUM(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1),
lastRange(sinn,last_year)
as
(select
sinn,max(last_year)
from
Ranges
group
by
sinn)
select
R.*
from
Ranges
as
R
inner
join
LastRange
as
L
on
R.sinn
=
L.sinn
and
R.last_year
=
L.last_year
1、创建表
create table Pensions
(
sinn char(10) not null,
pen_year integer not null,
month_cnt integer default 0 not null
check (month_cnt between 0 and 12),
earnings decimal (8,2) default 0.00 not null
)
(1)sinn 社会保险号
(2)pen_year退休金的日历年份
(3)month_cnt一年工作的总月数
(4)earnings一年工作的总收入
2、插入数据
insert into Pensions(sinn,pen_year,month_cnt,earnings)
values
('0001',2015,12,60000),
('0001',2016,12,60000),
('0001',2017,12,60000),
('0001',2018,12,60000),
('0001',2019,12,60000),
('0001',2020,2,60000),
('0001',2021,0,0.00),
('0001',2022,12,60000),
('0001',2023,12,60000),
('0001',2024,12,60000),
('0001',2025,12,60000),
('0001',2026,12,60000),
('0001',2027,0,0.00),
('0001',2028,12,60000),
('0001',2029,12,60000),
('0001',2030,12,60000),
('0001',2031,12,60000),
('0001',2032,12,60000),
('0001',2033,12,60000)
go
3、创建视图
(1)from语句获取pensions表分别为表1和表2
(2)where语句根据条件筛选数据
a.筛选表1和表2中社会保险号相等的;
b.筛选表2的中退休金年份大于等于四年的表1的退休金年份;
c.筛选表1和表2的对应的连续年数的每年工作月数是否每一个都大于0(其中子查询表3中筛选出社会保险号等于表1的社会保险号,再筛选出退休金年份在表1和表2的退休金年份之中);
d.筛选表1和表2的对应的连续年数的总工作月数之和是否大于60(其中子查询表4中筛选出社会保险号等于表1的社会保险号,再筛选出退休金年份在表1和表2的退休金年份之中的总工作月数之和)。
(3)select语句根据符合的条件筛选社会保险号、退休金开始年份、退休金结束年份、退休金开始年份和结束年份之间的总收入(其中子查询表2中筛选出社会保险号等于表1的社会保险号,再筛选出退休金年份在表1和表2的退休金年份之中的总收入之和)
create view PenPeriods(sinn,start_year,end_year,earnings_tot)
as
select p0.sinn,p0.pen_year,p1.pen_year,
(select SUM(earnings)
from Pensions as p2
where p2.sinn = p0.sinn
and p2.pen_year between p0.pen_year and p1.pen_year)
from Pensions as p0,Pensions as p1
where p0.sinn = p1.sinn
and p1.pen_year >= (p0.pen_year - 4)
and 0 < all(select month_cnt
from Pensions as p3
where p3.sinn = p0.sinn
and p3.pen_year between p0.pen_year and p1.pen_year)
and 60 <= (select SUM(month_cnt)
from Pensions as p4
where p4.sinn = p0.sinn
and p4.pen_year between p0.pen_year and p1.pen_year)
go
4、取得最近的一次连续工作的数据
(改进了语句)因为题目要求是找到每一个雇员在连续年份的、最近60个月的总收入。因此,选择开始年份和结束年份为最近的一次退休金年份即可。
select *
from PenPeriods as p0
where end_year = (select MAX(end_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
and start_year = (select max(start_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
解惑二:
1、创建表和插入数据
create table Pensions
(
sinn char(10) not null,
pen_year integer not null,
month_cnt integer default 0 not null
check (month_cnt between 0 and 12),
earnings decimal (8,2) default 0.00 not null
)
go
insert into Pensions(sinn,pen_year,month_cnt,earnings)
values
('0001',2015,12,60000),
('0001',2016,12,60000),
('0001',2017,12,60000),
('0001',2018,12,60000),
('0001',2019,12,60000),
('0001',2020,2,60000),
('0001',2021,0,0.00),
('0001',2022,12,60000),
('0001',2023,12,60000),
('0001',2024,12,60000),
('0001',2025,12,60000),
('0001',2026,12,60000),
('0001',2027,0,0.00),
('0001',2028,12,60000),
('0001',2029,12,60000),
('0001',2030,12,60000),
('0001',2031,12,60000),
('0001',2032,12,60000),
('0001',2033,12,60000)
go
2、筛选符合的数据
select *
from (select p0.sinn as sinn,
p0.pen_year as start_year,
p2.pen_year as end_year,
SUM(p1.earnings) as earnings_tot
from Pensions as p0,
Pensions as p1,
Pensions as p2
where p0.month_cnt > 0
and p1.month_cnt > 0
and p2.month_cnt > 0
and p0.sinn = p1.sinn
and p0.sinn = p2.sinn
and p0.pen_year between p2.pen_year - 59 and (p2.pen_year - 4) --p0代表了开始年份要大于小于
and p1.pen_year between p0.pen_year and p2.pen_year
group by p0.sinn,p0.pen_year,p2.pen_year
having sum(p1.month_cnt) >= 60 and (p2.pen_year - p0.pen_year) = (count(*) - 1)) as p0
where end_year = (select MAX(end_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
and start_year = (select max(start_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
分解步骤:
(1)from子句里面的p0子查询,作用:筛选连续工作60个月以上的退休金年份开始年数和结束年数和总工作收入。
a.from子句自联结三张Pensions表p0,p1,p2
b.where子句筛选
b.1先筛选掉三表的month_cnt每年工作月数等于0的数
b.2再连接sinn社会保险号相等的
b.3筛选表p0的退休金年份中,大于p2的年份5年,小于60年的退休金年份
b.4筛选出p1的退休金年份中在p0的退休金年份和p2的退休金年份,通过b.3的步骤即可了解p0的退休金年份和p2的退休金年份分别代表开始年份和结束年份
c.groupby子句将表按社会保险号、开始年份和结束年份分组
d.having子句
d.1筛选分组内的总工作月数之和大于60的
d.2筛选分组内的(结束年份 - 开始年份的计数)= 分组内统计数 - 1,这样做的原因是,一旦这个结束年份与开始年份之间的数不连续,即存在某个年份的总工作月数小于0的情况,将不会连续。
例如:以下是某人的社会保险号数据,其中2021年和2027年的总工作年数为0
sinn | pen_year | month_cnt | earnings |
1 | 2015 | 12 | 60000 |
1 | 2016 | 12 | 60000 |
1 | 2017 | 12 | 60000 |
1 | 2018 | 12 | 60000 |
1 | 2019 | 12 | 60000 |
1 | 2020 | 2 | 60000 |
1 | 2021 | 0 | 0 |
1 | 2022 | 12 | 60000 |
1 | 2023 | 12 | 60000 |
1 | 2024 | 12 | 60000 |
1 | 2025 | 12 | 60000 |
1 | 2026 | 12 | 60000 |
1 | 2027 | 0 | 0 |
1 | 2028 | 12 | 60000 |
1 | 2029 | 12 | 60000 |
1 | 2030 | 12 | 60000 |
1 | 2031 | 12 | 60000 |
1 | 2032 | 12 | 60000 |
1 | 2033 | 12 | 60000 |
select p0.sinn,p0.pen_year,p1.pen_year,p2.pen_year
from Pensions as p0,Pensions as p1,Pensions as p2
where p0.month_cnt > 0
and p1.month_cnt > 0
and p2.month_cnt > 0
and p0.sinn = p1.sinn
and p0.sinn = p2.sinn
and p0.pen_year between p2.pen_year - 59 and (p2.pen_year - 4)
and p1.pen_year between p0.pen_year and p2.pen_year
以下取部分数据为,由于groupby子句按社会保险号、开始年份和结束年份分组,因此假设取部分数据为p0.sinn=1和p0.pen_year=2015和p2.pen_year=2025,以此来统计p1.pen_year中是否存在为0被筛选出的年份,根据条件计算为p2.pen_year - p0.pen_year = count(p1.pen_year) - 1的结果为2025-2015=10-1即10=9,实际两边结果并不相等,存在该原因为2015到2025年之间存在2021年的工作总月数为0因此没有统计到。
sinn | pen_year | pen_year | pen_year |
1 | 2015 | 2015 | 2025 |
1 | 2015 | 2016 | 2025 |
1 | 2015 | 2017 | 2025 |
1 | 2015 | 2018 | 2025 |
1 | 2015 | 2019 | 2025 |
1 | 2015 | 2020 | 2025 |
1 | 2015 | 2022 | 2025 |
1 | 2015 | 2023 | 2025 |
1 | 2015 | 2024 | 2025 |
1 | 2015 | 2025 | 2025 |
select p0.sinn as sinn,
p0.pen_year as start_year,
p2.pen_year as end_year,
SUM(p1.earnings) as earnings_tot
from Pensions as p0,Pensions as p1,Pensions as p2
where p0.month_cnt > 0
and p1.month_cnt > 0
and p2.month_cnt > 0
and p0.sinn = p1.sinn
and p0.sinn = p2.sinn
and p0.pen_year between p2.pen_year - 59 and (p2.pen_year - 4)
and p1.pen_year between p0.pen_year and p2.pen_year
group by p0.sinn,p0.pen_year,p2.pen_year
having sum(p1.month_cnt) >= 60 and (p2.pen_year - p0.pen_year) = (count(*) - 1)
(3)根据以上的统计结果再筛选出最近的一次连续工作的数据
(改进了语句)因为题目要求是找到每一个雇员在连续年份的、最近60个月的总收入。因此,选择开始年份和结束年份为最近的一次退休金年份即可。
select *
from (...) as p0
where end_year = (select MAX(end_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
and start_year = (select max(start_year)
from PenPeriods as p1
where p1.sinn = p0.sinn)
解惑三:
1、创建表
create
view
PensionsView(sinn,start_year,end_year,sumofearnings,sumofmonth_cnt)
as
select
p0.sinn
as
sinn,
p0.pen_year
as
start_year,
p2.pen_year
as
end_year,
SUM(p1.earnings)
as
sumofearnings,
sum(p1.month_cnt)
as
sumofmonth_cnt
from
Pensions
as
p0,Pensions
as
p1,Pensions
as
p2
where
p0.month_cnt
>
0
and
p1.month_cnt
>
0
and
p2.month_cnt
>
0
and
p0.sinn
=
p1.sinn
and
p0.sinn
=
p2.sinn
and
p0.pen_year
between
p2.pen_year
-
59
and
(p2.pen_year
-
4)
and
p1.pen_year
between
p0.pen_year
and
p2.pen_year
group
by
p0.sinn,p0.pen_year,p2.pen_year
having
sum(p1.month_cnt)
>=
60
and
(p2.pen_year
-
p0.pen_year)
=
(count(*)
-
1)
go
2、语句
select
p0.sinn,p0.end_year,
max(p0.start_year)
as
laststart_year,
min(p0.sumofearnings)
as
minearnings,
min(p0.sumofmonth_cnt)
as
minmonth_cnt,
min(p0.start_year)
as
firstart_year,
max(p0.sumofearnings)
as
maxearnings,
max(p0.sumofmonth_cnt)
as
maxmonth_cnt
from
PensionsView
as
p0
where
end_year
=
(select
max(end_year)
from
Pensions
as
p1
where
p1.sinn
=
p0.sinn)
group
by
p0.sinn,p0.end_year
3、报错:
聚合不应出现在 WHERE 子句中,除非该聚合位于 HAVING 子句或选择列表所包含的子查询中,并且要对其进行聚合的列是外部引用。
改错正后的语句为:
select
p0.sinn,p0.end_year,
max(p0.start_year)
as
laststart_year,
min(p0.sumofearnings)
as
minearnings,
min(p0.sumofmonth_cnt)
as
minmonth_cnt,
min(p0.start_year)
as
firstart_year,
max(p0.sumofearnings)
as
maxearnings,
max(p0.sumofmonth_cnt)
as
maxmonth_cnt
from
PensionsView
as
p0
where
end_year
=
(select
max(end_year)
from
PensionsView
as
p1
where
p0.sinn
=
p1.sinn)
group
by
p0.sinn,p0.end_year
4、分解步骤:
(1)from语句获取PensionsView视图
(2)where子句的end_year字段与子查询进行过滤。按照社会保险号来区分,获取其中最大的end_year,即2033年
(3)再按照sinn和end_year来进行分组
(4)在sinn和end_year分组内来获取各个数据
a.获取社会保险号、获取最接近现在的退休金年份
b.按照sinn和end_year的分组中,start_year最大的即是最近的、连续总工作月份为60个月起始退休金年份
c.按照sinn和end_year的分组中,最小的总工作收入即30000,也即最近的、连续总工作月份
d.按照sinn和end_year的分组中,最小的月份数即60,也即最近的、连续总工作月份
e.按照sinn和end_year的分组中,start_year最小的即是最大的end_year中从最开始连续的到现在的退休金开始年份
f.按照sinn和end_year的分组中,最大的总工作收入即36000,也即最大的end_year中从最开始连续的到现在的退休金开始年份中这段时间的总工作收入
g.按照sinn和end_year的分组中,最大的总工作月份即72,也即最大的end_year中从最开始连续的到现在的退休金开始年份中这段时间的总工作月份
解惑四:
1、创建表并插入数据
create
table
Pensions
(
sinn
char(10)
not
null,
pen_year
integer
not
null,
month_cnt
integer
default
0
not
null
check
(month_cnt
between
0
and
12),
earnings
decimal
(8,2)
default
0.00
not
null
)
go
insert
into
Pensions(sinn,pen_year,month_cnt,earnings)
values
('0001',2015,12,60000),
('0001',2016,12,60000),
('0001',2017,12,60000),
('0001',2018,12,60000),
('0001',2019,12,60000),
('0001',2020,12,60000),
('0001',2021,12,60000),
('0001',2022,12,60000),
('0001',2023,12,60000),
('0001',2024,12,60000),
('0001',2025,6,30000)
2、谜题的要求是需要取到一个雇员最近的、连续60个月的month_cnt的收入。假设现在有一雇员,从2015年1月份开始连续工作到2024年总共10年,在第11年即2025年工作了6个月。那么最近的、连续工作60个月的时间应该从2020年的7月份开始算起。但是如果采用前面两种方法得出来的总收入则会将2020年到2025年的66个月的工资都算进去。
分解步骤:
(1)
select
p1.sinn,p1.pen_year
as
first_year,
p2.pen_year
as
last_year,
p1.month_cnt
as
first_year_month_cnt,
p1.earnings
as
first_year_earnings,
count(p3.pen_year)
as
year_cnt,
sum(p3.month_cnt)
as
total_month_cnt,
sum(p3.earnings)
as
earnings_tot
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt,p1.earnings
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1
a.from语句从表Pensions取数
b.inner join将Pensions与另外两个Pensions内连接。
c.where语句筛选出表3的pen_year在表1的pen_year和表2的pen_year中间,即表1的pen_year作为开始时间,表2的pen_year作为结束时间;且表3的pen_year对应的month_cnt大于0,即要获取所有有工作的年份。
d.groupby语句将筛选出来的数据根据p1的社会保险号、p1的pen_year(开始时间)、p2的pen_year(结束时间)、p1的month_cnt(开始工作当年的总工作月份)、p1的earnings(开始工作当年的总工作收入)进行分组
e.1在分组内进行筛选数据,如果位于表1的pen_year和表2的pen_year中间的表3的pen_year计数等于表2的pen_year减表1的pen_year+1则证明表3的pen_year是从开始时间到结束时间的连续数据,中间不存在没有工作的年份;
e.2再筛选位于表1的pen_year和表2的pen_year中间的表3的month_cnt的总工作月份之和是否大于60到60+首年工作总月份数-1,由于题目要求是最近连续的60个月,因为总月份数肯定要大于60个月,但是统计总收入是按年累加,且并不是所有年份都是每年12个月的工作月份,因此统计总工作月数要加上限为60个月加上第一年工作月数-1。
f.select语句再选择p1的sinn、p1的pen_year(即开始时间)、p2的pen_year(即结束时间)、p1的month_cnt(开始工作当年的总工作月份)、p1的earnings(开始工作当年的总工作收入),统计p3的pen_year的年份数(即连续工作的总年数)、p3的month_cnt的总工作月数(即连续工作的总月份数)、p3的earnings的总工作收入(即连续工作的总工作收入)。
(2)
分解步骤:
a.from语句从表a取数
b.where子句筛选表a的last_year
b.1 子查询筛选最大的last_year
b.1.1 子查询的from语句从表Pensions取数
b.1.2 innerjoin子句内联两张表Pensions
b.1.3 where子句查询位于表1的pen_year和表2的pen_year中
且总工作月数month_cnt大于0的
且表1的社会保险号等于表a的社会保险号(主要是根据a表的结果可以直接在b表筛选掉不符合数据的社会保险号)
的表3的pen_year
b.1.4 groupby语句将筛选出来的数据根据p1的社会保险号、p1的pen_year(开始时间)、p2的pen_year(结束时间)、p1的month_cnt(开始工作当年的总工作月份)进行分组
b.1.5 在分组内进行筛选数据,如果位于表1的pen_year和表2的pen_year中间的表3的pen_year计数等于表2的pen_year减表1的pen_year+1则证明表3的pen_year是从开始时间到结束时间的连续数据,中间不存在没有工作的年份
b.1.6 再统计位于表1的pen_year和表2的pen_year中间的表3的month_cnt的总工作月份之和是否大于60到60+首年工作总月份数-1,由于题目要求是最近连续的60个月,因为总月份数肯定要大于60个月,但是统计总收入是按年累加,且并不是所有年份都是每年12个月的工作月份,因此统计总工作月数要加上限为60个月加上第一年工作月数-1。
c. 从数据中筛选出最大的last_year
select...
from
(...)
as
a
where
a.last_year
= select
max(last_year)
from (select
p2.pen_year
as
last_year
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
and
p1.sinn
=
a.sinn
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
(3)
分解步骤:
a.select语句将选择a表中的p1的sinn、p1的pen_year(即开始时间)、p2的pen_year(即结束时间)、p1的month_cnt(开始工作当年的总工作月份)、p1的earnings(开始工作当年的总工作收入),统计p3的pen_year的年份数(即连续工作的总年数)、p3的month_cnt的总工作月数(即连续工作的总月份数)、p3的earnings的总工作收入(即连续工作的总工作收入)。
b.1 同时通过连续工作的总月份数除以12取余数获得第一年需要扣减的月份数;
b.2 通过连续工作的总月份数除以12取余数再除以第一个月份的总工作月份数再乘以第一年的总收入,即计算总收入中不属于60个月的需要扣减掉的收入;
b.3 通过表a计算的总工作收入减去b.2算出来的需调整的不属于60个月的需要扣减掉的收入得到真实收入。
select
*,
total_month_cnt
%
12
as
nonutillzed_first_year_month_cnt,
total_month_cnt
%
12
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
first_year_adjustment,
earnings_tot
-
(total_month_cnt
%
12)
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
adjusted_eaernings_tot
from
(...)
as
a
where
a.last_year
=
(select
max(last_year)
from
(...)
as
b)
3、总查询语句
select
*,
total_month_cnt
%
12
as
nonutillzed_first_year_month_cnt,
total_month_cnt
%
12
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
first_year_adjustment,
earnings_tot
-
(total_month_cnt
%
12)
/
(first_year_month_cnt
*
1.0)
*
first_year_earnings
as
adjusted_eaernings_tot
from
(
select
p1.sinn,p1.pen_year
as
first_year,
p2.pen_year
as
last_year,
p1.month_cnt
as
first_year_month_cnt,
p1.earnings
as
first_year_earnings,
count(p3.pen_year)
as
year_cnt,
sum(p3.month_cnt)
as
total_month_cnt,
sum(p3.earnings)
as
earnings_tot
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt,p1.earnings
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
as
a
where
a.last_year
=
(select
max(last_year)
from
(select
p2.pen_year
as
last_year
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
and
p1.sinn
=
a.sinn
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
sum(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
as
b)
解惑五:分别两种公用表达式的方法
1、用公共表表达式得出来的结果
With
A(sinn,row_number,first_year,last_year,first_year_month_cnt,first_year_earnings,year_cnt,
total_month_cnt,earnings_tot)
as
(select
p1.sinn,ROW_NUMBER()
over
(order
by
p1.sinn),
p1.pen_year,p2.pen_year,
p1.month_cnt,p1.earnings,
COUNT(p3.pen_year),
SUM(p3.month_cnt),sum(p3.earnings)
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt,p1.earnings
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
SUM(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1)
select
sinn,earnings_tot
from
A
as
Parent
where
not
exists
(select
*
from
A
where
sinn
=
Parent.sinn
and
row_number
>
parent.row_number)
2、用公共表表达式得出来的结果
With
Ranges(sinn,first_year,last_year,earnings_tot)
as
(select
p1.sinn,
p1.pen_year,p2.pen_year,
sum(p3.earnings)
as
earnings_tot
from
Pensions
as
p1
inner
join
Pensions
as
p2
on
p1.sinn
=
p2.sinn
inner
join
Pensions
as
p3
on
p1.sinn
=
p3.sinn
where
p3.pen_year
between
p1.pen_year
and
p2.pen_year
and
p3.month_cnt
>
0
group
by
p1.sinn,p1.pen_year,p2.pen_year,p1.month_cnt
having
COUNT(p3.pen_year)
=
p2.pen_year
-
p1.pen_year
+
1
and
SUM(p3.month_cnt)
between
60
and
60
+
p1.month_cnt
-
1),
lastRange(sinn,last_year)
as
(select
sinn,max(last_year)
from
Ranges
group
by
sinn)
select
R.*
from
Ranges
as
R
inner
join
LastRange
as
L
on
R.sinn
=
L.sinn
and
R.last_year
=
L.last_year
相关文章推荐
- 【SQL解惑】谜题15:找出最近两次工资
- 【SQL解惑】谜题11:工作顺序
- 使用光标查询10部门的员工姓名和工资,并打印__使用光标查询员工姓名和工资,并打印
- 谜题10:八两
- C++初学者的足迹10 作业报告9 计算周工资
- 【SQL解惑】谜题12:索赔状态
- 《Java解惑》系列——01表达式之谜——谜题10:八两
- 【SQL解惑】谜题13:教师
- 5-10 计算工资
- 【SQL解惑】谜题14:电话
- 【SQL解惑】谜题18:广告信件
- 5-10 计算工资 (15分)
- 【SQL解惑】谜题19:销售冠军
- 【SQL解惑】谜题20:测验结果
- 工资和社会保险计算
- 定义一个存储过程,以员工工号为参数,修改该员工的工资,若该员工属于10部门,则工资增加150 若属于20号部门,工资加200,若属于30部门,工资加250 若其它部门,则加300
- 字符谜题10:replace方法1
- java解惑之表达式之谜(谜题10)
- 【SQL解惑】谜题21:飞机与飞行员
- 济南市2011年社会平均工资及2012年社会保险缴纳基数