您的位置:首页 > 职场人生

聚类分析(K-Means)程序实现及展现(二)

2009-03-15 22:46 281 查看
说明:
这里只对研究的表,进行数据提取,分析因子,数据转化,数据标准化;
下面是相应的建表和处理的存储过程:

-----------------------数据准备-------------------------------------------
--------------------------原数据库表--------------------------------------------------
-----用来做分析的数据表
CREATE TABLE tb_khxx (
khxx_id varchar (50) ,
khxx_mc varchar (50) ,
khxx_qyxz varchar (30) ,
khxx_qylx varchar (30) ,
khxx_qyzx varchar (30) ,
khxx_qydz varchar (50) ,
khxx_szsf varchar (30) ,
khxx_szcs varchar (30) ,
khxx_gsyb varchar (6) ,
khxx_frdb varchar (20) ,
khxx_khyh varchar (50) ,
khxx_yhzh varchar (50) ,
khxx_nsh varchar (50) ,
khxx_ICcard varchar (50) ,
khxx_gswz varchar (50) ,
khxx_gsdh varchar (20) ,
khxx_gscz varchar (20) ,
khxx_lxr varchar (30) ,
khxx_lxrdh varchar (20) ,
khxx_khjb varchar (20) ,
khxx_bz varchar (500) ,
id int IDENTITY (1, 1) NOT NULL
)

-----------------------新建的数据库表-------------------------------------

--从原始数据表中获得需要分析的数据
CREATE TABLE collectCustomer
(
ID varchar(50), --客户编号
khxx_qyxz varchar(50), --企业性质
khxx_qylx varchar(50), --企业类型
khxx_qyzx varchar(50), --企业资信
khxx_khjb varchar(50) --客户级别
)

--进行数据转化
-----------------------------------------------------
--把提取的数据标准化
CREATE TABLE standCustomer
(
ID varchar(50), --客户编号
khxx_qyxz int , --企业性质
khxx_qylx int, --企业类型
khxx_qyzx int, --企业资信
khxx_khjb int --客户级别
)
-------------------------------------------------------

--进一步数据标准化(0--1,无量纲化,方法xi = (xi-minx)/(max -minx),空值用平均值来代替)
CREATE TABLE lastStandCustomer
(
ID varchar(50), --客户编号
khxx_qyxz decimal(10,2) , --企业性质
khxx_qylx decimal(10,2), --企业类型
khxx_qyzx decimal(10,2), --企业资信
khxx_khjb decimal(10,2) --客户级别
)
--分类结果存放表,进行分类之后保存的结果--------
CREATE TABLE KMCLASS
(
ID varchar(50),
CLASS int
)
drop table Kmclass
---------------------------------------------------------------------------------

-------------------------------------------------------
--从原始数据表中获得需要分析的数据
execute p_collectCustomers
select * from collectCustomer
--把提取的数据标准化
execute p_standCustomers
select * from standCustomer
--把提取的数据第二次标准化(无量纲化,0--1内)
execute p_lastStandCustomers
select * from lastStandCustomer

select * from lastStandCustomer order by id asc
------------------------标准化存储过程(一),初次提取数据-----------------------
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
--创建者:tiger_yu
--创建日期:2009-3-12
--用途:初次提取数据
--输入参数:无
--输出参数:无
--返回值说明:无
--提取数据
--create
alter procedure p_collectCustomers
as
begin
--清空装载数据表
truncate table collectCustomer
--提取数据
insert into collectCustomer select khxx_id,khxx_qyxz,khxx_qylx,khxx_qyzx,khxx_khjb from tb_khxx
order by khxx_id
end
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
-----------------------------------------------------
------------------标准化存储过程(二),第一次标准化----------------------
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
--创建者:tiger_yu
--创建日期:2009-3-12
--用途:第一次标准化存储过程
--输入参数:无
--输出参数:无
--返回值说明:无
--create
alter procedure p_standCustomers
as
begin
declare @qyxz varchar(50),
@qylx varchar(50),
@qyzx varchar(20),
@khjb varchar(50),
@id varchar(50),
@f1 int,
@f2 int,
@f3 int,
@f4 int
--先清空标准数据表
truncate table standCustomer
--把提取数据存到游标
declare cc_cur cursor for select * from collectCustomer
open cc_cur
fetch next from cc_cur into @id,@qyxz,@qylx,@qyzx,@khjb
while @@Fetch_status = 0
begin
--企业性质
set @f1 = (CASE @qyxz
WHEN '国有企业' THEN 1
WHEN '股份制企业' THEN 2
WHEN '外资企业' THEN 3
WHEN '私营企业' THEN 4
WHEN '其他 ' THEN 5
else 0
END)
--企业类型
set @f2 = ( CASE @qylx
WHEN '贸易型' THEN 1
WHEN '服务型' THEN 2
WHEN '生产型' THEN 3
WHEN '其他' THEN 4
else 0
END)
--企业资信
set @f3 = (CASE @qyzx
WHEN '特级' THEN 1
WHEN '一级' THEN 2
WHEN '二级' THEN 3
WHEN '三级' THEN 4
WHEN '其他 ' THEN 5
else 0
END)
--客户级别
set @f4 =( CASE @khjb
WHEN '一般客户' THEN 1
WHEN 'VIP客户' THEN 2
WHEN '顶级客户' THEN 3
WHEN '其他 ' THEN 4
else 0
END)

--把数据插入到standCustomer
insert into standCustomer values(@id,@f1,@f2,@f3,@f4)
fetch next from cc_cur into @id,@qyxz,@qylx,@qyzx,@khjb
end
close cc_cur
deallocate cc_cur
end
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO

---------------------------------------------------------

-----------------标准化存储过程(三),第一次标准化-----------------------
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
--创建者:tiger_yu
--创建日期:2009-3-12
--用途:第二次标准化存储过程
--输入参数:无
--输出参数:无
--返回值说明:无
--create
alter
procedure p_lastStandCustomers
as
begin
declare @id varchar(50),
@qyxz int,
@qylx int,
@qyzx int,
@khjb int,

@f1 decimal(10,2),
@f2 decimal(10,2),
@f3 decimal(10,2),
@f4 decimal(10,2),
@diff float,
@min int
--先清空标准数据表
truncate table lastStandCustomer
--把提取数据存到游标
declare last_cur cursor for select * from standCustomer
open last_cur
fetch next from last_cur into @id,@qyxz,@qylx,@qyzx,@khjb
while @@Fetch_status = 0
begin
--对表中空的字段填充平均数
exec p_updateAverage 'standCustomer','khxx_qyxz'
exec p_updateAverage 'standCustomer','khxx_qylx'
exec p_updateAverage 'standCustomer','khxx_qyzx'
exec p_updateAverage 'standCustomer','khxx_khjb'
--企业性质
execute p_standfloat 'standCustomer','khxx_qyxz',@diff output,@min output
set @f1=convert(decimal(10,2),(@qyxz-@min)/@diff)
--convert(decimal(10,2),3/5)
--企业类型
execute p_standfloat 'standCustomer','khxx_qylx',@diff output,@min output
set @f2=convert(decimal(10,2),(@qylx-@min)/@diff)
--企业资信
execute p_standfloat 'standCustomer','khxx_qyzx',@diff output,@min output
set @f3=convert(decimal(10,2),(@qyzx-@min)/@diff)
--客户级别
execute p_standfloat 'standCustomer','khxx_khjb',@diff output,@min output
set @f4=convert(decimal(10,2),(@khjb-@min)/@diff)

--把数据插入到lastStandCustomer
insert into lastStandCustomer values(@id,@f1,@f2,@f3,@f4)
fetch next from last_cur into @id,@qyxz,@qylx,@qyzx,@khjb
end
close last_cur
deallocate last_cur
end
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
-----------------------------------------------------------------
-------------功能存储过程(一),对表中空的字段填充平均数------------------
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
--创建者:tiger_yu
--创建日期:2009-3-12
--用途:对表中空的字段填充平均数
--输入参数:@tablename,@fieldname
--输出参数:无
--返回值说明:无
--create
alter procedure p_updateAverage
@tablename varchar(50),
@fieldname varchar(50)
as
--对表中空的字段填充平均数
begin
declare @sql varchar(2000)
set @sql = 'update '+@tablename+' set '+@fieldname+' = (select avg('+@fieldname+') from
'+@tablename+') where '+@fieldname+' = 0'
execute(@sql)
end
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
-------------------功能存储过程(二),返回一个字段的极差(最大与最小的差),最小值-----------------
--------------------------
set quoted_identifier on
go
set ansi_nulls on
go
--创建者:tiger_yu
--创建日期:2009-3-12
--用途:
--标准化无量纲化
--(0--1,无量纲化,方法xi = (xi-minx)/(max -minx),空值用平均值来代替,当最大值与最小值相当时,
--令最小值为0,当最大值与最小值相当且为0时,令最大为1)
--输入参数:@tablename,@fieldname
--输出参数:返回一个字段的极差(最大与最小的差),最小值
--返回值说明:无
--create
alter procedure p_standfloat
@tablename varchar(50),
@fieldname varchar(50),
@rs float output,
@rmin int output
as

begin
declare @sql nvarchar(2000),
@min int,
@max int,
@diff int
set @sql = N'select @min = min('+@fieldname+') from '+@tablename
exec sp_executesql @sql,N'@min int output',@min output
set @sql = N'select @max = max('+@fieldname+') from '+@tablename
exec sp_executesql @sql,N'@max int output',@max output
if @min = @max
set @min =0
if @min =@max and @max =0
set @max = 1
set @diff = @max-@min
set @rmin = @min
set @rs = convert(decimal(10,2),@diff)
--return @rs
end
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
-------------------------------------------
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息