您的位置:首页 > 大数据

记录自己的大数据学习之路

2017-10-18 21:02 30 查看
function [cid,nr,centers] = mykmeans(x,k,weight,C)
warning off
[n,d] = size(x);
[n1,d1] = size(weight);

% ---------------------1.开始聚类,选取初始种子---------------------
% Select the first seed by sampling uniformly at random
if nargin < 4
index = zeros(1,k);
S = RandStream.getGlobalStream;
[C(:,1), index(1)] = datasample(S,x',1,2);
minDist = inf(n,1);
% Select the rest of the seeds by a probabilistic model
for ii = 2:k
minDist = min(minDist,distfun(x,C(:,ii-1),'sqeuclidean'));
denominator = sum(minDist);
if denominator==0 || isinf(denominator) || isnan(denominator)
C(:,ii:k) = datasample(S,x',k-ii+1,2,'Replace',false);
break;
end
sampleProbability = minDist/denominator;
[C(:,ii), index(ii)] = datasample(S,x',1,2,'Replace',false,...
'Weights',sampleProbability);
end
end
C = C'; %对聚类中心进行转置方便后面的计算

% ---------------------2.循环计算---------------------
%%%%% prelocate the memory
% cid(1*n)存储n条负荷曲线属于哪个聚类中心
% integer 1,...,k indicating cluster membership
cid = zeros(1,n);
% Make this different to get the loop started.
oldcid = ones(1,n);
% The number in each cluster.
nr = zeros(1,k);
% Set up maximum number of iterations.
maxiter = 200;
iter = 1;

while ~isequal(cid,oldcid) & iter < maxiter
% Implement the kmeans algorithm
% For each point, find the distance to all cluster centers
for i = 1:n
% 欧式距离,将第i个负荷曲线堆叠成k*1的矩阵减种子按行求和,记录第i个曲线离每个负荷中心的距离
%dist = sum((repmat(x(i,:),k,1)-C).^2,2);
dist = sum((repmat(weight,k,1).*((repmat(x(i,:),k,1)-C).^2)),2);
[m,ind] = min(dist);
% 将值和负荷曲线标号属于哪个中存入cid
cid(i) = ind;
end
% 找新聚类中心
for i = 1:k
% ind记录第i个聚类里的所有负荷曲线
ind = find(cid==i);
% 求该聚类曲线的均值,作为新聚类中心
C(i,:) = mean(x(ind,:));
% 该聚类下共有几条曲线;
nr(i) = length(ind);
end
%利用欧氏距离构造评价函数,计算各个特性指标对于新聚类中心的贡献,评价值越小越好
A = (repmat(weight,k,1)-C).^2;
feature1 = A./(min(A));

% 利用熵权法计算新权重
weight = EntropyWeight(feature1);
%综合专家权重得到新权重

iter = iter + 1;
end

% Now check each observation to see if the error can be minimized some more.
% Loop through all points.

maxiter = 2;
iter = 1;
move = 1;
while iter < maxiter & move ~= 0
move = 0;
% Loop through all points.
for i = 1:n
% find the distance to all cluster centers
%dist = sum((repmat(x(i,:),k,1)-C).^2,2);
dist = sum((repmat(weight,k,1).*((repmat(x(i,:),k,1)-C).^2)),2);
r = cid(i);
% This is the cluster id(新中心距离和原来属于哪一类) for x  %%nr,nr+1;
dadj = nr./(nr+1).*dist'; % All adjusted distances
[m,ind] = min(dadj); % minimum should be the cluster it belongs to
if ind ~= r % if not, then move x
cid(i) = ind;
ic = find(cid == ind);
C(ind,:) = mean(x(ic,:));
move = 1;
end
end
iter = iter+1;
end
centers = C;
if move == 0
disp('No points were moved after the initial clustering procedure.')
else
disp('Some points were moved after the initial clustering procedure.')
end

%------------------------------------------------------------------

function D = distfun(X, C, dist, iter,rep, reps)
%DISTFUN Calculate point to cluster centroid distances.

switch dist
case 'sqeuclidean'
cal = pdist2(X,repmat(C,1,n)','squaredeuclidean');
D = cal(:,1);
case {'cosine','correlation'}
% The points are normalized, centroids are not, so normalize them
normC = sqrt(sum(C.^2, 1));
if any(normC < eps(class(normC))) % small relative to unit-length data points
if reps==1
error(message('stats:kmeans:ZeroCentroid', iter));
else
error(message('stats:kmeans:ZeroCentroidRep', iter, rep));
end

end
C = bsxfun(@rdivide,C,normC);
D = pdist2mex(X,C,'cos',[],[],[]);
end
end   %function
warning on
end
自己写的带熵权法的Kmeans聚类,不过程序还有问题,现在改起来吧!
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  Kmeans 聚类