您的位置:首页 > 编程语言 > MATLAB

基于Fisher线性判别分析的手写数字识别

2015-03-04 22:28 501 查看

基于Fisher线性判别分析的手写数字识别

<1>Fisher算法简介:

为简单起见,我们以两类问题1和2的分类来说明Fisher判别法的原理,看下面的图,为识别w1类和w2类,通过选择适当的投影方向可以比较好的分开这两类,Fisher线性判别的思想就是选择投影方向,使投影后的两类相隔尽可能的远,而同时每一类内部的样本又尽可能聚集。关键是找出那个最佳的投影方向。



我们假设w1共有N1个样本,w2共有N2个样本,N= N1+N2。两个类别在输入空间的均值向量为:



各类的类内离散度矩阵为:



总类内离散度矩阵为:



根据Fisher算法的思想,要使两类投影后两类尽可能分开,而各类内部又尽可能聚集,最优的投影方向即为:



确定投影方向之后,再确定一个分类阈值Wo,并采取决策规则:



在自己的实验中,样本的数量肯定是有限的,可以将分类阈值定为:



判别的决策就改为:



<2>基于Fisher线性判别实现数字识别的想法:

根据Fisher算法可以实现两类的识别,数字识别为多类识别,我们可以通过多个两类分类器来实现。具体的算法是先从1开始逐个与比它大的数字比较,当遇到比1更加匹配输入的数字是则将1排除,而改为从2开始逐个与比它大的数比较,由此进行到找出最匹配的数字为止。



<3>数字特征的提取

利用Fisher线性判别时,每一类都对应着一个特征线性向量,不同的类之间对应的向量也是有差别的,Fisher就是找出这些特征向量的最优投影方向,使之在投影方向上能够有最大间距。

特征有很多不同的提取方法,本实验采取的是将手写数字图片二值化,再讲二值化图片分割为7×7块,计算每块内的像素点不为0的百分比,这样就可以构成一个49维的特征向量。

本人使用了Matlab作为研究平台,设计了一个gui界面(可以手写输入数字),采用Fisher线性识别,样本是已经采集好的。



部分Matlab代码如下:
<1>         *********手写数字matlab实现(部分)*************
%%手写输入程序

%鼠标按下
function figure_patten_WindowButtonDownFcn(hObject, eventdata, handles)
% hObject    handle to figure_patten (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
global draw_enable     %定义一个标志,1表示绘图,0表示停止绘图
global x;
global y;
global h1;
imSize=10;
draw_enable=1;

axis([1 imSize 1 imSize]);  %设定axes1大小

% grid on;
%  box on;
if draw_enable==1
p=get(gca,'currentpoint');          %鼠标按下,获取当前坐标
x(1)=p(1);
y(1)=p(3);
end

%鼠标移动
function figure_patten_WindowButtonMotionFcn(hObject, eventdata, handles)
% hObject    handle to figure_patten (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
axes(handles.axes1);
global draw_enable;
global x;
global y;
global h1;
p=get(gca,'currentpoint');
if draw_enable==1
x(2)=p(1);          %鼠标第一次移动后的坐标为x(2),y(2)
y(2)=p(3);

x_gap = 0.1;    % 定义x方向增量
y_gap = 0.1;    % 定义y方向增量
if x(2) > x(1)
step_x = x_gap;
else
step_x = -x_gap;
end
if y(2) > y(1)
step_y = y_gap;
else
step_y = -y_gap;
end
% 定义x,y的变化范围和步长
if abs(x(2)-x(1)) < 0.01        % 线平行于y轴,即斜率不存在时
iy = y(1):step_y:y(2);
ix = x(2).*ones(1,size(iy,2));
else
ix = x(1):step_x:x(2) ;    % 定义x的变化范围和步长
% 当斜率存在,即k = (Y-InitialY)/(X-InitialX) ~= 0
iy = (y(2)-y(1))/(x(2)-x(1)).*(ix-x(1))+y(1);
end
ImageX = [ix, x(2)];
ImageY = cat(2, iy, y(2));
h1= line(ImageX,ImageY, 'marker', '.', 'markerSize',8, ...
'LineStyle', '-', 'LineWidth', 4, 'Color', 'Red');
dlmwrite('IXT.txt', ImageX, '-append', 'delimiter', '\t', 'precision', 6);
dlmwrite('IYT.txt', ImageY, '-append', 'delimiter', '\t', 'precision', 6);
x(1) = x(2);       %记住当前点坐标
y(1) = y(2);       %记住当前点坐标

end

%鼠标松开
function figure_patten_WindowButtonUpFcn(hObject, eventdata, handles)
% hObject    handle to figure_patten (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
global draw_enable;
draw_enable=0;
<2>         **********Fisher识别按钮程序************
%%Fisher识别按钮
function pushbutton6_Callback(hObject, eventdata, handles)
% hObject    handle to pushbutton6 (see GCBO)
% eventdata  reserved - to be defined in a future version of MATLAB
% handles    structure with handles and user data (see GUIDATA)
pix=getframe(handles.axes1);    %获取axes1中的图像数据(手写的数字)

bw=rgb2gray(pix.cdata);     %灰度化
level=graythresh(bw);

bw = im2bw(bw,level);   %二值化
imwrite(bw,'outfile.jpg','jpg');    %保存图像为JPG格式
axes(handles.axes2);

%剪切图像
[y2temp x2temp] = size(bw);
x1=1;
y1=1;
x2=x2temp;
y2=y2temp;

% Finding left side blank spaces
cntB=1;
while (sum(bw(:,cntB))==y2temp)
x1=x1+1;
cntB=cntB+1;
end

% Finding right side blank spaces
cntB=1;
while (sum(bw(cntB,:))==x2temp)
y1=y1+1;
cntB=cntB+1;
end

% Finding upper side blank spaces
cntB=x2temp;
while (sum(bw(:,cntB))==y2temp)
x2=x2-1;
cntB=cntB-1;
end

% Finding lower side blank spaces
cntB=y2temp;
while (sum(bw(cntB,:))==x2temp)
y2=y2-1;
cntB=cntB-1;
end

% Crop the image to the edge
bw1=imcrop(bw,[x1,y1,(x2-x1),(y2-y1)]);
imshow(bw1);

setappdata(handles.figure_patten,'bw1',bw1);    %设置变量bw1
global i;
name=strcat('D:\matlab\matlabinstall\bin\image\num',num2str(i),'.jpg');
imwrite(bw1,name,'jpg');    %保存图片,以连续的数字命名,便于样本的保存好提取。
i=i+1;

axes(handles.axes3);    %特征图像显示在aexs3

%特征提取
bw_7050=imresize(bw1,[70,70]);  %分成7*7
for cnt=1:7
for cnt2=1:7
Atemp=sum(bw_7050((cnt*10-9:cnt*10),(cnt2*10-9:cnt2*10)));
lett((cnt-1)*7+cnt2)=sum(Atemp);
end
end

lett=((100-lett)/100);  %所画图像的特征向量
A=zeros(70,70);
for num1=1:7
for num2=1:7
if lett((num1-1)*7+num2)>0.13
for num3=num1*10-9:num1*10
for num4=num2*10-9:num2*10
A(num3,num4)=0;
end
end

else
for num3=num1*10-9:num1*10
for num4=num2*10-9:num2*10
A(num3,num4)=1;
end
end

end
end
end

imshow(A);

Rchar=Fisher(lett);     %调用识别程序(Fisher判别)
set(handles.text_result,'String',num2str(Rchar));  %显示识别结果
<3>             *********Fisher()实现************
function  [Rchar]=Fisher(lett)
% 计算样本的先验概率和类条件概率密度
load NUM0.mat
load NUM1.mat
load NUM2.mat
load NUM3.mat
load NUM4.mat
load NUM5.mat
load NUM6.mat
load NUM7.mat
load NUM8.mat
load NUM9.mat       %导入样本

%求样本数
long0=length(NUM0);
long1=length(NUM1);
long2=length(NUM2);
long3=length(NUM3);
long4=length(NUM4);
long5=length(NUM5);
long6=length(NUM6);
long7=length(NUM7);
long8=length(NUM8);
long9=length(NUM9);
%转换数据格式
for i=1:long0
NUM00(i,:)=NUM0{i};
end

for i=1:long1
NUM11(i,:)=NUM1{i};
end

for i=1:long2
NUM22(i,:)=NUM2{i};
end

for i=1:long3
NUM33(i,:)=NUM3{i};
end

for i=1:long4
NUM44(i,:)=NUM4{i};
end

for i=1:long5
NUM55(i,:)=NUM5{i};
end

for i=1:long6
NUM66(i,:)=NUM6{i};
end

for i=1:long7
NUM77(i,:)=NUM7{i};
end

for i=1:long8
NUM88(i,:)=NUM8{i};
end

for i=1:long9
NUM99(i,:)=NUM9{i};
end
%求样本的平均
mean0=mean(NUM00);
mean1=mean(NUM11);
mean2=mean(NUM22);
mean3=mean(NUM33);
mean4=mean(NUM44);
mean5=mean(NUM55);
mean6=mean(NUM66);
mean7=mean(NUM77);
mean8=mean(NUM88);
mean9=mean(NUM99);
%类内离散度Si的计算
S0='0';     %0样本类内离散度
for i=1:long0
S0=S0+(NUM00(i)-mean0)'*(NUM00(i)-mean0);
end
S{1}=S0;

S1='0';        %1样本类内离散度
for i=1:long1
S1=S1+(NUM11(i)-mean1)'*(NUM11(i)-mean1);
end
S{2}=S1;

S2='0';     %2样本类内离散度
for i=1:long2
S2=S2+(NUM22(i)-mean2)'*(NUM22(i)-mean2);
end
S{3}=S2;

S3='0';         %3样本类内离散度
for i=1:long3
S3=S3+(NUM33(i)-mean3)'*(NUM33(i)-mean3);
end
S{4}=S3;

S4='0';     %4样本类内离散度
for i=1:long4
S4=S4+(NUM44(i)-mean4)'*(NUM44(i)-mean4);
end
S{5}=S4;

S5='0';     %5样本类内离散度
for i=1:long5
S5=S5+(NUM55(i)-mean5)'*(NUM55(i)-mean5);
end
S{6}=S5;

S6='0';         %6样本类内离散度
for i=1:long6
S6=S6+(NUM66(i)-mean6)'*(NUM66(i)-mean6);
end
S{7}=S6;

S7='0';         %7样本类内离散度
for i=1:long7
S7=S7+(NUM77(i)-mean7)'*(NUM77(i)-mean7);
end
S{8}=S7;

S8='0';         %8样本类内离散度
for i=1:long8
S8=S8+(NUM88(i)-mean8)'*(NUM88(i)-mean8);
end
S{9}=S8;

S9='0';         %9样本类内离散度
for i=1:long9
S9=S9+(NUM99(i)-mean9)'*(NUM99(i)-mean9);
end
S{10}=S9;

%%对这10类分别进行两两类识别比较
%求两两类的总类内离散度、类间离散度(45种情况)
M{1}=mean0;
M{2}=mean1;
M{3}=mean2;
M{4}=mean3;
M{5}=mean4;
M{6}=mean5;
M{7}=mean6;
M{8}=mean7;
M{9}=mean8;
M{10}=mean9;
Sw=cell(10,10);
Sb=cell(10,10);
for i=1:9
for j=i+1:10
Sw{i,j}=S{i}+S{j};
Sb{i,j}=(M{i}-M{j})'*(M{i}-M{j});
end
end

%求两两类判别的阈值,判别函数,最有投影方向。

Wo=zeros(10,10);
W=cell(10,10);
Gx=cell(10,10);
for i=1:9
for j=i+1:10
Sw{i,j}=Sw{i,j}+0.0001*eye(49);
W{i,j}=inv(Sw{i,j})*(M{i}-M{j})';    %最优投影方向
Gx{i,j}=(W{i,j}')*(lett-0.5*(M{i}+M{j}))';
end

end
count=1;
k=0;
for i=count:9   %从第1类开始两两比较

for j=(count+1):10

if Gx{i,j}<0      %不属于i类,则转为从第i+1类开始比较

if count==9        %已经确定不是8就是9,则停止继续往下比较(count不再加1)
char=10;
else
count=count+1;     %转为第i+1类
k=0;
break;
end
else
char=count;    %将当前类的序号赋值给变量char
k=k+1;     % 计算判定的次数

end

end
if k==10-count      %若判定完则跳出循环
break;
end
end

switch char-1     %根据变量char求取对应的判别数字
case 0
Rchar='0';
case 1
Rchar='1';
case 2
Rchar='2';
case 3
Rchar='3';
case 4
Rchar='4';
case 5
Rchar='5';
case 6
Rchar='6';
case 7
Rchar='7';
case 8
Rchar='8';
case 9
Rchar='9';
end

<4>         ************添加为样本部分程序*************
dd=str2double(get(handles.edit1,'String'));
h_patten=getappdata(handles.Rightnum,'h_patten');
bw1=getappdata(h_patten.figure_patten,'bw1');
bw_7050=imresize(bw1,[70,70]);%分成7*7
for cnt=1:7
for cnt2=1:7
Atemp=sum(bw_7050((cnt*10-9:cnt*10),(cnt2*10-9:cnt2*10)));
example1((cnt-1)*7+cnt2)=sum(Atemp);
end
end

example2=((100-example1)/100)
load NUM0.mat
load NUM1.mat
load NUM2.mat
load NUM3.mat
load NUM4.mat
load NUM5.mat
load NUM6.mat
load NUM7.mat
load NUM8.mat
load NUM9.mat

long0=length(NUM0);
long1=length(NUM1);
long2=length(NUM2);
long3=length(NUM3);
long4=length(NUM4);
long5=length(NUM5);
long6=length(NUM6);
long7=length(NUM7);
long8=length(NUM8);
long9=length(NUM9);

switch dd

case 0
for i=1:long0;
NUM0{i}=NUM0{i};
end
long0=long0+1;
NUM0{long0}=example2;
save('NUM0.mat','NUM0');
case 1
for i=1:long1;
NUM1{i}=NUM1{i};
end
long1=long1+1;
NUM1{long1}=example2;
save('NUM1.mat','NUM1');
case 2
for i=1:long2;
NUM2{i}=NUM2{i};
end
long2=long2+1;
NUM2{long2}=example2;
save('NUM2.mat','NUM2');
case 3
for i=1:long3;
NUM3{i}=NUM3{i};
end
long3=long3+1;
NUM3{long3}=example2;
save('NUM3.mat','NUM3');

case 4
for i=1:long4;
NUM4{i}=NUM4{i};
end
long4=long4+1;
NUM4{long4}=example2;
save('NUM4.mat','NUM4');

case 5
for i=1:long5;
NUM5{i}=NUM5{i};
end
long5=long5+1;
NUM5{long5}=example2;
save('NUM5.mat','NUM5');

case 6
for i=1:long6;
NUM6{i}=NUM6{i};
end
long6=long6+1;
NUM6{long6}=example2;
save('NUM6.mat','NUM6');

case 7
for i=1:long7;
NUM7{i}=NUM7{i};
end
long7=long7+1;
NUM7{long7}=example2;
save('NUM7.mat','NUM7');

case 8
for i=1:long8;
NUM8{i}=NUM8{i};
end
long8=long8+1;
NUM8{long8}=example2;
save('NUM8.mat','NUM8');

case 9
for i=1:long9;
NUM9{i}=NUM9{i};
end
long9=long9+1;
NUM9{long9}=example2;
save('NUM9.mat','NUM9');

end
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息