您的位置:首页 > 编程语言 > Java开发

DBSCAN算法的java实现

2010-05-30 15:35 169 查看
DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a data clustering algorithm proposed by Martin Ester, Hans-Peter Kriegel, Jörg Sander and Xiaowei Xu in 1996. It is a density based clustering algorithm because it finds a number of clusters starting from the estimated density distribution of corresponding nodes. DBSCAN is one of the most common clustering algorithms and also most cited in scientific literature.

DBSCAN是一种基于密度的聚类算法,它的基本原理就是给定两个参数,ξ和minp,其中 ξ可以理解为半径,算法将在这个半径内查找样本,minp是一个以ξ为半径查找到的样本个数n的限制条件,只要n>=minp,查找到的样本点就是核心样本点,算法的具体描述见参考文件1,下边是这个算法的java实现:

首先定义一个Point类,代表样本点:

1: package com.sunzhenxing;
2:
[/code]
3: public class Point {
4:   private int x;
5:   private int y;
6:   private boolean isKey;
7:   private boolean isClassed;
8:
9:   public boolean isKey() {
10:     return isKey;
11:   }
12:   public void setKey(boolean isKey) {
13:     this.isKey = isKey;
14:     this.isClassed=true;
15:   }
16:   public boolean isClassed() {
17:     return isClassed;
18:   }
19:   public void setClassed(boolean isClassed) {
20:     this.isClassed = isClassed;
21:   }
22:   public int getX() {
23:     return x;
24:   }
25:   public void setX(int x) {
26:     this.x = x;
27:   }
28:   public int getY() {
29:     return y;
30:   }
31:   public void setY(int y) {
32:     this.y = y;
33:   }
34:
35:   public Point(){
36:     x=0;
37:     y=0;
38:   }
39:   public Point(int x,int y){
40:     this.x=x;
41:     this.y=y;
42:   }
43:   public Point(String str){
44:     String[] p=str.split(",");
45:     this.x=Integer.parseInt(p[0]);
46:     this.y=Integer.parseInt(p[1]);
47:   }
48:   public String print(){
49:     return "<"+this.x+","+this.y+">";
50:   }
51: }
52:


然后定义一个工具类,为算法的实现服务:

1: package com.sunzhenxing;
2:
[/code]
3: import java.io.BufferedReader;
4: import java.io.FileReader;
5: import java.io.IOException;
6: import java.util.*;
7:
8: public final class Utility {
9:   //计算两点之间的距离
10:   public static double getDistance(Point p,Point q){
11:     int dx=p.getX()-q.getX();
12:     int dy=p.getY()-q.getY();
13:     double distance=Math.sqrt(dx*dx+dy*dy);
14:     return distance;
15:   }
16:   //检测p点是不是核心点,tmpLst存储核心点的直达点
17:   public static List<Point> isKeyPoint(List<Point> lst,Point p,int e,int minp){
18:     int count=0;
19:     List<Point> tmpLst=new ArrayList<Point>();
20:     for(Iterator<Point> it=lst.iterator();it.hasNext();){
21:       Point q=it.next();
22:       if(getDistance(p,q)<=e){
23:         ++count;
24:         if(!tmpLst.contains(q)){
25:           tmpLst.add(q);
26:         }
27:       }
28:     }
29:     if(count>=minp){
30:       p.setKey(true);
31:       return tmpLst;
32:     }
33:     return null;
34:   }
35:   //合并两个链表,前提是b中的核心点包含在a中
36:   public static boolean mergeList(List<Point> a,List<Point> b){
37:     boolean merge=false;
38:     if(a==null || b==null){
39:       return false;
40:     }
41:     for(int index=0;index<b.size();++index){
42:       Point p=b.get(index);
43:       if(p.isKey() && a.contains(p)){
44:         merge=true;
45:         break;
46:       }
47:     }
48:     if(merge){
49:       for(int index=0;index<b.size();++index){
50:         if(!a.contains(b.get(index))){
51:           a.add(b.get(index));
52:        }
53:       }
54:     }
55:     return merge;
56:   }
57:   //获取文本中的样本点集合
58:   public static List<Point> getPointsList() throws IOException{
59:     List<Point> lst=new ArrayList<Point>();
60:     String txtPath="src\\com\\sunzhenxing\\points.txt";
61:     BufferedReader br=new BufferedReader(new FileReader(txtPath));
62:     String str="";
63:     while((str=br.readLine())!=null && str!=""){
64:       lst.add(new Point(str));
65:     }
66:     br.close();
67:     return lst;
68:   }
69:   //显示聚类的结果
70:   public static void display(List<List<Point>> resultList){
71:     int index=1;
72:     for(Iterator<List<Point>> it=resultList.iterator();it.hasNext();){
73:       List<Point> lst=it.next();
74:       if(lst.isEmpty()){
75:         continue;
76:       }
77:       System.out.println("-----第"+index+"个聚类-----");
78:       for(Iterator<Point> it1=lst.iterator();it1.hasNext();){
79:         Point p=it1.next();
80:         System.out.println(p.print());
81:       }
82:       index++;
83:     }
84:   }
85: }
86:

最后在主程序中实现算法,如下所示:

1: package com.sunzhenxing;
2:
[/code]
3: import java.io.IOException;
4: import java.util.*;
5:
6: public class Dbscan {
7:   private final static int e=2;//ε半径
8:   private final static int minp=4;//密度阈值
9:   private static List<Point> pointsList=new ArrayList<Point>();//存储原始样本点
10:   private static List<List<Point>> resultList=new ArrayList<List<Point>>();//存储最后的聚类结果
11:
12:   private static void applyDbscan() throws IOException{
13:     pointsList=Utility.getPointsList();
14:     for(int index=0;index<pointsList.size();++index){
15:       List<Point> tmpLst=new ArrayList<Point>();
16:       Point p=pointsList.get(index);
17:       if(p.isClassed())
18:         continue;
19:       tmpLst=Utility.isKeyPoint(pointsList, p, e, minp);
20:       if(tmpLst!=null){
21:         resultList.add(tmpLst);
22:       }
23:     }
24:     int length=resultList.size();
25:     for(int i=0;i<length;++i){
26:       for(int j=0;j<length;++j){
27:         if(i!=j){
28:           if(Utility.mergeList(resultList.get(i), resultList.get(j))){
29:             resultList.get(j).clear();
30:           }
31:         }
32:       }
33:     }
34:   }
35:   public static void main(String[] args) {
36:     try {
37:       //调用DBSCAN的实现算法
38:       applyDbscan();
39:       Utility.display(resultList);
40:     } catch (IOException e) {
41:       // TODO Auto-generated catch block
42:       e.printStackTrace();
43:     }
44:
45:   }
46:
47: }
48:

下边是一个小测试, 即使用src\\com\\sunzhenxing\\points.txt文件的内容进行测试,points.txt的文件内容是:

0,0
0,1
0,2
0,3
0,4
0,5
12,1
12,2
12,3
12,4
12,5
12,6
0,6
0,7
12,7
0,8
0,9
1,1

最后算法的结果是:

-----第1个聚类-----
<0,0>
<0,1>
<0,2>
<1,1>
<0,3>
<0,4>
<0,5>
<0,6>
<0,7>
<0,8>
<0,9>
-----第2个聚类-----
<12,1>
<12,2>
<12,3>
<12,4>
<12,5>
<12,6>
<12,7>
大家画一下坐标就可以理解实验的结论了。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: