您的位置：首页 > 编程语言 > Java开发

DBSCAN算法的java实现

2010-05-30 15:35 169 查看

DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a data clustering algorithm proposed by Martin Ester, Hans-Peter Kriegel, Jörg Sander and Xiaowei Xu in 1996. It is a density based clustering algorithm because it finds a number of clusters starting from the estimated density distribution of corresponding nodes. DBSCAN is one of the most common clustering algorithms and also most cited in scientific literature.

DBSCAN是一种基于密度的聚类算法，它的基本原理就是给定两个参数，ξ和minp，其中 ξ可以理解为半径，算法将在这个半径内查找样本，minp是一个以ξ为半径查找到的样本个数n的限制条件，只要n>=minp，查找到的样本点就是核心样本点，算法的具体描述见参考文件1，下边是这个算法的java实现：

首先定义一个Point类，代表样本点：

1: package com.sunzhenxing;

2:
[/code]

3: public class Point {

4:   private int x;

5:   private int y;

6:   private boolean isKey;

7:   private boolean isClassed;

8:

9:   public boolean isKey() {

10:     return isKey;

11:   }

12:   public void setKey(boolean isKey) {

13:     this.isKey = isKey;

14:     this.isClassed=true;

15:   }

16:   public boolean isClassed() {

17:     return isClassed;

18:   }

19:   public void setClassed(boolean isClassed) {

20:     this.isClassed = isClassed;

21:   }

22:   public int getX() {

23:     return x;

24:   }

25:   public void setX(int x) {

26:     this.x = x;

27:   }

28:   public int getY() {

29:     return y;

30:   }

31:   public void setY(int y) {

32:     this.y = y;

33:   }

34:

35:   public Point(){

36:     x=0;

37:     y=0;

38:   }

39:   public Point(int x,int y){

40:     this.x=x;

41:     this.y=y;

42:   }

43:   public Point(String str){

44:     String[] p=str.split(",");

45:     this.x=Integer.parseInt(p[0]);

46:     this.y=Integer.parseInt(p[1]);

47:   }

48:   public String print(){

49:     return "<"+this.x+","+this.y+">";

50:   }

51: }

52:

然后定义一个工具类，为算法的实现服务：

1: package com.sunzhenxing;

2:
[/code]

3: import java.io.BufferedReader;

4: import java.io.FileReader;

5: import java.io.IOException;

6: import java.util.*;

7:

8: public final class Utility {

9:   //计算两点之间的距离

10:   public static double getDistance(Point p,Point q){

11:     int dx=p.getX()-q.getX();

12:     int dy=p.getY()-q.getY();

13:     double distance=Math.sqrt(dx*dx+dy*dy);

14:     return distance;

15:   }

16:   //检测p点是不是核心点，tmpLst存储核心点的直达点

17:   public static List<Point> isKeyPoint(List<Point> lst,Point p,int e,int minp){

18:     int count=0;

19:     List<Point> tmpLst=new ArrayList<Point>();

20:     for(Iterator<Point> it=lst.iterator();it.hasNext();){

21:       Point q=it.next();

22:       if(getDistance(p,q)<=e){

23:         ++count;

24:         if(!tmpLst.contains(q)){

25:           tmpLst.add(q);

26:         }

27:       }

28:     }

29:     if(count>=minp){

30:       p.setKey(true);

31:       return tmpLst;

32:     }

33:     return null;

34:   }

35:   //合并两个链表，前提是b中的核心点包含在a中

36:   public static boolean mergeList(List<Point> a,List<Point> b){

37:     boolean merge=false;

38:     if(a==null || b==null){

39:       return false;

40:     }

41:     for(int index=0;index<b.size();++index){

42:       Point p=b.get(index);

43:       if(p.isKey() && a.contains(p)){

44:         merge=true;

45:         break;

46:       }

47:     }

48:     if(merge){

49:       for(int index=0;index<b.size();++index){

50:         if(!a.contains(b.get(index))){

51:           a.add(b.get(index));

52:        }

53:       }

54:     }

55:     return merge;

56:   }

57:   //获取文本中的样本点集合

58:   public static List<Point> getPointsList() throws IOException{

59:     List<Point> lst=new ArrayList<Point>();

60:     String txtPath="src\\com\\sunzhenxing\\points.txt";

61:     BufferedReader br=new BufferedReader(new FileReader(txtPath));

62:     String str="";

63:     while((str=br.readLine())!=null && str!=""){

64:       lst.add(new Point(str));

65:     }

66:     br.close();

67:     return lst;

68:   }

69:   //显示聚类的结果

70:   public static void display(List<List<Point>> resultList){

71:     int index=1;

72:     for(Iterator<List<Point>> it=resultList.iterator();it.hasNext();){

73:       List<Point> lst=it.next();

74:       if(lst.isEmpty()){

75:         continue;

76:       }

77:       System.out.println("-----第"+index+"个聚类-----");

78:       for(Iterator<Point> it1=lst.iterator();it1.hasNext();){

79:         Point p=it1.next();

80:         System.out.println(p.print());

81:       }

82:       index++;

83:     }

84:   }

85: }

86:

最后在主程序中实现算法，如下所示：

1: package com.sunzhenxing;

2:
[/code]

3: import java.io.IOException;

4: import java.util.*;

5:

6: public class Dbscan {

7:   private final static int e=2;//ε半径

8:   private final static int minp=4;//密度阈值

9:   private static List<Point> pointsList=new ArrayList<Point>();//存储原始样本点

10:   private static List<List<Point>> resultList=new ArrayList<List<Point>>();//存储最后的聚类结果

11:

12:   private static void applyDbscan() throws IOException{

13:     pointsList=Utility.getPointsList();

14:     for(int index=0;index<pointsList.size();++index){

15:       List<Point> tmpLst=new ArrayList<Point>();

16:       Point p=pointsList.get(index);

17:       if(p.isClassed())

18:         continue;

19:       tmpLst=Utility.isKeyPoint(pointsList, p, e, minp);

20:       if(tmpLst!=null){

21:         resultList.add(tmpLst);

22:       }

23:     }

24:     int length=resultList.size();

25:     for(int i=0;i<length;++i){

26:       for(int j=0;j<length;++j){

27:         if(i!=j){

28:           if(Utility.mergeList(resultList.get(i), resultList.get(j))){

29:             resultList.get(j).clear();

30:           }

31:         }

32:       }

33:     }

34:   }

35:   public static void main(String[] args) {

36:     try {

37:       //调用DBSCAN的实现算法

38:       applyDbscan();

39:       Utility.display(resultList);

40:     } catch (IOException e) {

41:       // TODO Auto-generated catch block

42:       e.printStackTrace();

43:     }

44:

45:   }

46:

47: }

48:

下边是一个小测试，即使用src\\com\\sunzhenxing\\points.txt文件的内容进行测试，points.txt的文件内容是：

0,0
0,1
0,2
0,3
0,4
0,5
12,1
12,2
12,3
12,4
12,5
12,6
0,6
0,7
12,7
0,8
0,9
1,1

最后算法的结果是：

-----第1个聚类-----
<0,0>
<0,1>
<0,2>
<1,1>
<0,3>
<0,4>
<0,5>
<0,6>
<0,7>
<0,8>
<0,9>
-----第2个聚类-----
<12,1>
<12,2>
<12,3>
<12,4>
<12,5>
<12,6>
<12,7>
大家画一下坐标就可以理解实验的结论了。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航