层次聚类方法对给定的数据集进行层次的分解,直到某种条件满足为止。具体又可分为凝聚的,分裂的两种方案。
1凝聚的层次聚类是一种自底向上的策略,首先将每个对象作为一个簇,然后合并这些原子簇为越来越大的簇,直到所有的对象都在一个簇中,或者某个终结条件被满足,绝大多数层次聚类方法属于这一类,它们只是在簇间相似度的定义上有所不同。
2分裂的层次聚类与凝聚的层次聚类相反,采用自顶向下的策略,它首先将所有对象置于同一个簇中,然后逐渐细分为越来越小的簇,直到每个对象自成一簇,或者达到了某个终止条件。
层次凝聚的代表是AGNES算法
javad代码:
Node.java
public class Node {
String nodeName; // 样本点名
Cluster cluster; // 样本点所属类簇
private double dimension[]; // 样本点的维度
public Node(){
}
public Node(double[] dimension,String nodeName){
this.nodeName=nodeName;
this.dimension=dimension;
}
public double[] getDimension() {
return dimension;
}
public void setDimension(double[] dimension) {
this.dimension = dimension;
}
public Cluster getCluster() {
return cluster;
}
public void setCluster(Cluster cluster) {
this.cluster = cluster;
}
public String getNodeName() {
return nodeName;
}
public void setNodeName(String nodeName) {
this.nodeName = nodeName;
}
}
类簇Cluster.java
public class Cluster {
private List nodes = new ArrayList(); // 类簇中的样本点
private String clusterName;
public List getNodes() {
return nodes;
}
public void setNodes(List nodes) {
this.nodes = nodes;
}
public String getClusterName() {
return clusterName;
}
public void setClusterName(String clusterName) {
this.clusterName = clusterName;
}
}
算法主程序
public class hierCluster {
public List startAnalysis(List nodes,int ClusterNum){
List finalClusters=new ArrayList();
List originalClusters=InitialCluster(nodes);
finalClusters=originalClusters;
while(finalClusters.size()>ClusterNum){
double min=Double.MAX_VALUE;
int mergeIndexA=0;
int mergeIndexB=0;
for(int i=0;i
for(int j=0;j
if(i!=j){
Cluster clusterA=finalClusters.get(i);
Cluster clusterB=finalClusters.get(j);
List nodesA=clusterA.getNodes();
List nodesB=clusterB.getNodes();
for(int m=0;m
for(int n=0;n
double tempDis=dist(nodesA.get(m),nodesB.get(n));
if(tempDis
min=tempDis;
mergeIndexA=i;
mergeIndexB=j;
}
}
}
}
} //end for j
}// end for i
//合并cluster[mergeIndexA]和cluster[mergeIndexB]
finalClusters=mergeCluster(finalClusters,mergeIndexA,mergeIndexB);
}//end while
return finalClusters;
}
private List mergeCluster(List finalCluster, int mergeIndexA, int mergeIndexB) {
if(mergeIndexA!=mergeIndexB)
{
Cluster clusterA=finalCluster.get(mergeIndexA);
Cluster clusterB=finalCluster.get(mergeIndexB);
List nodesA=clusterA.getNodes();
ListnodesB=clusterB.getNodes();
for (Node dp : nodesB) {
Node tempDp = new Node();
tempDp.setNodeName(dp.getNodeName());
tempDp.setDimension(dp.getDimension());
tempDp.setCluster(clusterA);
nodesA.add(tempDp);
}
clusterA.setNodes(nodesA);
finalCluster.remove(mergeIndexB);
//System.out.println(" remove mergeIndexA"+mergeIndexA+";"+mergeIndexB);
}
return finalCluster;
}
public double dist(Node a,Node b)
{
double[] dimensionA=a.getDimension();
double[] dimensionB=b.getDimension();
double distance=0;
if(dimensionA.length==dimensionB.length)//注意易出错为://if(
dimensionA==dimensionB)
{
for(int j=0;j
{
double temp=Math.pow(dimensionA[j]-dimensionB[j], 2);
distance=distance+temp;
}
distance=Math.pow(distance, 0.5);
}
return distance;
}
public List InitialCluster(Listnodes)
{
ListoriginalCluster=new ArrayList();
for(int i=0;i
{
Node tempNode=nodes.get(i);
ListtempNodes=new ArrayList();
tempNodes.add(tempNode);
Cluster tempCluster=new Cluster();
tempCluster.setClusterName("cluster"+String.valueOf(i));
tempCluster.setNodes(tempNodes);
tempNode.setCluster(tempCluster);
originalCluster.add(tempCluster);
}
return originalCluster;
}
public static void main(String[] args) {
ArrayList nodes = new ArrayList();
for(int i=0;i<20;i++){//随机产生点
float tempx=(float) Math.random();
float tempy=(float) Math.random();
double[]a={tempx,tempy};
Node tempNode=new Node(a,String.valueOf(i));
nodes.add(tempNode);
}
int clusterNum=5; //类簇数
hierCluster hc=new hierCluster();
List clusters=hc.startAnalysis(nodes, clusterNum);
for(Cluster cl:clusters){
System.out.println("------"+cl.getClusterName()+"------");
List tempDps=cl.getNodes();
for(Node tempdp:tempDps){
System.out.println(tempdp.getNodeName());
}
}
}
}