使用POI数据挖掘区域功能并在网页端显示

xiaoxiao2021-02-28  102

使用POI数据挖掘区域功能并在网页端显示

    最近在做一个创新项目,其中包含区域功能挖掘的部分。前期我们使用了路网数据对上海市进行了区域的划分,并为每个POI加上了所属的区域标签。之后便使用此数据进行区域功能挖掘部分的展示。

一。使用TF_IDF算法挖掘出每个区域对应的功能,此处我们分了六大功能,分别是住宅,工作,教育,商业,公共服务,景点

   有如下几个子步骤   (1)根据poi的三级目录将poi数据划分到六种poi类别   (2)统计TF_IDF算法使用到的中间结果   (3) 使用TF_IDF挖掘出每个区域的功能,并记录相关结果 代码如下 import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; /* * poi.csv 原始经处理的poi,而且已经加上了区域编号,式例如下图 * "241076","永生餐饮","121.603927","31.235058","餐饮服务;中餐厅;中餐厅","275" * * 将操作合并起来,包括向poi中添加类,保留统计中间结果到result.txt * 和计算TF_IDF大小,并最终得到每个区域的功能, * * poiRange.csv 提取有用的信息(名称,经度,纬度,poi类别,所属区域) * result.txt(各个poi类别的数量,每个区域含有的poi数量) * TF_IDF1/2.txt 记录两种IDF算法产生的TF_IDF大小 * function1/2.txt 记录两种算法分别对应的每种功能区的区域数量 * 每个功能区包含的区域编号存储在poi_label.txt文件中 */ public class POI_Func { public static String [][] CC;//代表class的数量 public static String [][] CC2;//代表SecondClass的数量 public static int ccc=0; public static int getPoiClass(String type) { int num=-1; int flag=0; for (int i = 0; i < CC.length; i++) { if (flag==1) { flag=0; break; } for (int j = 0; j < CC[i].length; j++) { if (CC[i][j]==null) { break; } else if(CC[i][j].equals(type)){ num=i; flag=1; break; } } } return num; } public static int getSecondPoi(String type) { int num=-1; int flag=0; for (int i = 0; i < CC2.length; i++) { if (flag==1) { flag=0; break; } for (int j = 1; j < CC2[i].length; j++) { if (CC2[i][j]==null) { break; } else if(CC2[i][j].equals(type)){ num=Integer.parseInt(CC2[i][0]); ccc++; flag=1; break; } } } return num; } public static void main(String[] args) throws IOException{ // TODO Auto-generated method stub /*第一步:对原始数据poi.csv添加class.txt 中的poi类别,提取有用信息, 并将结果保存在poiRange.csv文件中*/ String path1 = "class5.txt"; String path2 = "poi.csv"; String path3 = "poiRange.csv"; String path4="result.txt"; int classLength=6;//记录poi类别的数量 int secondLength=3;//记录第二级目录的分类 int bigClass=19;//大类的数量 int secondClass=11; int rangeNum=542; String[] d1=null; String[] d2=null; String line=null; String line2=""; CC=new String[classLength][bigClass]; CC2=new String[secondLength][secondClass]; int countP[]=new int[classLength];//countP[0]-countP[5]分别对应六种类别的poi数量 int countR[]=new int[rangeNum];//countR[0]-countR[]分别对应落在50个区域的 BufferedReader br = new BufferedReader(new InputStreamReader( new FileInputStream(path1), "utf-8")); BufferedReader br2 = new BufferedReader(new InputStreamReader( new FileInputStream(path2), "utf-8")); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path3), "utf-8")); BufferedWriter bw2 = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path4), "utf-8")); for (int i = 0; i <classLength; i++) { line=br.readLine(); d1=line.split(" "); for (int j = 1; j < d1.length; j++) { CC[i][j-1]=d1[j]; } } for (int i = 0; i < secondLength; i++) { line=br.readLine(); d1=line.split(" "); for (int j = 0; j < d1.length; j++) { CC2[i][j]=d1[j]; } } int poiC=-1; int count=0; int range=-1; while((line=br2.readLine())!=null){ d1=line.split(","); d2=d1[4].split(";"); //System.out.println(d2[0].substring(1)); range=Integer.parseInt(d1[5].substring(1, d1[5].length()-1)); poiC=getPoiClass(d2[0].substring(1)); if (poiC==-1) { //System.out.println(d1[4].substring(1,d1[4].length()-1)); if (!(d1[4].substring(1,d1[4].length()-1)).equals("NULL")) { poiC=getSecondPoi(d2[0].substring(1)+";"+d2[1]); } if(poiC==-1) { count++; } } if (range!=-1&&poiC!=-1) { countR[range]++; } if(poiC!=-1&&range!=-1){ countP[poiC]++; } //统计 姓名+经度+纬度+poi分类(六类其中之一)+所属区域 line2=d1[1]+","+d1[2]+","+d1[3]+","+poiC+","+range; bw.write(line2); bw.newLine(); bw.flush(); } bw.close(); br.close(); br2.close(); CC=null; System.out.println("在区域外的poi数据:"+count); System.out.println("通过第二级目录得到的数据:"+ccc); System.out.println("----------------------------------------------"); line=""; System.out.print("poi:"); for (int i = 0; i < countP.length; i++) { System.out.print(countP[i]+" "); line+=countP[i]+" "; } System.out.println(); bw2.write(line.trim()); bw2.newLine(); bw2.flush(); line=""; System.out.print("Range:"); for (int i = 0; i < countR.length; i++) { System.out.print(countR[i]+","); line+=countR[i]+" "; } System.out.println(); bw2.write(line.trim()); bw2.flush(); bw2.close(); System.out.println("----------------------------------------------"); /*第二步:进行TF_IDF的计算,要使用countR[] 和 countP[]中的统计数据 */ path1 = "poiRange.csv"; path2="TF_IDF1.txt"; path3="function1.txt"; path4="poi_label1.txt"; d1=null; d2=null; int R=542; int countPoi=0;//统计有效的poi数量 br = new BufferedReader(new InputStreamReader( new FileInputStream(path1), "utf-8")); bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path2), "utf-8")); bw2 = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path3), "utf-8")); line=""; int TF[][]=new int[R][6];//记录各个区域每个POI类别的数量 int F[]=new int[6];//代表含有poi类别的区域数 float IDF[][]=new float[R][6]; float TF_F[][]=new float[R][6]; float TF_IDF[][]=new float[R][7]; int numFunc[]=new int [6];//与功能划分相关的变量 String lable[]=new String[6]; for (int i = 0; i < lable.length; i++) { lable[i]=""; } double max=0l; double temp=0l; int cc=0;//如果为均为0的归属问题 int cc2=0;//均为0的个数 int flag=0; count=0; for (int i = 0; i < TF.length; i++) { for (int j = 0; j < TF[1].length; j++) { TF[i][j]=0; TF_F[i][j]=0; } } int r=-1,p=-1; while ((line = br.readLine()) != null) {//从文件中获得数据 d1=line.split(","); r=Integer.parseInt(d1[4]); p=Integer.parseInt(d1[3]); if (r!=-1&&p!=-1) { TF[r][p]++; countPoi++; } } br.close(); for (int i = 0; i < TF.length; i++) { for (int j = 0; j < TF[1].length; j++) { if (countR[i]==0) { TF_F[i][j]=0l; } else { TF_F[i][j]=TF[i][j]*1.0f/countR[i]; } } } for (int i = 0; i < TF.length; i++) { for (int j = 0; j < TF[1].length; j++) { if (TF[i][j]!=0) { F[j]++; } } } //第一种类型的TF_IDF for (int i = 0; i < IDF.length; i++) { for (int j = 0; j < IDF[1].length; j++) { IDF[i][j]=(float) Math.log(R*1.0/(F[j]+1)); if (IDF[i][j]<0) { IDF[i][j]=0; } } } for (int i = 0; i < IDF.length; i++) { for (int j = 0; j < IDF[1].length; j++) { TF_IDF[i][j]=TF_F[i][j]*IDF[i][j]; temp=TF_IDF[i][j]; if (temp>max) { flag=j; max=temp; } } if (max==0l) { cc=cc%6; numFunc[cc++]++; cc2++; } else { numFunc[flag]++; lable[flag]+=count+" "; } max=0l; flag=0; count++; } line=""; for (int i = 0; i < TF.length; i++) { for (int j = 0; j < TF[1].length; j++) { line+=TF_IDF[i][j]+" "; } bw.write(line.trim()); bw.newLine(); bw.flush(); line=""; } bw.close(); System.out.print("各个功能区含有的区域数(第一种):"); for (int i = 0; i < numFunc.length; i++) { System.out.print(numFunc[i]+" "); bw2.write(numFunc[i]+""); bw2.newLine(); bw2.flush(); } System.out.println(); bw2.close(); bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path4), "utf-8")); for (int i = 0; i < lable.length-1; i++) { bw.write(lable[i].trim()); bw.newLine(); bw.flush(); } bw.write(lable[lable.length-1].trim()); bw.flush(); bw.close(); //第二种类型的DF_IDF path2="TF_IDF2.txt"; path3="function2.txt"; path4="poi_label2.txt"; bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path2), "utf-8")); bw2 = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path3), "utf-8")); count=0; for (int i = 0; i < lable.length; i++) { lable[i]=""; } for (int i = 0; i < numFunc.length; i++) { numFunc[i]=0; } cc=0; for (int i = 0; i < IDF.length; i++) { for (int j = 0; j < IDF[1].length; j++) { IDF[i][j]=(float) (100f/Math.log(countP[j]*1.0/TF[i][j])*Math.log(R*1.0/(F[j]+1))); if (IDF[i][j]<0) { IDF[i][j]=0; } } } for (int i = 0; i < IDF.length; i++) { for (int j = 0; j < IDF[1].length; j++) { TF_IDF[i][j]=TF_F[i][j]*IDF[i][j]; temp=TF_IDF[i][j]; if (temp>max) { flag=j; max=temp; } } if (max==0l) { cc=cc%6; numFunc[cc++]++; } else { numFunc[flag]++; lable[flag]+=count+" "; } max=0l; flag=0; count++; } line=""; for (int i = 0; i < TF.length; i++) { for (int j = 0; j < TF[1].length; j++) { line+=TF_IDF[i][j]+" "; } bw.write(line.trim()); bw.newLine(); bw.flush(); line=""; } bw.close(); System.out.print("各个功能区含有的区域数(第二种):"); for (int i = 0; i < numFunc.length; i++) { System.out.print(numFunc[i]+" "); bw2.write(numFunc[i]+""); bw2.newLine(); bw2.flush(); } System.out.println(); bw2.close(); bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path4), "utf-8")); for (int i = 0; i < lable.length-1; i++) { bw.write(lable[i].trim()); bw.newLine(); bw.flush(); } bw.write(lable[lable.length-1].trim()); bw.flush(); bw.close(); System.out.println("----------------------------------------------"); System.out.println("使用到poi数:"+countPoi); } }

二。将每个poi的有用信息提取出来,只包含经纬度和所属功能区编号(0-5)

代码如下 import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; /* * 此程序以poiRange.csv为输入,将结果保存在poiF1,poiF2.Json中 * * poiF1: 包含class5的六个不同的功能区的poi数据(每个区域只含有对应功能区的poi),包含经纬度和所属功能编号 * poiF2: 包含class5的六个不同的功能区的poi数据(每个区域含有所有的poi,只不过poi功能号码相同),包含经纬度和所属功能编号 */ public class Func_Json { public static int CC[][]; public static int getFunction(int label){ int num=-1; int flag=0; for (int i = 0; i < CC.length; i++) { if (flag==1) { break; } for (int j = 0; j < CC[0].length; j++) { if (CC[i][j]==-1) { break; } else { if (label==CC[i][j]) { num=i; flag=1; break; } } } } return num; } public static void main(String[] args) throws IOException{ // TODO Auto-generated method stub String path1 = "poiRange.csv"; String path2="poi_label1.txt"; String path3 = "poiF1.json"; String path4 = "poiF2.json"; String[] d1=null; String line=null; String line2=""; String line3=""; int classPoi=6; int classLength=260; CC=new int[classPoi][classLength]; for (int i = 0; i < CC.length; i++) { for (int j = 0; j < CC[i].length; j++) { CC[i][j]=-1; } } BufferedReader br = new BufferedReader(new InputStreamReader( new FileInputStream(path1), "utf-8")); BufferedReader br2 = new BufferedReader(new InputStreamReader( new FileInputStream(path2), "utf-8")); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path3), "utf-8")); BufferedWriter bw2 = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(path4), "utf-8")); int poi=-1,fun=-1,label=-1; for (int i = 0; i < classPoi; i++) { line=br2.readLine(); d1=line.split(" "); for (int j = 0; j < d1.length; j++) { CC[i][j]=Integer.parseInt(d1[j]); } } int count=0; int flag=0; while((line=br.readLine())!=null){ d1=line.split(","); poi=Integer.parseInt(d1[3]); label=Integer.parseInt(d1[4]); if (poi!=-1&&label!=-1) { fun=getFunction(label); if (fun==poi) { line2+="{\"lng\":"+d1[1].substring(1,d1[1].length()-1)+",\"lat\":" +d1[2].substring(1, d1[2].length()-1)+",\"fun\":"+fun+"},"; bw.write(line2); bw.newLine(); bw.flush(); } line3+="{\"lng\":"+d1[1].substring(1,d1[1].length()-1)+",\"lat\":" +d1[2].substring(1, d1[2].length()-1)+",\"fun\":"+fun+"},"; bw2.write(line3); bw2.newLine(); bw2.flush(); } poi=-1; label=-1; line2=""; line3=""; } line2="]"; bw.write(line2); bw.flush(); bw2.write(line2); bw2.flush(); bw.close(); bw2.close(); br.close(); br2.close(); System.out.println("Tranform end"); } }

三。在网页中展示效果

利用上一步得到的json文件(要稍微处理一下,把最后一项{}之后的“,”去掉),在使用leaflet.js d3.js库,便能清晰的在地图上展示每个区域的功能区分布。 效果如下: 四。代码链接   (1) java代码和源文件   (2)网页端显示地址
转载请注明原文地址: https://www.6miu.com/read-51105.html

最新回复(0)