海量IP地址排序统计出现次数最多的K个地址

xiaoxiao2021-02-28  66

本文基于海量IP地址无法一次性装入内存进行排序,因此采用如下步骤:(1)hash后分割成K(这里取1000)个文件(2)统计每个文件出现次数最多的K个地址(3)对K*N个地址进行统计(最小堆/归并/快排)

import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; public class IP { public static void generateIp(int num,String path) { Random random = new Random(); File file = new File(path); String dot = "."; String ip1 = "192."; try { if(!file.exists()) { file.getParentFile().mkdirs(); file.createNewFile(); }else { // return; } OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(file)); BufferedWriter bufferedWriter = new BufferedWriter(oStreamWriter); for (int i = 0; i < num; i++) { StringBuilder sBuilder = new StringBuilder(); sBuilder.append(ip1).append(random.nextInt(256) + dot). append(random.nextInt(256) + dot).append(random.nextInt(256) + " "); bufferedWriter.append(sBuilder); } bufferedWriter.flush(); bufferedWriter.close(); System.out.println("ip生成完毕"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void hash(String path,int fileNum) { File file = new File(path); InputStreamReader inputStreamReader; File[] fileArr = new File[fileNum]; OutputStreamWriter[] osArr= new OutputStreamWriter[fileNum]; try { for (int i = 0; i < fileArr.length; i++) { fileArr[i] = new File(file.getParent() + "/hash/" + i + ".txt"); fileArr[i].getParentFile().mkdirs(); fileArr[i].createNewFile(); osArr[i] = new OutputStreamWriter(new FileOutputStream(fileArr[i],true)); } inputStreamReader = new InputStreamReader(new FileInputStream(file)); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); while(true) { StringBuilder sBuilder = new StringBuilder(); int ii = -1; while((ii = bufferedReader.read()) != -1 && ii != (int)' ') { sBuilder.append((char)ii); } int h = sBuilder.toString().hashCode(); int hashCode = Math.abs((h ^ (h >>> 16))) % fileNum; osArr[hashCode].write(sBuilder.append(' ').toString()); if(ii == -1) { break; } } bufferedReader.close(); for (OutputStreamWriter outputStreamWriter : osArr) { outputStreamWriter.flush(); outputStreamWriter.close(); } System.out.println("hash完毕"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void hashAndSort(String path,int fileNum,int top) { File file = new File(path); try { HashMap<String, Integer> resultMap = new HashMap<>(); HashMap<String, Integer> topMap = new HashMap<>(); for (int i = 0; i < fileNum; i++) { File fileHash = new File(file.getParent() + "/hash/" + i + ".txt"); // File fileHash = new File(file.getParent() + "/新建文本文档.txt"); InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(fileHash)); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); HashMap<String, Integer> map = new HashMap<>(1024); while(true) { StringBuilder sBuilder = new StringBuilder(); int ii = -1; while((ii = bufferedReader.read()) != -1 && ii != (int)' ') { sBuilder.append((char)ii); } // System.out.println(sBuilder); Integer integer = map.get(sBuilder.toString()); // System.out.println(integer); if(integer == null) { map.put(sBuilder.toString(), 1); }else { map.put(sBuilder.toString(), integer + 1); } if(ii == -1) { break; } } sortMap(map, top, topMap); bufferedReader.close(); } sortMap(topMap, top, resultMap); File result = new File(file.getParent() + "/result" + ".txt"); OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(result)); for (Entry<String,Integer> entry : resultMap.entrySet()) { oStreamWriter.write(entry.getKey() + " 出现次数:" + entry.getValue()); oStreamWriter.write("\r\n"); } for (Entry<String,Integer> entry : resultMap.entrySet()) { System.out.println(entry.toString()); } oStreamWriter.flush(); oStreamWriter.close(); }catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void sortMap(Map<String, Integer> map,int top,Map<String, Integer> resultMap) { List<Entry<String, Integer>> list = new ArrayList<>(map.entrySet()); Collections.sort(list, new Comparator<Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { if(o1.getValue() > o2.getValue()) { return -1; }else if (o1.getValue() < o2.getValue()) { return 1; }else { return 0; } } }); for (int j = 0; j < list.size() && j < top; j++) { resultMap.put(list.get(j).getKey(), list.get(j).getValue()); } } public static void main(String[] args) { String path = "F:/ip/source2.txt"; int top = 10; int filenum = 1000; generateIp((int)Math.pow(10, 9), path); hash(path,filenum); hashAndSort(path, filenum,top); } }

转载请注明原文地址: https://www.6miu.com/read-55274.html

最新回复(0)