应用场景一:统计单词总长度,以及类似场景
public class HelloWorldMap implements Serializable {
public static void main(String
[] args
) {
SparkConf sparkConf
= new SparkConf().setAppName("HelloWorldMap").setMaster("local");
JavaSparkContext sc
= new JavaSparkContext(sparkConf
);
JavaRDD
<String> lines
= sc
.textFile("/Users/ccc/Documents/data.txt");
JavaRDD
<Integer> lineLengths
= lines
.map(line
-> line
.length());
lineLengths
.persist(StorageLevel
.MEMORY_ONLY());
lineLengths
.foreach(lineLength
-> System
.out
.println(lineLength
));
Integer totalLength
= lineLengths
.reduce((lineLengthA
, lineLengthB
) -> lineLengthA
+ lineLengthB
);
System
.out
.println(totalLength
);
}
}
应用场景二:统计所有单词以及对应个数,获取统计结果,以及类似场景
public class HelloWorldMapToPair implements Serializable {
public static void main(String
[] args
) {
SparkConf sparkConf
= new SparkConf().setAppName("HelloWorldMapToPair").setMaster("local");
JavaSparkContext sc
= new JavaSparkContext(sparkConf
);
JavaRDD
<String> lines
= sc
.textFile("/Users/ccc/Documents/data.txt");
JavaPairRDD
<String, Integer> pairs
= lines
.mapToPair(line
-> new Tuple2<>(line
, 1));
JavaPairRDD
<String, Integer> counts
= pairs
.reduceByKey((lineA
, lineB
) -> lineA
+ lineB
);
counts
.foreach(count
-> System
.out
.println(count
._1() + ":" + count
._2()));
List
<Tuple2
<String, Integer>> tuple2List
= counts
.sortByKey().collect();
tuple2List
.forEach(tuple2
-> System
.out
.println(tuple2
._1() + ":" + tuple2
._2()));
}
}
应用场景三:统计所有单词,以及类似场景
public class HelloWorldFlatMap implements Serializable {
public static void main(String
[] args
) {
SparkConf sparkConf
= new SparkConf().setAppName("HelloWorldFlatMap").setMaster("local");
JavaSparkContext sc
= new JavaSparkContext(sparkConf
);
JavaRDD
<String> lines
= sc
.textFile("/Users/ccc/Documents/data2.txt");
JavaRDD
<String> flatMapRDD
= lines
.flatMap((FlatMapFunction
<String, String>) line
-> Arrays
.asList(line
.split(" ")).iterator());
JavaRDD
<String> sortRDD
= flatMapRDD
.sortBy((Function
<String, String>) word
-> word
, true, 3);
sortRDD
.foreach((VoidFunction
<String>) line
-> System
.out
.println(line
));
}
}
应用场景四:统计所有单词,获取统计结果,以及类似场景
public class HelloWorldFlatMapToPair implements Serializable {
public static void main(String
[] args
) {
SparkConf sparkConf
= new SparkConf().setAppName("HelloWorldFlatMapToPair").setMaster("local");
JavaSparkContext sc
= new JavaSparkContext(sparkConf
);
JavaRDD
<String> lines
= sc
.textFile("/Users/ccc/Documents/data1.txt");
JavaPairRDD
<String, Integer> pairs
= lines
.flatMapToPair((PairFlatMapFunction
<String, String, Integer>) line
-> {
String
[] words
= line
.split(" ");
List
<String> wordList
= Arrays
.asList(words
);
List
<Tuple2
<String, Integer>> tuple2List
= new ArrayList<>();
wordList
.forEach(word
-> tuple2List
.add(new Tuple2<>(word
, 1)));
return tuple2List
.iterator();
});
JavaPairRDD
<String, Integer> counts
= pairs
.reduceByKey((tuple2A
, tuple2B
) -> tuple2A
+ tuple2B
);
counts
.foreach(count
-> System
.out
.println(count
._1() + ":" + count
._2()));
List
<Tuple2
<String, Integer>> tuple2List
= counts
.sortByKey().collect();
tuple2List
.forEach(tuple2
-> System
.out
.println(tuple2
._1
+ ":" + tuple2
._2()));
}
}
更多内容请关注微信公众号: “大数据开发与学习茶馆”
转载请注明原文地址: https://www.6miu.com/read-5040356.html