二次排序就是按照不同字段进行排序
package test1 import org.apache.spark.{SparkConf, SparkContext} class secondSort(val first:Int,val second:Int) extends Ordered[secondSort] with Serializable { override def compare(that: secondSort): Int = { if (this.first != that.first){ this.first - that.first }else{ this.second - that.second } } } object test{ def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("second").setMaster("local") val sc = new SparkContext(conf) val arr = Array((2,1),(2,4),(2,3),(5,2),(5,4),(5,2),(4,3),(4,2),(4,1)) sc.makeRDD(arr) .map(a=>(new secondSort(a._1,a._2),a)) .sortByKey() .foreach(a=> println(a._2+"\t")) // sc.textFile("D:\\a\\a.txt") // .map(a=>(new secondSort(a.split(",")(0).toInt,a.split(",")(1).toInt),a)) // .sortByKey() // .foreach(a=> // println(a._2+"\t") // ) } }类似于MapReduce里面的分组+排序的实现
