forked from xubo245/SparkLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWordCountPartitions.scala
29 lines (27 loc) · 1.21 KB
/
WordCountPartitions.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/**
* @author xubo
* You can change the number of output files by changing partitions
*/
package org.apache.spark.examples
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
object WordCountPartitions {
def main(args: Array[String]) {
// val conf = new SparkConf().setAppName("WordCountPartitions").setMaster("local")
val conf = new SparkConf().setAppName("WordCountPartitions").setMaster("local[4]")
val sc = new SparkContext(conf)
// val text1 = sc.textFile("file/wordCount").flatMap(_.split("\\s+")).map(word => (word, 1)).reduceByKey(_ + _)
var text1 = sc.textFile("file/data/examples/input/wordCount/*").flatMap(_.split("\\s+")).map(word => (word, 1)).reduceByKey(_ + _, 1)
// text1.map((k,v)=>(v,k))
// text1 = text1.sortBy(_._2, ascending = false) //down
text1 = text1.sortBy(_._2, ascending = true, 2) //up
val iString = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date())
text1.saveAsTextFile("file/data/examples/output/wordCount" + iString);
text1.foreach(println)
println("WordCountPartitions Success");
sc.stop
}
}