Word count program code
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
import StreamingContext._
object SparkStreamWC {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Spark Streaming Word Count").setMaster("local[4]")
val ssc = new StreamingContext(conf, Seconds(30))
val lines = ssc.socketTextStream("localhost", 54321)
val words = lines.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
wordCounts.print()
ssc.start()
ssc.awaitTermination()
}
}
Spark submit command
spark-submit --class "sparkstream.spark.scala.SparkStreamWC" retailanother-0.0.1-SNAPSHOT.jar
Sample word count data from the logs
(further,1)
(are,1)
(after,1)
(few,1)
(counts,1)
(transformation),1)
(have,1)
(only,1)
(been,1)
(frequency,1)
(further,4)
(are,4)
(after,4)
(few,4)
(counts,4)
(transformation),4)
(have,4)
(only,4)
(,5)
(frequency,4)