spark wordcount
2018-11-09 本文已影响0人
wncbbnk
sbt
lazy val sparkSettings = Seq(
organization := "org",
version := "0.1",
scalaVersion := "2.11.8",
libraryDependencies := Seq(
"org.apache.spark" % "spark-sql_2.11" % "2.3.2",
"org.apache.spark" % "spark-streaming_2.11" % "2.3.2",
"org.apache.spark" % "spark-streaming-kafka-0-10_2.11" % "2.3.2",
"org.apache.spark" % "spark-core_2.11" % "2.3.2"
)
)
lazy val root = (project in file("."))
.settings(
sparkSettings,
name := "spark-assembly",
mainClass in assembly := Some("类名"),
assemblyJarName in assembly := "jar包名",
assemblyMergeStrategy in assembly := {
case PathList("javax", "servlet", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
case PathList("net", "jpountz", xs @ _*) => MergeStrategy.last
case PathList("com", "google", xs @ _*) => MergeStrategy.last
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "codahale", xs @ _*) => MergeStrategy.last
case PathList("com", "yammer", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "git.properties" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case x =>
val oldStrategy = (assemblyMergeStrategy in assembly).value
oldStrategy(x)
}
)
scala
package 类名
import org.apache.spark.sql.SparkSession
object SparkPlayground {
def main(args: Array[String]): Unit = {
println("Start---")
val spark = SparkSession.builder().
appName("tmp_work").
master("local").
getOrCreate()
val lines = spark.sparkContext.textFile("tmp.txt")
lines.flatMap(_.split(" ")).
filter(_.length>0).
map((_, 1)).
reduceByKey { (x, y) =>
// x, y均为value
println("x, y:")
println(x)
println(y)
x + y
}.
foreach(println)
}
}