spark wordcount

2018-11-09  本文已影响0人  wncbbnk

sbt

lazy val sparkSettings = Seq(
  organization := "org",
  version := "0.1",
  scalaVersion := "2.11.8",
  libraryDependencies := Seq(
    "org.apache.spark" % "spark-sql_2.11" % "2.3.2",
    "org.apache.spark" % "spark-streaming_2.11" % "2.3.2",
    "org.apache.spark" % "spark-streaming-kafka-0-10_2.11" % "2.3.2",
    "org.apache.spark" % "spark-core_2.11" % "2.3.2"
  )
)

lazy val root = (project in file("."))
  .settings(
    sparkSettings,
    name := "spark-assembly",
    mainClass in assembly := Some("类名"),
    assemblyJarName in assembly := "jar包名",
    assemblyMergeStrategy in assembly := {
      case PathList("javax", "servlet", xs @ _*) => MergeStrategy.last
      case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
      case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
      case PathList("org", "apache", xs @ _*) => MergeStrategy.last
      case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
      case PathList("net", "jpountz", xs @ _*) => MergeStrategy.last
      case PathList("com", "google", xs @ _*) => MergeStrategy.last
      case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
      case PathList("com", "codahale", xs @ _*) => MergeStrategy.last
      case PathList("com", "yammer", xs @ _*) => MergeStrategy.last
      case "about.html" => MergeStrategy.rename
      case "META-INF/mailcap" => MergeStrategy.last
      case "META-INF/mimetypes.default" => MergeStrategy.last
      case "plugin.properties" => MergeStrategy.last
      case "git.properties" => MergeStrategy.last
      case "log4j.properties" => MergeStrategy.last
      case x =>
        val oldStrategy = (assemblyMergeStrategy in assembly).value
        oldStrategy(x)
    }
  )

scala

package 类名

import org.apache.spark.sql.SparkSession

object SparkPlayground {
  def main(args: Array[String]): Unit = {
    println("Start---")
    val spark = SparkSession.builder().
      appName("tmp_work").
      master("local").
      getOrCreate()

    val lines = spark.sparkContext.textFile("tmp.txt")
    lines.flatMap(_.split(" ")).
      filter(_.length>0).
      map((_, 1)).
      reduceByKey { (x, y) =>
        // x, y均为value
        println("x, y:")
        println(x)
        println(y)
        x + y
      }.
      foreach(println)

  }
}

上一篇 下一篇

猜你喜欢

热点阅读