Spark第一次任务提交(Scala和Python)

2021-12-16  本文已影响0人  抬头挺胸才算活着
object WordCount {
  def main(args: Array[String]): Unit = {
    // setMaster设置主机URL,local表示本地,2表示线程个数
    val sparkConf: SparkConf = new SparkConf().setMaster("local[2]").setAppName("WordCount")
    val sc = new SparkContext(sparkConf)

    val lines: RDD[String] = sc.textFile("C:/java/spark_practise/src/main/resources/input/word.txt")
    wordCount1(lines)
    sc.stop()
  }

  def wordCount1(lines : RDD[String]): Unit = {
    val words: RDD[String] = lines.flatMap(_.split(" "))
    val wordToOne: RDD[(String, Int)] = words.map((_, 1))
    val wordToCount: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _)
    wordToCount.foreach(println(_))
  }
}
import sys

from pyspark import SparkContext, SparkConf

if __name__ == "__main__":

  # create Spark context with Spark configuration
  conf = SparkConf().setAppName("Word Count - Python").set("spark.hadoop.yarn.resourcemanager.address", "192.168.0.104:8032")
  sc = SparkContext(conf=conf)

  # read in text file and split each document into words
  words = sc.textFile("C:/java/spark_practise/src/main/resources/input/word.txt").flatMap(lambda line: line.split(" "))

  # count the occurrence of each word
  wordCounts = words.map(lambda word: (word, 1)).reduceByKey(lambda a,b:a +b)

  print("spark python output................")
  print(wordCounts.collect())
上一篇下一篇

猜你喜欢

热点阅读