spark加载hdfs数据中文乱码

2018-11-24  本文已影响0人  IT_小白

解决加载的数据乱码

    object test {
      def main(args: Array[String]): Unit = {
        val inputpath = "path"
        val inputpath = "/input/123"
        val sparkConf: SparkConf = new SparkConf()
              .setMaster("local[2]")
              .setAppName(test .getClass.getSimpleName)
        val sc: SparkContext = new SparkContext(sparkConf)
              //    TODO:解决中文乱码
        val data: RDD[String] = sc.hadoopFile(inputpath, classOf[TextInputFormat], classOf[LongWritable], classOf[Text])
                              .map(pair => {new String(pair._2.getBytes, 0, pair._2.getLength, "GBK")})
        val line: RDD[String] = data.flatMap(_.split("\t"))
        val pari: RDD[(String, Int)] = line.map((_, 1))
        val result: RDD[(String, Int)] = pari.reduceByKey(_+_)
        result.foreach(println)
      }
    }
上一篇 下一篇

猜你喜欢

热点阅读