数据清洗
2018-08-04 本文已影响0人
扣篮的左手
import org.apache.spark.sql.SparkSession
object DataCleaning {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("DataCleaning")
.master("local[2]").getOrCreate()
val access = spark.sparkContext.textFile("file:///f:/access.txt")
access.foreach(println)
access.map(line => { line
val splits = line.split(" ");
splits(0) //取出IP地址
}).foreach(println)
spark.stop()
}
}