SPARK 简单操作
2017-08-16 本文已影响23人
至极L
1、从本地上传文件到hdfs
hdfs dfs -put ~/Downloads/tac
删除文件夹
hdfs dfs -rm -r ./tac
cd /usr/local/dbtaobao/dataset
//下面删除user_log.csv中的第1行
sed -i '1d' user_log.csv //1d表示删除第1行,同理,3d表示删除第3行,nd表示删除第n行
//下面再用head命令去查看文件的前5行记录,就看不到字段名称这一行了
head -5 user_log.csv
2、创建trajectory表
在做
CREATE EXTERNAL TABLE dbtac.trajectory(vme_id STRING,gps_time STRING,work_state INT,ultrasonic_station INT,limit_switch_state INT,work_deep INT,longtitude DOUBLE,latitude DOUBLE,elevation INT,speed INT, course_direction INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE ;
load data inpath '/dbtac/tac/S901100003_2015.csv' into table trajectory;
LOAD DATA LOCAL INPATH '/win/test/test10.csv' INTO TABLE trajectory;
load data inpath '/dbtac/tac/*.csv' into table trajectory;
DROP TABLE IF EXISTS trajectory;