hive基本操作笔记
一、几种操作方式:
1、文件内容:
[root@master tmp]# vi hive_citys.sql
1 select distinct city from talentdata.data_cleared_11;
2、hive -f 操作:
[root@master tmp]# hive -f /data/weixiujuan/tmp/hive_citys.sql >> cleared_distinct_citys.txt
3、 hive -e 操作:
hive -e "select user, login_timestamp from user_login" > /tmp/out.txt
[root@master data]# hive -e "select count(*) from talentdata.data_cleared_00" > /data/weixiujuan/data/out_0118.txt
4、hive窗口里操作:
hive> insert overwrite local directory "/tmp/out/" row format delimited fields
terminated by "\t" select user, login_time from user_login;
5、hive执行sql脚本:
hive> source /usr/apache-kylin-2.4.1-bin/create_sample_tables.sql
6、hive执行linux命令:
hive> ! ls /data/juan/data;
二、基本命令:
1、hive导出到本地:
hive> INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/output' ROW FORMAT DELIMITED FIELDS TERMINATED by ',' select * from testA;
2、建表:
drop table tomcatLog_wxj;
create external table tomcatLog_wxj(
client_ip string, request_method string,
uri string,
server_protol string,
time string,
time_zone string,
flag string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
location 'hdfs://cdh1:8020/user/hive/warehouse/flume/tomcat_log'
3、索引:
// 创建索引
create index index_data_cleared_00_exe_time on table data_cleared_00(exe_time)
as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
with deferred rebuild
IN TABLE index_table_data_cleared_00;
// 查看索引表(
hive> select * from index_table_data_cleared_00;
// 加载索引数据
alter index index_data_cleared_00_exe_time on data_cleared_00 rebuild;
[root@master ~]# hadoop hdfs -text master.hadoop:9000/user/hive/warehouse/talentdata.db/data_cleared_t1_bak/part-00000-782f3236-3950-4b12-ab5a-8a964d6e1ad9-c000
三、常用配置:
1、hive.cli.print.header = true
2、hive.cli.print.current.db = true