马士兵hive的学习
2017-12-11 本文已影响0人
lehuai
软件包地址
https://pan.baidu.com/s/1b7sID4
一,上传并解压hive压缩包到/usr/local
#cd /usr/local
#tar -xvf apache-hive-2.1.1-bin.tar.gz
#ls
#mv apache-hive-2.1.1-bin hive
#cd hive
#ll
vi /etc/profile
image.png
配置文件。
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
export PATH=$PATH:$HIVE_HOME/bin
#source /etc/profile
#echo $HIVE_HOME
#hive
#rm -rf m*_db
修改hive-site.xml配置
#cp hive-default.xml.template hive-site.xml //因为里面没有,所以复制一份
里面的内容太多,所以把他拿到windows里改,用xftp连接,找到里面的hive-site.xml,以Notepad++方式打开。
按ctrl+f搜索,把hive.metastore.schema.verification里面的true改为false.
image.png在虚拟机里新建目录。
#cd ..
#mkdir tmp
image.png
更改目录
image.png把配置文件改为root。
image.png对derby进行格式化
cd ../ //这时候是在hive目录
#schematool -initSchema -dbType derby
启动hive
#hive
exit;
#hive
show databases;
create table wordcount(line string);
show tables;
desc wordcount;
dfs -ls /
dsf -ls /tmp;
启动hive之前,必须先启动hadoop,hdfs集群和yarn。
exit;
#cd
#cat input.txt
#hadoop fs -mkdir /wcinput/
#hadoop fs -put input.txt /wcinput/
#cd /usr/local/hive
#hive
show tables;
load data inpath '/wcinput/' overwrite into table wordcount;
desc wordcount;
select * from wordcount;
select split(line, ' ') from wordcount;
OK
["hello","java"]
["hello","c"]
["hello","c++"]
["hello","python"]
["hello","wd"]
["hello","zz"]
Time taken: 0.75 seconds, Fetched: 6 row(s)
select explode(split(line, ' ')) from wordcount;
OK
hello
java
hello
c
hello
c++
hello
python
hello
wd
hello
zz
Time taken: 0.368 seconds, Fetched: 12 row(s)
select explode(split(line, ' ')) as word from wordcount;
OK
hello
java
hello
c
hello
c++
hello
python
hello
wd
hello
zz
Time taken: 0.359 seconds, Fetched: 12 row(s)
select word,count(*) as count from (select explode(split(line, ' ')) as word from wordcount) w group by word;
Total MapReduce CPU Time Spent: 3 seconds 150 msec
OK
c 1
c++ 1
hello 6
java 1
python 1
wd 1
zz 1
Time taken: 29.352 seconds, Fetched: 7 row(s)
create table sgr(qtime string,qid string,qword string,url string) row format delimited fields terminated by ',';
desc sgr;
load data inpath '/sougou.dic' into table sgr;
文档网址:
mashibing.com/hive.html
select count(*) from sgr;
create table sougou_results as select keyword, count(1) as count from (select qword as krd from sougou) t group by keyword order by count desc;
select * from sougou_results limit 10;
OK
[哄抢救灾物资] 133812
[汶川地震原因] 117532
[封杀莎朗斯通] 25298
[一个暗娼的自述] 19516
[广州军区司令员] 17322
[暗娼李湘] 17168
[成都警方扫黄现场] 10742
[百度] 9916
[尼泊尔地图] 9772
[现役解放军中将名单] 9442
Time taken: 0.223 seconds, Fetched: 10 row(s)