马士兵hive的学习

2017-12-11  本文已影响0人  lehuai

软件包地址

https://pan.baidu.com/s/1b7sID4

一,上传并解压hive压缩包到/usr/local

#cd /usr/local
#tar -xvf apache-hive-2.1.1-bin.tar.gz 
#ls
#mv apache-hive-2.1.1-bin hive
#cd hive
#ll
vi /etc/profile
image.png
配置文件。
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
export PATH=$PATH:$HIVE_HOME/bin
#source /etc/profile
#echo $HIVE_HOME
#hive
#rm -rf m*_db

修改hive-site.xml配置

#cp hive-default.xml.template hive-site.xml  //因为里面没有,所以复制一份
里面的内容太多,所以把他拿到windows里改,用xftp连接,找到里面的hive-site.xml,以Notepad++方式打开。
按ctrl+f搜索,把hive.metastore.schema.verification里面的true改为false.
image.png

在虚拟机里新建目录。

#cd ..
#mkdir tmp
image.png

更改目录

image.png

把配置文件改为root。

image.png

对derby进行格式化

cd ../  //这时候是在hive目录
#schematool -initSchema -dbType derby

启动hive

#hive
exit;
#hive
show databases;
create table wordcount(line string);
 show tables;
 desc wordcount;
dfs -ls /
dsf -ls /tmp;
启动hive之前,必须先启动hadoop,hdfs集群和yarn。
exit;
#cd
#cat input.txt
#hadoop fs -mkdir /wcinput/
#hadoop fs -put input.txt /wcinput/
#cd /usr/local/hive
#hive
show tables;
load data inpath '/wcinput/' overwrite into table wordcount;
desc wordcount;
select * from wordcount;
select split(line, ' ') from wordcount;
OK
["hello","java"]
["hello","c"]
["hello","c++"]
["hello","python"]
["hello","wd"]
["hello","zz"]
Time taken: 0.75 seconds, Fetched: 6 row(s)

select explode(split(line, ' ')) from wordcount;
OK
hello
java
hello
c
hello
c++
hello
python
hello
wd
hello
zz
Time taken: 0.368 seconds, Fetched: 12 row(s)

select explode(split(line, ' ')) as word from wordcount;
OK
hello
java
hello
c
hello
c++
hello
python
hello
wd
hello
zz
Time taken: 0.359 seconds, Fetched: 12 row(s)

select word,count(*) as count from (select explode(split(line, ' ')) as word from wordcount) w group by word;
Total MapReduce CPU Time Spent: 3 seconds 150 msec
OK
c   1
c++ 1
hello   6
java    1
python  1
wd  1
zz  1
Time taken: 29.352 seconds, Fetched: 7 row(s)
create table sgr(qtime string,qid string,qword string,url string) row format delimited fields terminated by ',';

 desc sgr;

load data inpath '/sougou.dic' into table sgr;
文档网址:
mashibing.com/hive.html

select count(*) from sgr;

create table sougou_results as select keyword, count(1) as count from (select qword as krd from sougou) t group by keyword order by count desc;

select * from sougou_results limit 10;
OK
[哄抢救灾物资]    133812
[汶川地震原因]    117532
[封杀莎朗斯通]    25298
[一个暗娼的自述]   19516
[广州军区司令员]   17322
[暗娼李湘]  17168
[成都警方扫黄现场]  10742
[百度]    9916
[尼泊尔地图] 9772
[现役解放军中将名单] 9442
Time taken: 0.223 seconds, Fetched: 10 row(s)
上一篇下一篇

猜你喜欢

热点阅读