14.Hadoop:用sqoop进行Mysql到Hbase和Hi

2020-07-07  本文已影响0人  負笈在线

本节主要内容:

用sqoop进行Mysql到Hbase和Hive的导出

一、准备数据

数据准备(Node3节点)

在mysql中建立sqoop_test库

       # mysql -uroot -p123456

mysql> create database sqoop_test;

Query OK, 1 row affected (0.00 sec)

在sqoop_test里面建立一个表 

mysql> use sqoop_test;

Database changed

mysql> CREATE TABLE `student_sqoop` (   

    ->  `id` int(11) NOT NULL,   

    ->  `name` varchar(20) NOT NULL,   

    ->  PRIMARY KEY (`id`)   

    -> ) ENGINE=MyISAM  DEFAULT CHARSET=utf8; 

Query OK, 0 rows affected (0.04 sec)

插入数据

mysql> insert into student_sqoop (id,name) values (1,'michael'); 

mysql> insert into student_sqoop (id,name) values (2,'ted');

mysql> insert into student_sqoop (id,name) values (3,'jack');

二、Hbase准备表(Node1节点)

       # hbase shell

hbase(main):001:0> list

TABLE                                                                 

student                                                               

1 row(s) in 0.5040 seconds

=> ["student"]

hbase(main):002:0> create 'student_sqoop','info'

0 row(s) in 2.5410 seconds

=> Hbase::Table - student_sqoop

hbase(main):003:0> list

TABLE

student

student_sqoop

2 row(s) in 0.0130 seconds

=> ["student", "student_sqoop"]

三、从mysql导入到Hbase(Node1节点)

       #sudo -u hdfs sqoop import --connect jdbc:mysql://node3.hadoop.com/sqoop_test --username root --password 123456 --table student_sqoop --hbase-table student_sqoop --column-family info --hbase-row-key id -m 1

查看结果

hbase(main):004:0> scan 'student_sqoop'

ROW                        COLUMN+CELL                                                               

1                        column=info:name, timestamp=1556038438897, value=michael                   

2                        column=info:name, timestamp=1556038438897, value=ted                       

3                        column=info:name, timestamp=1556038438897, value=jack                     

3 row(s) in 0.3280 seconds

四、从mysql导入hive内部表(Node1节点)

sqoop需要一个hive的包,将hive/lib中的hive-common-2.3.3.jar拷贝到sqoop的lib目录中

       # cp /usr/lib/hive/lib/hive-common-1.1.0-cdh5.16.2.jar /usr/lib/sqoop/lib/

hadoop需要hive的包

       # cd /usr/lib/hive/lib

       # cp hive-shims*.jar /usr/lib/hadoop/lib/

       # sqoop import --connect jdbc:mysql://node3.hadoop.com/sqoop_test --username root --password 123456 --table student_sqoop --hive-import --hive-table hive_student --create-hive-table

hive> select * from hive_student;

OK

1 michael

2 ted

3 jack

Time taken: 0.238 seconds, Fetched: 3 row(s)

五、mysql导入到hive分区表

1.创建数据库(Node3节点)

mysql> CREATE TABLE `people` (

    ->  `id` int(11) NOT NULL,

    ->  `name` varchar(20) NOT NULL,

    ->  `year` varchar(10),

    ->  `month` varchar(10),

    ->  `day` varchar(10),

    ->  PRIMARY KEY (`id`)   

    ->) ENGINE=MyISAM  DEFAULT CHARSET=utf8;

2.插入数据(Node3节点)

mysql> insert into people values (1,'jack','2015','01','02');

mysql> insert into people values (2,'ted','2015','01','02');

mysql> insert into people values (3,'billy','2015','01','02');

mysql> insert into people values (4,'sara','2015','01','03');

3.导入数据(Node1节点)

       # sqoop import --connect jdbc:mysql://node3.hadoop.com/sqoop_test --username root --password 123456 --query 'select id,name from people where year="2015"  AND $CONDITIONS'  --direct -m 2 --split-by id --hive-import --create-hive-table --hive-table hive_people --target-dir /user/hive_people --hive-partition-key year --hive-partition-value '2015'

--query 写你要查询的sql,

AND $CONDITIONS 这句话不能省,是给sqoop用的,

--split-by 写主键,

--hive-partition-key定义分区表的键,

--hive-partion-value定义分区表的值。

hive> select * from hive_people;

OK

1 jack 2015

2 ted 2015

3 billy 2015

4 sara 2015

Time taken: 0.439 seconds, Fetched: 4 row(s)

       # sudo -u hdfs hadoop fs -ls /user/hive/warehouse/hive_people

Found 1 items

drwxrwxrwt  - root supergroup          0 2020-07-05 18:05 /user/hive/warehouse/hive_people/year=2015

上一篇下一篇

猜你喜欢

热点阅读