(十二)SparkSQL Catalog访问Hive元数据信息
2018-09-26 本文已影响0人
白面葫芦娃92
SparkSQL如何直接访问hive元数据信息
不再需要去mysql里的表里去查找
[hadoop@hadoop001 bin]$ ./spark-shell --master local[2] --jars ~/software/mysql-connector-java-5.1.27.jar
scala> val catalog = spark.catalog
scala> catalog.listDatabases.show(false)
scala> catalog.listDatabases.show(false)
+---------------+----------------------------------+------------------------------------------------------------------+
|name |description |locationUri |
+---------------+----------------------------------+------------------------------------------------------------------+
|default |Default Hive database |hdfs://192.168.137.141:9000/user/hive/warehouse |
|hive | |hdfs://192.168.137.141:9000/user/hive/warehouse/hive.db |
|hive2_ruozedata|this is ruozedata 03 test database|hdfs://192.168.137.141:9000/user/hive/warehouse/hive2_ruozedata.db|
|hive3 | |hdfs://192.168.137.141:9000/zh |
|ruozedata | |hdfs://192.168.137.141:9000/user/hive/warehouse/ruozedata.db |
+---------------+----------------------------------+------------------------------------------------------------------+
scala> catalog.listDatabases.select("name").show(false)
+---------------+
|name |
+---------------+
|default |
|hive |
|hive2_ruozedata|
|hive3 |
|ruozedata |
+---------------+
scala> catalog.listTables("default").show(false)
+---------------+--------+-----------+---------+-----------+
|name |database|description|tableType|isTemporary|
+---------------+--------+-----------+---------+-----------+
|hive_array |default |null |MANAGED |false |
|hive_map |default |null |MANAGED |false |
|hive_rownumber |default |null |MANAGED |false |
|hive_struct |default |null |MANAGED |false |
|hive_wc |default |null |MANAGED |false |
|rating_json |default |null |MANAGED |false |
|ruoze_test |default |null |MANAGED |false |
|user_click1 |default |null |MANAGED |false |
|user_click_tmp1|default |null |MANAGED |false |
|zh |default |null |MANAGED |false |
+---------------+--------+-----------+---------+-----------+
scala> catalog.listColumns("ruozedata","ruozedata_emp").show(false)
+--------+-----------+--------+--------+-----------+--------+
|name |description|dataType|nullable|isPartition|isBucket|
+--------+-----------+--------+--------+-----------+--------+
|empno |null |int |true |false |false |
|ename |null |string |true |false |false |
|job |null |string |true |false |false |
|mgr |null |int |true |false |false |
|hiredate|null |string |true |false |false |
|salary |null |double |true |false |false |
|comm |null |double |true |false |false |
|deptno |null |int |true |false |false |
+--------+-----------+--------+--------+-----------+--------+
查看MySQL验证一下:
mysql> show databases;
mysql> use ruozedata_basic03;
//查询所有数据库
mysql> select * from dbs;
+-------+------------------------------------+--------------------------------------------------------------------+-----------------+------------+------------+
| DB_ID | DESC | DB_LOCATION_URI | NAME | OWNER_NAME | OWNER_TYPE |
+-------+------------------------------------+--------------------------------------------------------------------+-----------------+------------+------------+
| 1 | Default Hive database | hdfs://192.168.137.141:9000/user/hive/warehouse | default | public | ROLE |
| 6 | NULL | hdfs://192.168.137.141:9000/user/hive/warehouse/hive.db | hive | hadoop | USER |
| 9 | this is ruozedata 03 test database | hdfs://192.168.137.141:9000/user/hive/warehouse/hive2_ruozedata.db | hive2_ruozedata | hadoop | USER |
| 10 | NULL | hdfs://192.168.137.141:9000/zh | hive3 | hadoop | USER |
| 11 | NULL | hdfs://192.168.137.141:9000/user/hive/warehouse/ruozedata.db | ruozedata | hadoop | USER |
+-------+------------------------------------+--------------------------------------------------------------------+-----------------+------------+------------+
5 rows in set (0.00 sec)
//查询数据库default的所有表格
mysql> select * from tbls where DB_ID=1;
+--------+-------------+-------+------------------+--------+-----------+-------+-----------------+---------------+--------------------+--------------------+
| TBL_ID | CREATE_TIME | DB_ID | LAST_ACCESS_TIME | OWNER | RETENTION | SD_ID | TBL_NAME | TBL_TYPE | VIEW_EXPANDED_TEXT | VIEW_ORIGINAL_TEXT |
+--------+-------------+-------+------------------+--------+-----------+-------+-----------------+---------------+--------------------+--------------------+
| 1 | 1529135635 | 1 | 0 | hadoop | 0 | 1 | zh | MANAGED_TABLE | NULL | NULL |
| 31 | 1529287737 | 1 | 0 | hadoop | 0 | 41 | hive_wc | MANAGED_TABLE | NULL | NULL |
| 32 | 1529288829 | 1 | 0 | hadoop | 0 | 42 | rating_json | MANAGED_TABLE | NULL | NULL |
| 33 | 1529289493 | 1 | 0 | hadoop | 0 | 43 | hive_rownumber | MANAGED_TABLE | NULL | NULL |
| 76 | 1531056372 | 1 | 0 | hadoop | 0 | 91 | user_click_tmp1 | MANAGED_TABLE | NULL | NULL |
| 78 | 1531056697 | 1 | 0 | hadoop | 0 | 95 | user_click1 | MANAGED_TABLE | NULL | NULL |
| 81 | 1532851994 | 1 | 0 | hadoop | 0 | 101 | hive_array | MANAGED_TABLE | NULL | NULL |
| 83 | 1532855555 | 1 | 0 | hadoop | 0 | 103 | hive_map | MANAGED_TABLE | NULL | NULL |
| 85 | 1532856628 | 1 | 0 | hadoop | 0 | 105 | hive_struct | MANAGED_TABLE | NULL | NULL |
| 91 | 1535883337 | 1 | 0 | hadoop | 0 | 111 | ruoze_test | MANAGED_TABLE | NULL | NULL |
+--------+-------------+-------+------------------+--------+-----------+-------+-----------------+---------------+--------------------+--------------------+
10 rows in set (0.00 sec)
//查询ruozedata数据库中ruoze_emp的所有列,需分几步才能完成
mysql> select * from tbls where DB_ID=11;
+--------+-------------+-------+------------------+--------+-----------+-------+-------------------------+---------------+--------------------+--------------------+
| TBL_ID | CREATE_TIME | DB_ID | LAST_ACCESS_TIME | OWNER | RETENTION | SD_ID | TBL_NAME | TBL_TYPE | VIEW_EXPANDED_TEXT | VIEW_ORIGINAL_TEXT |
+--------+-------------+-------+------------------+--------+-----------+-------+-------------------------+---------------+--------------------+--------------------+
| 11 | 1529212083 | 11 | 0 | hadoop | 0 | 11 | ruozedata_person | MANAGED_TABLE | NULL | NULL |
| 12 | 1529213071 | 11 | 0 | hadoop | 0 | 12 | ruozedata_emp | MANAGED_TABLE | NULL | NULL |
| 13 | 1529214712 | 11 | 0 | hadoop | 0 | 13 | ruozedata_emp2 | MANAGED_TABLE | NULL | NULL |
| 14 | 1529215446 | 11 | 0 | hadoop | 0 | 14 | ruozedata_emp3_new | MANAGED_TABLE | NULL | NULL |
| 17 | 1529220164 | 11 | 0 | hadoop | 0 | 17 | ruozedata_emp4 | MANAGED_TABLE | NULL | NULL |
| 22 | 1529234706 | 11 | 0 | hadoop | 0 | 22 | a | MANAGED_TABLE | NULL | NULL |
| 24 | 1529235629 | 11 | 0 | hadoop | 0 | 24 | b | MANAGED_TABLE | NULL | NULL |
| 25 | 1529237177 | 11 | 0 | hadoop | 0 | 25 | order_partition | MANAGED_TABLE | NULL | NULL |
| 26 | 1529241702 | 11 | 0 | hadoop | 0 | 29 | order_4_partition | MANAGED_TABLE | NULL | NULL |
| 27 | 1529242467 | 11 | 0 | hadoop | 0 | 31 | order_mulit_partition | MANAGED_TABLE | NULL | NULL |
| 28 | 1529244966 | 11 | 0 | hadoop | 0 | 33 | ruozedata_static_emp | MANAGED_TABLE | NULL | NULL |
| 29 | 1529245719 | 11 | 0 | hadoop | 0 | 35 | ruozedata_dynamic_emp | MANAGED_TABLE | NULL | NULL |
| 30 | 1529247858 | 11 | 0 | hadoop | 0 | 40 | dual | MANAGED_TABLE | NULL | NULL |
| 36 | 1530106021 | 11 | 0 | hadoop | 0 | 46 | emp_sqoop | MANAGED_TABLE | NULL | NULL |
| 41 | 1530107925 | 11 | 0 | hadoop | 0 | 51 | ruozedata_emp_partition | MANAGED_TABLE | NULL | NULL |
| 51 | 1530622246 | 11 | 0 | hadoop | 0 | 66 | city_info | MANAGED_TABLE | NULL | NULL |
| 52 | 1530622255 | 11 | 0 | hadoop | 0 | 67 | product_info | MANAGED_TABLE | NULL | NULL |
| 72 | 1531051049 | 11 | 0 | hadoop | 0 | 87 | user_click_tmp | MANAGED_TABLE | NULL | NULL |
| 79 | 1531060915 | 11 | 0 | hadoop | 0 | 96 | product_rank | MANAGED_TABLE | NULL | NULL |
| 80 | 1531061272 | 11 | 0 | hadoop | 0 | 97 | user_click | MANAGED_TABLE | NULL | NULL |
| 86 | 1532861861 | 11 | 0 | hadoop | 0 | 106 | ruoze_dept | MANAGED_TABLE | NULL | NULL |
+--------+-------------+-------+------------------+--------+-----------+-------+-------------------------+---------------+--------------------+--------------------+
21 rows in set (0.00 sec)
mysql> select SD_ID,CD_ID from sds where SD_ID=12;
+-------+-------+
| SD_ID | CD_ID |
+-------+-------+
| 12 | 12 |
+-------+-------+
1 row in set (0.01 sec)
mysql> select * from columns_v2 where CD_ID=12;
+-------+---------+-------------+-----------+-------------+
| CD_ID | COMMENT | COLUMN_NAME | TYPE_NAME | INTEGER_IDX |
+-------+---------+-------------+-----------+-------------+
| 12 | NULL | comm | double | 6 |
| 12 | NULL | deptno | int | 7 |
| 12 | NULL | empno | int | 0 |
| 12 | NULL | ename | string | 1 |
| 12 | NULL | hiredate | string | 4 |
| 12 | NULL | job | string | 2 |
| 12 | NULL | mgr | int | 3 |
| 12 | NULL | salary | double | 5 |
+-------+---------+-------------+-----------+-------------+
8 rows in set (0.00 sec)