hive 迁移数据

2023-03-11  本文已影响0人  后知不觉1

1、discp传输数据

再传输过程中有写入或者删除会导致传输失败

#增量更新,更新时覆盖,带宽100MB/s,跳过检查版本,设置队列
hadoop distcp   -Dmapreduce.job.queuename=ana hdfs://xxx/user/hive/warehouse hdfs://xxx/apps/warehouse/

#可以设置队列
#设置带宽

详细设置

2、导出元数据表

mysqldump -uhive -hxxxx -phive hive > hive-cmal-metastore.sql
#替换hive-metastore.bak的存储位置,需要根据当时调整
 
 
替换defaultFS:ssed  's/hdfs:\/\/bigdatacluster\/user\/hive\/warehouse\/hdfs:\/\/testCluster\/apps\/hive_test\/warehouse/g' hive-cmal-metastore.sql

3、导入到新的meta 元数据库

mysql -h xxx -uhive -pxxx  hive_meta < hive-metastore.sql

4、修复分区

因为分区表中目录没有分区字段,只能通过alter

附录: 导出原有hive 表分区

1、 导出库名
export HADOOP_USER_NAME=hive
mkdir ./sql
mkdir ./tmp

#查找库名
hive -e 'show databases;' > ./tmp/dbname.txt

#导出建库语句
while read dbname
do
    hive -e "show create database $dbname;" >> ./sql/create_dbs.sql
    echo ";" >> ./sql/create_dbs.sql
done < ./tmp/dbname.txt


#查找表
while read dbname
do
    hive -e "use $dbname;show tables;" > ./tmp/${dbname}_tables.txt
done < ./tmp/dbname.txt


#导出建表语句
while read dbname
do
    echo "use ${dbname};" > ./sql/create_${dbname}_tables.sql
    while read tbname
    do
        hive -e "use $dbname;show create table $tbname;" >> ./sql/create_${dbname}_tables.sql
        echo ";" >> ./sql/create_${dbname}_tables.sql
    done < ./tmp/${dbname}_tables.txt
done < ./tmp/dbname.txt
2、生层alter语句
#添加分区
while read dbname
do
    while read tbname
    do
       echo "dbname: ${dbname},tbname: ${tbname}" 
       partition_file=./partitions/${dbname}/${tbname}_partitions.txt
       if [[ -s ${partition_file} ]]; then
           while read partions
           do
            partion_key=`echo $partions |awk -F'=' '{print $1}'`
            partion_value=`echo $partions |awk -F'=' '{print $2}'`
            echo "alter table ${dbname}.${tbname} add partition(${partion_key}='"${partion_value}"');" >> ./alter_partitions.txt
           done < ${partition_file}
       else
           echo "empty"
       fi
    done < ./tmp/${dbname}_tables.txt
done < ./tmp/dbname.txt
上一篇下一篇

猜你喜欢

热点阅读