hive任务报错:Failed with exception j
insert overwrite table tmp.table_1 partition(day='20190425')
select * from tmp.table_2 where day='20190428';
虽然执行成功,但是结尾报这个错误:
Failed with exception java.util.ConcurrentModificationException
19/04/29 11:30:00 ERROR exec.Task: Failed with exception java.util.ConcurrentModificationException
org.apache.hadoop.hive.ql.metadata.HiveException: java.util.ConcurrentModificationException
at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2942)
at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3198)
at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1532)
at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1461)
at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:497)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.util.ConcurrentModificationException
at java.util.ArrayList$SubList.checkForComodification(ArrayList.java:1231)
at java.util.ArrayList$SubList.size(ArrayList.java:1040)
at java.util.AbstractList.clear(AbstractList.java:234)
at com.google.common.collect.Iterables.removeIfFromRandomAccessList(Iterables.java:209)
at com.google.common.collect.Iterables.removeIf(Iterables.java:180)
at org.apache.hadoop.hive.io.HdfsUtils.removeBaseAclEntries(HdfsUtils.java:155)
at org.apache.hadoop.hive.io.HdfsUtils.setFullFileStatus(HdfsUtils.java:77)
at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2919)
at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:2911)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. java.util.ConcurrentModificationException
19/04/29 11:30:00 ERROR ql.Driver: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. java.util.ConcurrentModificationException
下面是源码中关于文件权限继承的开关代码:
HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
产生问题的原因:
hive的查询结果在在进行move操作时,需要进行文件权限的授权,多个文件的授权是并发进行的,hive中该源码是在一个线程池中
执行的,该操作在多线程时线程同步有问题的该异常,这是hive的一个bug,目前截止新版本Apache Hive 2.1.1还没有修复该问题;
可以通过关闭hive的文件权限继承 hive.warehouse.subdir.inherit.perms=false 来规避该问题。
解决方法:
<property>
<name>hive.warehouse.subdir.inherit.perms</name>
<value>true</value>
<description>
Set this to false if the table directories should be created
with the permissions derived from dfs umask instead of inheriting the permission of the warehouse or database directory.
</description>
</property>