大数据小问题
2018-04-25 本文已影响18人
kason_zhang
1 Hadoop Mapreduce 提交YARN jar包冲突问题
最近开发遇到一个jar包冲突的问题, 我们的应用程序里需要操作httpclient-4.5.2.jar和httpcore-4.5.2.jar 这两个jar包,而Hadoop 本身提供的jar包版本是httpclient-4.2.5.jar,httpcore-4.2.5.jar,
image.png
而当我们使用hadoop jar 可执行性Jar Main class时Hadoop 会先加载自己的jar而忽略应用程序的版本,这时候就会出现class not found exception等问题。
解决方案:加入下面这个
-Dmapreduce.task.classpath.user.precedence=true -libjars httpclient-4.5.2.jar httpcore-4.5.2.jar 强制执行用户的jar包
2 Hbase按Region 进行Scan
private static void checkTable(String tabName) throws Exception {
TableName tn = TableName.valueOf(tabName);
Configuration config = HBaseConfiguration.create();
HRegionInfo regionInfo;
Connection connection = null;
Admin admin = null;
Table table = null;
try {
connection = ConnectionFactory.createConnection(config);
admin = connection.getAdmin();
table = connection.getTable(tn);
if (!admin.tableExists(TableName.valueOf(tabName))) {
return;
}
List<HRegionInfo> lr = admin.getTableRegions(tn);
Result r = null;
if (lr == null) {
System.out.print("No region found for table " + tabName);
}
// 遍历表的每个region
Iterator<HRegionInfo> ir = lr.iterator();
int i = 1;
while (ir.hasNext()) {
regionInfo = ir.next();
ResultScanner scanner = null;
byte[] startRowkey = regionInfo.getStartKey();
System.out.println("----start----" + Bytes.toString(startRowkey));
byte[] endKey = regionInfo.getEndKey();
System.out.println("----end----" + Bytes.toString(endKey));
Scan sc = new Scan();
sc.setBatch(1);
sc.setStartRow(startRowkey);
sc.setStopRow(endKey);
try {
scanner = table.getScanner(sc);
Iterator<Result> iterator = scanner.iterator();
while (iterator.hasNext()) {
Result next = iterator.next();
byte[] row = next.getRow();
System.out.println("第" + i + " 批 " + Arrays.toString(row));
}
} finally {
if (null != scanner) {
scanner.close();
}
}
i ++;
}
}catch (Exception e) {
} finally {
if (null != table) {
table.close();
}
if (null != admin) {
admin.close();
}
if (null != connection) {
connection.close();
}
}
}