hbase中存储的16进制字符串和中文互转
2020-02-28 本文已影响0人
海德堡绝尘
import org.apache.hadoop.hbase.shaded.org.apache.commons.codec.DecoderException;
import org.apache.hadoop.hbase.shaded.org.apache.commons.codec.binary.Hex;
import java.io.UnsupportedEncodingException;
/**
* @Author weijun.nie
* @Date 2020/2/27 15:50
* @Version 1.0
*/
public class HbaseBytesUtil {
public static void main(String[] args) throws UnsupportedEncodingException {
String hbase1 = "\\xE4\\xB8\\x81\\xE4\\xBD\\xB3\\xE5\\xA6\\xAE";
String hbase2 = "\\xE7\\x89\\x9B\\xE5\\x8F\\x89";
System.out.println(getHanzByHexString(hbase1));
System.out.println(getHanzByHexString(hbase2));
String s1 = "牛叉";
System.out.println(getHexStringByHaz(s1));
}
/**
* "\xE7\x89\x9B\xE5\x8F\x89" --> "E7899BE58F89" --> "牛叉"
* <p>
* 把hbase中的汉字 16进制字符串 拿出, 转换为汉字
* hexString类似: \xE4\xB8\x81\xE4\xBD\xB3\xE5\xA6\xAE
*
* @param hexString
* @return 汉字
*/
public static String getHanzByHexString(String hexString) {
// 1. 去掉"\x" \xE4\xB8\x81\xE4\xBD\xB3\xE5\xA6\xAE
hexString = hexString.replaceAll("\\\\x", "");
System.out.println(hexString);
char[] chars = hexString.toCharArray();
String ouputStr = null;
try {
ouputStr = new String(Hex.decodeHex(chars), "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (DecoderException e) {
e.printStackTrace();
}
return ouputStr;
}
/**
* "牛叉"-->"E7899BE58F89"
*
* @param hanz
* @return
*/
public static String getHexStringByHaz(String hanz) {
byte[] utf8Bytes = null;
// 1. 汉字转 字节数组--> utf-8
try {
utf8Bytes = hanz.getBytes("UTF-8");
} catch (Exception e) {
e.printStackTrace();
}
// 2. 每个字节转为十六进制; 加个 \x
StringBuffer sb = new StringBuffer();
for (int i = 0; i < utf8Bytes.length; i++) {
sb.append("\\x").append(Integer.toHexString(utf8Bytes[i] & 0xFF));
}
return sb.toString().toUpperCase();
}
}