hbase中存储的16进制字符串和中文互转

2020-02-28  本文已影响0人  海德堡绝尘
import org.apache.hadoop.hbase.shaded.org.apache.commons.codec.DecoderException;
import org.apache.hadoop.hbase.shaded.org.apache.commons.codec.binary.Hex;

import java.io.UnsupportedEncodingException;

/**
 * @Author weijun.nie
 * @Date 2020/2/27 15:50
 * @Version 1.0
 */
public class HbaseBytesUtil {
    public static void main(String[] args) throws UnsupportedEncodingException {
        String hbase1 = "\\xE4\\xB8\\x81\\xE4\\xBD\\xB3\\xE5\\xA6\\xAE";
        String hbase2 = "\\xE7\\x89\\x9B\\xE5\\x8F\\x89";

        System.out.println(getHanzByHexString(hbase1));
        System.out.println(getHanzByHexString(hbase2));

        String s1 = "牛叉";
        System.out.println(getHexStringByHaz(s1));
    }


    /**
     * "\xE7\x89\x9B\xE5\x8F\x89" --> "E7899BE58F89" --> "牛叉"
     * <p>
     * 把hbase中的汉字 16进制字符串 拿出, 转换为汉字
     * hexString类似: \xE4\xB8\x81\xE4\xBD\xB3\xE5\xA6\xAE
     *
     * @param hexString
     * @return 汉字
     */
    public static String getHanzByHexString(String hexString) {
        // 1. 去掉"\x" \xE4\xB8\x81\xE4\xBD\xB3\xE5\xA6\xAE
        hexString = hexString.replaceAll("\\\\x", "");

        System.out.println(hexString);
        char[] chars = hexString.toCharArray();

        String ouputStr = null;
        try {
            ouputStr = new String(Hex.decodeHex(chars), "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (DecoderException e) {
            e.printStackTrace();
        }

        return ouputStr;
    }

    /**
     * "牛叉"-->"E7899BE58F89"
     *
     * @param hanz
     * @return
     */
    public static String getHexStringByHaz(String hanz) {
        byte[] utf8Bytes = null;

        // 1. 汉字转 字节数组--> utf-8
        try {
            utf8Bytes = hanz.getBytes("UTF-8");
        } catch (Exception e) {
            e.printStackTrace();
        }

        // 2. 每个字节转为十六进制; 加个 \x
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < utf8Bytes.length; i++) {
            sb.append("\\x").append(Integer.toHexString(utf8Bytes[i] & 0xFF));
        }
        return sb.toString().toUpperCase();
    }

}


上一篇下一篇

猜你喜欢

热点阅读