[ 编码 ]原字节不变 信息不变

2021-08-29  本文已影响0人  一个好汉

测试在编码转换过程是否会丢失信息

事实证明是会的
但是只要在转换的过程 原字节不会发生变化 还是可以还原出原来的结果的

/**
     * 编码之间的转换
     *
     * 只要在转换的过程 原字节不会发生变化 还是可以还原出原来的结果的
     */
    public static void conversion() {
        List<String> srcStrings = Arrays.asList("少年", "少µ年", "少");
        Map<Charset, Charset[]> charsetMap = new LinkedHashMap<Charset, Charset[]>(){{
            put(StandardCharsets.UTF_8, new Charset[]{StandardCharsets.UTF_8, new GBK(), StandardCharsets.ISO_8859_1});
            put(new GBK(), new Charset[]{StandardCharsets.UTF_8, new GBK(), StandardCharsets.ISO_8859_1});
            put(StandardCharsets.ISO_8859_1, new Charset[]{StandardCharsets.UTF_8, new GBK(), StandardCharsets.ISO_8859_1});
        }};
        charsetMap.forEach((key, value) -> {
            Arrays.stream(value).forEach(
                    middleCharset->srcStrings.forEach(e -> translate(e, key, middleCharset)));
        });
    }

private static void translate(String src, Charset srcCharset, Charset middleCharset) {
        log.info("src :{}, srcCharset: {}, middleCharset: {}",
                src, srcCharset.name(), middleCharset.name());
        byte[] uBytes = src.getBytes(srcCharset);
        log.info("src bytes:{}",
                HexUtil.encodeHexStr(uBytes));
        String gStr = new String(uBytes, middleCharset);
        log.info("after decode : {}", gStr);
        byte[] afterGbkDecodeBytes = gStr.getBytes(middleCharset);
        log.info("after middle conversion bytes:{}", HexUtil.encodeHexStr(afterGbkDecodeBytes));
        String finalStr = new String(afterGbkDecodeBytes, srcCharset);
        log.info("final String:{}", finalStr);

        if (src.equals(finalStr)) {
            log.error("[equals]--- src :{}, srcCharset: {}, middleCharset: {}",
                    src, srcCharset.name(), middleCharset.name());
        }
    }

结果:

src :少年, srcCharset: UTF-8, middleCharset: UTF-8
src bytes:e5b091e5b9b4
after decode : 少年
after middle conversion bytes:e5b091e5b9b4
final String:少年
[equals]--- src :少年, srcCharset: UTF-8, middleCharset: UTF-8
src :少µ年, srcCharset: UTF-8, middleCharset: UTF-8
src bytes:e5b091c2b5e5b9b4
after decode : 少µ年
after middle conversion bytes:e5b091c2b5e5b9b4
final String:少µ年
[equals]--- src :少µ年, srcCharset: UTF-8, middleCharset: UTF-8
src :少, srcCharset: UTF-8, middleCharset: UTF-8
src bytes:e5b091
after decode : 少
after middle conversion bytes:e5b091
final String:少
[equals]--- src :少, srcCharset: UTF-8, middleCharset: UTF-8
src :少年, srcCharset: UTF-8, middleCharset: GBK
src bytes:e5b091e5b9b4
after decode : 灏戝勾
after middle conversion bytes:e5b091e5b9b4
final String:少年
[equals]--- src :少年, srcCharset: UTF-8, middleCharset: GBK
src :少µ年, srcCharset: UTF-8, middleCharset: GBK
src bytes:e5b091c2b5e5b9b4
after decode : 灏懧靛勾
after middle conversion bytes:e5b091c2b5e5b9b4
final String:少µ年
[equals]--- src :少µ年, srcCharset: UTF-8, middleCharset: GBK
src :少, srcCharset: UTF-8, middleCharset: GBK
src bytes:e5b091
after decode : 灏�
after middle conversion bytes:e5b03f
final String:�?
src :少年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src bytes:e5b091e5b9b4
after decode : �年
after middle conversion bytes:e5b091e5b9b4
final String:少年
[equals]--- src :少年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src :少µ年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src bytes:e5b091c2b5e5b9b4
after decode : �µ年
after middle conversion bytes:e5b091c2b5e5b9b4
final String:少µ年
[equals]--- src :少µ年, srcCharset: UTF-8, middleCharset: ISO-8859-1
src :少, srcCharset: UTF-8, middleCharset: ISO-8859-1
src bytes:e5b091
after decode : å°�
after middle conversion bytes:e5b091
final String:少
[equals]--- src :少, srcCharset: UTF-8, middleCharset: ISO-8859-1
src :少年, srcCharset: GBK, middleCharset: UTF-8
src bytes:c9d9c4ea
after decode : ����
after middle conversion bytes:efbfbdefbfbdefbfbdefbfbd
final String:锟斤拷锟斤拷
src :少µ年, srcCharset: GBK, middleCharset: UTF-8
src bytes:c9d93fc4ea
after decode : ��?��
after middle conversion bytes:efbfbdefbfbd3fefbfbdefbfbd
final String:锟斤拷?锟斤拷
src :少, srcCharset: GBK, middleCharset: UTF-8
src bytes:c9d9
after decode : ��
after middle conversion bytes:efbfbdefbfbd
final String:锟斤拷
src :少年, srcCharset: GBK, middleCharset: GBK
src bytes:c9d9c4ea
after decode : 少年
after middle conversion bytes:c9d9c4ea
final String:少年
[equals]--- src :少年, srcCharset: GBK, middleCharset: GBK
src :少µ年, srcCharset: GBK, middleCharset: GBK
src bytes:c9d93fc4ea
after decode : 少?年
after middle conversion bytes:c9d93fc4ea
final String:少?年
src :少, srcCharset: GBK, middleCharset: GBK
src bytes:c9d9
after decode : 少
after middle conversion bytes:c9d9
final String:少
[equals]--- src :少, srcCharset: GBK, middleCharset: GBK
src :少年, srcCharset: GBK, middleCharset: ISO-8859-1
src bytes:c9d9c4ea
after decode : ÉÙÄê
after middle conversion bytes:c9d9c4ea
final String:少年
[equals]--- src :少年, srcCharset: GBK, middleCharset: ISO-8859-1
src :少µ年, srcCharset: GBK, middleCharset: ISO-8859-1
src bytes:c9d93fc4ea
after decode : ÉÙ?Äê
after middle conversion bytes:c9d93fc4ea
final String:少?年
src :少, srcCharset: GBK, middleCharset: ISO-8859-1
src bytes:c9d9
after decode : ÉÙ
after middle conversion bytes:c9d9
final String:少
[equals]--- src :少, srcCharset: GBK, middleCharset: ISO-8859-1
src :少年, srcCharset: ISO-8859-1, middleCharset: UTF-8
src bytes:3f3f
after decode : ??
after middle conversion bytes:3f3f
final String:??
src :少µ年, srcCharset: ISO-8859-1, middleCharset: UTF-8
src bytes:3fb53f
after decode : ?�?
after middle conversion bytes:3fefbfbd3f
final String:?�?
src :少, srcCharset: ISO-8859-1, middleCharset: UTF-8
src bytes:3f
after decode : ?
after middle conversion bytes:3f
final String:?
src :少年, srcCharset: ISO-8859-1, middleCharset: GBK
src bytes:3f3f
after decode : ??
after middle conversion bytes:3f3f
final String:??
src :少µ年, srcCharset: ISO-8859-1, middleCharset: GBK
src bytes:3fb53f
after decode : ?�?
after middle conversion bytes:3f3f3f
final String:???
src :少, srcCharset: ISO-8859-1, middleCharset: GBK
src bytes:3f
after decode : ?
after middle conversion bytes:3f
final String:?
src :少年, srcCharset: ISO-8859-1, middleCharset: ISO-8859-1
src bytes:3f3f
after decode : ??
after middle conversion bytes:3f3f
final String:??
src :少µ年, srcCharset: ISO-8859-1, middleCharset: ISO-8859-1
src bytes:3fb53f
after decode : ?µ?
after middle conversion bytes:3fb53f
final String:?µ?
src :少, srcCharset: ISO-8859-1, middleCharset: ISO-8859-1
src bytes:3f
after decode : ?
after middle conversion bytes:3f
final String:?

上一篇 下一篇

猜你喜欢

热点阅读