哈夫曼树和哈夫曼编码

2021-03-23  本文已影响0人  粑粑八成

构建哈夫曼树

  1. 只有叶子节点有值
  2. 带权路径长度最短的树,权值较大的结点离根较近。
  3. 叶子节点的权值乘叶子节点到根节点的路径长度之和最小
public class HuffmanTree {

  public static void main(String[] args) {
    int arr[] = {13, 7, 8,3,29,6,1};
    TreeNode huffmanTree = createHuffmanTree(arr);
    huffmanTree.preOrder(huffmanTree);
  }

  /**
   * 把每个节点当做一个独立的树(只有一个根节点的树)
   * 1. 从小到大排序,取出最小的两个组成新树
   * 2. 将新树加入,删除原先两个
   * 3. 重复上面步骤直到只剩一个节点,就是形成的哈夫曼树的根节点
   * @param arr
   * @return
   */
  public static TreeNode createHuffmanTree(int[] arr) {
    List<TreeNode> nodes = new ArrayList<>();
    for (int value:arr) {
      nodes.add(new TreeNode(value));
    }
    Collections.sort(nodes, Comparator.comparingInt(a -> a.id));

    while (nodes.size()>1) {
      TreeNode leftNode = nodes.get(0);
      TreeNode rightNode = nodes.get(1);

      TreeNode parentNode = new TreeNode(leftNode.id + rightNode.id);
      parentNode.left = leftNode;
      parentNode.right = rightNode;

      nodes.remove(leftNode);
      nodes.remove(rightNode);
      nodes.add(parentNode);
      Collections.sort(nodes, Comparator.comparingInt(a -> a.id));
    }
    return nodes.get(0);

  }
}

哈弗曼编码

  1. 统计字符出现的次数,作为叶子节点的权值
  2. 构建哈夫曼树
  3. 从根节点开始,根据左0右1的规则,获得所有字符(叶子节点)对应的01编码的映射
  4. 构建压缩后的编码,一个字节存储8位即8个01二进制
public class HuffmanCode {

  public static void main(String[] args) {

    String str = "i like like like java do you like a java";
    byte[] contentBytes = str.getBytes();
    byte[] zip = huffmanZip(contentBytes);
    System.out.println(byteTobitString((byte) -1));
  }

  static byte[] huffmanZip(byte[] bytes) {
    // 统计字符出现的次数作为权值,构建node数组
    List<Node> nodes = getNodes(bytes);
    // 构建哈夫曼树
    Node huffmanTree = createHuffmanTree(nodes);
    // 获得字符到二进制编码的映射关系
    getCodes(huffmanTree, "", stringBuilder);
    // 按照一个字节8位计算获得压缩后的编码
    byte[] zip = zip(bytes, huffmanCode);
    return zip;
  }

  static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
    StringBuilder stringBuilder = new StringBuilder();
    for (int i = 0; i < bytes.length; i++) {
      stringBuilder.append(huffmanCodes.get(bytes[i]));
    }

//    System.out.println(stringBuilder.toString());
    int len;
    if (stringBuilder.length() % 8 == 0) {
      len = stringBuilder.length() / 8;
    } else {
      len = stringBuilder.length() / 8 + 1;
    }
    int index = 0;
    byte[] by = new byte[len];
    for (int i = 0; i < stringBuilder.length(); i = i + 8) {
      if (i + 8 > stringBuilder.length()) {
        by[index] = (byte) Integer.parseInt(stringBuilder.substring(i), 2);
      } else {
        by[index] = (byte) Integer.parseInt(stringBuilder.substring(i, i + 8), 2);
      }
      index++;
    }
    return by;
  }

  static Map<Byte, String> huffmanCode = new HashMap<>();
  static StringBuilder stringBuilder = new StringBuilder();

  /**
   * @param node
   * @param code          左0右1
   * @param stringBuilder
   */
  static void getCodes(Node node, String code, StringBuilder stringBuilder) {
    StringBuilder stringBuilder1 = new StringBuilder(stringBuilder);
    stringBuilder1.append(code);
    if (node != null) {
      if (node.data == null) {
        getCodes(node.left, "0", stringBuilder1);
        getCodes(node.right, "1", stringBuilder1);
      } else {
        huffmanCode.put(node.data, stringBuilder1.toString());
      }
    }
  }

  public static List<Node> getNodes(byte[] bytes) {
    ArrayList<Node> nodes = new ArrayList<>();
    Map<Byte, Integer> counts = new HashMap<>();
    for (byte b : bytes) {
      Integer count = counts.get(b);
      if (count == null) {
        counts.put(b, 1);
      } else {
        counts.put(b, count + 1);
      }
    }

    for (Entry<Byte, Integer> entry : counts.entrySet()) {
      nodes.add(new Node(entry.getKey(), entry.getValue()));
    }

    return nodes;
  }

  public static Node createHuffmanTree(List<Node> nodes) {
    while (nodes.size() > 1) {
      Collections.sort(nodes, Comparator.comparingInt(Node::getWeight));

      Node leftNode = nodes.get(0);
      Node rightNode = nodes.get(1);
      Node parentNode = new Node(null, leftNode.weight + rightNode.weight);
      parentNode.left = leftNode;
      parentNode.right = rightNode;
      nodes.remove(leftNode);
      nodes.remove(rightNode);
      nodes.add(parentNode);

    }

    return nodes.get(0);
  }

  static String byteTobitString(byte b) {
    int temp = b;
    String s = Integer.toBinaryString(b);
    System.out.println(s);
    return s;
  }
}

class Node {

  Byte data;
  int weight;// 权值,字符串出现次数
  Node left;
  Node right;

  public Node(Byte data, int weight) {
    this.data = data;
    this.weight = weight;
  }

  public Byte getData() {
    return data;
  }

  public int getWeight() {
    return weight;
  }

  public Node getLeft() {
    return left;
  }

  public Node getRight() {
    return right;
  }

  @Override
  public String toString() {
    return "Node{" +
        "data=" + data +
        ", weight=" + weight +
        ", left=" + left +
        ", right=" + right +
        '}';
  }

  public void preOrder() {
    System.out.println(this);
    if (this.left != null) {
      this.left.preOrder();
    }
    if (this.right != null) {
      this.right.preOrder();
    }
  }
}

上一篇下一篇

猜你喜欢

热点阅读