Java 杂谈

Java 超大文件排序

2019-07-12  本文已影响13人  叫我宫城大人

思想

  1. 超大文件无法一次性全部加载到内存中;
  2. 可以将超大文件分片排序,然后遍历分片,输出排序后内容至指定文件;

编码

创建超大文件

private static void createBigFile() {
    Random random = new Random();
    try (FileWriter writer = new FileWriter(BIG_FILE_NAME)) {
        for (int i = 0; i < LINE_COUNT; i++) {
            int val = random.nextInt(Integer.MAX_VALUE);
            writer.write(val + LINE_SEPARATOR);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

超大文件分片排序

private static List<String> separateFile() {
    List<String> fileNameList = new ArrayList<>();
    try (BufferedReader reader = new BufferedReader(new FileReader(BIG_FILE_NAME))) {
        int index = 0;
        List<Integer> batchLineList = new ArrayList<>(BATCH_SIZE);
        String line;
        while ((line = reader.readLine()) != null) {
            batchLineList.add(Integer.valueOf(line));
            if (batchLineList.size() == BATCH_SIZE) {
                // 内容排序
                batchLineList.sort(Comparator.comparingInt(a -> a));
                // 写小文件
                String fileName = BIG_FILE_NAME + ".tmp." + index++;
                try (FileWriter tmpWriter = new FileWriter(fileName)) {
                    for (Integer val : batchLineList) {
                        tmpWriter.write(val + LINE_SEPARATOR);
                    }
                }
                fileNameList.add(fileName);
                batchLineList.clear();
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return fileNameList;
}

分片合并输出

private static void mergeFile(List<String> fileNameList) {
    Map<BufferedReader, String> map = new HashMap<>();
    try (FileWriter writer = new FileWriter(SORT_FILE_NAME)) {
        for (String fileName : fileNameList) {
            BufferedReader tmpReader = new BufferedReader(new FileReader(fileName));
            map.put(tmpReader, tmpReader.readLine());
        }
        while (true) {
            boolean canRead = false;
            Map.Entry<BufferedReader, String> minEntry = null;
            for (Map.Entry<BufferedReader, String> entry : map.entrySet()) {
                String value = entry.getValue();
                if (value == null) {
                    continue;
                }
                // 获取当前 reader 内容最小 entry
                if ((minEntry == null) || (Integer.valueOf(value) < Integer.valueOf(minEntry.getValue()))) {
                    minEntry = entry;
                }
                canRead = true;
            }
            // 当且仅当所有 reader 内容为空时,跳出循环
            if (!canRead) {
                break;
            }
            writer.write(minEntry.getValue() + LINE_SEPARATOR);
            minEntry.setValue(minEntry.getKey().readLine());
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        // 注意关闭分片文件输入流
        for (BufferedReader reader : map.keySet()) {
            try {
                reader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
上一篇下一篇

猜你喜欢

热点阅读