杂-文件按行读取

2020-10-27  本文已影响0人  五洋捉鳖zz

背景

写文件并返回最后一行的行号

 public static Long write(File file, String content, boolean append) {
    try(FileWriter fileWriter = new FileWriter(file.getPath(), append)) {
        fileWriter.write(content);
        fileWriter.flush();
        long lineCount;
        try (Stream<String> stream = Files.lines(Paths.get(file.getPath()))) {
            lineCount = stream.count();
        }
        return lineCount;
    } catch (IOException exception) {
        log.error("Sink log error! msg: {}", exception.getMessage());
    }
    return -1L;
}   

按行读取&处理(文件全部内容)

public static List<String> lineScan(File file) {
    List<String> res = new ArrayList<>();
    try (RandomAccessFile fileR = new RandomAccessFile(file, "r")) {
        String str = null;
        while ((str = fileR.readLine()) != null) {
            res.add(str);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return res;
}
    
// FileUtils.java
public static void lineScan(File file, RowHandler handler) {
    try (RandomAccessFile fileR = new RandomAccessFile(file, "r")) {
        String str = null;
        while ((str = fileR.readLine()) != null) {
            handler.handle(str);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return res;
}
// RowHandler.java
@FunctionalInterface
public interface FileLineFilter {

    void filter(String line);
}
// eg.
FileUtils.linScan(file, (lineContent) ->{
    // 这里写你每行的数据处理逻辑
} )

// * Sets the file-pointer offset, measured from the beginning of this
// * file, at which the next read or write occurs.  The offset may be
// * set beyond the end of the file. Setting the offset beyond the end
// * of the file does not change the file length.  The file length will
// * change only by writing after the offset has been set beyond the end
// * of the file.
RandomAccessFile.seek(long pos);
//  日志中一条请求的详细信息存储如下
<SESSION_ID>|<TIMESTAMP>|<COST>|<STATUS>|<IP>

// SESSION_ID是定长的,不用考虑
// TIMESTAMP 预估这十几年都会是这个长度,不用考虑
// COST:  接口请求耗时(MS),这个长度不一定需要做一下处理,假设API接口请求耗时最大为 8位数字,少于8位的我们来补0转换为字符串。
public static String getFormatCostStr(Long cost) {
    return String.format("%08d", this.cost);
}

// STATUS: 接口请求状态,成功/不成功。这里我们用 0 1表示 不成功/成功。完美定长
// IP: 地址,这里还是采取补0的策略(一个简陋版的补全。。)
public static String ipCompletion(String ip) {
    String[] arr = ip.split("\\.");
    if (arr.length == 4) {
        for (int i = 0; i < arr.length; i++) {
            if (arr[i].length() == 1) {
                arr[i] = "00" + arr[i];
            } else if (arr[i].length() == 2) {
                arr[i] = "0" + arr[i];
            }
        }
        return String.join(".", arr);
    } else {
        return "000.000.000.000";
    }
}
// 先来个读取文件首行获取一行长度的方法

// 这个长度未算上 换行符(\r\n)
public static long lineLength(File file) {
    long length = -1L;
    String str = null;
    try(RandomAccessFile randomAccessFile = new RandomAccessFile(file,"r")) {
        str = randomAccessFile.readLine();
        if (StringUtils.isNotEmpty(str)) {
            length = str.getBytes().length;
        }
    }catch (Exception e) {
        log.error("read file first line length failed ! msg: {}", e.getMessage());
        log.error("", e);
    }
    return length;
}

// 而后在来个按行读取的方法
public static void lineScan(File file, int start, int end, Long rowLength, FileLineHandler handler) {
    try(RandomAccessFile fileR = new RandomAccessFile(file,"r")){
        fileR.seek((start-1) * rowLength); // fly!!!!!
        long line = start;
        String str = null;
        while ((str = fileR.readLine())!= null) {
            line++;
            if (line > end) {
                break;
            }
            handler.handle(line, str);
        }
    } catch (IOException e) {
        log.error("read file line failed ! msg: {}", e.getMessage());
        log.error("", e);
    }
}

// FileLineHandler
@FunctionalInterface
public interface FileLineHandler {

    void handler(String line);
}

// 实际的调用效果
Long lineRange = FileUtils.lineLength(auditFile) + 2;
FileUtils.lineScan(auditFile, rowIndex, lineRange, (line, rowContext) -> {
    // 你的逻辑!!!
});

  1. <DATE>.log(按行存储每次的请求详情。)
asdfasdfasdfasdf|1603643538000|00000012|1|172.016.002.015
asdfasdfasdfasdf|1603643538000|00000012|1|172.016.002.015
  1. <DATE>.log.index(分为1440行,即每天共有1440分钟,每行存放当前分钟发生请求日志在log文件中的行号,方便检索)
// 在当天的第973分钟发生了两次请求,两次请求的日志分别在log文件中的第1,2行,index文件中第972行内容如下,
1|2

上一篇下一篇

猜你喜欢

热点阅读