MybatisPlus利用数据库LOAD DATA特性,上万数据

2025-01-16  本文已影响0人  唐岭云

MybatisPlus的saveBatch,当插入1W条数据性能测试中大概700毫秒2000毫秒之间,假如数据达到5W20W以上的量级的时候,插入速度会有所增加,非常有可能超过1分钟的等待时间.往往等待的时间比较长,就会出现其他的问题,比如连接数据库线程池长时间占用,导致性能下降,引发系统出现非常规的错误.
所以不得不思考,如何快速入库,减少连接池的占用时长,避免引发不可预知的问题.打算就去MYSQL官网查看相关的资料,发现还真有一个官方思路
原文:
When loading a table from a text file, use LOAD DATA. This is usually 20 times faster than using INSERT statements.
翻译就是:利用文件特性使用 LOAD DATA 比 INSERT 插入速度快20倍.
好了,假设: 1W条数据 700ms ~ 2s

猜想用10W数据量大脑比较

方法 数据量 耗时
saveBatch 10W 7s ~ 20s
load data 10W 35ms~1s

太逆天,有没有!!!



准备工作

MYSQL 配置开关

  1. 打开客户端支持文件二进制流传输到MYSQL服务器

[mysqld]
local-infile=1
需要重新启动 MySql

  1. 项目配置文件.yml或者.properties项目连接数据库地址url后面增加参数
    &allowLoadLocalInfile=true

3.项目pom.xml引入依赖

   <dependency>
        <groupId>org.apache.commons</groupId>
        <artifactId>commons-csv</artifactId>
        <version>1.13.0</version>
   </dependency>


利用MybaitsPlus特性优化代码实现LOAD DATA快速插入特性:

核心工具类 CsvUtil.java
package com.*.*.utils;

import com.baomidou.mybatisplus.core.metadata.TableFieldInfo;
import com.baomidou.mybatisplus.core.metadata.TableInfo;
import com.baomidou.mybatisplus.core.metadata.TableInfoHelper;
import com.baomidou.mybatisplus.core.toolkit.IdWorker;
import com.*.*.aspect.helper.DynamicTableHelper;
import lombok.SneakyThrows;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.springframework.util.CollectionUtils;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.*;

public class CsvUtil {


    private static final List<String> DYNAMIC_TABLE = List.of("mysql_table_name_");
    private static final ThreadLocal<Map<String, Collection<String>>> INSERT_COLUMN_COLLECTION = new ThreadLocal<>();
    public static final SimpleDateFormat YYYY_MM_DD_24HH_MM_SS = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");


    @SneakyThrows
    public static <T> File buildCsv(Collection<T> data) {
        if (CollectionUtils.isEmpty(data)) return null;
        File file = new File(IdWorker.getIdStr() + ".csv");
        Class<?> clazz = data.stream().findFirst().get().getClass();
        TableInfo tableInfo = TableInfoHelper.getTableInfo(clazz);
        String tableName = tableInfo.getTableName();
        if (DYNAMIC_TABLE.contains(tableName)) {
            tableName = tableName + PartitionByMurmurHashUtil.hash(DynamicTableHelper.getHashKey(), 20);
        }
        LinkedHashMap<String, String> columnMapping = getColumnPropertyMapping(tableInfo);
        Set<String> insertColumns = columnMapping.keySet();
        INSERT_COLUMN_COLLECTION.set(Map.of(tableName, insertColumns));
        CSVFormat.Builder builder = CSVFormat.MYSQL.builder().setHeader(insertColumns.toArray(new String[0])).setDelimiter(',').setQuote('"');
        try (FileWriter fileWriter = new FileWriter(file); CSVPrinter csvPrinter = new CSVPrinter(fileWriter, builder.get())) {
            Collection<String> propertys = columnMapping.values();
            writeInCsvFile(data, tableInfo, propertys, csvPrinter);
            csvPrinter.flush();
        }
        return file;
    }

    private static LinkedHashMap<String, String> getColumnPropertyMapping(TableInfo tableInfo) {
        LinkedHashMap<String, String> columnMapping = new LinkedHashMap<>();
        if (tableInfo.havePK()) {
            columnMapping.put(tableInfo.getKeyColumn(), tableInfo.getKeyProperty());
        }
        for (TableFieldInfo tableFieldInfo : tableInfo.getFieldList()) {
            columnMapping.put(tableFieldInfo.getColumn(), tableFieldInfo.getProperty());
        }
        return columnMapping;
    }

    private static <T> void writeInCsvFile(Collection<T> data, TableInfo tableInfo, Collection<String> propertys, CSVPrinter csvPrinter) throws IOException {
        for (T item : data) {
            Object[] record = new Object[propertys.size()];
            int index = 0;
            for (String property : propertys) {
                Object value = tableInfo.getPropertyValue(item, property);
                if (Objects.equals(tableInfo.getKeyProperty(), property)) {
                    value = value == null ? IdWorker.getIdStr() : value;
                }
                if (value instanceof Date dateObject) {
                    value = format(dateObject, YYYY_MM_DD_24HH_MM_SS);
                }
                record[index++] = value;
            }
            csvPrinter.printRecord(record);
        }
    }


    private static String format(Date date, SimpleDateFormat dateFormat) {
        return dateFormat.format(Objects.requireNonNullElseGet(date, Date::new));
    }


    public static Collection<String> insertColumns() {
        return INSERT_COLUMN_COLLECTION.get().get(tableName());
    }

    public static String tableName() {
        return INSERT_COLUMN_COLLECTION.get().keySet().stream().findFirst().orElse(null);
    }

    public static void clear() {
        INSERT_COLUMN_COLLECTION.remove();
    }

}

利用MybatisPlus大佬的工具,封装LOAD DATA需要的csv文件工具
通用的LOAD DATA Mapper接口创建

CsvMapper.java
package com.*.*.repository.mapper;

import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.*.*.utils.CsvUtil;
import org.apache.ibatis.annotations.Param;

import java.io.File;
import java.util.Collection;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantLock;

public interface CsvMapper extends BaseMapper<Object> {

    int csvInsert(@Param("tableName") String tableName, @Param("csvPath") String csvPath, @Param("insertColumns") Collection<String> insertColumns);

    static final ConcurrentHashMap<String, ReentrantLock> lockMap = new ConcurrentHashMap<>();

    default int batchInsert(File csvFile) {
        if (csvFile == null || csvFile.length() == 0) return 0;
        String table = CsvUtil.tableName();
        ReentrantLock lock = lockMap.computeIfAbsent(table, k -> new ReentrantLock());
        lock.lock();
        try {
            return csvInsert(table, csvFile.getAbsolutePath(), CsvUtil.insertColumns());
        } finally {
            CsvUtil.clear();
            csvFile.delete();
            lock.unlock();
        }
    }
}


CsvMapper.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.*.*.repository.mapper.CsvMapper">

    <insert id="csvInsert">
        LOAD DATA LOCAL INFILE #{csvPath}
        INTO TABLE ${tableName}
        FIELDS TERMINATED BY ','
        ENCLOSED BY '"'
        LINES TERMINATED BY '\n'
        IGNORE 1 ROWS
        <foreach collection="insertColumns" item="column" separator=","  open="(" close=");" >
            ${column}
        </foreach>
    </insert>
</mapper>

开始测试

数据库表 实体映射类 Apple.java
package com.*.*.entity;

import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import java.math.BigDecimal;
import java.util.Date;

@Data
@TableName(value = "apple", autoResultMap = true)
public class Apple {
    @TableId(type = IdType.ASSIGN_ID)
    private String id;
    private String color;
    private BigDecimal price;
    private String productAddress;
    private Date productDate;
}
AppleMapper.java
package com.*.*.repository.mapper;

import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.*.*.entity.Apple;

public interface AppleMapper extends BaseMapper<Apple> {
}

CsvMapperTest.java
package com.*.*.repository.mapper;

import org.springframework.boot.test.context.SpringBootTest;
import java.math.BigDecimal;
import com.*.*.entity.Apple;
import com.*.*.utils.CsvUtil;
import jakarta.annotation.Resource;
import org.junit.jupiter.api.Test;
import java.util.Date;
import java.io.File;
import java.util.ArrayList;
import java.util.List;


@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
class CsvMapperTest {

    @Resource
    private CsvMapper csvMapper;

    @Test
    void test() {
        List<Apple> apples = new ArrayList<>();
        for (int i = 0; i < 100000; i++) {
            Apple apple = new Apple();
            apple.setId(i+"");
            apple.setColor("red");
            apple.setPrice(new BigDecimal(5));
            apple.setProductAddress("Chongqing");
            apple.setProductDate(new Date());
            apples.add(apple);
        }
        File file = CsvUtil.buildCsv(apples);
        long startTime = System.currentTimeMillis();
        csvMapper.batchInsert(file);
        System.err.println("cost time:" + (System.currentTimeMillis() - startTime));
    }

}

注意事项

LOAD DATA JAVA对象的类型如果是Date,需要利用DateFormat格式化数据成字符串数据,否则插入的时间内容是
0000-00-00 00:00:00

上一篇 下一篇

猜你喜欢

热点阅读