dataX-windows系统下mysql与txt的转化(二)
2021-12-27 本文已影响0人
堂哥000
本篇的配置环境 https://www.jianshu.com/p/6c086c0ab2a9
mysql 8.0.27 ,python3.7 ,windows系统
一、文件准备
新建workJson 和 workfile 目录分别存放原始文件,和json
原始文件名file.txt
分割符为\t

二、txt文件到txt文件
- json模板获取
python D:\dataxTest\datax\bin\datax.py -r txtfilereader -w txtwriter > txtTotxtfile.json
- txtTotxt配置
{
"job": {
"content": [
{
"reader": {
"name": "txtfilereader",
"parameter": {
"column": [
{
"index": 0,
"type": "string",
"nullFormat":""
},
{
"index": 1,
"type": "string",
"nullFormat":""
},
{
"index": 2,
"type": "string",
"nullFormat":""
},
{
"index": 3,
"type": "string",
"nullFormat":""
}
],
"encoding": "utf-8",
"fieldDelimiter": "\t",
"path": ["D:\\dataxTest\\workfile\\file.txt"]
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"fieldDelimiter": ",",
"fileName": "res.txt",
"path": "D:\\dataxTest\\workfile",
"writeMode": "truncate" # 每次会覆盖写入
}
}
}
],
"setting": {
"speed": {
"channel": "2"
}
}
}
}
- 命令行运行
CHCP 65001
D:\file_app\Anaconda\python.exe D:\dataxTest\datax\bin\datax.py D:\dataxTest\workJson\txtTotxtfile.json
- 执行结果
此时结果文件名为res.txt__85b669e0_536f_4786_9928_0bff4eeb4fab
比json 配置里多一个UUID
image.png
- 解决方案
修改txtfilereader部分的column 参数为"column" : ["*"]
不指定列数
三、TXT到Mysql
- json模板获取
python D:\dataxTest\datax\bin\datax.py -r txtfilereader -w mysqlwriter > txtTomysql.json
- mysql 建表
CREATE TABLE datax_fdl (
id bigint NOT NULL AUTO_INCREMENT
,lable varchar(200) DEFAULT NULL
,lable2 varchar(200) DEFAULT NULL
,lable3 varchar(200) DEFAULT NULL
,lable4 varchar(200) DEFAULT NULL
,create_time timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间'
,update_time timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
,PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
- txtTomysql配置
{
"job": {
"content": [
{
"reader": {
"name": "txtfilereader",
"parameter": {
"column": [
{
"index": 0,
"type": "string",
"nullFormat":""
},
{
"index": 1,
"type": "string",
"nullFormat":""
},
{
"index": 2,
"type": "string",
"nullFormat":""
},
{
"index": 3,
"type": "string",
"nullFormat":""
}
],
"encoding": "utf-8",
"fieldDelimiter": "\t",
"path": ["D:\\dataxTest\\workfile\\file.txt"]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["lable","lable2","lable3","lable4"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://本机ip:端口/datax?useSSL=false&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC",
"table": ["datax_fdl"]
}
],
"password": "root",
"preSql": [],
"session": [],
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "2"
}
}
}
}
- 执行导入
D:\file_app\Anaconda\python.exe D:\dataxTest\datax\bin\datax.py D:\dataxTest\workJson\txtTomysql.json
- 执行结果(
)
image.png
- 处理方案
- 配置文件修改为一列,windows下文件用 \r 分割,
"column": [
{
"index": 0,
"type": "string"
}
],
"encoding": "utf-8",
"fieldDelimiter": "\r",
写入mysql后,使用切割函数再次处理
select
id
,lable
,REGEXP_SUBSTR(lable,'[^\t]+',1,1) as label_1
,REGEXP_SUBSTR(lable,'[^\t]+',1,2) as label_2
,REGEXP_SUBSTR(lable,'[^\t]+',1,3) as label_3
,REGEXP_SUBSTR(lable,'[^\t]+',1,4) as label_4
from datax_fdl_3
- 注意事项 write参数可不带
[]
但是read参数必须带[]