大数据

dataX-windows系统下mysql与txt的转化(二)

2021-12-27  本文已影响0人  堂哥000

本篇的配置环境 https://www.jianshu.com/p/6c086c0ab2a9
mysql 8.0.27 ,python3.7 ,windows系统

一、文件准备

新建workJson 和 workfile 目录分别存放原始文件,和json
原始文件名file.txt分割符为\t

image.png

二、txt文件到txt文件

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "txtfilereader",
                    "parameter": {
                        "column": [
                        {
                                "index": 0,
                                "type": "string",
                                "nullFormat":""
                            },
                            {
                                "index": 1,
                                "type": "string",
                                "nullFormat":""
                            },
                            {
                                "index": 2,
                                "type": "string",
                                "nullFormat":""
                            },
                            {
                                "index": 3,
                                "type": "string",
                                "nullFormat":""
                            }
                                  ],
                        "encoding": "utf-8",
                        "fieldDelimiter": "\t",
                        "path": ["D:\\dataxTest\\workfile\\file.txt"]
                    }
                },
                "writer": {
                    "name": "txtfilewriter",
                    "parameter": {
                        "fieldDelimiter": ",",
                        "fileName": "res.txt",
                        "path": "D:\\dataxTest\\workfile",
                        "writeMode": "truncate"  # 每次会覆盖写入
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "2"
            }
        }
    }
}
CHCP 65001
D:\file_app\Anaconda\python.exe  D:\dataxTest\datax\bin\datax.py  D:\dataxTest\workJson\txtTotxtfile.json

三、TXT到Mysql

CREATE TABLE datax_fdl (
  id bigint NOT NULL AUTO_INCREMENT
  ,lable varchar(200)   DEFAULT NULL
  ,lable2 varchar(200)   DEFAULT NULL
  ,lable3 varchar(200)   DEFAULT NULL
  ,lable4 varchar(200)   DEFAULT NULL
  ,create_time timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间'
  ,update_time timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
  ,PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "txtfilereader",
                    "parameter": {
                        "column": [
                        {
                                "index": 0,
                                "type": "string",
                                "nullFormat":""
                            },
                            {
                                "index": 1,
                                "type": "string",
                                "nullFormat":""
                            },
                            {
                                "index": 2,
                                "type": "string",
                                "nullFormat":""
                            },
                            {
                                "index": 3,
                                "type": "string",
                                "nullFormat":""
                            }
                                  ],
                        "encoding": "utf-8",
                        "fieldDelimiter": "\t",
                        "path": ["D:\\dataxTest\\workfile\\file.txt"]
                    }
                },
                "writer": {
                    "name": "mysqlwriter",
                    "parameter": {
                        "column": ["lable","lable2","lable3","lable4"],
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://本机ip:端口/datax?useSSL=false&useUnicode=true&characterEncoding=utf8&serverTimezone=UTC",
                                "table": ["datax_fdl"]

                            }
                        ],
                        "password": "root",
                        "preSql": [],
                        "session": [],
                        "username": "root",
                        "writeMode": "insert"
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": "2"
            }
        }
    }
}
D:\file_app\Anaconda\python.exe  D:\dataxTest\datax\bin\datax.py  D:\dataxTest\workJson\txtTomysql.json
                        "column": [
                        {
                                "index": 0,
                                "type": "string"
                            }
                                  ],
                        "encoding": "utf-8",
                        "fieldDelimiter": "\r",

写入mysql后,使用切割函数再次处理

select 
id
,lable
,REGEXP_SUBSTR(lable,'[^\t]+',1,1) as label_1
,REGEXP_SUBSTR(lable,'[^\t]+',1,2) as label_2
,REGEXP_SUBSTR(lable,'[^\t]+',1,3) as label_3
,REGEXP_SUBSTR(lable,'[^\t]+',1,4) as label_4
from  datax_fdl_3
上一篇 下一篇

猜你喜欢

热点阅读