DataX 是一个离线数据同步工具/平台,支持多种异构数据库之间的迁移同步,可以通过文档查看各个数据源的配置方法。链接地址:https://github.com/alibaba/datax
开始迁移
这里主要是 mysql -> ob,其他数据源的配置可以参考 datax。
1、下载安装 datax
官方地址,有两种方式可以安装
https://github.com/alibaba/DataX/blob/master/userGuid.md
或者也可以直接通过下面的步骤下载安装
# 下载 datax
wget https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202309/datax.tar.gz
# 解压
tar -xf datax.tar.gz
# 调试,没有报错则正常
cd datax/bin
python datax.py ../job/job.json
2、在 OB 集群内创建表结构
3、编写配置文件
{
"job": {
"setting": {
"speed": {
"channel": 4
},
"errorLimit": {
"record": 0,
"percentage": 0.1
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "xxxxx",
"column": ["*"],
"connection": [
{
"table": ["t1"],
"jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&useSSL=false"]
}
]
}
},
"writer": {
"name": "oceanbasev10writer",
"parameter": {
"obWriteMode": "insert",
"column": ["*"],
"preSql": ["truncate table t1"],
"connection": [
{
"jdbcUrl": "jdbc:oceanbase://127.0.0.1:2883/test2?",
"table": ["t1"]
}
],
"username": "用户名@租户名#集群名",
"password":"xxxxx",
"writerThreadCount":10,
"batchSize": 1000,
"memstoreThreshold": "0.9"
}
}
}
]
}
}
4、启动迁移
python bin/datax.py conf/mysql2ob.json
5、如果需要离线定时同步增量,可以指定 where 或者 querySql
指定 where
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "xxxx",
"column": ["*"],
"where": "id >2"
"connection": [
{
"table": ["t1"],
"jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&useSSL=false"]
}
]
}
},
指定querySql
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "xxxx",
"column": ["*"],
"connection": [
{
"querySql": [
"select * from t1 where id > 4;"
],
"jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&useSSL=false"]
}
]
}
},