add datax

This commit is contained in:
kev 2020-06-18 15:44:18 +08:00
parent 732169f508
commit f63d463781
5 changed files with 91 additions and 0 deletions

View File

@ -45,6 +45,7 @@ A collection of delicious docker recipes.
- [x] airflow
- [x] ambari
- [x] cmak (~kafka-manager~)
- [x] datax :cn:
- [x] kafka-arm
- [x] luigi
- [x] nifi

19
datax/Dockerfile Normal file
View File

@ -0,0 +1,19 @@
#
# Dockerfile for datax
#
FROM openjdk:8-alpine
MAINTAINER EasyPi Software Foundation
WORKDIR /opt/datax
RUN set -xe \
&& apk add --no-cache curl python2 tar \
&& curl -sSL http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz | tar xz --strip 1 \
&& bin/datax.py --help \
&& apk del curl tar
ENV PATH=/opt/datax/bin:$PATH
ENTRYPOINT ["datax.py"]
CMD ["--help"]

14
datax/README.md Normal file
View File

@ -0,0 +1,14 @@
datax
=====
[DataX][1] is a data migration tool written by alibaba.
## up and running
```bash
$ alias datax='docker run --rm -v $PWD:/data vimagick/datax'
$ wget https://github.com/alibaba/DataX/raw/master/core/src/main/job/job.json
$ datax job.json
```
[1]: https://github.com/alibaba/DataX

52
datax/data/job.json Normal file
View File

@ -0,0 +1,52 @@
{
"job": {
"setting": {
"speed": {
"byte":10485760
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"column" : [
{
"value": "DataX",
"type": "string"
},
{
"value": 19890604,
"type": "long"
},
{
"value": "1989-06-04 00:00:00",
"type": "date"
},
{
"value": true,
"type": "bool"
},
{
"value": "test",
"type": "bytes"
}
],
"sliceRecordCount": 100000
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"print": false,
"encoding": "UTF-8"
}
}
}
]
}
}

5
datax/docker-compose.yml Normal file
View File

@ -0,0 +1,5 @@
datax:
image: vimagick/datax
command: /data/job.json
volumes:
- ./data:/data