diff --git a/README.md b/README.md index a2b3749..be8ce6b 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ A collection of delicious docker recipes. - [x] airflow - [x] ambari - [x] cmak (~kafka-manager~) +- [x] datax :cn: - [x] kafka-arm - [x] luigi - [x] nifi diff --git a/datax/Dockerfile b/datax/Dockerfile new file mode 100644 index 0000000..b03df2d --- /dev/null +++ b/datax/Dockerfile @@ -0,0 +1,19 @@ +# +# Dockerfile for datax +# + +FROM openjdk:8-alpine +MAINTAINER EasyPi Software Foundation + +WORKDIR /opt/datax + +RUN set -xe \ + && apk add --no-cache curl python2 tar \ + && curl -sSL http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz | tar xz --strip 1 \ + && bin/datax.py --help \ + && apk del curl tar + +ENV PATH=/opt/datax/bin:$PATH + +ENTRYPOINT ["datax.py"] +CMD ["--help"] diff --git a/datax/README.md b/datax/README.md new file mode 100644 index 0000000..e2f7d1d --- /dev/null +++ b/datax/README.md @@ -0,0 +1,14 @@ +datax +===== + +[DataX][1] is a data migration tool written by alibaba. + +## up and running + +```bash +$ alias datax='docker run --rm -v $PWD:/data vimagick/datax' +$ wget https://github.com/alibaba/DataX/raw/master/core/src/main/job/job.json +$ datax job.json +``` + +[1]: https://github.com/alibaba/DataX diff --git a/datax/data/job.json b/datax/data/job.json new file mode 100644 index 0000000..5820659 --- /dev/null +++ b/datax/data/job.json @@ -0,0 +1,52 @@ +{ + "job": { + "setting": { + "speed": { + "byte":10485760 + }, + "errorLimit": { + "record": 0, + "percentage": 0.02 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "DataX", + "type": "string" + }, + { + "value": 19890604, + "type": "long" + }, + { + "value": "1989-06-04 00:00:00", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 100000 + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": false, + "encoding": "UTF-8" + } + } + } + ] + } +} diff --git a/datax/docker-compose.yml b/datax/docker-compose.yml new file mode 100644 index 0000000..1c78c13 --- /dev/null +++ b/datax/docker-compose.yml @@ -0,0 +1,5 @@ +datax: + image: vimagick/datax + command: /data/job.json + volumes: + - ./data:/data