From 43dd686dd53c55a59274069dc7d293c35e48e78a Mon Sep 17 00:00:00 2001 From: kev Date: Mon, 1 Dec 2014 21:19:37 +0800 Subject: [PATCH] add scrapyd --- .dockerignore | 2 ++ .gitignore | 2 ++ scrapyd/001-scrapyd | 2 ++ scrapyd/Dockerfile | 42 ++++++++++++++++++++++++++++++++++++++++ scrapyd/README.md | 17 ++++++++++++++++ scrapyd/requirements.txt | 21 ++++++++++++++++++++ scrapyd/supervisord.conf | 20 +++++++++++++++++++ 7 files changed, 106 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 scrapyd/001-scrapyd create mode 100644 scrapyd/Dockerfile create mode 100644 scrapyd/README.md create mode 100644 scrapyd/requirements.txt create mode 100644 scrapyd/supervisord.conf diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3978a0f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.git +.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b72f9be --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*~ +*.swp diff --git a/scrapyd/001-scrapyd b/scrapyd/001-scrapyd new file mode 100644 index 0000000..14f164b --- /dev/null +++ b/scrapyd/001-scrapyd @@ -0,0 +1,2 @@ +[scrapyd] +items_dir = diff --git a/scrapyd/Dockerfile b/scrapyd/Dockerfile new file mode 100644 index 0000000..b2b96f5 --- /dev/null +++ b/scrapyd/Dockerfile @@ -0,0 +1,42 @@ +# +# Dockerfile for scraypd +# +# References: +# - http://docs.docker.com/reference/builder/ +# - http://doc.scrapy.org/en/latest/topics/ubuntu.html#topics-ubuntu +# - https://github.com/scrapy/scrapyd/blob/master/debian/scrapyd.upstart#L9-L11 +# - http://pip.readthedocs.org/en/latest/installing.html +# - http://supervisord.org/index.html +# + +FROM ubuntu:14.04 +MAINTAINER kev + +RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7 +RUN echo 'deb http://archive.scrapy.org/ubuntu scrapy main' >/etc/apt/sources.list.d/scrapy.list +RUN apt-get update &&\ + apt-get install -y\ + git\ + libpq-dev\ + build-essential\ + python-dev\ + python-pip\ + python-numpy\ + python-txzmq\ + scrapy-0.24\ + scrapyd &&\ + apt-get clean +RUN rm -rf /var/lib/apt/lists/* +RUN mkdir /var/log/supervisor/ + +ADD ./001-scrapyd /etc/scrapyd/conf.d/ +ADD ./requirements.txt /tmp/ +ADD ./supervisord.conf /etc/supervisor/supervisord.conf + +WORKDIR /tmp +RUN pip install -r requirements.txt + +EXPOSE 6800 9001 + +CMD supervisord -c /etc/supervisor/supervisord.conf + diff --git a/scrapyd/README.md b/scrapyd/README.md new file mode 100644 index 0000000..039aab5 --- /dev/null +++ b/scrapyd/README.md @@ -0,0 +1,17 @@ +docker-scrapyd +============== + +Dockerfile for building an image that runs [scrapyd][1]. +Then monitor it with [supervisor][2]. + +## Building + + $ docker build -t scrapyd . + +## Running + + $ docker run -p 6800:6800 -p 9001:9001 scrapyd + +[1]: https://github.com/scrapy/scrapyd +[2]: http://admin:admin@localhost:9001 + diff --git a/scrapyd/requirements.txt b/scrapyd/requirements.txt new file mode 100644 index 0000000..791206a --- /dev/null +++ b/scrapyd/requirements.txt @@ -0,0 +1,21 @@ +--allow-all-external +--allow-unverified jsonpath + +# parser +jsonpath +jsonschema +pyquery +pyparsing +git+https://github.com/scrapy/scrapely + +# database +redis +pymongo +psycopg2 + +# others +requests +chardet +toolz +supervisor + diff --git a/scrapyd/supervisord.conf b/scrapyd/supervisord.conf new file mode 100644 index 0000000..7f69856 --- /dev/null +++ b/scrapyd/supervisord.conf @@ -0,0 +1,20 @@ +[inet_http_server] +port = :9001 +username = admin +password = {SHA}d033e22ae348aeb5660fc2140aec35850c4da997 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisord] +logfile=/var/log/supervisor/supervisord.log +pidfile=/var/run/supervisord.pid +childlogdir=/var/log/supervisor +nodaemon=true + +[supervisorctl] +serverurl=http://localhost:9001 + +[program:scrapyd] +command = /usr/bin/scrapyd -u scrapy -g nogroup --pidfile /var/run/scrapyd.pid -l /var/log/scrapyd/scrapyd.log +