add scrapyd

This commit is contained in:
kev 2014-12-01 21:19:37 +08:00
parent 99a91fff79
commit 43dd686dd5
7 changed files with 106 additions and 0 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
.git
.gitignore

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*~
*.swp

2
scrapyd/001-scrapyd Normal file
View File

@ -0,0 +1,2 @@
[scrapyd]
items_dir =

42
scrapyd/Dockerfile Normal file
View File

@ -0,0 +1,42 @@
#
# Dockerfile for scraypd
#
# References:
# - http://docs.docker.com/reference/builder/
# - http://doc.scrapy.org/en/latest/topics/ubuntu.html#topics-ubuntu
# - https://github.com/scrapy/scrapyd/blob/master/debian/scrapyd.upstart#L9-L11
# - http://pip.readthedocs.org/en/latest/installing.html
# - http://supervisord.org/index.html
#
FROM ubuntu:14.04
MAINTAINER kev
RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7
RUN echo 'deb http://archive.scrapy.org/ubuntu scrapy main' >/etc/apt/sources.list.d/scrapy.list
RUN apt-get update &&\
apt-get install -y\
git\
libpq-dev\
build-essential\
python-dev\
python-pip\
python-numpy\
python-txzmq\
scrapy-0.24\
scrapyd &&\
apt-get clean
RUN rm -rf /var/lib/apt/lists/*
RUN mkdir /var/log/supervisor/
ADD ./001-scrapyd /etc/scrapyd/conf.d/
ADD ./requirements.txt /tmp/
ADD ./supervisord.conf /etc/supervisor/supervisord.conf
WORKDIR /tmp
RUN pip install -r requirements.txt
EXPOSE 6800 9001
CMD supervisord -c /etc/supervisor/supervisord.conf

17
scrapyd/README.md Normal file
View File

@ -0,0 +1,17 @@
docker-scrapyd
==============
Dockerfile for building an image that runs [scrapyd][1].
Then monitor it with [supervisor][2].
## Building
$ docker build -t scrapyd .
## Running
$ docker run -p 6800:6800 -p 9001:9001 scrapyd
[1]: https://github.com/scrapy/scrapyd
[2]: http://admin:admin@localhost:9001

21
scrapyd/requirements.txt Normal file
View File

@ -0,0 +1,21 @@
--allow-all-external
--allow-unverified jsonpath
# parser
jsonpath
jsonschema
pyquery
pyparsing
git+https://github.com/scrapy/scrapely
# database
redis
pymongo
psycopg2
# others
requests
chardet
toolz
supervisor

20
scrapyd/supervisord.conf Normal file
View File

@ -0,0 +1,20 @@
[inet_http_server]
port = :9001
username = admin
password = {SHA}d033e22ae348aeb5660fc2140aec35850c4da997
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisord]
logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
childlogdir=/var/log/supervisor
nodaemon=true
[supervisorctl]
serverurl=http://localhost:9001
[program:scrapyd]
command = /usr/bin/scrapyd -u scrapy -g nogroup --pidfile /var/run/scrapyd.pid -l /var/log/scrapyd/scrapyd.log