1
2
mirror of https://github.com/vimagick/dockerfiles synced 2024-06-28 09:41:20 +00:00

update scrapyd

This commit is contained in:
kev 2016-08-10 11:32:53 +08:00
parent 684740c84b
commit 34ee0334c0
3 changed files with 40 additions and 7 deletions

@ -26,14 +26,37 @@ This image is based on `debian:jessie`, 5 latest python packages are installed:
Please use this as base image for your own project. Please use this as base image for your own project.
## docker-compose.yml
```yaml
scrapyd:
image: vimagick/scrapyd
ports:
- "6800:6800"
restart: always
scrapy:
image: vimagick/scrapyd
command: bash
volumes:
- .:/code
working_dir: /code
restart: always
```
## Run it as background-daemon for scrapyd ## Run it as background-daemon for scrapyd
``` ```bash
$ docker run -d --restart always --name scrapyd -p 6800:6800 vimagick/scrapyd $ docker-compose up -d scrapyd
$ firefox http://localhost:6800 $ docker-compose logs -f scrapyd
$ docker cp scrapyd_scrapyd_1:/var/lib/scrapyd/items .
$ tree items
└── myproject
└── myspider
└── ad6153ee5b0711e68bc70242ac110005.jl
``` ```
``` ```bash
$ mkvirtualenv webbot $ mkvirtualenv webbot
$ pip install scrapy scrapyd-client $ pip install scrapy scrapyd-client
@ -48,11 +71,12 @@ $ scrapy list
$ vi scrapy.cfg $ vi scrapy.cfg
$ scrapyd-client deploy $ scrapyd-client deploy
$ curl http://localhost:6800/schedule.json -d project=myproject -d spider=myspider $ curl http://localhost:6800/schedule.json -d project=myproject -d spider=myspider
$ firefox http://localhost:6800
``` ```
File: scrapy.cfg File: scrapy.cfg
``` ```ini
[settings] [settings]
default = myproject.settings default = myproject.settings
@ -63,7 +87,7 @@ project = myproject
## Run it as interactive-shell for scrapy ## Run it as interactive-shell for scrapy
``` ```bash
$ cat > stackoverflow_spider.py << _EOF_ $ cat > stackoverflow_spider.py << _EOF_
import scrapy import scrapy
@ -86,7 +110,7 @@ class StackOverflowSpider(scrapy.Spider):
} }
_EOF_ _EOF_
$ docker run -it --rm -v `pwd`:/code -w /code vimagick/scrapyd bash $ docker-compose run --rm scrapy
>>> scrapy runspider stackoverflow_spider.py -o top-stackoverflow-questions.json >>> scrapy runspider stackoverflow_spider.py -o top-stackoverflow-questions.json
>>> cat top-stackoverflow-questions.json >>> cat top-stackoverflow-questions.json
>>> exit >>> exit

@ -3,3 +3,11 @@ scrapyd:
ports: ports:
- "6800:6800" - "6800:6800"
restart: always restart: always
scrapy:
image: vimagick/scrapyd
command: bash
volumes:
- .:/code
working_dir: /code
restart: always

@ -24,3 +24,4 @@ listspiders.json = scrapyd.webservice.ListSpiders
delproject.json = scrapyd.webservice.DeleteProject delproject.json = scrapyd.webservice.DeleteProject
delversion.json = scrapyd.webservice.DeleteVersion delversion.json = scrapyd.webservice.DeleteVersion
listjobs.json = scrapyd.webservice.ListJobs listjobs.json = scrapyd.webservice.ListJobs
daemonstatus.json = scrapyd.webservice.DaemonStatus