diff --git a/renderjs/Dockerfile b/renderjs/Dockerfile new file mode 100644 index 0000000..14593e3 --- /dev/null +++ b/renderjs/Dockerfile @@ -0,0 +1,22 @@ +# +# Dockerfile for renderjs +# + +FROM ubuntu:14.04 +MAINTAINER kev + +RUN apt-get update && apt-get install -y supervisor haproxy libfontconfig1 libfreetype6 + +ADD ./phantomjs /usr/local/bin/ +ADD ./render.js /usr/local/bin/ +ADD ./restart.sh /usr/local/bin/ +ADD ./haproxy.cfg /etc/haproxy/ +ADD ./supervisor/supervisord.conf /etc/supervisor/ +ADD ./supervisor/conf.d/haproxy.conf /etc/supervisor/conf.d/ +ADD ./supervisor/conf.d/renderjs.conf /etc/supervisor/conf.d/ +ADD ./supervisor/conf.d/restart.conf /etc/supervisor/conf.d/ + +EXPOSE 1024 9001 + +CMD /usr/bin/supervisord -n -c /etc/supervisor/supervisord.conf + diff --git a/renderjs/README.md b/renderjs/README.md new file mode 100644 index 0000000..ae8c092 --- /dev/null +++ b/renderjs/README.md @@ -0,0 +1,17 @@ +docker-renderjs +=============== + +Dockerfile for building an image that runs webkit rendering service. + +## Building + + $ docker build -t renderjs . + +## Running + + $ docker run -p 1024:1024 -p 9001:9001 renderjs + +## Playing + + $ http :1024 url=http://datageek.info/ + diff --git a/renderjs/haproxy.cfg b/renderjs/haproxy.cfg new file mode 100644 index 0000000..1a09fe6 --- /dev/null +++ b/renderjs/haproxy.cfg @@ -0,0 +1,40 @@ +global + log /dev/log local0 + log /dev/log local1 notice + chroot /var/lib/haproxy + user haproxy + group haproxy + #daemon + +defaults + log global + mode http + option httplog + option dontlognull + contimeout 5000 + clitimeout 50000 + srvtimeout 50000 + errorfile 400 /etc/haproxy/errors/400.http + errorfile 403 /etc/haproxy/errors/403.http + errorfile 408 /etc/haproxy/errors/408.http + errorfile 500 /etc/haproxy/errors/500.http + errorfile 502 /etc/haproxy/errors/502.http + errorfile 503 /etc/haproxy/errors/503.http + errorfile 504 /etc/haproxy/errors/504.http + + +frontend front + bind 0.0.0.0:1024 + default_backend back + +backend back + server s0 127.0.0.1:8080 maxconn 10 + server s1 127.0.0.1:8081 maxconn 10 + server s2 127.0.0.1:8082 maxconn 10 + server s3 127.0.0.1:8083 maxconn 10 + server s4 127.0.0.1:8084 maxconn 10 + server s5 127.0.0.1:8085 maxconn 10 + server s6 127.0.0.1:8086 maxconn 10 + server s7 127.0.0.1:8087 maxconn 10 + server s8 127.0.0.1:8088 maxconn 10 + server s9 127.0.0.1:8089 maxconn 10 diff --git a/renderjs/phantomjs b/renderjs/phantomjs new file mode 100755 index 0000000..af9e4ab Binary files /dev/null and b/renderjs/phantomjs differ diff --git a/renderjs/render.js b/renderjs/render.js new file mode 100755 index 0000000..a158504 --- /dev/null +++ b/renderjs/render.js @@ -0,0 +1,199 @@ +#!/usr/bin/env phantomjs +/* + * Webpage Rendering Service + * ========================= + * + * SERVER + * ------ + * + * $ phantomjs render.js 8080 + * + * + * CLIENT + * ------ + * + * $ http :8080 url=http://www.python.org timeout=5000 wait=1000 + * + */ + +sys = require('system'); +svr = require('webserver').create(); + +if(sys.args.length > 2) { + console.log('Usage: phantomjs render.js [port]'); + phantom.exit(1); +} + +port = parseInt(sys.args[1]) || 8080; + +service = svr.listen(port, function(request, response){ + + if(request.method === 'POST'){ + try{ + var raw = request.postRaw || request.post || '{}', + qs = JSON.parse(raw), + style = qs.style || false, + image = qs.image || false, + shot = qs.shot || false, + content = qs.content || '', + proxy = qs.proxy || '', + method = qs.method || 'get', + url = qs.url, + headers = qs.headers || {}, + data = qs.data || '', + wait = qs.wait || 1000, + timeout = qs.timeout || 30000; + if(!/^https?:\/\//.test(url)){ + throw 'bad request'; + } + }catch(e){ + log(e); + return send(page, request, response, 400); + } + }else{ + return send(page, request, response, 405); + } + + log('recv:', url); + + //create PAGE object + var page = require('webpage').create(); + + //set headers + headers['Accept-Encoding'] = 'identity'; + page.customHeaders = headers; + + //no error message + page.onError = null; + + //load images (yes/no) + page.settings.loadImages = image; + + //load style (yes/no) + if(!style){ + page.onResourceRequested = function(requestData, request) { + var url = requestData['url'], + type = requestData.headers['Content-Type']; + if((/http:\/\/.+?\.css/gi).test(url) || type == 'text/css') { + request.abort(); + } + }; + } + + //set cookies (yes/no) + phantom.clearCookies(); + var cs = headers['Cookie']; + if(cs){ + delete headers['Cookie']; + cs = parse_cookies(url, cs); + for(var i=0; i=0){ + var ctype = 'application/json'; + var content = JSON.stringify({ + 'headers': page.customHeaders, + 'cookies': page.cookies, + 'content': html + }); + }else{ + var ctype = 'text/html'; + var content = html || ''; + } + + response.setHeader('Content-Type', ctype) + response.write(content); + response.close(); +} + +function send_later(page, request, response, wait, shot){ + log('wait:', wait); + var wid = setTimeout(function(){ + log('send:', page.url); + log('output:', page.content.length, 'bytes') + send(page, request, response, 200, page.content); + if(shot){ + var fn = '/tmp/'+(new Date()).getTime()+'.png'; + log('shot:', fn); + page.render(fn); + } + page.close(); + }, wait); +} + +function parse_cookies(url, cs){ + var domain = url.match(/https?:\/\/([^\/]+)/)[1] + var cookies = cs.split('; '); + for(var i=0; i