119 lines
6.1 KiB
Python
119 lines
6.1 KiB
Python
# -*- coding: utf-8 -*-
|
||
from irc3.plugins.command import command
|
||
import irc3
|
||
import re
|
||
import requests
|
||
from lxml.html import fromstring
|
||
TOO_LONG = 2000
|
||
URL_REGEX = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', re.IGNORECASE)
|
||
YOUTUBE_REGEX = re.compile('http(?:s?):\/\/(?:www\.)?youtu(?:be\.com\/watch\?v=|\.be\/)([\w\-\_]*)(&(amp;)?[\w\?=]*)?', re.IGNORECASE)
|
||
TWITTER_REGEX = re.compile('https?:\/\/twitter\.com\/(?:#!\/)?(\w+)\/status(es)?\/(\d+)$', re.IGNORECASE)
|
||
###########################################################################################
|
||
###########################################################################################
|
||
@irc3.plugin
|
||
class Plugin:
|
||
#######################################################################################
|
||
#######################################################################################
|
||
def __init__(self, bot):
|
||
self.bot = bot
|
||
#######################################################################################
|
||
#######################################################################################
|
||
@irc3.event(irc3.rfc.PRIVMSG)
|
||
def on_privmsg(self, mask=None, target=None, data=None, **kw):
|
||
if not data.find('reacted with :') == -1: return
|
||
if data.startswith("?"): return
|
||
if self.bot.check_if_ignored(mask): return
|
||
if mask.nick == '[0]' or mask.nick == '[0]_': return
|
||
self.__check_for_url(data,target,mask)
|
||
#######################################################################################
|
||
#######################################################################################
|
||
def __check_for_url(self,msg,target,mask):
|
||
match_list = URL_REGEX.findall(msg)
|
||
read_size = 0
|
||
if match_list:
|
||
url = match_list.pop()
|
||
if not url.lower().find('wp-login') == -1:
|
||
msg = 'pre-fetch aborted -> hell nah nigga'
|
||
msg = self.bot.emo(msg)
|
||
self.bot.privmsg(target, msg)
|
||
return
|
||
y_match = YOUTUBE_REGEX.findall(url)
|
||
if y_match:
|
||
y_match = y_match.pop()
|
||
if len(y_match) == 3:
|
||
return
|
||
t_match = TWITTER_REGEX.findall(url)
|
||
if t_match:
|
||
t_match = t_match.pop()
|
||
if len(t_match) == 3:
|
||
return
|
||
try:
|
||
if not url.find("giphy.com") == -1:
|
||
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x02\x0303{url:}\x0F\x0303".format(nick=mask.nick,url="fuk giphy!!!")
|
||
msg = self.bot.emo(msg)
|
||
self.bot.privmsg(target, msg)
|
||
return
|
||
if not url.find("facebook.com") == -1:
|
||
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x02\x0312{url:}\x0F\x0303".format(nick=mask.nick,url="fuk facebook!!!")
|
||
msg = self.bot.emo(msg)
|
||
self.bot.privmsg(target, msg)
|
||
return
|
||
r = requests.get(url, timeout=3, stream=True)
|
||
content_type = r.headers.get("Content-Type")
|
||
content_length = r.headers.get('Content-Length')
|
||
if not content_length:
|
||
content_length = 0
|
||
if int(content_length) > 200000:
|
||
return
|
||
while read_size <= (2000 * 10):
|
||
for content in r.iter_content(chunk_size=2000):
|
||
tree = fromstring(content)
|
||
title = tree.find(".//title")
|
||
if title is not None:
|
||
title = title.text.strip()[:100]
|
||
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x1D\x0314{title:}\x0F".format(nick=mask.nick,title=title)
|
||
msg = self.bot.emo(msg)
|
||
self.bot.privmsg(target, msg)
|
||
######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ########
|
||
self.bot.bbs.enter(mask.nick,url,title)
|
||
######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ########
|
||
return
|
||
read_size = read_size + 2000
|
||
except Exception as e:
|
||
print("%s" % e)
|
||
pass
|
||
#######################################################################################
|
||
#######################################################################################
|
||
@irc3.extend
|
||
def prefetch_title(self,msg,target,mask):
|
||
match_list = URL_REGEX.findall(msg)
|
||
read_size = 0
|
||
if match_list:
|
||
url = match_list.pop()
|
||
if not url.lower().find('wp-login') == -1:
|
||
print('error: url_grabber_plugin:prefetch_title - aborted')
|
||
return
|
||
try:
|
||
r = requests.get(url, timeout=3, stream=True)
|
||
content_type = r.headers.get("Content-Type")
|
||
content_length = r.headers.get('Content-Length')
|
||
if not content_length:
|
||
content_length = 0
|
||
if int(content_length) > 200000:
|
||
return
|
||
while read_size <= (2000 * 10):
|
||
for content in r.iter_content(chunk_size=2000):
|
||
tree = fromstring(content)
|
||
title = tree.find(".//title")
|
||
if title is not None:
|
||
title = title.text.strip()[:100]
|
||
return title
|
||
read_size = read_size + 2000
|
||
except Exception as e:
|
||
print("%s" % e)
|
||
pass
|
||
#######################################################################################
|
||
#######################################################################################
|
||
###########################################################################################
|
||
###########################################################################################
|