m4pl1mp/plugins/url_grabber_plugin.py
2022-02-01 22:44:42 -06:00

84 lines
4.4 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import irc3
import re
import requests
from lxml.html import fromstring
TOO_LONG = 2000
URL_REGEX = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', re.IGNORECASE)
YOUTUBE_REGEX = re.compile('http(?:s?):\/\/(?:www\.)?youtu(?:be\.com\/watch\?v=|\.be\/)([\w\-\_]*)(&(amp;)?[\w\?=]*)?', re.IGNORECASE)
TWITTER_REGEX = re.compile('https?:\/\/twitter\.com\/(?:#!\/)?(\w+)\/status(es)?\/(\d+)$', re.IGNORECASE)
###########################################################################################
###########################################################################################
@irc3.plugin
class Plugin:
#######################################################################################
#######################################################################################
def __init__(self, bot):
self.bot = bot
#######################################################################################
#######################################################################################
@irc3.event(irc3.rfc.PRIVMSG)
def on_privmsg(self, mask=None, target=None, data=None, **kw):
if not data.find('reacted with :') == -1: return
if data.startswith("?"): return
if self.bot.check_if_ignored(mask): return
if mask.nick == '[0]' or mask.nick == '[0]_': return
self.__check_for_url(data, target, mask)
#######################################################################################
#######################################################################################
def __check_for_url(self, msg, target, mask):
match_list = URL_REGEX.findall(msg)
read_size = 0
if match_list:
url = match_list.pop()
if not url.lower().find('wp-login') == -1:
msg = 'pre-fetch aborted -> hell nah nigga'
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
y_match = YOUTUBE_REGEX.findall(url)
if y_match:
y_match = y_match.pop()
if len(y_match) == 3:
return
t_match = TWITTER_REGEX.findall(url)
if t_match:
t_match = t_match.pop()
if len(t_match) == 3:
return
try:
if not url.find("giphy.com") == -1:
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x02\x0303{url:}\x0F\x0303".format(nick=mask.nick,url="fuk giphy!!!")
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
if not url.find("facebook.com") == -1:
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x02\x0312{url:}\x0F\x0303".format(nick=mask.nick,url="fuk facebook!!!")
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
r = requests.get(url, timeout=3, stream=True)
content_type = r.headers.get("Content-Type")
content_length = r.headers.get('Content-Length')
if not content_length:
content_length = 0
if int(content_length) > 200000:
return
while read_size <= (2000 * 10):
for content in r.iter_content(chunk_size=2000):
tree = fromstring(content)
title = tree.find(".//title")
if title is not None:
title = title.text.strip()[:100]
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x1D\x0314{title:}\x0F".format(nick=mask.nick,title=title)
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
read_size = read_size + 2000
except Exception as e:
print("%s" % e)
pass
#######################################################################################
#######################################################################################
###########################################################################################
###########################################################################################