m4pl1mp/plugins/url_grabber_plugin.py
2022-07-27 00:08:43 -05:00

119 lines
6.1 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from irc3.plugins.command import command
import irc3
import re
import requests
from lxml.html import fromstring
TOO_LONG = 2000
URL_REGEX = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', re.IGNORECASE)
YOUTUBE_REGEX = re.compile('http(?:s?):\/\/(?:www\.)?youtu(?:be\.com\/watch\?v=|\.be\/)([\w\-\_]*)(&(amp;)?[\w\?=]*)?', re.IGNORECASE)
TWITTER_REGEX = re.compile('https?:\/\/twitter\.com\/(?:#!\/)?(\w+)\/status(es)?\/(\d+)$', re.IGNORECASE)
###########################################################################################
###########################################################################################
@irc3.plugin
class Plugin:
#######################################################################################
#######################################################################################
def __init__(self, bot):
self.bot = bot
#######################################################################################
#######################################################################################
@irc3.event(irc3.rfc.PRIVMSG)
def on_privmsg(self, mask=None, target=None, data=None, **kw):
if not data.find('reacted with :') == -1: return
if data.startswith("?"): return
if self.bot.check_if_ignored(mask): return
if mask.nick == '[0]' or mask.nick == '[0]_': return
self.__check_for_url(data,target,mask)
#######################################################################################
#######################################################################################
def __check_for_url(self,msg,target,mask):
match_list = URL_REGEX.findall(msg)
read_size = 0
if match_list:
url = match_list.pop()
if not url.lower().find('wp-login') == -1:
msg = 'pre-fetch aborted -> hell nah nigga'
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
y_match = YOUTUBE_REGEX.findall(url)
if y_match:
y_match = y_match.pop()
if len(y_match) == 3:
return
t_match = TWITTER_REGEX.findall(url)
if t_match:
t_match = t_match.pop()
if len(t_match) == 3:
return
try:
if not url.find("giphy.com") == -1:
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x02\x0303{url:}\x0F\x0303".format(nick=mask.nick,url="fuk giphy!!!")
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
if not url.find("facebook.com") == -1:
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x02\x0312{url:}\x0F\x0303".format(nick=mask.nick,url="fuk facebook!!!")
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
return
r = requests.get(url, timeout=3, stream=True)
content_type = r.headers.get("Content-Type")
content_length = r.headers.get('Content-Length')
if not content_length:
content_length = 0
if int(content_length) > 200000:
return
while read_size <= (2000 * 10):
for content in r.iter_content(chunk_size=2000):
tree = fromstring(content)
title = tree.find(".//title")
if title is not None:
title = title.text.strip()[:100]
msg = "\x02\x0302{nick:}\x0F\x02\x0304 ▶ \x0F\x1D\x0314{title:}\x0F".format(nick=mask.nick,title=title)
msg = self.bot.emo(msg)
self.bot.privmsg(target, msg)
######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ########
self.bot.bbs.enter(mask.nick,url,title)
######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ######## URL_GRABBER <-> BOOMBOX_PLUGIN HOOK ########
return
read_size = read_size + 2000
except Exception as e:
print("%s" % e)
pass
#######################################################################################
#######################################################################################
@irc3.extend
def prefetch_title(self,msg,target,mask):
match_list = URL_REGEX.findall(msg)
read_size = 0
if match_list:
url = match_list.pop()
if not url.lower().find('wp-login') == -1:
print('error: url_grabber_plugin:prefetch_title - aborted')
return
try:
r = requests.get(url, timeout=3, stream=True)
content_type = r.headers.get("Content-Type")
content_length = r.headers.get('Content-Length')
if not content_length:
content_length = 0
if int(content_length) > 200000:
return
while read_size <= (2000 * 10):
for content in r.iter_content(chunk_size=2000):
tree = fromstring(content)
title = tree.find(".//title")
if title is not None:
title = title.text.strip()[:100]
return title
read_size = read_size + 2000
except Exception as e:
print("%s" % e)
pass
#######################################################################################
#######################################################################################
###########################################################################################
###########################################################################################