# -*- coding: utf-8 -*- import irc3 from datetime import datetime import twitter import re import os import time import timeago import os import requests from lxml.html import fromstring from difflib import SequenceMatcher ########################################################################################### ########################################################################################### CONSUMER_KEY = os.environ['CONSUMER_KEY'] CONSUMER_SECRET = os.environ['CONSUMER_SECRET'] ACCESS_TOKEN_KEY = os.environ['ACCESS_TOKEN_KEY'] ACCESS_TOKEN_SECRET = os.environ['ACCESS_TOKEN_SECRET'] TOO_LONG = 2000 YOUTUBE_REGEX = re.compile('http(?:s?):\/\/(?:www\.)?youtu(?:be\.com\/watch\?v=|\.be\/)([\w\-\_]*)(&(amp;)?‌​[\w\?‌​=]*)?', re.IGNORECASE) TWITTER_REGEX = re.compile('https?:\/\/twitter\.com\/(?:#!\/)?(\w+)\/status(es)?\/(\d+)$', re.IGNORECASE) URL_REGEX = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', re.IGNORECASE) twitter = twitter.Api(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token_key=ACCESS_TOKEN_KEY, access_token_secret=ACCESS_TOKEN_SECRET) ########################################################################################### ########################################################################################### @irc3.plugin class Plugin: ####################################################################################### ####################################################################################### def __init__(self, bot): self.bot = bot ####################################################################################### ####################################################################################### @irc3.extend def _similar(self, a, b): return SequenceMatcher(None, a, b).ratio() ####################################################################################### ####################################################################################### @irc3.extend def _check_for_url(self, og_tweet, d_nick, d_url, d_unrolled, d_text, d_target): match_list = URL_REGEX.findall(d_unrolled) read_size = 0 if match_list: url = match_list.pop() try: if not d_unrolled.find('https://twitter.com/') == -1: if not d_unrolled.find('status') == -1: try: e_status = d_unrolled.split('/')[-1] e_tweet = twitter.GetStatus(e_status) e_text = e_tweet.text if self._similar(og_tweet.text,e_text) > 0.7: return msg = "\x02\x0302{nick1:}:{nick2:}\x0F\x02\x0303 ▶▶ {e_text:} \x0F".format(nick1=d_nick,nick2='UNROLLED',e_text=e_text) if e_tweet.media: for y in range(len(e_tweet.media)): m_turl = e_tweet.media[y].url m_murl = e_tweet.media[y].media_url m_eurl = e_tweet.media[y].expanded_url msg = msg + " ▶▶ [media] \x0F\x02\x0312{media_url:}".format(media_url=m_murl) if e_tweet.urls: for y in range(len(e_tweet.urls)): e_turl = e_tweet.urls[y].url e_eurl = e_tweet.urls[y].expanded_url msg = msg + " ▶▶ [url] \x0F\x02\x0312{e_url:}".format(e_url=e_eurl) msg = self.bot.emo(msg) self.bot.privmsg(d_target, msg) return except Exception as e: msg = "wu/tang >>>>>>>>>>> sub-unrolling: {}".format(e) msg = self.bot.emo(msg) self.bot.privmsg(d_target, msg) return r = requests.get(d_unrolled, timeout=3, stream=True) content_type = r.headers.get("Content-Type") content_length = r.headers.get('Content-Length') if not content_length: content_length = 0 if content_type.startswith('image'): msg = "\x02\x0302{nick1:}:{nick2:}\x0F\x02\x0303 ▶▶ [media] {media:} \x0F".format(nick1=d_nick,nick2='UNROLLED',media=d_unrolled) msg = self.bot.emo(msg) self.bot.privmsg(d_target, msg) return if not content_type.startswith("text/html"): return if int(content_length) > 200000: self.bot.privmsg(d_target, "pre-fetch aborted -> fuck your large ass content -> {} -> {}".format(d_url,d_unrolled)) while read_size <= (2000 * 10): for content in r.iter_content(chunk_size=2000): tree = fromstring(content) title = tree.find(".//title") if title is not None: title = title.text.strip()[:100] print('title: {}'.format(title)) similarity = self.bot._similar(title,d_text) if similarity > 0.4: print('wu/tang: similarity') return msg = "\x02\x0302{nick1:}:{nick2:}\x0F\x02\x0304 ▶▶ \x0F\x1D\x0314{url:}\x0F\x0304 ▶▶ \x0F\x0303{unrolled:} \x0F\x0304▶▶ \x0F\x1D\x0314{title:}\x0F".format(nick1=d_nick,nick2='UNROLLED',url=d_url,unrolled=d_unrolled,title=title) msg = self.bot.emo(msg) self.bot.privmsg(d_target, msg) return read_size = read_size + 2000 except Exception as e: self.bot.privmsg("_debug_check_for_url_error: {}".format(e)) print("original: {} nick: {} url: {} unrolled: {} text: {} error: {}".format(og_tweet,d_nick,d_url,d_unrolled,d_text,e)) pass ####################################################################################### ####################################################################################### def _check_for_twitter(self, mask=None, data=None, target=None, **kw): match_list = TWITTER_REGEX.findall(data) if match_list: status_id = match_list[0][2] try: tweet = twitter.GetStatus(status_id=status_id) tweet_text = tweet.text user = tweet.user.screen_name fav_count = tweet.favorite_count retweet_count = tweet.retweet_count if tweet.coordinates: location = tweet.coordinates else: location = "" tweet_time = time.strptime(tweet.created_at, '%a %b %d %H:%M:%S +0000 %Y') time_since = timeago.format(time.strftime('%Y-%m-%d %H:%M:%S', tweet_time), datetime.now()) msg = "\x02\x0302{} \x0F\x0303▶\x0F \x02\x0301{}\x0F\x0314 | Retweets:\x0F \x1D\x0306{}\x0F\x0314 Favorites:\x0F\x1D\x0306 {} \x0F⑆\x1D\x0314\x1D {} {}".format(user, tweet_text, retweet_count, fav_count, time_since, location) msg = self.bot.emo(msg) self.bot.privmsg(target, msg) match_list = URL_REGEX.findall(msg) try: if(match_list): try: if len(tweet.urls) == 0: if tweet.media: for y in range(len(tweet.media)): m_turl = tweet.media[y].url m_murl = tweet.media[y].media_url m_eurl = tweet.media[y].expanded_url msg = "\x02\x0302{nick1:}:{nick2:}\x0F\x02\x0304 ▶▶ [media] \x0F\x02\x0312{m_turl:}\x0F\x0304 ▶▶ \x0F\x0303{m_murl:} \x0F\x0304▶▶ \x0F\x1D\x0314{m_eurl:}\x0F".format(nick1=user,nick2='UNROLLED',m_turl=m_turl,m_murl=m_murl,m_eurl=m_eurl) msg = self.bot.emo(msg) self.bot.privmsg(target, msg) else: for y in range(len(tweet.urls)): t_turl = tweet.urls[y].url try: match_list.remove(t_turl) except: print('cant remove from matchlist, does not exist') t_eurl = tweet.urls[y].expanded_url yt_match_list = YOUTUBE_REGEX.findall(t_eurl) if yt_match_list: d_video_id = t_eurl.split('=')[1] self.bot.madjust = "{}:UNROLLED".format(user) self.bot.yt(mask,target,{'': [d_video_id],'yt': True}) self.bot.madjust = "" else: if not str(tweet.id) == t_eurl.split('/')[-1]: self.bot._check_for_url(tweet,user,t_turl,t_eurl,tweet_text,target) else: print('bypassing original tweet') try: if len(match_list) > 0: print('items in matchlist remain') print(match_list) except: print('errrororororororo: cant remove from matchlist, does not exist') except Exception as e: self.bot.privmsg(target,'twitter_plugin <> _debug_unrolling -> wu/tang: {}'.format(e)) except Exception as e: self.bot.privmsg(target,'twitter_plugin <> wu/tang: {}'.format(e)) except Exception as e: _msg = e.message[0]['message'] _code = e.message[0]['code'] _erid = '' if _code == 179: _erid = "PROTECTED TW33T" else: _erid = "DON'T CARE EXCEPTION" msg = "\x02\x0302{} \x0F\x0304▶ \x0F\x02\x0312{} aka {}\x0F\x0303".format(_code,_msg,_erid) msg = self.bot.emo(msg) self.bot.privmsg(target, msg) pass ####################################################################################### ####################################################################################### @irc3.event(irc3.rfc.PRIVMSG) def on_privmsg_search_for_twitter(self, mask=None, target=None, data=None, **kw): if data.startswith("?"): return if mask.nick.lower() not in self.bot.ignore_list: self._check_for_twitter(mask, data, target) ####################################################################################### ####################################################################################### ########################################################################################### ###########################################################################################