From f0e4aba9708f21d30151b2c4b49fb949d1c91df5 Mon Sep 17 00:00:00 2001 From: Pawky Languish Date: Wed, 11 Dec 2024 16:40:28 +0000 Subject: Try to workaround youtube bot flagging in alternate youtube module (doesn't quite work? might reduce likelyhood ip gets flagged?) --- youtube.py.old | 179 --------------------------------------------------------- 1 file changed, 179 deletions(-) delete mode 100755 youtube.py.old (limited to 'youtube.py.old') diff --git a/youtube.py.old b/youtube.py.old deleted file mode 100755 index b09b1d1..0000000 --- a/youtube.py.old +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python3 -from html.parser import HTMLParser -from urllib.request import urlopen -from urllib.error import HTTPError - -class YouTube: - video_type = "" - - def mesg(self, msg, t=None): - self.util.mesg(msg, t) - - def match_urls(self, str): - r = [ - i - for i in str.split() - if "https://youtu.be/" in i - or "https://www.youtube.com/watch?v=" in i - or "https://m.youtube.com/watch?v=" in i - or "https://youtube.com/watch?v=" in i - or "https://www.youtube.com/embed/" in i - or "https://www.youtube-nocookie.com/embed/" in i - or "https://music.youtube.com/watch?v=" in i - or "https://youtube.com/shorts/" in i - or "https://www.youtube.com/shorts/" in i - or "https://www.youtube.com/clip/" in i - or "https://youtube.com/clip/" in i - ] - r = list(dict.fromkeys(r)) - n = 0 - for i in r: - if not i.startswith("http"): - r.pop(n) - n += 1 - - return r - - def is_embed(str): - return str.startswith("https://www.youtube.com/embed/") or str.startswith( - "https://www.youtube-nocookie.com/embed/" - ) - - def is_ytmusic(str): - return str.startswith("https://music.youtube.com/watch?v=") - - def is_ytshorts(str): - return str.startswith("https://youtube.com/shorts/") or str.startswith( - "https://www.youtube.com/shorts/" - ) - - def is_clip(str): - return str.startswith("https://youtube.com/clip/") or str.startswith( - "https://www.youtube.com/clip/" - ) - - class parseprop(HTMLParser): - def __init__(self): - #print("yt parse init") - HTMLParser.__init__(self) - self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"] - self.h = {} - if YouTube.video_type == "clip": - self.itemprops_list += ["description"] - print("it is a clip!") - - def handle_starttag(self, tag, attrs): - if (tag != "meta" and tag != "link") or ( - ( - [i for i in attrs if "itemprop" in i] == [] - and ("name", "title") not in attrs - ) - or (tag == "meta" and ("itemprop", "name") in attrs) - ): - return - # print(self,tag,attrs) - for k, v in attrs: - if k == "itemprop": - if v not in self.itemprops_list: - return - x = [v] - if tag == "link" and v == "name": - x = ["channelName"] - elif k == "content": - if attrs[0][1] == "interactionCount": - v = int(v) - x += [v] - elif k == "name" and v == "title": - x = [v] - else: - return - self.h.update({x[0]: x[1]}) - # print(x[0],"=",x[1]) - - def fmt_dur(dur): - h, m, s = 0, 0, 0 - m = dur[2:].split("M") - s = int(m[1][:-1]) - m = int(m[0]) - if m >= 60: - h = m // 60 - m = round((m / 60 - h) * 60) - return f"{h}h {m}m {s}s" - elif h == 0 and m == 0 and s == 0: - return "LIVE" - elif m == 0 and s != 0: - return f"{s}s" - elif s == 0: - return f"{m}m" - else: - return f"{m}m {s}s" - - def yt(self, url): - irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" - ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" - # self.util.mesg("dbg hello") - url = url.rstrip("\x01") - self.video_type = ( - "clip" - if self.is_clip(url) - else ( - "shorts" - if self.is_ytshorts(url) - else ( - "music" - if self.is_ytmusic(url) - else "embed" if self.is_embed(url) else "video" - ) - ) - ) - video_type = self.video_type - if video_type == "embed": - videoId = url.split("/")[4] - url = f"https://www.youtube.com/watch?v={videoId}" - elif video_type == "music": - for i in url.split("?")[1].split("&"): - if i[0:2] == "v=": - videoId = i[2:] - url = f"https://www.youtube.com/watch?v={videoId}" - elif video_type == "shorts": - videoId = url.split("?")[0].split("/")[-1] - url = f"https://www.youtube.com/watch?v={videoId}" - p = self.parseprop() - # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail - data = b"" - if self.premature_optimization: - url_h = urlopen(url) - # appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment) - # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads - for i in range(24): - data += url_h.readline() - url_h.close() - data = data.decode() # bytes to utf-8 - if ( - data.find('meta itemprop="duration"') == -1 - or data.find('meta itemprop="name"') == -1 - ): # acts as both fallback for optimization, and in case optimization's turned off - # just read all of the html - try: data = urlopen(url).read().decode() - except HTTPError as e: - irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" - ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" - # print(f"\x1b[31m my data is: {data}\x1b[0m") - p.feed(data) - if p.h == {}: - print(ansi_string) - return irc_string, True - y = p.h - print(y) - y.update(duration=self.fmt_dur(y["duration"])) - irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views" - ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views" - print(ansi_string) - return irc_string, False - - -if __name__ == "__main__": - import sys - - YouTube.premature_optimization = False - YouTube.yt(YouTube, sys.argv[1]) -- cgit 1.4.1-2-gfad0