From 763f96948f39a20d79f7ec68a924966c2b3db761 Mon Sep 17 00:00:00 2001 From: Pawky Languish Date: Thu, 28 Nov 2024 03:18:20 +0000 Subject: add tests and make youtube use oembed (no more bot block, at the cost of VERY minimal info) --- .gitignore | 1 + requirements.txt | 2 +- soundcloud.py | 6 +- test.sh | 21 +++++++ youtube.py | 132 +++++++++------------------------------- youtube.py.old | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 233 insertions(+), 108 deletions(-) create mode 100755 test.sh create mode 100755 youtube.py.old diff --git a/.gitignore b/.gitignore index f05327d..7d94e97 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ venv pass.txt local_config.py log.txt +yt_keys.json # ---> Python # Byte-compiled / optimized / DLL files diff --git a/requirements.txt b/requirements.txt index 8376b04..8a70ebc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -ircstates ~=0.11.9 +ircstates diff --git a/soundcloud.py b/soundcloud.py index ae17f9d..a1f5f91 100755 --- a/soundcloud.py +++ b/soundcloud.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from urllib.parse import urlencode, urlparse from urllib.request import urlopen +from urllib.parse import urlencode, urlparse from json import loads as json_loads @@ -29,7 +29,7 @@ class SoundCloud: url = f"https://soundcloud.com/oembed?{urlencode([('url',url),('format','json')])}" data = urlopen(url).read().decode() data = json_loads(data) - print(data) + #print(data) # print(data["title"].removesuffix(" by "+data["author_name"]),data["author_name"]) try: artist = data["author_name"] @@ -37,7 +37,7 @@ class SoundCloud: except KeyError: title = "" artist = "" - print(title.removesuffix(" by " + artist), "|", artist) + #print(title.removesuffix(" by " + artist), "|", artist) if title == "": irc_string = "[\x0304SoundCloud\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" ansi_string = "[\x1b[31mSoundCloud\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..fdcbba3 --- /dev/null +++ b/test.sh @@ -0,0 +1,21 @@ +#!/bin/sh +#just random urls to test the modules + +./bandcamp.py https://austinwintory.bandcamp.com/album/stray-gods +./bandcamp.py https://soundoftheaviators.bandcamp.com/track/writing-on-the-walls-2 + +#soundcloud and spotify add junk to links if you click "share" +./soundcloud.py https://soundcloud.com/lindseystomp/sets/artemis-3 +./soundcloud.py 'https://soundcloud.com/lindseystomp/sets/artemis-3?utm_source=clipboard&utm_medium=text&utm_campaign=social_sharing' +./soundcloud.py https://soundcloud.com/user-152508755/thefatrat-maisy-kay-the-storm-epic-orchestra-remix +./soundcloud.py 'https://soundcloud.com/user-152508755/thefatrat-maisy-kay-the-storm-epic-orchestra-remix?utm_source=clipboard&utm_medium=text&utm_campaign=social_sharing' + +./spotify.py https://open.spotify.com/track/4pY1okPrJvIPBQM0t4i28v +./spotify.py 'https://open.spotify.com/track/4sOX1nhpKwFWPvoMMExi3q?si=c880ccca72ee435d' +./spotify.py https://open.spotify.com/album/2hvCFY4DYaKzzkNYd60oS3 +./spotify.py 'https://open.spotify.com/album/1u2ACTYzVNK3vSLG0Ah4H3?si=c1ZT_3YeS8SXkrbErFl6bw' + +#youtube oembed does support playlists but ONLY when it's /playlist?list= *NOT* when it is /watch?v=bla&list= +./youtube.py https://www.youtube.com/watch?v=EUD9UTwXAZY +./youtube.py https://www.youtube.com/playlist?list=PL0bbUqXsNHE0ZELST3vW_11GDHKDAwLYh +./youtube.py 'https://www.youtube.com/watch?v=eneLP_P1_fg&list=PL0bbUqXsNHE0ZELST3vW_11GDHKDAwLYh&index=2' diff --git a/youtube.py b/youtube.py index 41dd18b..a75f52b 100755 --- a/youtube.py +++ b/youtube.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 -from html.parser import HTMLParser from urllib.request import urlopen - +from urllib.error import HTTPError +from urllib.parse import urlencode, urlparse, parse_qs +from json import loads as json_loads class YouTube: - video_type = "" - def mesg(self, msg, t=None): self.util.mesg(msg, t) @@ -52,119 +51,44 @@ class YouTube: "https://www.youtube.com/clip/" ) - class parseprop(HTMLParser): - def __init__(self): - print("yt parse init") - HTMLParser.__init__(self) - self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"] - self.h = {} - if YouTube.video_type == "clip": - self.itemprops_list += ["description"] - print("it is a clip!") - - def handle_starttag(self, tag, attrs): - if (tag != "meta" and tag != "link") or ( - ( - [i for i in attrs if "itemprop" in i] == [] - and ("name", "title") not in attrs - ) - or (tag == "meta" and ("itemprop", "name") in attrs) - ): - return - # print(self,tag,attrs) - for k, v in attrs: - if k == "itemprop": - if v not in self.itemprops_list: - return - x = [v] - if tag == "link" and v == "name": - x = ["channelName"] - elif k == "content": - if attrs[0][1] == "interactionCount": - v = int(v) - x += [v] - elif k == "name" and v == "title": - x = [v] - else: - return - self.h.update({x[0]: x[1]}) - # print(x[0],"=",x[1]) - - def fmt_dur(dur): - h, m, s = 0, 0, 0 - m = dur[2:].split("M") - s = int(m[1][:-1]) - m = int(m[0]) - if m >= 60: - h = m // 60 - m = round((m / 60 - h) * 60) - return f"{h}h {m}m {s}s" - elif h == 0 and m == 0 and s == 0: - return "LIVE" - elif m == 0 and s != 0: - return f"{s}s" - elif s == 0: - return f"{m}m" - else: - return f"{m}m {s}s" - def yt(self, url): + irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" + ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" # self.util.mesg("dbg hello") url = url.rstrip("\x01") - self.video_type = ( - "clip" - if self.is_clip(url) - else ( - "shorts" - if self.is_ytshorts(url) - else ( - "music" - if self.is_ytmusic(url) - else "embed" if self.is_embed(url) else "video" - ) - ) - ) - video_type = self.video_type - if video_type == "embed": + if self.is_embed(url): videoId = url.split("/")[4] url = f"https://www.youtube.com/watch?v={videoId}" - elif video_type == "music": + elif self.is_ytmusic(url): for i in url.split("?")[1].split("&"): if i[0:2] == "v=": videoId = i[2:] url = f"https://www.youtube.com/watch?v={videoId}" - elif video_type == "shorts": + elif self.is_ytshorts(url): videoId = url.split("?")[0].split("/")[-1] url = f"https://www.youtube.com/watch?v={videoId}" - p = self.parseprop() - # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail - data = b"" - if self.premature_optimization: - url_h = urlopen(url) - # appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment) - # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads - for i in range(24): - data += url_h.readline() - url_h.close() - data = data.decode() # bytes to utf-8 - if ( - data.find('meta itemprop="duration"') == -1 - or data.find('meta itemprop="name"') == -1 - ): # acts as both fallback for optimization, and in case optimization's turned off - # just read all of the html + url = urlparse(url) + qs=parse_qs(url.query);video_id=qs['v'][0] + try: playlist_id=qs['list'][0] + except KeyError: playlist_id=None + if self.prefer_playlist and playlist_id: + url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id + else: + url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id + url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}" + try: + print(url," and ",playlist_id) data = urlopen(url).read().decode() - # print(f"\x1b[31m my data is: {data}\x1b[0m") - p.feed(data) - if p.h == {}: - irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" - ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" + data = json_loads(data) + title=data['title'] + channelName=data['author_name'] + except HTTPError as e: + irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" print(ansi_string) return irc_string, True - y = p.h - print(y) - y.update(duration=self.fmt_dur(y["duration"])) - irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views" - ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views" + irc_string = f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d" + ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m" print(ansi_string) return irc_string, False @@ -172,5 +96,5 @@ class YouTube: if __name__ == "__main__": import sys - YouTube.premature_optimization = False + YouTube.prefer_playlist=False YouTube.yt(YouTube, sys.argv[1]) diff --git a/youtube.py.old b/youtube.py.old new file mode 100755 index 0000000..b09b1d1 --- /dev/null +++ b/youtube.py.old @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +from html.parser import HTMLParser +from urllib.request import urlopen +from urllib.error import HTTPError + +class YouTube: + video_type = "" + + def mesg(self, msg, t=None): + self.util.mesg(msg, t) + + def match_urls(self, str): + r = [ + i + for i in str.split() + if "https://youtu.be/" in i + or "https://www.youtube.com/watch?v=" in i + or "https://m.youtube.com/watch?v=" in i + or "https://youtube.com/watch?v=" in i + or "https://www.youtube.com/embed/" in i + or "https://www.youtube-nocookie.com/embed/" in i + or "https://music.youtube.com/watch?v=" in i + or "https://youtube.com/shorts/" in i + or "https://www.youtube.com/shorts/" in i + or "https://www.youtube.com/clip/" in i + or "https://youtube.com/clip/" in i + ] + r = list(dict.fromkeys(r)) + n = 0 + for i in r: + if not i.startswith("http"): + r.pop(n) + n += 1 + + return r + + def is_embed(str): + return str.startswith("https://www.youtube.com/embed/") or str.startswith( + "https://www.youtube-nocookie.com/embed/" + ) + + def is_ytmusic(str): + return str.startswith("https://music.youtube.com/watch?v=") + + def is_ytshorts(str): + return str.startswith("https://youtube.com/shorts/") or str.startswith( + "https://www.youtube.com/shorts/" + ) + + def is_clip(str): + return str.startswith("https://youtube.com/clip/") or str.startswith( + "https://www.youtube.com/clip/" + ) + + class parseprop(HTMLParser): + def __init__(self): + #print("yt parse init") + HTMLParser.__init__(self) + self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"] + self.h = {} + if YouTube.video_type == "clip": + self.itemprops_list += ["description"] + print("it is a clip!") + + def handle_starttag(self, tag, attrs): + if (tag != "meta" and tag != "link") or ( + ( + [i for i in attrs if "itemprop" in i] == [] + and ("name", "title") not in attrs + ) + or (tag == "meta" and ("itemprop", "name") in attrs) + ): + return + # print(self,tag,attrs) + for k, v in attrs: + if k == "itemprop": + if v not in self.itemprops_list: + return + x = [v] + if tag == "link" and v == "name": + x = ["channelName"] + elif k == "content": + if attrs[0][1] == "interactionCount": + v = int(v) + x += [v] + elif k == "name" and v == "title": + x = [v] + else: + return + self.h.update({x[0]: x[1]}) + # print(x[0],"=",x[1]) + + def fmt_dur(dur): + h, m, s = 0, 0, 0 + m = dur[2:].split("M") + s = int(m[1][:-1]) + m = int(m[0]) + if m >= 60: + h = m // 60 + m = round((m / 60 - h) * 60) + return f"{h}h {m}m {s}s" + elif h == 0 and m == 0 and s == 0: + return "LIVE" + elif m == 0 and s != 0: + return f"{s}s" + elif s == 0: + return f"{m}m" + else: + return f"{m}m {s}s" + + def yt(self, url): + irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" + ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" + # self.util.mesg("dbg hello") + url = url.rstrip("\x01") + self.video_type = ( + "clip" + if self.is_clip(url) + else ( + "shorts" + if self.is_ytshorts(url) + else ( + "music" + if self.is_ytmusic(url) + else "embed" if self.is_embed(url) else "video" + ) + ) + ) + video_type = self.video_type + if video_type == "embed": + videoId = url.split("/")[4] + url = f"https://www.youtube.com/watch?v={videoId}" + elif video_type == "music": + for i in url.split("?")[1].split("&"): + if i[0:2] == "v=": + videoId = i[2:] + url = f"https://www.youtube.com/watch?v={videoId}" + elif video_type == "shorts": + videoId = url.split("?")[0].split("/")[-1] + url = f"https://www.youtube.com/watch?v={videoId}" + p = self.parseprop() + # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail + data = b"" + if self.premature_optimization: + url_h = urlopen(url) + # appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment) + # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads + for i in range(24): + data += url_h.readline() + url_h.close() + data = data.decode() # bytes to utf-8 + if ( + data.find('meta itemprop="duration"') == -1 + or data.find('meta itemprop="name"') == -1 + ): # acts as both fallback for optimization, and in case optimization's turned off + # just read all of the html + try: data = urlopen(url).read().decode() + except HTTPError as e: + irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" + # print(f"\x1b[31m my data is: {data}\x1b[0m") + p.feed(data) + if p.h == {}: + print(ansi_string) + return irc_string, True + y = p.h + print(y) + y.update(duration=self.fmt_dur(y["duration"])) + irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views" + ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views" + print(ansi_string) + return irc_string, False + + +if __name__ == "__main__": + import sys + + YouTube.premature_optimization = False + YouTube.yt(YouTube, sys.argv[1]) -- cgit 1.4.1-2-gfad0