diff options
author | Pawky Languish | 2025-04-19 19:07:38 +0000 |
---|---|---|
committer | Pawky Languish | 2025-04-19 19:07:38 +0000 |
commit | f4fb34dde6b03fdb49f71476d587c5c7f986b565 (patch) | |
tree | ef8cc174c413dcd766754bc51c52b9a2fc328e5c /youtube_abstract.py | |
parent | 12062621d6d67adb8abee962f4201e2d5196f55b (diff) |
Diffstat (limited to 'youtube_abstract.py')
-rwxr-xr-x | youtube_abstract.py | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/youtube_abstract.py b/youtube_abstract.py new file mode 100755 index 0000000..fb93ad7 --- /dev/null +++ b/youtube_abstract.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +from urllib.parse import urlencode, urlparse, parse_qs +from json import loads as json_loads +from URLget import urlget, URLgetException + + +class YouTube: + def __init__(self): + # whether urls with a playlist included, should title the video, or the playlist? + try: + YouTube.prefer_playlist = YouTube.prefer_playlist + except AttributeError: # we probably want video title, default to that + YouTube.prefer_playlist = False + self.irc_pal = { + "rst": "\x0f", ######### reset + "ylw": "\x0307", ####### yellow + "b_ylw": "\x0307\x02", # bold yellow + "wht": "\x0315", ####### white + "red": "\x0304", ####### red + "grn": "\x0303", ####### green + "itl": "\x1d", ######### italic + "bld": "\x02", ######### bold + } + self.ansi_pal = { + "rst": "\x1b[0m", ###### reset + "ylw": "\x1b[33;2m", ### yellow + "b_ylw": "\x1b[33;1m", # bold yellow + "wht": "\x1b[37;2m", ### white + "red": "\x1b[31m", ##### red + "grn": "\x1b[32m", ##### green + "itl": "\x1b[03m", ##### italic + "bld": "\x1b[1m", ###### bold + } + + def mesg(self, msg, t=None): # just an alias to shorten full name + self.util.mesg(msg, t) + + def match_urls(self, str, r=[]): + if str.startswith("http://"): + str = "https://" + str[7:] + if str.startswith( + "https://" + ): # first string has to be trimmed outside this func + if ( + str.startswith("https://youtu.be/") + or str.startswith("https://www.youtube.com/watch?") + or str.startswith("https://music.youtube.com/watch?") + or str.startswith("https://m.youtube.com/watch?") + or str.startswith("https://www.youtube.com/playlist?") + or str.startswith("https://music.youtube.com/playlist?") + or str.startswith("https://m.youtube.com/playlist?") + or str.startswith("https://www.youtube.com/shorts/") + or str.startswith("https://youtube.com/shorts/") + or str.startswith("https://m.youtube.com/shorts/") + or str.startswith("https://www.youtube.com/embed/") + or str.startswith("https://www.youtube-nocookie.com/embed/") + or str.startswith("https://www.youtube.com/embed/videoseries?") + ): + r += [str[: str.find(" ")]] + i = str.find(" ") + 1 + return match_urls(self, str[i:].strip(), r=r) if i != 0 else r + + """ + def match_urls(self, str): + str = str.replace("http://", "https://") + r = [ + i + for i in str.split() + # shorturl + if "https://youtu.be/" in i + # desktop + or "https://www.youtube.com/watch?" in i or "https://www.youtube.com/playlist?" in i + # mobile + or "https://m.youtube.com/watch?" in i or "https://m.youtube.com/playlist?" in i + # music + or "https://music.youtube.com/watch?" in i or "https://music.youtube.com/playlist?" in i + # shorts + or "https://www.youtube.com/shorts/" in i + or "https://m.youtube.com/shorts/" in i + or "https://youtube.com/shorts/" in i + # embed + or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i + # or "https://www.youtube.com/embed/videoseries?" in i # embed playlist, lol + # just in case (shouldn't happen) + or "https://youtube.com/watch?" in i or "https://youtube.com/playlist?" in i + ] + r = list(dict.fromkeys(r)) + n = 0 + for i in r: + if not i.startswith("http"): + r.pop(n) + n += 1 + + return r + """ + + # makes for a little better syntax than a bunch of str.startswith calls + def matchstart(self, str, *arr): + for i in arr: + if str.startswith(i): + return True + return False + + def is_clip(self, str): + return self.matchstart( + str, "https://youtube.com/clip/", "https://www.youtube.com/clip/" + ) + + # boil down to video id + playlist id + def normalize_url(self, url): + raw_url = url + # youtu.be + if self.matchstart(url, "https://youtu.be/"): + videoId = url.split("/")[3].split("?")[0] + elif self.matchstart( + url, "https://youtube.com/shorts/", "https://www.youtube.com/shorts/" + ): + videoId = url.split("?")[0].split("/")[-1] + # embed + elif self.matchstart( + url, + "https://www.youtube.com/embed/", + "https://www.youtube-nocookie.com/embed/", + ): + # try: + listId = parse_qs(urlparse(url).query)["list"][0] + # except + if not url.split("/")[4].startswith("videoseries"): + videoId = url.split("/")[4] + # print("embed", videoId, listId) + elif "v=" in url: # handles yt music, normal url, etc + for i in url.split("?")[1].split("&"): + if i[0:2] == "v=": + videoId = i[2:] + elif i[0:5] == "list=": + listId = i[5:] + if "videoId" in locals(): + url = "https://www.youtube.com/watch?" + if "videoId" in locals(): + if videoId != "": + url += f"v={videoId}" + if "listId" in locals(): + if listId != "": + if not url.endswith("?"): + url += "&" + url += f"list={listId}" + print("clean url", url) + return url + + # very close to normalize_url, maybe could reorganize better? + def normalize_playlist(self, url): + url = urlparse(url) + qs = parse_qs(url.query) + try: + video_id = qs["v"][0] + except KeyError: + video_id = None + try: + playlist_id = qs["list"][0] + # ignore the programmatic "mix" / "radio" lists, actual playlists start with "PL" + if playlist_id.startswith("RD"): + playlist_id = None + except KeyError: + playlist_id = None + if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id): + url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id + elif video_id: + url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id + else: + self.setstring( + "string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}" + ) + return {"irc": irc_string, "ansi": ansi_string}, True + return url + + # set both irc_name and ansi_name, using the appropriate palette + def setstring(self, name, val, mylocals=locals()): + prefixes = ["irc", "ansi"] + for i in prefixes: + value = val.replace("{i}", i) + mylocals.update(locals()) # merge the local variables + exec( + f"global {i}_{name}; pal=self.{i}_pal; {i}_{name}=f{repr(value)}", + globals(), + mylocals, + ) + + def yt(self, url): + self.setstring( + "prefix", + "[{pal['grn']}YouTube{pal['rst']}]", + ) + self.setstring( + "prefix_err", + "[{pal['red']}YouTube{pal['rst']}] {pal['ylw']}ERROR:{pal['b_ylw']}", + ) + self.setstring( + "string", + "{{i}_prefix_err} got no data from server! {pal['wht']}(check your URL for typos!){pal['rst']}", + ) + url = url.rstrip("\x01") # I forget exactly why, might be due to /me ? + url = self.normalize_url(url) + url = self.normalize_playlist(url) + url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}" + try: + # print(url, " and ", playlist_id) + status, data = urlget(url) + if status != 200: + self.setstring( + "string", "{{i}_prefix_err} {status}{pal['rst']}", locals() + ) + return {"irc": irc_string, "ansi": ansi_string}, True + data = json_loads(data) + title, channelName = data["title"], data["author_name"] + except URLgetException as e: + self.setstring("string", "{{i}_prefix_err} {e}{pal['rst']}", locals()) + if __import__("sys").stdout.isatty(): + print(ansi_string) + return {"irc": irc_string, "ansi": ansi_string}, True + self.setstring( + "string", + "{{i}_prefix} {pal['bld']}{title}{pal['rst']} uploaded by {pal['itl']}{channelName}{pal['rst']}", + mylocals=locals(), + ) + if __import__("sys").stdout.isatty(): + print("ansi", ansi_string) + print("irc", irc_string) + return {"irc": irc_string, "ansi": ansi_string}, False + + +if __name__ == "__main__": + import sys + + # if url is a video that's part of a playlist, return playlist (True) or video (False, default)? + # YouTube.prefer_playlist=False + + # YouTube.yt(YouTube, sys.argv[1]) + # YouTube().yt(sys.argv[1]) + YT = YouTube() + print(YT.match_urls(sys.argv[1])) + YT.yt(sys.argv[1]) |