From 69a28e7a27d276ef945bd9e6ffd9e12849491cae Mon Sep 17 00:00:00 2001 From: Pawky Languish Date: Sat, 24 May 2025 18:50:43 +0000 Subject: wip --- youtube_abstract.py | 152 ++++++++++++++++++++++------------------------------ 1 file changed, 63 insertions(+), 89 deletions(-) diff --git a/youtube_abstract.py b/youtube_abstract.py index fb93ad7..f773aa0 100755 --- a/youtube_abstract.py +++ b/youtube_abstract.py @@ -3,6 +3,13 @@ from urllib.parse import urlencode, urlparse, parse_qs from json import loads as json_loads from URLget import urlget, URLgetException +import sys + + +def dbgprint(*args, **kwargs): + if sys.stdout.isatty(): + print(*args, **kwargs) + class YouTube: def __init__(self): @@ -11,25 +18,25 @@ class YouTube: YouTube.prefer_playlist = YouTube.prefer_playlist except AttributeError: # we probably want video title, default to that YouTube.prefer_playlist = False - self.irc_pal = { - "rst": "\x0f", ######### reset - "ylw": "\x0307", ####### yellow - "b_ylw": "\x0307\x02", # bold yellow - "wht": "\x0315", ####### white - "red": "\x0304", ####### red - "grn": "\x0303", ####### green - "itl": "\x1d", ######### italic - "bld": "\x02", ######### bold + self.irc_pal = { # ignore the fancy alignment BS lol + "rst": "" + "\x0f", ####### reset + "ylw": "" + "\x0307", ##### yellow + "b_ylw": "" "\x0307\x02", # bold yellow + "wht": "" + "\x0315", ##### white + "red": "" + "\x0304", ##### red + "grn": "" + "\x0303", ##### green + "itl": "" + "\x1d", ####### italic + "bld": "" + "\x02", ####### bold } - self.ansi_pal = { - "rst": "\x1b[0m", ###### reset - "ylw": "\x1b[33;2m", ### yellow - "b_ylw": "\x1b[33;1m", # bold yellow - "wht": "\x1b[37;2m", ### white - "red": "\x1b[31m", ##### red - "grn": "\x1b[32m", ##### green - "itl": "\x1b[03m", ##### italic - "bld": "\x1b[1m", ###### bold + self.ansi_pal = { # ignore the fancy alignment BS lol + "rst": "" + "\x1b[0m", #### reset + "ylw": "" + "\x1b[33;2m", # yellow + "b_ylw": "" "\x1b[33;1m", # bold yellow + "wht": "" + "\x1b[37;2m", # white + "red": "" + "\x1b[31m", ### red + "grn": "" + "\x1b[32m", ### green + "itl": "" + "\x1b[03m", ### italic + "bld": "" + "\x1b[1m", #### bold } def mesg(self, msg, t=None): # just an alias to shorten full name @@ -38,61 +45,32 @@ class YouTube: def match_urls(self, str, r=[]): if str.startswith("http://"): str = "https://" + str[7:] - if str.startswith( - "https://" - ): # first string has to be trimmed outside this func - if ( + if str.startswith("https://youtube."): + str = "https://www." + str[8:] + if str.startswith("https://"): # first string has to be trimmed before calling match_urls + if ( # I'm just doing fancy BS to align the urls nicely, lol, ignore this str.startswith("https://youtu.be/") - or str.startswith("https://www.youtube.com/watch?") - or str.startswith("https://music.youtube.com/watch?") - or str.startswith("https://m.youtube.com/watch?") - or str.startswith("https://www.youtube.com/playlist?") - or str.startswith("https://music.youtube.com/playlist?") - or str.startswith("https://m.youtube.com/playlist?") - or str.startswith("https://www.youtube.com/shorts/") - or str.startswith("https://youtube.com/shorts/") - or str.startswith("https://m.youtube.com/shorts/") - or str.startswith("https://www.youtube.com/embed/") - or str.startswith("https://www.youtube-nocookie.com/embed/") - or str.startswith("https://www.youtube.com/embed/videoseries?") + or str.startswith("" "" "" "" "https://www.youtube.com/playlist?") ####### playlist + or str.startswith("" "" "" "https://music.youtube.com/playlist?") + or str.startswith("" "" "" "" "https://m.youtube.com/playlist?") + or str.startswith("" "" "" "" "https://www.youtube.com/shorts/") ######### shorts + or str.startswith("" "" "" "" "" "https://youtube.com/shorts/") + or str.startswith("" "" "" "" "https://m.youtube.com/shorts/") + or str.startswith("" "" "" "" "https://www.youtube.com/watch?") ########## normal + or str.startswith("" "" "" "https://music.youtube.com/watch?") + or str.startswith("" "" "" "" "https://m.youtube.com/watch?") + or str.startswith("https://www.youtube-nocookie.com/embed/") ############# embed + or str.startswith("" "" "" "https://www.youtube.com/embed/") + or str.startswith("" "" "" "" "https://m.youtube.com/embed/") + or str.startswith("https://www.youtube-nocookie.com/embed/videoseries?") # embed playlist + or str.startswith("" "" "" "https://www.youtube.com/embed/videoseries?") + or str.startswith("" "" "" "" "https://m.youtube.com/embed/videoseries?") ): - r += [str[: str.find(" ")]] + r += [str[: str.find(" ")]] # make array of all matching "words" (urls) i = str.find(" ") + 1 - return match_urls(self, str[i:].strip(), r=r) if i != 0 else r - - """ - def match_urls(self, str): - str = str.replace("http://", "https://") - r = [ - i - for i in str.split() - # shorturl - if "https://youtu.be/" in i - # desktop - or "https://www.youtube.com/watch?" in i or "https://www.youtube.com/playlist?" in i - # mobile - or "https://m.youtube.com/watch?" in i or "https://m.youtube.com/playlist?" in i - # music - or "https://music.youtube.com/watch?" in i or "https://music.youtube.com/playlist?" in i - # shorts - or "https://www.youtube.com/shorts/" in i - or "https://m.youtube.com/shorts/" in i - or "https://youtube.com/shorts/" in i - # embed - or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i - # or "https://www.youtube.com/embed/videoseries?" in i # embed playlist, lol - # just in case (shouldn't happen) - or "https://youtube.com/watch?" in i or "https://youtube.com/playlist?" in i - ] - r = list(dict.fromkeys(r)) - n = 0 - for i in r: - if not i.startswith("http"): - r.pop(n) - n += 1 - - return r - """ + return ( + match_urls(self, str[i:].strip(), r=r) if i != 0 else r + ) # recurse down each word, see if anything matches # makes for a little better syntax than a bunch of str.startswith calls def matchstart(self, str, *arr): @@ -102,32 +80,32 @@ class YouTube: return False def is_clip(self, str): - return self.matchstart( - str, "https://youtube.com/clip/", "https://www.youtube.com/clip/" - ) + return self.matchstart(str, "https://youtube.com/clip/", "https://www.youtube.com/clip/") # boil down to video id + playlist id def normalize_url(self, url): - raw_url = url + dbgprint("normalize", url) + raw_url, videoId, listId = url, "", "" # youtu.be if self.matchstart(url, "https://youtu.be/"): videoId = url.split("/")[3].split("?")[0] - elif self.matchstart( - url, "https://youtube.com/shorts/", "https://www.youtube.com/shorts/" - ): + dbgprint("youtu.be") + elif self.matchstart(url, "https://www.youtube.com/shorts/", "https://m.youtube.com/shorts/"): videoId = url.split("?")[0].split("/")[-1] + dbgprint("/shorts", videoId) # embed elif self.matchstart( url, + "https://m.youtube.com/embed/", "https://www.youtube.com/embed/", "https://www.youtube-nocookie.com/embed/", ): - # try: - listId = parse_qs(urlparse(url).query)["list"][0] - # except - if not url.split("/")[4].startswith("videoseries"): - videoId = url.split("/")[4] - # print("embed", videoId, listId) + try: + listId = parse_qs(urlparse(url).query)["list"][0] + except KeyError: + if not url.split("/")[4].startswith("videoseries"): + videoId = url.split("/")[4] + dbgprint("embed", videoId, listId) elif "v=" in url: # handles yt music, normal url, etc for i in url.split("?")[1].split("&"): if i[0:2] == "v=": @@ -167,9 +145,7 @@ class YouTube: elif video_id: url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id else: - self.setstring( - "string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}" - ) + self.setstring("string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}") return {"irc": irc_string, "ansi": ansi_string}, True return url @@ -206,9 +182,7 @@ class YouTube: # print(url, " and ", playlist_id) status, data = urlget(url) if status != 200: - self.setstring( - "string", "{{i}_prefix_err} {status}{pal['rst']}", locals() - ) + self.setstring("string", "{{i}_prefix_err} {status}{pal['rst']}", locals()) return {"irc": irc_string, "ansi": ansi_string}, True data = json_loads(data) title, channelName = data["title"], data["author_name"] -- cgit 1.4.1-2-gfad0