diff options
Diffstat (limited to 'youtube_abstract.py')
-rwxr-xr-x | youtube_abstract.py | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/youtube_abstract.py b/youtube_abstract.py new file mode 100755 index 0000000..f773aa0 --- /dev/null +++ b/youtube_abstract.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +from urllib.parse import urlencode, urlparse, parse_qs +from json import loads as json_loads +from URLget import urlget, URLgetException + +import sys + + +def dbgprint(*args, **kwargs): + if sys.stdout.isatty(): + print(*args, **kwargs) + + +class YouTube: + def __init__(self): + # whether urls with a playlist included, should title the video, or the playlist? + try: + YouTube.prefer_playlist = YouTube.prefer_playlist + except AttributeError: # we probably want video title, default to that + YouTube.prefer_playlist = False + self.irc_pal = { # ignore the fancy alignment BS lol + "rst": "" + "\x0f", ####### reset + "ylw": "" + "\x0307", ##### yellow + "b_ylw": "" "\x0307\x02", # bold yellow + "wht": "" + "\x0315", ##### white + "red": "" + "\x0304", ##### red + "grn": "" + "\x0303", ##### green + "itl": "" + "\x1d", ####### italic + "bld": "" + "\x02", ####### bold + } + self.ansi_pal = { # ignore the fancy alignment BS lol + "rst": "" + "\x1b[0m", #### reset + "ylw": "" + "\x1b[33;2m", # yellow + "b_ylw": "" "\x1b[33;1m", # bold yellow + "wht": "" + "\x1b[37;2m", # white + "red": "" + "\x1b[31m", ### red + "grn": "" + "\x1b[32m", ### green + "itl": "" + "\x1b[03m", ### italic + "bld": "" + "\x1b[1m", #### bold + } + + def mesg(self, msg, t=None): # just an alias to shorten full name + self.util.mesg(msg, t) + + def match_urls(self, str, r=[]): + if str.startswith("http://"): + str = "https://" + str[7:] + if str.startswith("https://youtube."): + str = "https://www." + str[8:] + if str.startswith("https://"): # first string has to be trimmed before calling match_urls + if ( # I'm just doing fancy BS to align the urls nicely, lol, ignore this + str.startswith("https://youtu.be/") + or str.startswith("" "" "" "" "https://www.youtube.com/playlist?") ####### playlist + or str.startswith("" "" "" "https://music.youtube.com/playlist?") + or str.startswith("" "" "" "" "https://m.youtube.com/playlist?") + or str.startswith("" "" "" "" "https://www.youtube.com/shorts/") ######### shorts + or str.startswith("" "" "" "" "" "https://youtube.com/shorts/") + or str.startswith("" "" "" "" "https://m.youtube.com/shorts/") + or str.startswith("" "" "" "" "https://www.youtube.com/watch?") ########## normal + or str.startswith("" "" "" "https://music.youtube.com/watch?") + or str.startswith("" "" "" "" "https://m.youtube.com/watch?") + or str.startswith("https://www.youtube-nocookie.com/embed/") ############# embed + or str.startswith("" "" "" "https://www.youtube.com/embed/") + or str.startswith("" "" "" "" "https://m.youtube.com/embed/") + or str.startswith("https://www.youtube-nocookie.com/embed/videoseries?") # embed playlist + or str.startswith("" "" "" "https://www.youtube.com/embed/videoseries?") + or str.startswith("" "" "" "" "https://m.youtube.com/embed/videoseries?") + ): + r += [str[: str.find(" ")]] # make array of all matching "words" (urls) + i = str.find(" ") + 1 + return ( + match_urls(self, str[i:].strip(), r=r) if i != 0 else r + ) # recurse down each word, see if anything matches + + # makes for a little better syntax than a bunch of str.startswith calls + def matchstart(self, str, *arr): + for i in arr: + if str.startswith(i): + return True + return False + + def is_clip(self, str): + return self.matchstart(str, "https://youtube.com/clip/", "https://www.youtube.com/clip/") + + # boil down to video id + playlist id + def normalize_url(self, url): + dbgprint("normalize", url) + raw_url, videoId, listId = url, "", "" + # youtu.be + if self.matchstart(url, "https://youtu.be/"): + videoId = url.split("/")[3].split("?")[0] + dbgprint("youtu.be") + elif self.matchstart(url, "https://www.youtube.com/shorts/", "https://m.youtube.com/shorts/"): + videoId = url.split("?")[0].split("/")[-1] + dbgprint("/shorts", videoId) + # embed + elif self.matchstart( + url, + "https://m.youtube.com/embed/", + "https://www.youtube.com/embed/", + "https://www.youtube-nocookie.com/embed/", + ): + try: + listId = parse_qs(urlparse(url).query)["list"][0] + except KeyError: + if not url.split("/")[4].startswith("videoseries"): + videoId = url.split("/")[4] + dbgprint("embed", videoId, listId) + elif "v=" in url: # handles yt music, normal url, etc + for i in url.split("?")[1].split("&"): + if i[0:2] == "v=": + videoId = i[2:] + elif i[0:5] == "list=": + listId = i[5:] + if "videoId" in locals(): + url = "https://www.youtube.com/watch?" + if "videoId" in locals(): + if videoId != "": + url += f"v={videoId}" + if "listId" in locals(): + if listId != "": + if not url.endswith("?"): + url += "&" + url += f"list={listId}" + print("clean url", url) + return url + + # very close to normalize_url, maybe could reorganize better? + def normalize_playlist(self, url): + url = urlparse(url) + qs = parse_qs(url.query) + try: + video_id = qs["v"][0] + except KeyError: + video_id = None + try: + playlist_id = qs["list"][0] + # ignore the programmatic "mix" / "radio" lists, actual playlists start with "PL" + if playlist_id.startswith("RD"): + playlist_id = None + except KeyError: + playlist_id = None + if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id): + url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id + elif video_id: + url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id + else: + self.setstring("string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}") + return {"irc": irc_string, "ansi": ansi_string}, True + return url + + # set both irc_name and ansi_name, using the appropriate palette + def setstring(self, name, val, mylocals=locals()): + prefixes = ["irc", "ansi"] + for i in prefixes: + value = val.replace("{i}", i) + mylocals.update(locals()) # merge the local variables + exec( + f"global {i}_{name}; pal=self.{i}_pal; {i}_{name}=f{repr(value)}", + globals(), + mylocals, + ) + + def yt(self, url): + self.setstring( + "prefix", + "[{pal['grn']}YouTube{pal['rst']}]", + ) + self.setstring( + "prefix_err", + "[{pal['red']}YouTube{pal['rst']}] {pal['ylw']}ERROR:{pal['b_ylw']}", + ) + self.setstring( + "string", + "{{i}_prefix_err} got no data from server! {pal['wht']}(check your URL for typos!){pal['rst']}", + ) + url = url.rstrip("\x01") # I forget exactly why, might be due to /me ? + url = self.normalize_url(url) + url = self.normalize_playlist(url) + url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}" + try: + # print(url, " and ", playlist_id) + status, data = urlget(url) + if status != 200: + self.setstring("string", "{{i}_prefix_err} {status}{pal['rst']}", locals()) + return {"irc": irc_string, "ansi": ansi_string}, True + data = json_loads(data) + title, channelName = data["title"], data["author_name"] + except URLgetException as e: + self.setstring("string", "{{i}_prefix_err} {e}{pal['rst']}", locals()) + if __import__("sys").stdout.isatty(): + print(ansi_string) + return {"irc": irc_string, "ansi": ansi_string}, True + self.setstring( + "string", + "{{i}_prefix} {pal['bld']}{title}{pal['rst']} uploaded by {pal['itl']}{channelName}{pal['rst']}", + mylocals=locals(), + ) + if __import__("sys").stdout.isatty(): + print("ansi", ansi_string) + print("irc", irc_string) + return {"irc": irc_string, "ansi": ansi_string}, False + + +if __name__ == "__main__": + import sys + + # if url is a video that's part of a playlist, return playlist (True) or video (False, default)? + # YouTube.prefer_playlist=False + + # YouTube.yt(YouTube, sys.argv[1]) + # YouTube().yt(sys.argv[1]) + YT = YouTube() + print(YT.match_urls(sys.argv[1])) + YT.yt(sys.argv[1]) |