summary refs log tree commit diff
path: root/youtube_abstract.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_abstract.py')
-rwxr-xr-xyoutube_abstract.py215
1 files changed, 215 insertions, 0 deletions
diff --git a/youtube_abstract.py b/youtube_abstract.py
new file mode 100755
index 0000000..f773aa0
--- /dev/null
+++ b/youtube_abstract.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
+from URLget import urlget, URLgetException
+
+import sys
+
+
+def dbgprint(*args, **kwargs):
+    if sys.stdout.isatty():
+        print(*args, **kwargs)
+
+
+class YouTube:
+    def __init__(self):
+        # whether urls with a playlist included, should title the video, or the playlist?
+        try:
+            YouTube.prefer_playlist = YouTube.prefer_playlist
+        except AttributeError:  # we probably want video title, default to that
+            YouTube.prefer_playlist = False
+        self.irc_pal = {  # ignore the fancy alignment BS lol
+            "rst": "" + "\x0f",  ####### reset
+            "ylw": "" + "\x0307",  ##### yellow
+            "b_ylw": "" "\x0307\x02",  # bold yellow
+            "wht": "" + "\x0315",  ##### white
+            "red": "" + "\x0304",  ##### red
+            "grn": "" + "\x0303",  ##### green
+            "itl": "" + "\x1d",  ####### italic
+            "bld": "" + "\x02",  ####### bold
+        }
+        self.ansi_pal = {  # ignore the fancy alignment BS lol
+            "rst": "" + "\x1b[0m",  #### reset
+            "ylw": "" + "\x1b[33;2m",  # yellow
+            "b_ylw": "" "\x1b[33;1m",  # bold yellow
+            "wht": "" + "\x1b[37;2m",  # white
+            "red": "" + "\x1b[31m",  ### red
+            "grn": "" + "\x1b[32m",  ### green
+            "itl": "" + "\x1b[03m",  ### italic
+            "bld": "" + "\x1b[1m",  #### bold
+        }
+
+    def mesg(self, msg, t=None):  # just an alias to shorten full name
+        self.util.mesg(msg, t)
+
+    def match_urls(self, str, r=[]):
+        if str.startswith("http://"):
+            str = "https://" + str[7:]
+        if str.startswith("https://youtube."):
+            str = "https://www." + str[8:]
+        if str.startswith("https://"):  # first string has to be trimmed before calling match_urls
+            if (  # I'm just doing fancy BS to align the urls nicely, lol, ignore this
+                str.startswith("https://youtu.be/")
+                or str.startswith("" "" "" "" "https://www.youtube.com/playlist?")  ####### playlist
+                or str.startswith("" "" "" "https://music.youtube.com/playlist?")
+                or str.startswith("" "" "" "" "https://m.youtube.com/playlist?")
+                or str.startswith("" "" "" "" "https://www.youtube.com/shorts/")  ######### shorts
+                or str.startswith("" "" "" "" "" "https://youtube.com/shorts/")
+                or str.startswith("" "" "" "" "https://m.youtube.com/shorts/")
+                or str.startswith("" "" "" "" "https://www.youtube.com/watch?")  ########## normal
+                or str.startswith("" "" "" "https://music.youtube.com/watch?")
+                or str.startswith("" "" "" "" "https://m.youtube.com/watch?")
+                or str.startswith("https://www.youtube-nocookie.com/embed/")  ############# embed
+                or str.startswith("" "" "" "https://www.youtube.com/embed/")
+                or str.startswith("" "" "" "" "https://m.youtube.com/embed/")
+                or str.startswith("https://www.youtube-nocookie.com/embed/videoseries?")  # embed playlist
+                or str.startswith("" "" "" "https://www.youtube.com/embed/videoseries?")
+                or str.startswith("" "" "" "" "https://m.youtube.com/embed/videoseries?")
+            ):
+                r += [str[: str.find(" ")]]  # make array of all matching "words" (urls)
+        i = str.find(" ") + 1
+        return (
+            match_urls(self, str[i:].strip(), r=r) if i != 0 else r
+        )  # recurse down each word, see if anything matches
+
+    # makes for a little better syntax than a bunch of str.startswith calls
+    def matchstart(self, str, *arr):
+        for i in arr:
+            if str.startswith(i):
+                return True
+        return False
+
+    def is_clip(self, str):
+        return self.matchstart(str, "https://youtube.com/clip/", "https://www.youtube.com/clip/")
+
+    # boil down to video id + playlist id
+    def normalize_url(self, url):
+        dbgprint("normalize", url)
+        raw_url, videoId, listId = url, "", ""
+        # youtu.be
+        if self.matchstart(url, "https://youtu.be/"):
+            videoId = url.split("/")[3].split("?")[0]
+            dbgprint("youtu.be")
+        elif self.matchstart(url, "https://www.youtube.com/shorts/", "https://m.youtube.com/shorts/"):
+            videoId = url.split("?")[0].split("/")[-1]
+            dbgprint("/shorts", videoId)
+        # embed
+        elif self.matchstart(
+            url,
+            "https://m.youtube.com/embed/",
+            "https://www.youtube.com/embed/",
+            "https://www.youtube-nocookie.com/embed/",
+        ):
+            try:
+                listId = parse_qs(urlparse(url).query)["list"][0]
+            except KeyError:
+                if not url.split("/")[4].startswith("videoseries"):
+                    videoId = url.split("/")[4]
+            dbgprint("embed", videoId, listId)
+        elif "v=" in url:  # handles yt music, normal url, etc
+            for i in url.split("?")[1].split("&"):
+                if i[0:2] == "v=":
+                    videoId = i[2:]
+                elif i[0:5] == "list=":
+                    listId = i[5:]
+        if "videoId" in locals():
+            url = "https://www.youtube.com/watch?"
+            if "videoId" in locals():
+                if videoId != "":
+                    url += f"v={videoId}"
+            if "listId" in locals():
+                if listId != "":
+                    if not url.endswith("?"):
+                        url += "&"
+                    url += f"list={listId}"
+        print("clean url", url)
+        return url
+
+    # very close to normalize_url, maybe could reorganize better?
+    def normalize_playlist(self, url):
+        url = urlparse(url)
+        qs = parse_qs(url.query)
+        try:
+            video_id = qs["v"][0]
+        except KeyError:
+            video_id = None
+        try:
+            playlist_id = qs["list"][0]
+            # ignore the programmatic "mix" / "radio" lists, actual playlists start with "PL"
+            if playlist_id.startswith("RD"):
+                playlist_id = None
+        except KeyError:
+            playlist_id = None
+        if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id):
+            url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+        elif video_id:
+            url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+        else:
+            self.setstring("string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}")
+            return {"irc": irc_string, "ansi": ansi_string}, True
+        return url
+
+    # set both irc_name and ansi_name, using the appropriate palette
+    def setstring(self, name, val, mylocals=locals()):
+        prefixes = ["irc", "ansi"]
+        for i in prefixes:
+            value = val.replace("{i}", i)
+            mylocals.update(locals())  # merge the local variables
+            exec(
+                f"global {i}_{name}; pal=self.{i}_pal; {i}_{name}=f{repr(value)}",
+                globals(),
+                mylocals,
+            )
+
+    def yt(self, url):
+        self.setstring(
+            "prefix",
+            "[{pal['grn']}YouTube{pal['rst']}]",
+        )
+        self.setstring(
+            "prefix_err",
+            "[{pal['red']}YouTube{pal['rst']}] {pal['ylw']}ERROR:{pal['b_ylw']}",
+        )
+        self.setstring(
+            "string",
+            "{{i}_prefix_err} got no data from server! {pal['wht']}(check your URL for typos!){pal['rst']}",
+        )
+        url = url.rstrip("\x01")  # I forget exactly why, might be due to /me ?
+        url = self.normalize_url(url)
+        url = self.normalize_playlist(url)
+        url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+        try:
+            # print(url, " and ", playlist_id)
+            status, data = urlget(url)
+            if status != 200:
+                self.setstring("string", "{{i}_prefix_err} {status}{pal['rst']}", locals())
+                return {"irc": irc_string, "ansi": ansi_string}, True
+            data = json_loads(data)
+            title, channelName = data["title"], data["author_name"]
+        except URLgetException as e:
+            self.setstring("string", "{{i}_prefix_err} {e}{pal['rst']}", locals())
+            if __import__("sys").stdout.isatty():
+                print(ansi_string)
+            return {"irc": irc_string, "ansi": ansi_string}, True
+        self.setstring(
+            "string",
+            "{{i}_prefix} {pal['bld']}{title}{pal['rst']} uploaded by {pal['itl']}{channelName}{pal['rst']}",
+            mylocals=locals(),
+        )
+        if __import__("sys").stdout.isatty():
+            print("ansi", ansi_string)
+            print("irc", irc_string)
+        return {"irc": irc_string, "ansi": ansi_string}, False
+
+
+if __name__ == "__main__":
+    import sys
+
+    # if url is a video that's part of a playlist, return playlist (True) or video (False, default)?
+    # YouTube.prefer_playlist=False
+
+    # YouTube.yt(YouTube, sys.argv[1])
+    # YouTube().yt(sys.argv[1])
+    YT = YouTube()
+    print(YT.match_urls(sys.argv[1]))
+    YT.yt(sys.argv[1])