summary refs log tree commit diff
path: root/youtube_abstract.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_abstract.py')
-rwxr-xr-xyoutube_abstract.py241
1 files changed, 241 insertions, 0 deletions
diff --git a/youtube_abstract.py b/youtube_abstract.py
new file mode 100755
index 0000000..fb93ad7
--- /dev/null
+++ b/youtube_abstract.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
+from URLget import urlget, URLgetException
+
+
+class YouTube:
+    def __init__(self):
+        # whether urls with a playlist included, should title the video, or the playlist?
+        try:
+            YouTube.prefer_playlist = YouTube.prefer_playlist
+        except AttributeError:  # we probably want video title, default to that
+            YouTube.prefer_playlist = False
+        self.irc_pal = {
+            "rst": "\x0f",  ######### reset
+            "ylw": "\x0307",  ####### yellow
+            "b_ylw": "\x0307\x02",  # bold yellow
+            "wht": "\x0315",  ####### white
+            "red": "\x0304",  ####### red
+            "grn": "\x0303",  ####### green
+            "itl": "\x1d",  ######### italic
+            "bld": "\x02",  ######### bold
+        }
+        self.ansi_pal = {
+            "rst": "\x1b[0m",  ###### reset
+            "ylw": "\x1b[33;2m",  ### yellow
+            "b_ylw": "\x1b[33;1m",  # bold yellow
+            "wht": "\x1b[37;2m",  ### white
+            "red": "\x1b[31m",  ##### red
+            "grn": "\x1b[32m",  ##### green
+            "itl": "\x1b[03m",  ##### italic
+            "bld": "\x1b[1m",  ###### bold
+        }
+
+    def mesg(self, msg, t=None):  # just an alias to shorten full name
+        self.util.mesg(msg, t)
+
+    def match_urls(self, str, r=[]):
+        if str.startswith("http://"):
+            str = "https://" + str[7:]
+        if str.startswith(
+            "https://"
+        ):  # first string has to be trimmed outside this func
+            if (
+                str.startswith("https://youtu.be/")
+                or str.startswith("https://www.youtube.com/watch?")
+                or str.startswith("https://music.youtube.com/watch?")
+                or str.startswith("https://m.youtube.com/watch?")
+                or str.startswith("https://www.youtube.com/playlist?")
+                or str.startswith("https://music.youtube.com/playlist?")
+                or str.startswith("https://m.youtube.com/playlist?")
+                or str.startswith("https://www.youtube.com/shorts/")
+                or str.startswith("https://youtube.com/shorts/")
+                or str.startswith("https://m.youtube.com/shorts/")
+                or str.startswith("https://www.youtube.com/embed/")
+                or str.startswith("https://www.youtube-nocookie.com/embed/")
+                or str.startswith("https://www.youtube.com/embed/videoseries?")
+            ):
+                r += [str[: str.find(" ")]]
+        i = str.find(" ") + 1
+        return match_urls(self, str[i:].strip(), r=r) if i != 0 else r
+
+    """
+    def match_urls(self, str):
+        str = str.replace("http://", "https://")
+        r = [
+            i
+            for i in str.split()
+            # shorturl
+            if "https://youtu.be/" in i
+            # desktop
+            or "https://www.youtube.com/watch?" in i or "https://www.youtube.com/playlist?" in i
+            # mobile
+            or "https://m.youtube.com/watch?" in i or "https://m.youtube.com/playlist?" in i
+            # music
+            or "https://music.youtube.com/watch?" in i or "https://music.youtube.com/playlist?" in i
+            # shorts
+            or "https://www.youtube.com/shorts/" in i
+            or "https://m.youtube.com/shorts/" in i
+            or "https://youtube.com/shorts/" in i
+            # embed
+            or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i
+            # or "https://www.youtube.com/embed/videoseries?" in i # embed playlist, lol
+            # just in case (shouldn't happen)
+            or "https://youtube.com/watch?" in i or "https://youtube.com/playlist?" in i
+        ]
+        r = list(dict.fromkeys(r))
+        n = 0
+        for i in r:
+            if not i.startswith("http"):
+                r.pop(n)
+            n += 1
+
+        return r
+        """
+
+    # makes for a little better syntax than a bunch of str.startswith calls
+    def matchstart(self, str, *arr):
+        for i in arr:
+            if str.startswith(i):
+                return True
+        return False
+
+    def is_clip(self, str):
+        return self.matchstart(
+            str, "https://youtube.com/clip/", "https://www.youtube.com/clip/"
+        )
+
+    # boil down to video id + playlist id
+    def normalize_url(self, url):
+        raw_url = url
+        # youtu.be
+        if self.matchstart(url, "https://youtu.be/"):
+            videoId = url.split("/")[3].split("?")[0]
+        elif self.matchstart(
+            url, "https://youtube.com/shorts/", "https://www.youtube.com/shorts/"
+        ):
+            videoId = url.split("?")[0].split("/")[-1]
+        # embed
+        elif self.matchstart(
+            url,
+            "https://www.youtube.com/embed/",
+            "https://www.youtube-nocookie.com/embed/",
+        ):
+            # try:
+            listId = parse_qs(urlparse(url).query)["list"][0]
+            # except
+            if not url.split("/")[4].startswith("videoseries"):
+                videoId = url.split("/")[4]
+            # print("embed", videoId, listId)
+        elif "v=" in url:  # handles yt music, normal url, etc
+            for i in url.split("?")[1].split("&"):
+                if i[0:2] == "v=":
+                    videoId = i[2:]
+                elif i[0:5] == "list=":
+                    listId = i[5:]
+        if "videoId" in locals():
+            url = "https://www.youtube.com/watch?"
+            if "videoId" in locals():
+                if videoId != "":
+                    url += f"v={videoId}"
+            if "listId" in locals():
+                if listId != "":
+                    if not url.endswith("?"):
+                        url += "&"
+                    url += f"list={listId}"
+        print("clean url", url)
+        return url
+
+    # very close to normalize_url, maybe could reorganize better?
+    def normalize_playlist(self, url):
+        url = urlparse(url)
+        qs = parse_qs(url.query)
+        try:
+            video_id = qs["v"][0]
+        except KeyError:
+            video_id = None
+        try:
+            playlist_id = qs["list"][0]
+            # ignore the programmatic "mix" / "radio" lists, actual playlists start with "PL"
+            if playlist_id.startswith("RD"):
+                playlist_id = None
+        except KeyError:
+            playlist_id = None
+        if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id):
+            url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+        elif video_id:
+            url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+        else:
+            self.setstring(
+                "string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}"
+            )
+            return {"irc": irc_string, "ansi": ansi_string}, True
+        return url
+
+    # set both irc_name and ansi_name, using the appropriate palette
+    def setstring(self, name, val, mylocals=locals()):
+        prefixes = ["irc", "ansi"]
+        for i in prefixes:
+            value = val.replace("{i}", i)
+            mylocals.update(locals())  # merge the local variables
+            exec(
+                f"global {i}_{name}; pal=self.{i}_pal; {i}_{name}=f{repr(value)}",
+                globals(),
+                mylocals,
+            )
+
+    def yt(self, url):
+        self.setstring(
+            "prefix",
+            "[{pal['grn']}YouTube{pal['rst']}]",
+        )
+        self.setstring(
+            "prefix_err",
+            "[{pal['red']}YouTube{pal['rst']}] {pal['ylw']}ERROR:{pal['b_ylw']}",
+        )
+        self.setstring(
+            "string",
+            "{{i}_prefix_err} got no data from server! {pal['wht']}(check your URL for typos!){pal['rst']}",
+        )
+        url = url.rstrip("\x01")  # I forget exactly why, might be due to /me ?
+        url = self.normalize_url(url)
+        url = self.normalize_playlist(url)
+        url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+        try:
+            # print(url, " and ", playlist_id)
+            status, data = urlget(url)
+            if status != 200:
+                self.setstring(
+                    "string", "{{i}_prefix_err} {status}{pal['rst']}", locals()
+                )
+                return {"irc": irc_string, "ansi": ansi_string}, True
+            data = json_loads(data)
+            title, channelName = data["title"], data["author_name"]
+        except URLgetException as e:
+            self.setstring("string", "{{i}_prefix_err} {e}{pal['rst']}", locals())
+            if __import__("sys").stdout.isatty():
+                print(ansi_string)
+            return {"irc": irc_string, "ansi": ansi_string}, True
+        self.setstring(
+            "string",
+            "{{i}_prefix} {pal['bld']}{title}{pal['rst']} uploaded by {pal['itl']}{channelName}{pal['rst']}",
+            mylocals=locals(),
+        )
+        if __import__("sys").stdout.isatty():
+            print("ansi", ansi_string)
+            print("irc", irc_string)
+        return {"irc": irc_string, "ansi": ansi_string}, False
+
+
+if __name__ == "__main__":
+    import sys
+
+    # if url is a video that's part of a playlist, return playlist (True) or video (False, default)?
+    # YouTube.prefer_playlist=False
+
+    # YouTube.yt(YouTube, sys.argv[1])
+    # YouTube().yt(sys.argv[1])
+    YT = YouTube()
+    print(YT.match_urls(sys.argv[1]))
+    YT.yt(sys.argv[1])