From 69a28e7a27d276ef945bd9e6ffd9e12849491cae Mon Sep 17 00:00:00 2001
From: Pawky Languish
Date: Sat, 24 May 2025 18:50:43 +0000
Subject: wip

---
 youtube_abstract.py | 152 ++++++++++++++++++++++------------------------------
 1 file changed, 63 insertions(+), 89 deletions(-)

diff --git a/youtube_abstract.py b/youtube_abstract.py
index fb93ad7..f773aa0 100755
--- a/youtube_abstract.py
+++ b/youtube_abstract.py
@@ -3,6 +3,13 @@ from urllib.parse import urlencode, urlparse, parse_qs
 from json import loads as json_loads
 from URLget import urlget, URLgetException
 
+import sys
+
+
+def dbgprint(*args, **kwargs):
+    if sys.stdout.isatty():
+        print(*args, **kwargs)
+
 
 class YouTube:
     def __init__(self):
@@ -11,25 +18,25 @@ class YouTube:
             YouTube.prefer_playlist = YouTube.prefer_playlist
         except AttributeError:  # we probably want video title, default to that
             YouTube.prefer_playlist = False
-        self.irc_pal = {
-            "rst": "\x0f",  ######### reset
-            "ylw": "\x0307",  ####### yellow
-            "b_ylw": "\x0307\x02",  # bold yellow
-            "wht": "\x0315",  ####### white
-            "red": "\x0304",  ####### red
-            "grn": "\x0303",  ####### green
-            "itl": "\x1d",  ######### italic
-            "bld": "\x02",  ######### bold
+        self.irc_pal = {  # ignore the fancy alignment BS lol
+            "rst": "" + "\x0f",  ####### reset
+            "ylw": "" + "\x0307",  ##### yellow
+            "b_ylw": "" "\x0307\x02",  # bold yellow
+            "wht": "" + "\x0315",  ##### white
+            "red": "" + "\x0304",  ##### red
+            "grn": "" + "\x0303",  ##### green
+            "itl": "" + "\x1d",  ####### italic
+            "bld": "" + "\x02",  ####### bold
         }
-        self.ansi_pal = {
-            "rst": "\x1b[0m",  ###### reset
-            "ylw": "\x1b[33;2m",  ### yellow
-            "b_ylw": "\x1b[33;1m",  # bold yellow
-            "wht": "\x1b[37;2m",  ### white
-            "red": "\x1b[31m",  ##### red
-            "grn": "\x1b[32m",  ##### green
-            "itl": "\x1b[03m",  ##### italic
-            "bld": "\x1b[1m",  ###### bold
+        self.ansi_pal = {  # ignore the fancy alignment BS lol
+            "rst": "" + "\x1b[0m",  #### reset
+            "ylw": "" + "\x1b[33;2m",  # yellow
+            "b_ylw": "" "\x1b[33;1m",  # bold yellow
+            "wht": "" + "\x1b[37;2m",  # white
+            "red": "" + "\x1b[31m",  ### red
+            "grn": "" + "\x1b[32m",  ### green
+            "itl": "" + "\x1b[03m",  ### italic
+            "bld": "" + "\x1b[1m",  #### bold
         }
 
     def mesg(self, msg, t=None):  # just an alias to shorten full name
@@ -38,61 +45,32 @@ class YouTube:
     def match_urls(self, str, r=[]):
         if str.startswith("http://"):
             str = "https://" + str[7:]
-        if str.startswith(
-            "https://"
-        ):  # first string has to be trimmed outside this func
-            if (
+        if str.startswith("https://youtube."):
+            str = "https://www." + str[8:]
+        if str.startswith("https://"):  # first string has to be trimmed before calling match_urls
+            if (  # I'm just doing fancy BS to align the urls nicely, lol, ignore this
                 str.startswith("https://youtu.be/")
-                or str.startswith("https://www.youtube.com/watch?")
-                or str.startswith("https://music.youtube.com/watch?")
-                or str.startswith("https://m.youtube.com/watch?")
-                or str.startswith("https://www.youtube.com/playlist?")
-                or str.startswith("https://music.youtube.com/playlist?")
-                or str.startswith("https://m.youtube.com/playlist?")
-                or str.startswith("https://www.youtube.com/shorts/")
-                or str.startswith("https://youtube.com/shorts/")
-                or str.startswith("https://m.youtube.com/shorts/")
-                or str.startswith("https://www.youtube.com/embed/")
-                or str.startswith("https://www.youtube-nocookie.com/embed/")
-                or str.startswith("https://www.youtube.com/embed/videoseries?")
+                or str.startswith("" "" "" "" "https://www.youtube.com/playlist?")  ####### playlist
+                or str.startswith("" "" "" "https://music.youtube.com/playlist?")
+                or str.startswith("" "" "" "" "https://m.youtube.com/playlist?")
+                or str.startswith("" "" "" "" "https://www.youtube.com/shorts/")  ######### shorts
+                or str.startswith("" "" "" "" "" "https://youtube.com/shorts/")
+                or str.startswith("" "" "" "" "https://m.youtube.com/shorts/")
+                or str.startswith("" "" "" "" "https://www.youtube.com/watch?")  ########## normal
+                or str.startswith("" "" "" "https://music.youtube.com/watch?")
+                or str.startswith("" "" "" "" "https://m.youtube.com/watch?")
+                or str.startswith("https://www.youtube-nocookie.com/embed/")  ############# embed
+                or str.startswith("" "" "" "https://www.youtube.com/embed/")
+                or str.startswith("" "" "" "" "https://m.youtube.com/embed/")
+                or str.startswith("https://www.youtube-nocookie.com/embed/videoseries?")  # embed playlist
+                or str.startswith("" "" "" "https://www.youtube.com/embed/videoseries?")
+                or str.startswith("" "" "" "" "https://m.youtube.com/embed/videoseries?")
             ):
-                r += [str[: str.find(" ")]]
+                r += [str[: str.find(" ")]]  # make array of all matching "words" (urls)
         i = str.find(" ") + 1
-        return match_urls(self, str[i:].strip(), r=r) if i != 0 else r
-
-    """
-    def match_urls(self, str):
-        str = str.replace("http://", "https://")
-        r = [
-            i
-            for i in str.split()
-            # shorturl
-            if "https://youtu.be/" in i
-            # desktop
-            or "https://www.youtube.com/watch?" in i or "https://www.youtube.com/playlist?" in i
-            # mobile
-            or "https://m.youtube.com/watch?" in i or "https://m.youtube.com/playlist?" in i
-            # music
-            or "https://music.youtube.com/watch?" in i or "https://music.youtube.com/playlist?" in i
-            # shorts
-            or "https://www.youtube.com/shorts/" in i
-            or "https://m.youtube.com/shorts/" in i
-            or "https://youtube.com/shorts/" in i
-            # embed
-            or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i
-            # or "https://www.youtube.com/embed/videoseries?" in i # embed playlist, lol
-            # just in case (shouldn't happen)
-            or "https://youtube.com/watch?" in i or "https://youtube.com/playlist?" in i
-        ]
-        r = list(dict.fromkeys(r))
-        n = 0
-        for i in r:
-            if not i.startswith("http"):
-                r.pop(n)
-            n += 1
-
-        return r
-        """
+        return (
+            match_urls(self, str[i:].strip(), r=r) if i != 0 else r
+        )  # recurse down each word, see if anything matches
 
     # makes for a little better syntax than a bunch of str.startswith calls
     def matchstart(self, str, *arr):
@@ -102,32 +80,32 @@ class YouTube:
         return False
 
     def is_clip(self, str):
-        return self.matchstart(
-            str, "https://youtube.com/clip/", "https://www.youtube.com/clip/"
-        )
+        return self.matchstart(str, "https://youtube.com/clip/", "https://www.youtube.com/clip/")
 
     # boil down to video id + playlist id
     def normalize_url(self, url):
-        raw_url = url
+        dbgprint("normalize", url)
+        raw_url, videoId, listId = url, "", ""
         # youtu.be
         if self.matchstart(url, "https://youtu.be/"):
             videoId = url.split("/")[3].split("?")[0]
-        elif self.matchstart(
-            url, "https://youtube.com/shorts/", "https://www.youtube.com/shorts/"
-        ):
+            dbgprint("youtu.be")
+        elif self.matchstart(url, "https://www.youtube.com/shorts/", "https://m.youtube.com/shorts/"):
             videoId = url.split("?")[0].split("/")[-1]
+            dbgprint("/shorts", videoId)
         # embed
         elif self.matchstart(
             url,
+            "https://m.youtube.com/embed/",
             "https://www.youtube.com/embed/",
             "https://www.youtube-nocookie.com/embed/",
         ):
-            # try:
-            listId = parse_qs(urlparse(url).query)["list"][0]
-            # except
-            if not url.split("/")[4].startswith("videoseries"):
-                videoId = url.split("/")[4]
-            # print("embed", videoId, listId)
+            try:
+                listId = parse_qs(urlparse(url).query)["list"][0]
+            except KeyError:
+                if not url.split("/")[4].startswith("videoseries"):
+                    videoId = url.split("/")[4]
+            dbgprint("embed", videoId, listId)
         elif "v=" in url:  # handles yt music, normal url, etc
             for i in url.split("?")[1].split("&"):
                 if i[0:2] == "v=":
@@ -167,9 +145,7 @@ class YouTube:
         elif video_id:
             url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
         else:
-            self.setstring(
-                "string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}"
-            )
+            self.setstring("string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}")
             return {"irc": irc_string, "ansi": ansi_string}, True
         return url
 
@@ -206,9 +182,7 @@ class YouTube:
             # print(url, " and ", playlist_id)
             status, data = urlget(url)
             if status != 200:
-                self.setstring(
-                    "string", "{{i}_prefix_err} {status}{pal['rst']}", locals()
-                )
+                self.setstring("string", "{{i}_prefix_err} {status}{pal['rst']}", locals())
                 return {"irc": irc_string, "ansi": ansi_string}, True
             data = json_loads(data)
             title, channelName = data["title"], data["author_name"]
-- 
cgit v1.2.3