add tests and make youtube use oembed (no more bot block, at the cost of VERY minimal info)

author: Pawky Languish 2024-11-28 03:18:20 +0000
committer: Pawky Languish 2024-11-28 03:18:20 +0000
commit: 763f96948f39a20d79f7ec68a924966c2b3db761 (patch)
tree: 4b522062a472d0cbd87210f7e57a779651ae0a1a
parent: 1c083501b82eba4e927988bc84547ba76bb6f513 (diff)
6 files changed, 233 insertions, 108 deletions
diff --git a/.gitignore b/.gitignore
index f05327d..7d94e97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ venv
 pass.txt
 local_config.py
 log.txt
+yt_keys.json
 
 # ---> Python
 # Byte-compiled / optimized / DLL files
diff --git a/requirements.txt b/requirements.txt
index 8376b04..8a70ebc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-ircstates ~=0.11.9 
+ircstates 
diff --git a/soundcloud.py b/soundcloud.py
index ae17f9d..a1f5f91 100755
--- a/soundcloud.py
+++ b/soundcloud.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
-from urllib.parse import urlencode, urlparse
 from urllib.request import urlopen
+from urllib.parse import urlencode, urlparse
 from json import loads as json_loads
 
 
@@ -29,7 +29,7 @@ class SoundCloud:
         url = f"https://soundcloud.com/oembed?{urlencode([('url',url),('format','json')])}"
         data = urlopen(url).read().decode()
         data = json_loads(data)
-        print(data)
+        #print(data)
         # print(data["title"].removesuffix(" by "+data["author_name"]),data["author_name"])
         try:
             artist = data["author_name"]
@@ -37,7 +37,7 @@ class SoundCloud:
         except KeyError:
             title = ""
             artist = ""
-        print(title.removesuffix(" by " + artist), "|", artist)
+        #print(title.removesuffix(" by " + artist), "|", artist)
         if title == "":
             irc_string = "[\x0304SoundCloud\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
             ansi_string = "[\x1b[31mSoundCloud\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..fdcbba3
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+#just random urls to test the modules
+
+./bandcamp.py https://austinwintory.bandcamp.com/album/stray-gods
+./bandcamp.py https://soundoftheaviators.bandcamp.com/track/writing-on-the-walls-2
+
+#soundcloud and spotify add junk to links if you click "share"
+./soundcloud.py https://soundcloud.com/lindseystomp/sets/artemis-3
+./soundcloud.py 'https://soundcloud.com/lindseystomp/sets/artemis-3?utm_source=clipboard&utm_medium=text&utm_campaign=social_sharing'
+./soundcloud.py https://soundcloud.com/user-152508755/thefatrat-maisy-kay-the-storm-epic-orchestra-remix
+./soundcloud.py 'https://soundcloud.com/user-152508755/thefatrat-maisy-kay-the-storm-epic-orchestra-remix?utm_source=clipboard&utm_medium=text&utm_campaign=social_sharing'
+
+./spotify.py https://open.spotify.com/track/4pY1okPrJvIPBQM0t4i28v
+./spotify.py 'https://open.spotify.com/track/4sOX1nhpKwFWPvoMMExi3q?si=c880ccca72ee435d'
+./spotify.py https://open.spotify.com/album/2hvCFY4DYaKzzkNYd60oS3
+./spotify.py 'https://open.spotify.com/album/1u2ACTYzVNK3vSLG0Ah4H3?si=c1ZT_3YeS8SXkrbErFl6bw'
+
+#youtube oembed does support playlists but ONLY when it's /playlist?list= *NOT* when it is /watch?v=bla&list=
+./youtube.py https://www.youtube.com/watch?v=EUD9UTwXAZY
+./youtube.py https://www.youtube.com/playlist?list=PL0bbUqXsNHE0ZELST3vW_11GDHKDAwLYh
+./youtube.py 'https://www.youtube.com/watch?v=eneLP_P1_fg&list=PL0bbUqXsNHE0ZELST3vW_11GDHKDAwLYh&index=2'
diff --git a/youtube.py b/youtube.py
index 41dd18b..a75f52b 100755
--- a/youtube.py
+++ b/youtube.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python3
-from html.parser import HTMLParser
 from urllib.request import urlopen
-
+from urllib.error import HTTPError
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
 
 class YouTube:
-    video_type = ""
-
     def mesg(self, msg, t=None):
         self.util.mesg(msg, t)
 
@@ -52,119 +51,44 @@ class YouTube:
             "https://www.youtube.com/clip/"
         )
 
-    class parseprop(HTMLParser):
-        def __init__(self):
-            print("yt parse init")
-            HTMLParser.__init__(self)
-            self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
-            self.h = {}
-            if YouTube.video_type == "clip":
-                self.itemprops_list += ["description"]
-                print("it is a clip!")
-
-        def handle_starttag(self, tag, attrs):
-            if (tag != "meta" and tag != "link") or (
-                (
-                    [i for i in attrs if "itemprop" in i] == []
-                    and ("name", "title") not in attrs
-                )
-                or (tag == "meta" and ("itemprop", "name") in attrs)
-            ):
-                return
-            # print(self,tag,attrs)
-            for k, v in attrs:
-                if k == "itemprop":
-                    if v not in self.itemprops_list:
-                        return
-                    x = [v]
-                    if tag == "link" and v == "name":
-                        x = ["channelName"]
-                elif k == "content":
-                    if attrs[0][1] == "interactionCount":
-                        v = int(v)
-                    x += [v]
-                elif k == "name" and v == "title":
-                    x = [v]
-                else:
-                    return
-            self.h.update({x[0]: x[1]})
-            # print(x[0],"=",x[1])
-
-    def fmt_dur(dur):
-        h, m, s = 0, 0, 0
-        m = dur[2:].split("M")
-        s = int(m[1][:-1])
-        m = int(m[0])
-        if m >= 60:
-            h = m // 60
-            m = round((m / 60 - h) * 60)
-            return f"{h}h {m}m {s}s"
-        elif h == 0 and m == 0 and s == 0:
-            return "LIVE"
-        elif m == 0 and s != 0:
-            return f"{s}s"
-        elif s == 0:
-            return f"{m}m"
-        else:
-            return f"{m}m {s}s"
-
     def yt(self, url):
+        irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+        ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
         # self.util.mesg("dbg hello")
         url = url.rstrip("\x01")
-        self.video_type = (
-            "clip"
-            if self.is_clip(url)
-            else (
-                "shorts"
-                if self.is_ytshorts(url)
-                else (
-                    "music"
-                    if self.is_ytmusic(url)
-                    else "embed" if self.is_embed(url) else "video"
-                )
-            )
-        )
-        video_type = self.video_type
-        if video_type == "embed":
+        if self.is_embed(url):
             videoId = url.split("/")[4]
             url = f"https://www.youtube.com/watch?v={videoId}"
-        elif video_type == "music":
+        elif self.is_ytmusic(url):
             for i in url.split("?")[1].split("&"):
                 if i[0:2] == "v=":
                     videoId = i[2:]
             url = f"https://www.youtube.com/watch?v={videoId}"
-        elif video_type == "shorts":
+        elif self.is_ytshorts(url):
             videoId = url.split("?")[0].split("/")[-1]
             url = f"https://www.youtube.com/watch?v={videoId}"
-        p = self.parseprop()
-        # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
-        data = b""
-        if self.premature_optimization:
-            url_h = urlopen(url)
-            # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
-            # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
-            for i in range(24):
-                data += url_h.readline()
-            url_h.close()
-        data = data.decode()  # bytes to utf-8
-        if (
-            data.find('meta itemprop="duration"') == -1
-            or data.find('meta itemprop="name"') == -1
-        ):  # acts as both fallback for optimization, and in case optimization's turned off
-            # just read all of the html
+        url = urlparse(url)
+        qs=parse_qs(url.query);video_id=qs['v'][0]
+        try: playlist_id=qs['list'][0]
+        except KeyError: playlist_id=None
+        if self.prefer_playlist and playlist_id:
+          url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+        else:
+          url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+        url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+        try:
+            print(url," and ",playlist_id)
             data = urlopen(url).read().decode()
-        # print(f"\x1b[31m my data is: {data}\x1b[0m")
-        p.feed(data)
-        if p.h == {}:
-            irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
-            ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
+            data = json_loads(data)
+            title=data['title']
+            channelName=data['author_name']
+        except HTTPError as e:
+            irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+            ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
             print(ansi_string)
             return irc_string, True
-        y = p.h
-        print(y)
-        y.update(duration=self.fmt_dur(y["duration"]))
-        irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
-        ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
+        irc_string = f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d"
+        ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m"
         print(ansi_string)
         return irc_string, False
 
@@ -172,5 +96,5 @@ class YouTube:
 if __name__ == "__main__":
     import sys
 
-    YouTube.premature_optimization = False
+    YouTube.prefer_playlist=False
     YouTube.yt(YouTube, sys.argv[1])
diff --git a/youtube.py.old b/youtube.py.old
new file mode 100755
index 0000000..b09b1d1
--- /dev/null
+++ b/youtube.py.old
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+from html.parser import HTMLParser
+from urllib.request import urlopen
+from urllib.error import HTTPError
+
+class YouTube:
+    video_type = ""
+
+    def mesg(self, msg, t=None):
+        self.util.mesg(msg, t)
+
+    def match_urls(self, str):
+        r = [
+            i
+            for i in str.split()
+            if "https://youtu.be/" in i
+            or "https://www.youtube.com/watch?v=" in i
+            or "https://m.youtube.com/watch?v=" in i
+            or "https://youtube.com/watch?v=" in i
+            or "https://www.youtube.com/embed/" in i
+            or "https://www.youtube-nocookie.com/embed/" in i
+            or "https://music.youtube.com/watch?v=" in i
+            or "https://youtube.com/shorts/" in i
+            or "https://www.youtube.com/shorts/" in i
+            or "https://www.youtube.com/clip/" in i
+            or "https://youtube.com/clip/" in i
+        ]
+        r = list(dict.fromkeys(r))
+        n = 0
+        for i in r:
+            if not i.startswith("http"):
+                r.pop(n)
+            n += 1
+
+        return r
+
+    def is_embed(str):
+        return str.startswith("https://www.youtube.com/embed/") or str.startswith(
+            "https://www.youtube-nocookie.com/embed/"
+        )
+
+    def is_ytmusic(str):
+        return str.startswith("https://music.youtube.com/watch?v=")
+
+    def is_ytshorts(str):
+        return str.startswith("https://youtube.com/shorts/") or str.startswith(
+            "https://www.youtube.com/shorts/"
+        )
+
+    def is_clip(str):
+        return str.startswith("https://youtube.com/clip/") or str.startswith(
+            "https://www.youtube.com/clip/"
+        )
+
+    class parseprop(HTMLParser):
+        def __init__(self):
+            #print("yt parse init")
+            HTMLParser.__init__(self)
+            self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
+            self.h = {}
+            if YouTube.video_type == "clip":
+                self.itemprops_list += ["description"]
+                print("it is a clip!")
+
+        def handle_starttag(self, tag, attrs):
+            if (tag != "meta" and tag != "link") or (
+                (
+                    [i for i in attrs if "itemprop" in i] == []
+                    and ("name", "title") not in attrs
+                )
+                or (tag == "meta" and ("itemprop", "name") in attrs)
+            ):
+                return
+            # print(self,tag,attrs)
+            for k, v in attrs:
+                if k == "itemprop":
+                    if v not in self.itemprops_list:
+                        return
+                    x = [v]
+                    if tag == "link" and v == "name":
+                        x = ["channelName"]
+                elif k == "content":
+                    if attrs[0][1] == "interactionCount":
+                        v = int(v)
+                    x += [v]
+                elif k == "name" and v == "title":
+                    x = [v]
+                else:
+                    return
+            self.h.update({x[0]: x[1]})
+            # print(x[0],"=",x[1])
+
+    def fmt_dur(dur):
+        h, m, s = 0, 0, 0
+        m = dur[2:].split("M")
+        s = int(m[1][:-1])
+        m = int(m[0])
+        if m >= 60:
+            h = m // 60
+            m = round((m / 60 - h) * 60)
+            return f"{h}h {m}m {s}s"
+        elif h == 0 and m == 0 and s == 0:
+            return "LIVE"
+        elif m == 0 and s != 0:
+            return f"{s}s"
+        elif s == 0:
+            return f"{m}m"
+        else:
+            return f"{m}m {s}s"
+
+    def yt(self, url):
+        irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+        ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
+        # self.util.mesg("dbg hello")
+        url = url.rstrip("\x01")
+        self.video_type = (
+            "clip"
+            if self.is_clip(url)
+            else (
+                "shorts"
+                if self.is_ytshorts(url)
+                else (
+                    "music"
+                    if self.is_ytmusic(url)
+                    else "embed" if self.is_embed(url) else "video"
+                )
+            )
+        )
+        video_type = self.video_type
+        if video_type == "embed":
+            videoId = url.split("/")[4]
+            url = f"https://www.youtube.com/watch?v={videoId}"
+        elif video_type == "music":
+            for i in url.split("?")[1].split("&"):
+                if i[0:2] == "v=":
+                    videoId = i[2:]
+            url = f"https://www.youtube.com/watch?v={videoId}"
+        elif video_type == "shorts":
+            videoId = url.split("?")[0].split("/")[-1]
+            url = f"https://www.youtube.com/watch?v={videoId}"
+        p = self.parseprop()
+        # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
+        data = b""
+        if self.premature_optimization:
+            url_h = urlopen(url)
+            # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
+            # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
+            for i in range(24):
+                data += url_h.readline()
+            url_h.close()
+        data = data.decode()  # bytes to utf-8
+        if (
+            data.find('meta itemprop="duration"') == -1
+            or data.find('meta itemprop="name"') == -1
+        ):  # acts as both fallback for optimization, and in case optimization's turned off
+            # just read all of the html
+            try: data = urlopen(url).read().decode()
+            except HTTPError as e:
+              irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+              ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
+        # print(f"\x1b[31m my data is: {data}\x1b[0m")
+        p.feed(data)
+        if p.h == {}:
+            print(ansi_string)
+            return irc_string, True
+        y = p.h
+        print(y)
+        y.update(duration=self.fmt_dur(y["duration"]))
+        irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
+        ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
+        print(ansi_string)
+        return irc_string, False
+
+
+if __name__ == "__main__":
+    import sys
+
+    YouTube.premature_optimization = False
+    YouTube.yt(YouTube, sys.argv[1])
author	Pawky Languish	2024-11-28 03:18:20 +0000
committer	Pawky Languish	2024-11-28 03:18:20 +0000
commit	763f96948f39a20d79f7ec68a924966c2b3db761 (patch)
tree	4b522062a472d0cbd87210f7e57a779651ae0a1a
parent	1c083501b82eba4e927988bc84547ba76bb6f513 (diff)