summary refs log tree commit diff
path: root/youtube.py.old
diff options
context:
space:
mode:
authorPawky Languish2024-12-11 16:40:28 +0000
committerPawky Languish2024-12-11 17:01:54 +0000
commitf0e4aba9708f21d30151b2c4b49fb949d1c91df5 (patch)
treec9cf5ea17bcef9fe42df010c3dbfdfcbd9c35bcd /youtube.py.old
parent225f83f5a5f077d7b7f7c476f825a0ded8008b74 (diff)
Try to workaround youtube bot flagging in alternate youtube module HEAD master
(doesn't quite work? might reduce likelyhood ip gets flagged?)
Diffstat (limited to 'youtube.py.old')
-rwxr-xr-xyoutube.py.old179
1 files changed, 0 insertions, 179 deletions
diff --git a/youtube.py.old b/youtube.py.old
deleted file mode 100755
index b09b1d1..0000000
--- a/youtube.py.old
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-from html.parser import HTMLParser
-from urllib.request import urlopen
-from urllib.error import HTTPError
-
-class YouTube:
-    video_type = ""
-
-    def mesg(self, msg, t=None):
-        self.util.mesg(msg, t)
-
-    def match_urls(self, str):
-        r = [
-            i
-            for i in str.split()
-            if "https://youtu.be/" in i
-            or "https://www.youtube.com/watch?v=" in i
-            or "https://m.youtube.com/watch?v=" in i
-            or "https://youtube.com/watch?v=" in i
-            or "https://www.youtube.com/embed/" in i
-            or "https://www.youtube-nocookie.com/embed/" in i
-            or "https://music.youtube.com/watch?v=" in i
-            or "https://youtube.com/shorts/" in i
-            or "https://www.youtube.com/shorts/" in i
-            or "https://www.youtube.com/clip/" in i
-            or "https://youtube.com/clip/" in i
-        ]
-        r = list(dict.fromkeys(r))
-        n = 0
-        for i in r:
-            if not i.startswith("http"):
-                r.pop(n)
-            n += 1
-
-        return r
-
-    def is_embed(str):
-        return str.startswith("https://www.youtube.com/embed/") or str.startswith(
-            "https://www.youtube-nocookie.com/embed/"
-        )
-
-    def is_ytmusic(str):
-        return str.startswith("https://music.youtube.com/watch?v=")
-
-    def is_ytshorts(str):
-        return str.startswith("https://youtube.com/shorts/") or str.startswith(
-            "https://www.youtube.com/shorts/"
-        )
-
-    def is_clip(str):
-        return str.startswith("https://youtube.com/clip/") or str.startswith(
-            "https://www.youtube.com/clip/"
-        )
-
-    class parseprop(HTMLParser):
-        def __init__(self):
-            #print("yt parse init")
-            HTMLParser.__init__(self)
-            self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
-            self.h = {}
-            if YouTube.video_type == "clip":
-                self.itemprops_list += ["description"]
-                print("it is a clip!")
-
-        def handle_starttag(self, tag, attrs):
-            if (tag != "meta" and tag != "link") or (
-                (
-                    [i for i in attrs if "itemprop" in i] == []
-                    and ("name", "title") not in attrs
-                )
-                or (tag == "meta" and ("itemprop", "name") in attrs)
-            ):
-                return
-            # print(self,tag,attrs)
-            for k, v in attrs:
-                if k == "itemprop":
-                    if v not in self.itemprops_list:
-                        return
-                    x = [v]
-                    if tag == "link" and v == "name":
-                        x = ["channelName"]
-                elif k == "content":
-                    if attrs[0][1] == "interactionCount":
-                        v = int(v)
-                    x += [v]
-                elif k == "name" and v == "title":
-                    x = [v]
-                else:
-                    return
-            self.h.update({x[0]: x[1]})
-            # print(x[0],"=",x[1])
-
-    def fmt_dur(dur):
-        h, m, s = 0, 0, 0
-        m = dur[2:].split("M")
-        s = int(m[1][:-1])
-        m = int(m[0])
-        if m >= 60:
-            h = m // 60
-            m = round((m / 60 - h) * 60)
-            return f"{h}h {m}m {s}s"
-        elif h == 0 and m == 0 and s == 0:
-            return "LIVE"
-        elif m == 0 and s != 0:
-            return f"{s}s"
-        elif s == 0:
-            return f"{m}m"
-        else:
-            return f"{m}m {s}s"
-
-    def yt(self, url):
-        irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
-        ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
-        # self.util.mesg("dbg hello")
-        url = url.rstrip("\x01")
-        self.video_type = (
-            "clip"
-            if self.is_clip(url)
-            else (
-                "shorts"
-                if self.is_ytshorts(url)
-                else (
-                    "music"
-                    if self.is_ytmusic(url)
-                    else "embed" if self.is_embed(url) else "video"
-                )
-            )
-        )
-        video_type = self.video_type
-        if video_type == "embed":
-            videoId = url.split("/")[4]
-            url = f"https://www.youtube.com/watch?v={videoId}"
-        elif video_type == "music":
-            for i in url.split("?")[1].split("&"):
-                if i[0:2] == "v=":
-                    videoId = i[2:]
-            url = f"https://www.youtube.com/watch?v={videoId}"
-        elif video_type == "shorts":
-            videoId = url.split("?")[0].split("/")[-1]
-            url = f"https://www.youtube.com/watch?v={videoId}"
-        p = self.parseprop()
-        # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
-        data = b""
-        if self.premature_optimization:
-            url_h = urlopen(url)
-            # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
-            # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
-            for i in range(24):
-                data += url_h.readline()
-            url_h.close()
-        data = data.decode()  # bytes to utf-8
-        if (
-            data.find('meta itemprop="duration"') == -1
-            or data.find('meta itemprop="name"') == -1
-        ):  # acts as both fallback for optimization, and in case optimization's turned off
-            # just read all of the html
-            try: data = urlopen(url).read().decode()
-            except HTTPError as e:
-              irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
-              ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
-        # print(f"\x1b[31m my data is: {data}\x1b[0m")
-        p.feed(data)
-        if p.h == {}:
-            print(ansi_string)
-            return irc_string, True
-        y = p.h
-        print(y)
-        y.update(duration=self.fmt_dur(y["duration"]))
-        irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
-        ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
-        print(ansi_string)
-        return irc_string, False
-
-
-if __name__ == "__main__":
-    import sys
-
-    YouTube.premature_optimization = False
-    YouTube.yt(YouTube, sys.argv[1])