summaryrefslogtreecommitdiff
path: root/youtube.alt.py
diff options
context:
space:
mode:
authorPawky Languish2024-12-11 16:40:28 +0000
committerPawky Languish2024-12-11 17:01:54 +0000
commitf0e4aba9708f21d30151b2c4b49fb949d1c91df5 (patch)
treec9cf5ea17bcef9fe42df010c3dbfdfcbd9c35bcd /youtube.alt.py
parent225f83f5a5f077d7b7f7c476f825a0ded8008b74 (diff)
Try to workaround youtube bot flagging in alternate youtube module
(doesn't quite work? might reduce likelyhood ip gets flagged?)
Diffstat (limited to 'youtube.alt.py')
-rwxr-xr-xyoutube.alt.py205
1 files changed, 205 insertions, 0 deletions
diff --git a/youtube.alt.py b/youtube.alt.py
new file mode 100755
index 0000000..ca478d8
--- /dev/null
+++ b/youtube.alt.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+from html.parser import HTMLParser
+from URLget import urlget, URLgetException
+
+# from URLget import URLgetException
+# urlget=URLget().urlget
+# print(urlget("http://ip.envs.net"))
+# print(dir(URLget))
+
+
+class YouTube:
+ # crude import, lol
+ # URLget = URLget().URLget
+ # def __init__(self):
+ # self.URLget = URLget().URLget
+ # print(URLget,URLget.URLget)
+
+ video_type = ""
+
+ def mesg(self, msg, t=None):
+ self.util.mesg(msg, t)
+
+ def match_urls(self, str):
+ r = [
+ i
+ for i in str.split()
+ if "https://youtu.be/" in i
+ or "https://www.youtube.com/watch?v=" in i
+ or "https://m.youtube.com/watch?v=" in i
+ or "https://youtube.com/watch?v=" in i
+ or "https://www.youtube.com/embed/" in i
+ or "https://www.youtube-nocookie.com/embed/" in i
+ or "https://music.youtube.com/watch?v=" in i
+ or "https://youtube.com/shorts/" in i
+ or "https://www.youtube.com/shorts/" in i
+ or "https://www.youtube.com/clip/" in i
+ or "https://youtube.com/clip/" in i
+ ]
+ r = list(dict.fromkeys(r))
+ n = 0
+ for i in r:
+ if not i.startswith("http"):
+ r.pop(n)
+ n += 1
+
+ return r
+
+ def is_embed(self, str):
+ return str.startswith("https://www.youtube.com/embed/") or str.startswith(
+ "https://www.youtube-nocookie.com/embed/"
+ )
+
+ def is_ytmusic(self, str):
+ return str.startswith("https://music.youtube.com/watch?v=")
+
+ def is_ytshorts(self, str):
+ return str.startswith("https://youtube.com/shorts/") or str.startswith(
+ "https://www.youtube.com/shorts/"
+ )
+
+ def is_clip(self, str):
+ return str.startswith("https://youtube.com/clip/") or str.startswith(
+ "https://www.youtube.com/clip/"
+ )
+
+ class parseprop(HTMLParser):
+ def __init__(self):
+ # print("yt parse init")
+ HTMLParser.__init__(self)
+ self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
+ self.h = {}
+ if YouTube.video_type == "clip":
+ self.itemprops_list += ["description"]
+ print("it is a clip!")
+ self.title = False
+
+ def handle_data(self, data):
+ if self.title != False:
+ # print("title",data)
+ self.h.update({"html_title": data})
+ self.title = False
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "title":
+ self.title = True
+ if (tag != "meta" and tag != "link") or (
+ (
+ [i for i in attrs if "itemprop" in i] == []
+ and ("name", "title") not in attrs
+ )
+ or (tag == "meta" and ("itemprop", "name") in attrs)
+ ):
+ return
+ # print(self,tag,attrs)
+ for k, v in attrs:
+ if k == "itemprop":
+ if v not in self.itemprops_list:
+ return
+ x = [v]
+ if tag == "link" and v == "name":
+ x = ["channelName"]
+ elif k == "content":
+ if attrs[0][1] == "interactionCount":
+ v = int(v)
+ x += [v]
+ elif k == "name" and v == "title":
+ x = [v]
+ else:
+ return
+ self.h.update({x[0]: x[1]})
+ # print(x[0],"=",x[1])
+
+ def fmt_dur(dur):
+ h, m, s = 0, 0, 0
+ m = dur[2:].split("M")
+ s = int(m[1][:-1])
+ m = int(m[0])
+ if m >= 60:
+ h = m // 60
+ m = round((m / 60 - h) * 60)
+ return f"{h}h {m}m {s}s"
+ elif h == 0 and m == 0 and s == 0:
+ return "LIVE"
+ elif m == 0 and s != 0:
+ return f"{s}s"
+ elif s == 0:
+ return f"{m}m"
+ else:
+ return f"{m}m {s}s"
+
+ def yt(self, url):
+ irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+ ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
+ # self.util.mesg("dbg hello")
+ url = url.rstrip("\x01")
+ self.video_type = (
+ "clip"
+ if self.is_clip(url)
+ else (
+ "shorts"
+ if self.is_ytshorts(url)
+ else (
+ "music"
+ if self.is_ytmusic(url)
+ else "embed" if self.is_embed(url) else "video"
+ )
+ )
+ )
+ video_type = self.video_type
+ if video_type == "embed":
+ videoId = url.split("/")[4]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif video_type == "music":
+ for i in url.split("?")[1].split("&"):
+ if i[0:2] == "v=":
+ videoId = i[2:]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif video_type == "shorts":
+ videoId = url.split("?")[0].split("/")[-1]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ p = self.parseprop()
+ # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
+ data = b""
+ data = data.decode() # bytes to utf-8
+ if (
+ data.find('meta itemprop="duration"') == -1
+ or data.find('meta itemprop="name"') == -1
+ ):
+ try:
+ status, data = urlget(url)
+ if status != 200:
+ irc_string = (
+ f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03"
+ )
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m"
+ except URLgetException as e:
+ irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
+ # print(f"\x1b[31m my data is: {data}\x1b[0m")
+ p.feed(data)
+ if p.h == {}:
+ print(ansi_string)
+ return irc_string, True
+ elif p.h == {"html_title": "YouTube"}:
+ irc_string = (
+ "[\x0304Youtube\x03] \x0307ERROR:\x0308 flagged as bot \x0315\x03"
+ )
+ ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m flagged as bot \x1b[37;2m\x1b[0m"
+ print(ansi_string)
+ return irc_string, True
+ y = p.h
+ print(y)
+ y.update(duration=self.fmt_dur(y["duration"]))
+ irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
+ ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
+ print(ansi_string)
+ return irc_string, False
+
+
+if __name__ == "__main__":
+ import sys
+
+ YouTube.premature_optimization = False
+ # YouTube.yt(YouTube, sys.argv[1])
+ YouTube().yt(sys.argv[1])