diff --git a/README.txt b/README
index ac4778c..ac4778c 100644
--- a/README.txt
+++ b/README
diff --git a/URLget.py b/URLget.py
new file mode 100644
index 0000000..abe27a1
--- /dev/null
+++ b/URLget.py
@@ -0,0 +1,40 @@
+class URLgetException(Exception):
+ pass
+
+
+try:
+ from curl_cffi import requests
+
+ # from curl_cffi.requests.exceptions import HTTPError
+ print("using curl_cffi")
+
+ def urlget(url):
+ # probably want to impersonate "chrome", "safari" or "safari_ios"
+ # could impersonate some more specific versions too I guess
+ try:
+ r = requests.get(url, impersonate="safari_ios")
+ # print(dir(r))
+ # print(r.status_code)
+ except Exception as e:
+ raise URLgetException(e)
+ return r.status_code, r.text
+
+except ModuleNotFoundError:
+ # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt?
+ from urllib.request import Request, urlopen
+
+ # from urllib.error import HTTPError
+ print("using urllib.request")
+
+ def urlget(url):
+ # update as needed I guess
+ ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
+ # req=Request(url)
+ req = Request(url)
+ req.add_header("User-Agent", ua)
+ try:
+ r = urlopen(req)
+ except Exception as e:
+ # except HTTPError as e:
+ raise URLgetException(e)
+ return r.status, r.read().decode()
diff --git a/requirements.txt b/requirements.txt
index 8a70ebc..cb7e46d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
ircstates
+#OPTIONAL: only for TLS fingerprint spoofing, used by youtube.alt.py
+curl_cffi
diff --git a/youtube.py.old b/youtube.alt.py
index b09b1d1..ca478d8 100755
--- a/youtube.py.old
+++ b/youtube.alt.py
@@ -1,9 +1,20 @@
#!/usr/bin/env python3
from html.parser import HTMLParser
-from urllib.request import urlopen
-from urllib.error import HTTPError
+from URLget import urlget, URLgetException
+
+# from URLget import URLgetException
+# urlget=URLget().urlget
+# print(urlget("http://ip.envs.net"))
+# print(dir(URLget))
+
class YouTube:
+ # crude import, lol
+ # URLget = URLget().URLget
+ # def __init__(self):
+ # self.URLget = URLget().URLget
+ # print(URLget,URLget.URLget)
+
video_type = ""
def mesg(self, msg, t=None):
@@ -34,35 +45,44 @@ class YouTube:
return r
- def is_embed(str):
+ def is_embed(self, str):
return str.startswith("https://www.youtube.com/embed/") or str.startswith(
"https://www.youtube-nocookie.com/embed/"
)
- def is_ytmusic(str):
+ def is_ytmusic(self, str):
return str.startswith("https://music.youtube.com/watch?v=")
- def is_ytshorts(str):
+ def is_ytshorts(self, str):
return str.startswith("https://youtube.com/shorts/") or str.startswith(
"https://www.youtube.com/shorts/"
)
- def is_clip(str):
+ def is_clip(self, str):
return str.startswith("https://youtube.com/clip/") or str.startswith(
"https://www.youtube.com/clip/"
)
class parseprop(HTMLParser):
def __init__(self):
- #print("yt parse init")
+ # print("yt parse init")
HTMLParser.__init__(self)
self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
self.h = {}
if YouTube.video_type == "clip":
self.itemprops_list += ["description"]
print("it is a clip!")
+ self.title = False
+
+ def handle_data(self, data):
+ if self.title != False:
+ # print("title",data)
+ self.h.update({"html_title": data})
+ self.title = False
def handle_starttag(self, tag, attrs):
+ if tag == "title":
+ self.title = True
if (tag != "meta" and tag != "link") or (
(
[i for i in attrs if "itemprop" in i] == []
@@ -141,28 +161,33 @@ class YouTube:
p = self.parseprop()
# use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
data = b""
- if self.premature_optimization:
- url_h = urlopen(url)
- # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
- # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
- for i in range(24):
- data += url_h.readline()
- url_h.close()
data = data.decode() # bytes to utf-8
if (
data.find('meta itemprop="duration"') == -1
or data.find('meta itemprop="name"') == -1
- ): # acts as both fallback for optimization, and in case optimization's turned off
- # just read all of the html
- try: data = urlopen(url).read().decode()
- except HTTPError as e:
- irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
- ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
+ ):
+ try:
+ status, data = urlget(url)
+ if status != 200:
+ irc_string = (
+ f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03"
+ )
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m"
+ except URLgetException as e:
+ irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
# print(f"\x1b[31m my data is: {data}\x1b[0m")
p.feed(data)
if p.h == {}:
print(ansi_string)
return irc_string, True
+ elif p.h == {"html_title": "YouTube"}:
+ irc_string = (
+ "[\x0304Youtube\x03] \x0307ERROR:\x0308 flagged as bot \x0315\x03"
+ )
+ ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m flagged as bot \x1b[37;2m\x1b[0m"
+ print(ansi_string)
+ return irc_string, True
y = p.h
print(y)
y.update(duration=self.fmt_dur(y["duration"]))
@@ -176,4 +201,5 @@ if __name__ == "__main__":
import sys
YouTube.premature_optimization = False
- YouTube.yt(YouTube, sys.argv[1])
+ # YouTube.yt(YouTube, sys.argv[1])
+ YouTube().yt(sys.argv[1])
diff --git a/youtube.py b/youtube.py
index 1e60546..0d51e16 100755
--- a/youtube.py
+++ b/youtube.py
@@ -96,7 +96,7 @@ class YouTube:
video_id = None
try:
playlist_id = qs["list"][0]
- #ignore the "random mix" and "radio" lists
+ # ignore the "random mix" and "radio" lists
if playlist_id.startswith("RD"):
playlist_id = None
except KeyError:
|