summaryrefslogtreecommitdiff
path: root/URLget.py
diff options
context:
space:
mode:
authorPawky Languish2024-12-11 16:40:28 +0000
committerPawky Languish2024-12-11 17:01:54 +0000
commitf0e4aba9708f21d30151b2c4b49fb949d1c91df5 (patch)
treec9cf5ea17bcef9fe42df010c3dbfdfcbd9c35bcd /URLget.py
parent225f83f5a5f077d7b7f7c476f825a0ded8008b74 (diff)
Try to workaround youtube bot flagging in alternate youtube module
(doesn't quite work? might reduce likelyhood ip gets flagged?)
Diffstat (limited to 'URLget.py')
-rw-r--r--URLget.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/URLget.py b/URLget.py
new file mode 100644
index 0000000..abe27a1
--- /dev/null
+++ b/URLget.py
@@ -0,0 +1,40 @@
+class URLgetException(Exception):
+ pass
+
+
+try:
+ from curl_cffi import requests
+
+ # from curl_cffi.requests.exceptions import HTTPError
+ print("using curl_cffi")
+
+ def urlget(url):
+ # probably want to impersonate "chrome", "safari" or "safari_ios"
+ # could impersonate some more specific versions too I guess
+ try:
+ r = requests.get(url, impersonate="safari_ios")
+ # print(dir(r))
+ # print(r.status_code)
+ except Exception as e:
+ raise URLgetException(e)
+ return r.status_code, r.text
+
+except ModuleNotFoundError:
+ # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt?
+ from urllib.request import Request, urlopen
+
+ # from urllib.error import HTTPError
+ print("using urllib.request")
+
+ def urlget(url):
+ # update as needed I guess
+ ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
+ # req=Request(url)
+ req = Request(url)
+ req.add_header("User-Agent", ua)
+ try:
+ r = urlopen(req)
+ except Exception as e:
+ # except HTTPError as e:
+ raise URLgetException(e)
+ return r.status, r.read().decode()