summary refs log tree commit diff
path: root/URLget.py
diff options
context:
space:
mode:
authorPawky Languish2024-12-11 16:40:28 +0000
committerPawky Languish2024-12-11 17:01:54 +0000
commitf0e4aba9708f21d30151b2c4b49fb949d1c91df5 (patch)
treec9cf5ea17bcef9fe42df010c3dbfdfcbd9c35bcd /URLget.py
parent225f83f5a5f077d7b7f7c476f825a0ded8008b74 (diff)
Try to workaround youtube bot flagging in alternate youtube module HEAD master
(doesn't quite work? might reduce likelyhood ip gets flagged?)
Diffstat (limited to 'URLget.py')
-rw-r--r--URLget.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/URLget.py b/URLget.py
new file mode 100644
index 0000000..abe27a1
--- /dev/null
+++ b/URLget.py
@@ -0,0 +1,40 @@
+class URLgetException(Exception):
+    pass
+
+
+try:
+    from curl_cffi import requests
+
+    # from curl_cffi.requests.exceptions import HTTPError
+    print("using curl_cffi")
+
+    def urlget(url):
+        # probably want to impersonate "chrome", "safari" or "safari_ios"
+        # could impersonate some more specific versions too I guess
+        try:
+            r = requests.get(url, impersonate="safari_ios")
+            # print(dir(r))
+            # print(r.status_code)
+        except Exception as e:
+            raise URLgetException(e)
+        return r.status_code, r.text
+
+except ModuleNotFoundError:
+    # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt?
+    from urllib.request import Request, urlopen
+
+    # from urllib.error import HTTPError
+    print("using urllib.request")
+
+    def urlget(url):
+        # update as needed I guess
+        ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
+        # req=Request(url)
+        req = Request(url)
+        req.add_header("User-Agent", ua)
+        try:
+            r = urlopen(req)
+        except Exception as e:
+            # except HTTPError as e:
+            raise URLgetException(e)
+        return r.status, r.read().decode()