summary refs log tree commit diff
path: root/URLget.py
diff options
context:
space:
mode:
Diffstat (limited to 'URLget.py')
-rw-r--r--URLget.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/URLget.py b/URLget.py
new file mode 100644
index 0000000..abe27a1
--- /dev/null
+++ b/URLget.py
@@ -0,0 +1,40 @@
+class URLgetException(Exception):
+    pass
+
+
+try:
+    from curl_cffi import requests
+
+    # from curl_cffi.requests.exceptions import HTTPError
+    print("using curl_cffi")
+
+    def urlget(url):
+        # probably want to impersonate "chrome", "safari" or "safari_ios"
+        # could impersonate some more specific versions too I guess
+        try:
+            r = requests.get(url, impersonate="safari_ios")
+            # print(dir(r))
+            # print(r.status_code)
+        except Exception as e:
+            raise URLgetException(e)
+        return r.status_code, r.text
+
+except ModuleNotFoundError:
+    # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt?
+    from urllib.request import Request, urlopen
+
+    # from urllib.error import HTTPError
+    print("using urllib.request")
+
+    def urlget(url):
+        # update as needed I guess
+        ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
+        # req=Request(url)
+        req = Request(url)
+        req.add_header("User-Agent", ua)
+        try:
+            r = urlopen(req)
+        except Exception as e:
+            # except HTTPError as e:
+            raise URLgetException(e)
+        return r.status, r.read().decode()