diff options
author | Pawky Languish | 2024-12-11 16:40:28 +0000 |
---|---|---|
committer | Pawky Languish | 2024-12-11 17:01:54 +0000 |
commit | f0e4aba9708f21d30151b2c4b49fb949d1c91df5 (patch) | |
tree | c9cf5ea17bcef9fe42df010c3dbfdfcbd9c35bcd /URLget.py | |
parent | 225f83f5a5f077d7b7f7c476f825a0ded8008b74 (diff) |
(doesn't quite work? might reduce likelyhood ip gets flagged?)
Diffstat (limited to 'URLget.py')
-rw-r--r-- | URLget.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/URLget.py b/URLget.py new file mode 100644 index 0000000..abe27a1 --- /dev/null +++ b/URLget.py @@ -0,0 +1,40 @@ +class URLgetException(Exception): + pass + + +try: + from curl_cffi import requests + + # from curl_cffi.requests.exceptions import HTTPError + print("using curl_cffi") + + def urlget(url): + # probably want to impersonate "chrome", "safari" or "safari_ios" + # could impersonate some more specific versions too I guess + try: + r = requests.get(url, impersonate="safari_ios") + # print(dir(r)) + # print(r.status_code) + except Exception as e: + raise URLgetException(e) + return r.status_code, r.text + +except ModuleNotFoundError: + # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt? + from urllib.request import Request, urlopen + + # from urllib.error import HTTPError + print("using urllib.request") + + def urlget(url): + # update as needed I guess + ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0" + # req=Request(url) + req = Request(url) + req.add_header("User-Agent", ua) + try: + r = urlopen(req) + except Exception as e: + # except HTTPError as e: + raise URLgetException(e) + return r.status, r.read().decode() |