diff options
| author | Pawky Languish | 2024-12-11 16:40:28 +0000 | 
|---|---|---|
| committer | Pawky Languish | 2024-12-11 17:01:54 +0000 | 
| commit | f0e4aba9708f21d30151b2c4b49fb949d1c91df5 (patch) | |
| tree | c9cf5ea17bcef9fe42df010c3dbfdfcbd9c35bcd /URLget.py | |
| parent | 225f83f5a5f077d7b7f7c476f825a0ded8008b74 (diff) | |
Try to workaround youtube bot flagging in alternate youtube module
(doesn't quite work? might reduce likelyhood ip gets flagged?)
Diffstat (limited to 'URLget.py')
| -rw-r--r-- | URLget.py | 40 | 
1 files changed, 40 insertions, 0 deletions
diff --git a/URLget.py b/URLget.py new file mode 100644 index 0000000..abe27a1 --- /dev/null +++ b/URLget.py @@ -0,0 +1,40 @@ +class URLgetException(Exception): +    pass + + +try: +    from curl_cffi import requests + +    # from curl_cffi.requests.exceptions import HTTPError +    print("using curl_cffi") + +    def urlget(url): +        # probably want to impersonate "chrome", "safari" or "safari_ios" +        # could impersonate some more specific versions too I guess +        try: +            r = requests.get(url, impersonate="safari_ios") +            # print(dir(r)) +            # print(r.status_code) +        except Exception as e: +            raise URLgetException(e) +        return r.status_code, r.text + +except ModuleNotFoundError: +    # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt? +    from urllib.request import Request, urlopen + +    # from urllib.error import HTTPError +    print("using urllib.request") + +    def urlget(url): +        # update as needed I guess +        ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0" +        # req=Request(url) +        req = Request(url) +        req.add_header("User-Agent", ua) +        try: +            r = urlopen(req) +        except Exception as e: +            # except HTTPError as e: +            raise URLgetException(e) +        return r.status, r.read().decode()  | 
