summary refs log tree commit diff
path: root/URLget.py
blob: abe27a179c1ba9a42d57326c0ea3c7a7ff99820f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class URLgetException(Exception):
    pass


try:
    from curl_cffi import requests

    # from curl_cffi.requests.exceptions import HTTPError
    print("using curl_cffi")

    def urlget(url):
        # probably want to impersonate "chrome", "safari" or "safari_ios"
        # could impersonate some more specific versions too I guess
        try:
            r = requests.get(url, impersonate="safari_ios")
            # print(dir(r))
            # print(r.status_code)
        except Exception as e:
            raise URLgetException(e)
        return r.status_code, r.text

except ModuleNotFoundError:
    # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt?
    from urllib.request import Request, urlopen

    # from urllib.error import HTTPError
    print("using urllib.request")

    def urlget(url):
        # update as needed I guess
        ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
        # req=Request(url)
        req = Request(url)
        req.add_header("User-Agent", ua)
        try:
            r = urlopen(req)
        except Exception as e:
            # except HTTPError as e:
            raise URLgetException(e)
        return r.status, r.read().decode()