blob: abe27a179c1ba9a42d57326c0ea3c7a7ff99820f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
class URLgetException(Exception):
pass
try:
from curl_cffi import requests
# from curl_cffi.requests.exceptions import HTTPError
print("using curl_cffi")
def urlget(url):
# probably want to impersonate "chrome", "safari" or "safari_ios"
# could impersonate some more specific versions too I guess
try:
r = requests.get(url, impersonate="safari_ios")
# print(dir(r))
# print(r.status_code)
except Exception as e:
raise URLgetException(e)
return r.status_code, r.text
except ModuleNotFoundError:
# fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt?
from urllib.request import Request, urlopen
# from urllib.error import HTTPError
print("using urllib.request")
def urlget(url):
# update as needed I guess
ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0"
# req=Request(url)
req = Request(url)
req.add_header("User-Agent", ua)
try:
r = urlopen(req)
except Exception as e:
# except HTTPError as e:
raise URLgetException(e)
return r.status, r.read().decode()
|