From f0e4aba9708f21d30151b2c4b49fb949d1c91df5 Mon Sep 17 00:00:00 2001 From: Pawky Languish Date: Wed, 11 Dec 2024 16:40:28 +0000 Subject: Try to workaround youtube bot flagging in alternate youtube module (doesn't quite work? might reduce likelyhood ip gets flagged?) --- URLget.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 URLget.py (limited to 'URLget.py') diff --git a/URLget.py b/URLget.py new file mode 100644 index 0000000..abe27a1 --- /dev/null +++ b/URLget.py @@ -0,0 +1,40 @@ +class URLgetException(Exception): + pass + + +try: + from curl_cffi import requests + + # from curl_cffi.requests.exceptions import HTTPError + print("using curl_cffi") + + def urlget(url): + # probably want to impersonate "chrome", "safari" or "safari_ios" + # could impersonate some more specific versions too I guess + try: + r = requests.get(url, impersonate="safari_ios") + # print(dir(r)) + # print(r.status_code) + except Exception as e: + raise URLgetException(e) + return r.status_code, r.text + +except ModuleNotFoundError: + # fallback to just dumb user-agent spoofing, it will not help, but at least it won't hurt? + from urllib.request import Request, urlopen + + # from urllib.error import HTTPError + print("using urllib.request") + + def urlget(url): + # update as needed I guess + ua = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0" + # req=Request(url) + req = Request(url) + req.add_header("User-Agent", ua) + try: + r = urlopen(req) + except Exception as e: + # except HTTPError as e: + raise URLgetException(e) + return r.status, r.read().decode() -- cgit 1.4.1-2-gfad0