diff --git a/README b/README
index ac4778c..fe0c472 100644
--- a/README
+++ b/README
@@ -1,3 +1,8 @@
code is formatted with "black"
#remember to install the libraries, and you probably should use venvs anyway
pip --require-venv install -r requirements.txt
+
+for youtube, there's several options, but for just getting a title, you'll want to use youtube_oembed.py
+scraping youtube may or may not work based on your IP and stuff (it should mostly work on home IPs, but tends to get blocked from servers), using curl_ffi to spoof TLS fingerprint may help slightly, I don't handle the POT token stuff yet
+
+URLget.py is an abstraction that either uses python's builtin urllib.request or, if you have it available, then curl_cffi (which can spoof TLS fingerprinting, though that likely has minimal effect)
diff --git a/URLget.py b/URLget.py
index abe27a1..701b10d 100644
--- a/URLget.py
+++ b/URLget.py
@@ -1,3 +1,4 @@
+import sys
class URLgetException(Exception):
pass
@@ -6,7 +7,7 @@ try:
from curl_cffi import requests
# from curl_cffi.requests.exceptions import HTTPError
- print("using curl_cffi")
+ if sys.stderr.isatty(): print("using curl_cffi",file=sys.stderr)
def urlget(url):
# probably want to impersonate "chrome", "safari" or "safari_ios"
@@ -24,7 +25,7 @@ except ModuleNotFoundError:
from urllib.request import Request, urlopen
# from urllib.error import HTTPError
- print("using urllib.request")
+ if sys.stderr.isatty(): print("using urllib.request",file=sys.stderr)
def urlget(url):
# update as needed I guess
diff --git a/youtube_oembed.py b/youtube_oembed.py
new file mode 100755
index 0000000..4a0ca20
--- /dev/null
+++ b/youtube_oembed.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
+from URLget import urlget, URLgetException
+
+class YouTube:
+ def __init__(self):
+ try:
+ YouTube.prefer_playlist = YouTube.prefer_playlist
+ except AttributeError:
+ YouTube.prefer_playlist = False
+
+ def mesg(self, msg, t=None):
+ self.util.mesg(msg, t)
+
+ def match_urls(self, str):
+ r = [
+ i
+ for i in str.split()
+ if "https://youtu.be/" in i
+ or "https://www.youtube.com/watch?v=" in i
+ or "https://m.youtube.com/watch?v=" in i
+ or "https://youtube.com/watch?v=" in i
+ or "https://www.youtube.com/embed/" in i
+ or "https://www.youtube-nocookie.com/embed/" in i
+ or "https://music.youtube.com/watch?v=" in i
+ or "https://youtube.com/shorts/" in i
+ or "https://www.youtube.com/shorts/" in i
+ ]
+ r = list(dict.fromkeys(r))
+ n = 0
+ for i in r:
+ if not i.startswith("http"):
+ r.pop(n)
+ n += 1
+
+ return r
+
+ def is_embed(self, *str):
+ if type(self) == type("a"):
+ str = self
+ else:
+ str = str[0]
+ return str.startswith("https://www.youtube.com/embed/") or str.startswith(
+ "https://www.youtube-nocookie.com/embed/"
+ )
+
+ def is_ytmusic(self, *str):
+ if type(self) == type("a"):
+ str = self
+ else:
+ str = str[0]
+ return str.startswith("https://music.youtube.com/watch?v=")
+
+ def is_ytshorts(self, *str):
+ if type(self) == type("a"):
+ str = self
+ else:
+ str = str[0]
+ return str.startswith("https://youtube.com/shorts/") or str.startswith(
+ "https://www.youtube.com/shorts/"
+ )
+
+ def is_clip(self, *str):
+ if type(self) == type("a"):
+ str = self
+ else:
+ str = str[0]
+ return str.startswith("https://youtube.com/clip/") or str.startswith(
+ "https://www.youtube.com/clip/"
+ )
+
+ def is_shorturl(self, *str):
+ if type(self) == type("a"):
+ str = self
+ else:
+ str = str[0]
+ return str.startswith("https://youtu.be/")
+
+ def yt(self, url):
+ irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+ ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
+ # self.util.mesg("dbg hello")
+ url = url.rstrip("\x01")
+ if self.is_embed(url):
+ videoId = url.split("/")[4]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif self.is_ytmusic(url):
+ for i in url.split("?")[1].split("&"):
+ if i[0:2] == "v=":
+ videoId = i[2:]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif self.is_ytshorts(url):
+ videoId = url.split("?")[0].split("/")[-1]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif self.is_shorturl(url):
+ videoId = url.split("/")[3].split("?")[0]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ url = urlparse(url)
+ qs = parse_qs(url.query)
+ try:
+ video_id = qs["v"][0]
+ except KeyError:
+ video_id = None
+ try:
+ playlist_id = qs["list"][0]
+ # ignore the "random mix" and "radio" lists
+ if playlist_id.startswith("RD"):
+ playlist_id = None
+ except KeyError:
+ playlist_id = None
+ if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id):
+ url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+ else:
+ url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+ url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+ try:
+ # print(url, " and ", playlist_id)
+ status,data = urlget(url)
+ if status != 200:
+ irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03"
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m"
+ data = json_loads(data)
+ title = data["title"]
+ channelName = data["author_name"]
+ except URLgetException as e:
+ irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
+ print(ansi_string)
+ return irc_string, True
+ irc_string = (
+ f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d"
+ )
+ ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m"
+ if __import__("sys").stdout.isatty(): print(ansi_string)
+ return irc_string, False
+
+
+if __name__ == "__main__":
+ import sys
+
+ # if url is a video that's part of a playlist,
+ # return playlist (True) or video (False, default)?
+ # YouTube.prefer_playlist=False
+ YouTube().yt(sys.argv[1])
+ # YouTube.yt(YouTube, sys.argv[1])
diff --git a/youtube.py b/youtube_oembed_old.py
index 0d51e16..0d51e16 100755
--- a/youtube.py
+++ b/youtube_oembed_old.py
diff --git a/youtube.alt.py b/youtube_scrape.py
index ca478d8..82671a2 100755
--- a/youtube.alt.py
+++ b/youtube_scrape.py
@@ -159,7 +159,6 @@ class YouTube:
videoId = url.split("?")[0].split("/")[-1]
url = f"https://www.youtube.com/watch?v={videoId}"
p = self.parseprop()
- # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
data = b""
data = data.decode() # bytes to utf-8
if (
@@ -177,6 +176,7 @@ class YouTube:
irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
# print(f"\x1b[31m my data is: {data}\x1b[0m")
+ print(data)
p.feed(data)
if p.h == {}:
print(ansi_string)
|