summaryrefslogtreecommitdiff
path: root/youtube_abstract.py
diff options
context:
space:
mode:
authorPawky Languish2025-04-19 19:07:38 +0000
committerPawky Languish2025-04-19 19:07:38 +0000
commitf4fb34dde6b03fdb49f71476d587c5c7f986b565 (patch)
treeef8cc174c413dcd766754bc51c52b9a2fc328e5c /youtube_abstract.py
parent12062621d6d67adb8abee962f4201e2d5196f55b (diff)
idk some changes lol, formatting and stuff
Diffstat (limited to 'youtube_abstract.py')
-rwxr-xr-xyoutube_abstract.py241
1 files changed, 241 insertions, 0 deletions
diff --git a/youtube_abstract.py b/youtube_abstract.py
new file mode 100755
index 0000000..fb93ad7
--- /dev/null
+++ b/youtube_abstract.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
+from URLget import urlget, URLgetException
+
+
+class YouTube:
+ def __init__(self):
+ # whether urls with a playlist included, should title the video, or the playlist?
+ try:
+ YouTube.prefer_playlist = YouTube.prefer_playlist
+ except AttributeError: # we probably want video title, default to that
+ YouTube.prefer_playlist = False
+ self.irc_pal = {
+ "rst": "\x0f", ######### reset
+ "ylw": "\x0307", ####### yellow
+ "b_ylw": "\x0307\x02", # bold yellow
+ "wht": "\x0315", ####### white
+ "red": "\x0304", ####### red
+ "grn": "\x0303", ####### green
+ "itl": "\x1d", ######### italic
+ "bld": "\x02", ######### bold
+ }
+ self.ansi_pal = {
+ "rst": "\x1b[0m", ###### reset
+ "ylw": "\x1b[33;2m", ### yellow
+ "b_ylw": "\x1b[33;1m", # bold yellow
+ "wht": "\x1b[37;2m", ### white
+ "red": "\x1b[31m", ##### red
+ "grn": "\x1b[32m", ##### green
+ "itl": "\x1b[03m", ##### italic
+ "bld": "\x1b[1m", ###### bold
+ }
+
+ def mesg(self, msg, t=None): # just an alias to shorten full name
+ self.util.mesg(msg, t)
+
+ def match_urls(self, str, r=[]):
+ if str.startswith("http://"):
+ str = "https://" + str[7:]
+ if str.startswith(
+ "https://"
+ ): # first string has to be trimmed outside this func
+ if (
+ str.startswith("https://youtu.be/")
+ or str.startswith("https://www.youtube.com/watch?")
+ or str.startswith("https://music.youtube.com/watch?")
+ or str.startswith("https://m.youtube.com/watch?")
+ or str.startswith("https://www.youtube.com/playlist?")
+ or str.startswith("https://music.youtube.com/playlist?")
+ or str.startswith("https://m.youtube.com/playlist?")
+ or str.startswith("https://www.youtube.com/shorts/")
+ or str.startswith("https://youtube.com/shorts/")
+ or str.startswith("https://m.youtube.com/shorts/")
+ or str.startswith("https://www.youtube.com/embed/")
+ or str.startswith("https://www.youtube-nocookie.com/embed/")
+ or str.startswith("https://www.youtube.com/embed/videoseries?")
+ ):
+ r += [str[: str.find(" ")]]
+ i = str.find(" ") + 1
+ return match_urls(self, str[i:].strip(), r=r) if i != 0 else r
+
+ """
+ def match_urls(self, str):
+ str = str.replace("http://", "https://")
+ r = [
+ i
+ for i in str.split()
+ # shorturl
+ if "https://youtu.be/" in i
+ # desktop
+ or "https://www.youtube.com/watch?" in i or "https://www.youtube.com/playlist?" in i
+ # mobile
+ or "https://m.youtube.com/watch?" in i or "https://m.youtube.com/playlist?" in i
+ # music
+ or "https://music.youtube.com/watch?" in i or "https://music.youtube.com/playlist?" in i
+ # shorts
+ or "https://www.youtube.com/shorts/" in i
+ or "https://m.youtube.com/shorts/" in i
+ or "https://youtube.com/shorts/" in i
+ # embed
+ or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i
+ # or "https://www.youtube.com/embed/videoseries?" in i # embed playlist, lol
+ # just in case (shouldn't happen)
+ or "https://youtube.com/watch?" in i or "https://youtube.com/playlist?" in i
+ ]
+ r = list(dict.fromkeys(r))
+ n = 0
+ for i in r:
+ if not i.startswith("http"):
+ r.pop(n)
+ n += 1
+
+ return r
+ """
+
+ # makes for a little better syntax than a bunch of str.startswith calls
+ def matchstart(self, str, *arr):
+ for i in arr:
+ if str.startswith(i):
+ return True
+ return False
+
+ def is_clip(self, str):
+ return self.matchstart(
+ str, "https://youtube.com/clip/", "https://www.youtube.com/clip/"
+ )
+
+ # boil down to video id + playlist id
+ def normalize_url(self, url):
+ raw_url = url
+ # youtu.be
+ if self.matchstart(url, "https://youtu.be/"):
+ videoId = url.split("/")[3].split("?")[0]
+ elif self.matchstart(
+ url, "https://youtube.com/shorts/", "https://www.youtube.com/shorts/"
+ ):
+ videoId = url.split("?")[0].split("/")[-1]
+ # embed
+ elif self.matchstart(
+ url,
+ "https://www.youtube.com/embed/",
+ "https://www.youtube-nocookie.com/embed/",
+ ):
+ # try:
+ listId = parse_qs(urlparse(url).query)["list"][0]
+ # except
+ if not url.split("/")[4].startswith("videoseries"):
+ videoId = url.split("/")[4]
+ # print("embed", videoId, listId)
+ elif "v=" in url: # handles yt music, normal url, etc
+ for i in url.split("?")[1].split("&"):
+ if i[0:2] == "v=":
+ videoId = i[2:]
+ elif i[0:5] == "list=":
+ listId = i[5:]
+ if "videoId" in locals():
+ url = "https://www.youtube.com/watch?"
+ if "videoId" in locals():
+ if videoId != "":
+ url += f"v={videoId}"
+ if "listId" in locals():
+ if listId != "":
+ if not url.endswith("?"):
+ url += "&"
+ url += f"list={listId}"
+ print("clean url", url)
+ return url
+
+ # very close to normalize_url, maybe could reorganize better?
+ def normalize_playlist(self, url):
+ url = urlparse(url)
+ qs = parse_qs(url.query)
+ try:
+ video_id = qs["v"][0]
+ except KeyError:
+ video_id = None
+ try:
+ playlist_id = qs["list"][0]
+ # ignore the programmatic "mix" / "radio" lists, actual playlists start with "PL"
+ if playlist_id.startswith("RD"):
+ playlist_id = None
+ except KeyError:
+ playlist_id = None
+ if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id):
+ url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+ elif video_id:
+ url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+ else:
+ self.setstring(
+ "string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}"
+ )
+ return {"irc": irc_string, "ansi": ansi_string}, True
+ return url
+
+ # set both irc_name and ansi_name, using the appropriate palette
+ def setstring(self, name, val, mylocals=locals()):
+ prefixes = ["irc", "ansi"]
+ for i in prefixes:
+ value = val.replace("{i}", i)
+ mylocals.update(locals()) # merge the local variables
+ exec(
+ f"global {i}_{name}; pal=self.{i}_pal; {i}_{name}=f{repr(value)}",
+ globals(),
+ mylocals,
+ )
+
+ def yt(self, url):
+ self.setstring(
+ "prefix",
+ "[{pal['grn']}YouTube{pal['rst']}]",
+ )
+ self.setstring(
+ "prefix_err",
+ "[{pal['red']}YouTube{pal['rst']}] {pal['ylw']}ERROR:{pal['b_ylw']}",
+ )
+ self.setstring(
+ "string",
+ "{{i}_prefix_err} got no data from server! {pal['wht']}(check your URL for typos!){pal['rst']}",
+ )
+ url = url.rstrip("\x01") # I forget exactly why, might be due to /me ?
+ url = self.normalize_url(url)
+ url = self.normalize_playlist(url)
+ url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+ try:
+ # print(url, " and ", playlist_id)
+ status, data = urlget(url)
+ if status != 200:
+ self.setstring(
+ "string", "{{i}_prefix_err} {status}{pal['rst']}", locals()
+ )
+ return {"irc": irc_string, "ansi": ansi_string}, True
+ data = json_loads(data)
+ title, channelName = data["title"], data["author_name"]
+ except URLgetException as e:
+ self.setstring("string", "{{i}_prefix_err} {e}{pal['rst']}", locals())
+ if __import__("sys").stdout.isatty():
+ print(ansi_string)
+ return {"irc": irc_string, "ansi": ansi_string}, True
+ self.setstring(
+ "string",
+ "{{i}_prefix} {pal['bld']}{title}{pal['rst']} uploaded by {pal['itl']}{channelName}{pal['rst']}",
+ mylocals=locals(),
+ )
+ if __import__("sys").stdout.isatty():
+ print("ansi", ansi_string)
+ print("irc", irc_string)
+ return {"irc": irc_string, "ansi": ansi_string}, False
+
+
+if __name__ == "__main__":
+ import sys
+
+ # if url is a video that's part of a playlist, return playlist (True) or video (False, default)?
+ # YouTube.prefer_playlist=False
+
+ # YouTube.yt(YouTube, sys.argv[1])
+ # YouTube().yt(sys.argv[1])
+ YT = YouTube()
+ print(YT.match_urls(sys.argv[1]))
+ YT.yt(sys.argv[1])