summaryrefslogtreecommitdiff
path: root/youtube_abstract.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_abstract.py')
-rwxr-xr-xyoutube_abstract.py215
1 files changed, 215 insertions, 0 deletions
diff --git a/youtube_abstract.py b/youtube_abstract.py
new file mode 100755
index 0000000..f773aa0
--- /dev/null
+++ b/youtube_abstract.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
+from URLget import urlget, URLgetException
+
+import sys
+
+
+def dbgprint(*args, **kwargs):
+ if sys.stdout.isatty():
+ print(*args, **kwargs)
+
+
+class YouTube:
+ def __init__(self):
+ # whether urls with a playlist included, should title the video, or the playlist?
+ try:
+ YouTube.prefer_playlist = YouTube.prefer_playlist
+ except AttributeError: # we probably want video title, default to that
+ YouTube.prefer_playlist = False
+ self.irc_pal = { # ignore the fancy alignment BS lol
+ "rst": "" + "\x0f", ####### reset
+ "ylw": "" + "\x0307", ##### yellow
+ "b_ylw": "" "\x0307\x02", # bold yellow
+ "wht": "" + "\x0315", ##### white
+ "red": "" + "\x0304", ##### red
+ "grn": "" + "\x0303", ##### green
+ "itl": "" + "\x1d", ####### italic
+ "bld": "" + "\x02", ####### bold
+ }
+ self.ansi_pal = { # ignore the fancy alignment BS lol
+ "rst": "" + "\x1b[0m", #### reset
+ "ylw": "" + "\x1b[33;2m", # yellow
+ "b_ylw": "" "\x1b[33;1m", # bold yellow
+ "wht": "" + "\x1b[37;2m", # white
+ "red": "" + "\x1b[31m", ### red
+ "grn": "" + "\x1b[32m", ### green
+ "itl": "" + "\x1b[03m", ### italic
+ "bld": "" + "\x1b[1m", #### bold
+ }
+
+ def mesg(self, msg, t=None): # just an alias to shorten full name
+ self.util.mesg(msg, t)
+
+ def match_urls(self, str, r=[]):
+ if str.startswith("http://"):
+ str = "https://" + str[7:]
+ if str.startswith("https://youtube."):
+ str = "https://www." + str[8:]
+ if str.startswith("https://"): # first string has to be trimmed before calling match_urls
+ if ( # I'm just doing fancy BS to align the urls nicely, lol, ignore this
+ str.startswith("https://youtu.be/")
+ or str.startswith("" "" "" "" "https://www.youtube.com/playlist?") ####### playlist
+ or str.startswith("" "" "" "https://music.youtube.com/playlist?")
+ or str.startswith("" "" "" "" "https://m.youtube.com/playlist?")
+ or str.startswith("" "" "" "" "https://www.youtube.com/shorts/") ######### shorts
+ or str.startswith("" "" "" "" "" "https://youtube.com/shorts/")
+ or str.startswith("" "" "" "" "https://m.youtube.com/shorts/")
+ or str.startswith("" "" "" "" "https://www.youtube.com/watch?") ########## normal
+ or str.startswith("" "" "" "https://music.youtube.com/watch?")
+ or str.startswith("" "" "" "" "https://m.youtube.com/watch?")
+ or str.startswith("https://www.youtube-nocookie.com/embed/") ############# embed
+ or str.startswith("" "" "" "https://www.youtube.com/embed/")
+ or str.startswith("" "" "" "" "https://m.youtube.com/embed/")
+ or str.startswith("https://www.youtube-nocookie.com/embed/videoseries?") # embed playlist
+ or str.startswith("" "" "" "https://www.youtube.com/embed/videoseries?")
+ or str.startswith("" "" "" "" "https://m.youtube.com/embed/videoseries?")
+ ):
+ r += [str[: str.find(" ")]] # make array of all matching "words" (urls)
+ i = str.find(" ") + 1
+ return (
+ match_urls(self, str[i:].strip(), r=r) if i != 0 else r
+ ) # recurse down each word, see if anything matches
+
+ # makes for a little better syntax than a bunch of str.startswith calls
+ def matchstart(self, str, *arr):
+ for i in arr:
+ if str.startswith(i):
+ return True
+ return False
+
+ def is_clip(self, str):
+ return self.matchstart(str, "https://youtube.com/clip/", "https://www.youtube.com/clip/")
+
+ # boil down to video id + playlist id
+ def normalize_url(self, url):
+ dbgprint("normalize", url)
+ raw_url, videoId, listId = url, "", ""
+ # youtu.be
+ if self.matchstart(url, "https://youtu.be/"):
+ videoId = url.split("/")[3].split("?")[0]
+ dbgprint("youtu.be")
+ elif self.matchstart(url, "https://www.youtube.com/shorts/", "https://m.youtube.com/shorts/"):
+ videoId = url.split("?")[0].split("/")[-1]
+ dbgprint("/shorts", videoId)
+ # embed
+ elif self.matchstart(
+ url,
+ "https://m.youtube.com/embed/",
+ "https://www.youtube.com/embed/",
+ "https://www.youtube-nocookie.com/embed/",
+ ):
+ try:
+ listId = parse_qs(urlparse(url).query)["list"][0]
+ except KeyError:
+ if not url.split("/")[4].startswith("videoseries"):
+ videoId = url.split("/")[4]
+ dbgprint("embed", videoId, listId)
+ elif "v=" in url: # handles yt music, normal url, etc
+ for i in url.split("?")[1].split("&"):
+ if i[0:2] == "v=":
+ videoId = i[2:]
+ elif i[0:5] == "list=":
+ listId = i[5:]
+ if "videoId" in locals():
+ url = "https://www.youtube.com/watch?"
+ if "videoId" in locals():
+ if videoId != "":
+ url += f"v={videoId}"
+ if "listId" in locals():
+ if listId != "":
+ if not url.endswith("?"):
+ url += "&"
+ url += f"list={listId}"
+ print("clean url", url)
+ return url
+
+ # very close to normalize_url, maybe could reorganize better?
+ def normalize_playlist(self, url):
+ url = urlparse(url)
+ qs = parse_qs(url.query)
+ try:
+ video_id = qs["v"][0]
+ except KeyError:
+ video_id = None
+ try:
+ playlist_id = qs["list"][0]
+ # ignore the programmatic "mix" / "radio" lists, actual playlists start with "PL"
+ if playlist_id.startswith("RD"):
+ playlist_id = None
+ except KeyError:
+ playlist_id = None
+ if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id):
+ url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+ elif video_id:
+ url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+ else:
+ self.setstring("string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}")
+ return {"irc": irc_string, "ansi": ansi_string}, True
+ return url
+
+ # set both irc_name and ansi_name, using the appropriate palette
+ def setstring(self, name, val, mylocals=locals()):
+ prefixes = ["irc", "ansi"]
+ for i in prefixes:
+ value = val.replace("{i}", i)
+ mylocals.update(locals()) # merge the local variables
+ exec(
+ f"global {i}_{name}; pal=self.{i}_pal; {i}_{name}=f{repr(value)}",
+ globals(),
+ mylocals,
+ )
+
+ def yt(self, url):
+ self.setstring(
+ "prefix",
+ "[{pal['grn']}YouTube{pal['rst']}]",
+ )
+ self.setstring(
+ "prefix_err",
+ "[{pal['red']}YouTube{pal['rst']}] {pal['ylw']}ERROR:{pal['b_ylw']}",
+ )
+ self.setstring(
+ "string",
+ "{{i}_prefix_err} got no data from server! {pal['wht']}(check your URL for typos!){pal['rst']}",
+ )
+ url = url.rstrip("\x01") # I forget exactly why, might be due to /me ?
+ url = self.normalize_url(url)
+ url = self.normalize_playlist(url)
+ url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+ try:
+ # print(url, " and ", playlist_id)
+ status, data = urlget(url)
+ if status != 200:
+ self.setstring("string", "{{i}_prefix_err} {status}{pal['rst']}", locals())
+ return {"irc": irc_string, "ansi": ansi_string}, True
+ data = json_loads(data)
+ title, channelName = data["title"], data["author_name"]
+ except URLgetException as e:
+ self.setstring("string", "{{i}_prefix_err} {e}{pal['rst']}", locals())
+ if __import__("sys").stdout.isatty():
+ print(ansi_string)
+ return {"irc": irc_string, "ansi": ansi_string}, True
+ self.setstring(
+ "string",
+ "{{i}_prefix} {pal['bld']}{title}{pal['rst']} uploaded by {pal['itl']}{channelName}{pal['rst']}",
+ mylocals=locals(),
+ )
+ if __import__("sys").stdout.isatty():
+ print("ansi", ansi_string)
+ print("irc", irc_string)
+ return {"irc": irc_string, "ansi": ansi_string}, False
+
+
+if __name__ == "__main__":
+ import sys
+
+ # if url is a video that's part of a playlist, return playlist (True) or video (False, default)?
+ # YouTube.prefer_playlist=False
+
+ # YouTube.yt(YouTube, sys.argv[1])
+ # YouTube().yt(sys.argv[1])
+ YT = YouTube()
+ print(YT.match_urls(sys.argv[1]))
+ YT.yt(sys.argv[1])