#!/usr/bin/env python3 from html.parser import HTMLParser from urllib.request import urlopen class YouTube: video_type = "" def mesg(self, msg, t=None): self.util.mesg(msg, t) def match_urls(self, str): r = [ i for i in str.split() if "https://youtu.be/" in i or "https://www.youtube.com/watch?v=" in i or "https://m.youtube.com/watch?v=" in i or "https://youtube.com/watch?v=" in i or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i or "https://music.youtube.com/watch?v=" in i or "https://youtube.com/shorts/" in i or "https://www.youtube.com/shorts/" in i or "https://www.youtube.com/clip/" in i or "https://youtube.com/clip/" in i ] r = list(dict.fromkeys(r)) n = 0 for i in r: if not i.startswith("http"): r.pop(n) n += 1 return r def is_embed(str): return str.startswith("https://www.youtube.com/embed/") or str.startswith( "https://www.youtube-nocookie.com/embed/" ) def is_ytmusic(str): return str.startswith("https://music.youtube.com/watch?v=") def is_ytshorts(str): return str.startswith("https://youtube.com/shorts/") or str.startswith( "https://www.youtube.com/shorts/" ) def is_clip(str): return str.startswith("https://youtube.com/clip/") or str.startswith( "https://www.youtube.com/clip/" ) class parseprop(HTMLParser): def __init__(self): print("yt parse init") HTMLParser.__init__(self) self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"] self.h = {} if YouTube.video_type == "clip": self.itemprops_list += ["description"] print("it is a clip!") def handle_starttag(self, tag, attrs): if (tag != "meta" and tag != "link") or ( ( [i for i in attrs if "itemprop" in i] == [] and ("name", "title") not in attrs ) or (tag == "meta" and ("itemprop", "name") in attrs) ): return # print(self,tag,attrs) for k, v in attrs: if k == "itemprop": if v not in self.itemprops_list: return x = [v] if tag == "link" and v == "name": x = ["channelName"] elif k == "content": if attrs[0][1] == "interactionCount": v = int(v) x += [v] elif k == "name" and v == "title": x = [v] else: return self.h.update({x[0]: x[1]}) # print(x[0],"=",x[1]) def fmt_dur(dur): h, m, s = 0, 0, 0 m = dur[2:].split("M") s = int(m[1][:-1]) m = int(m[0]) if m >= 60: h = m // 60 m = round((m / 60 - h) * 60) return f"{h}h {m}m {s}s" elif h == 0 and m == 0 and s == 0: return "LIVE" elif m == 0 and s != 0: return f"{s}s" elif s == 0: return f"{m}m" else: return f"{m}m {s}s" def yt(self, url): # self.util.mesg("dbg hello") url = url.rstrip("\x01") self.video_type = ( "clip" if self.is_clip(url) else "shorts" if self.is_ytshorts(url) else "music" if self.is_ytmusic(url) else "embed" if self.is_embed(url) else "video" ) video_type = self.video_type if video_type == "embed": videoId = url.split("/")[4] url = f"https://www.youtube.com/watch?v={videoId}" elif video_type == "music": for i in url.split("?")[1].split("&"): if i[0:2] == "v=": videoId = i[2:] url = f"https://www.youtube.com/watch?v={videoId}" elif video_type == "shorts": videoId = url.split("?")[0].split("/")[-1] url = f"https://www.youtube.com/watch?v={videoId}" p = self.parseprop() # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail data = b"" if self.premature_optimization: url_h = urlopen(url) # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment) # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads for i in range(24): data += url_h.readline() url_h.close() data = data.decode() # bytes to utf-8 if ( data.find('meta itemprop="duration"') == -1 or data.find('meta itemprop="name"') == -1 ): # acts as both fallback for optimization, and in case optimization's turned off # just read all of the html data = urlopen(url).read().decode() # print(f"\x1b[31m my data is: {data}\x1b[0m") p.feed(data) if p.h == {}: irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" print(ansi_string) return irc_string, True y = p.h print(y) y.update(duration=self.fmt_dur(y["duration"])) irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views" ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views" print(ansi_string) return irc_string, False if __name__ == "__main__": import sys YouTube.premature_optimization = False YouTube.yt(YouTube, sys.argv[1])