From 12062621d6d67adb8abee962f4201e2d5196f55b Mon Sep 17 00:00:00 2001 From: Pawky Languish Date: Tue, 28 Jan 2025 06:01:21 +0000 Subject: slight changes --- README | 5 ++ URLget.py | 5 +- youtube.alt.py | 205 -------------------------------------------------- youtube.py | 135 --------------------------------- youtube_oembed.py | 146 +++++++++++++++++++++++++++++++++++ youtube_oembed_old.py | 135 +++++++++++++++++++++++++++++++++ youtube_scrape.py | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 494 insertions(+), 342 deletions(-) delete mode 100755 youtube.alt.py delete mode 100755 youtube.py create mode 100755 youtube_oembed.py create mode 100755 youtube_oembed_old.py create mode 100755 youtube_scrape.py diff --git a/README b/README index ac4778c..fe0c472 100644 --- a/README +++ b/README @@ -1,3 +1,8 @@ code is formatted with "black" #remember to install the libraries, and you probably should use venvs anyway pip --require-venv install -r requirements.txt + +for youtube, there's several options, but for just getting a title, you'll want to use youtube_oembed.py +scraping youtube may or may not work based on your IP and stuff (it should mostly work on home IPs, but tends to get blocked from servers), using curl_ffi to spoof TLS fingerprint may help slightly, I don't handle the POT token stuff yet + +URLget.py is an abstraction that either uses python's builtin urllib.request or, if you have it available, then curl_cffi (which can spoof TLS fingerprinting, though that likely has minimal effect) diff --git a/URLget.py b/URLget.py index abe27a1..701b10d 100644 --- a/URLget.py +++ b/URLget.py @@ -1,3 +1,4 @@ +import sys class URLgetException(Exception): pass @@ -6,7 +7,7 @@ try: from curl_cffi import requests # from curl_cffi.requests.exceptions import HTTPError - print("using curl_cffi") + if sys.stderr.isatty(): print("using curl_cffi",file=sys.stderr) def urlget(url): # probably want to impersonate "chrome", "safari" or "safari_ios" @@ -24,7 +25,7 @@ except ModuleNotFoundError: from urllib.request import Request, urlopen # from urllib.error import HTTPError - print("using urllib.request") + if sys.stderr.isatty(): print("using urllib.request",file=sys.stderr) def urlget(url): # update as needed I guess diff --git a/youtube.alt.py b/youtube.alt.py deleted file mode 100755 index ca478d8..0000000 --- a/youtube.alt.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/env python3 -from html.parser import HTMLParser -from URLget import urlget, URLgetException - -# from URLget import URLgetException -# urlget=URLget().urlget -# print(urlget("http://ip.envs.net")) -# print(dir(URLget)) - - -class YouTube: - # crude import, lol - # URLget = URLget().URLget - # def __init__(self): - # self.URLget = URLget().URLget - # print(URLget,URLget.URLget) - - video_type = "" - - def mesg(self, msg, t=None): - self.util.mesg(msg, t) - - def match_urls(self, str): - r = [ - i - for i in str.split() - if "https://youtu.be/" in i - or "https://www.youtube.com/watch?v=" in i - or "https://m.youtube.com/watch?v=" in i - or "https://youtube.com/watch?v=" in i - or "https://www.youtube.com/embed/" in i - or "https://www.youtube-nocookie.com/embed/" in i - or "https://music.youtube.com/watch?v=" in i - or "https://youtube.com/shorts/" in i - or "https://www.youtube.com/shorts/" in i - or "https://www.youtube.com/clip/" in i - or "https://youtube.com/clip/" in i - ] - r = list(dict.fromkeys(r)) - n = 0 - for i in r: - if not i.startswith("http"): - r.pop(n) - n += 1 - - return r - - def is_embed(self, str): - return str.startswith("https://www.youtube.com/embed/") or str.startswith( - "https://www.youtube-nocookie.com/embed/" - ) - - def is_ytmusic(self, str): - return str.startswith("https://music.youtube.com/watch?v=") - - def is_ytshorts(self, str): - return str.startswith("https://youtube.com/shorts/") or str.startswith( - "https://www.youtube.com/shorts/" - ) - - def is_clip(self, str): - return str.startswith("https://youtube.com/clip/") or str.startswith( - "https://www.youtube.com/clip/" - ) - - class parseprop(HTMLParser): - def __init__(self): - # print("yt parse init") - HTMLParser.__init__(self) - self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"] - self.h = {} - if YouTube.video_type == "clip": - self.itemprops_list += ["description"] - print("it is a clip!") - self.title = False - - def handle_data(self, data): - if self.title != False: - # print("title",data) - self.h.update({"html_title": data}) - self.title = False - - def handle_starttag(self, tag, attrs): - if tag == "title": - self.title = True - if (tag != "meta" and tag != "link") or ( - ( - [i for i in attrs if "itemprop" in i] == [] - and ("name", "title") not in attrs - ) - or (tag == "meta" and ("itemprop", "name") in attrs) - ): - return - # print(self,tag,attrs) - for k, v in attrs: - if k == "itemprop": - if v not in self.itemprops_list: - return - x = [v] - if tag == "link" and v == "name": - x = ["channelName"] - elif k == "content": - if attrs[0][1] == "interactionCount": - v = int(v) - x += [v] - elif k == "name" and v == "title": - x = [v] - else: - return - self.h.update({x[0]: x[1]}) - # print(x[0],"=",x[1]) - - def fmt_dur(dur): - h, m, s = 0, 0, 0 - m = dur[2:].split("M") - s = int(m[1][:-1]) - m = int(m[0]) - if m >= 60: - h = m // 60 - m = round((m / 60 - h) * 60) - return f"{h}h {m}m {s}s" - elif h == 0 and m == 0 and s == 0: - return "LIVE" - elif m == 0 and s != 0: - return f"{s}s" - elif s == 0: - return f"{m}m" - else: - return f"{m}m {s}s" - - def yt(self, url): - irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" - ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" - # self.util.mesg("dbg hello") - url = url.rstrip("\x01") - self.video_type = ( - "clip" - if self.is_clip(url) - else ( - "shorts" - if self.is_ytshorts(url) - else ( - "music" - if self.is_ytmusic(url) - else "embed" if self.is_embed(url) else "video" - ) - ) - ) - video_type = self.video_type - if video_type == "embed": - videoId = url.split("/")[4] - url = f"https://www.youtube.com/watch?v={videoId}" - elif video_type == "music": - for i in url.split("?")[1].split("&"): - if i[0:2] == "v=": - videoId = i[2:] - url = f"https://www.youtube.com/watch?v={videoId}" - elif video_type == "shorts": - videoId = url.split("?")[0].split("/")[-1] - url = f"https://www.youtube.com/watch?v={videoId}" - p = self.parseprop() - # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail - data = b"" - data = data.decode() # bytes to utf-8 - if ( - data.find('meta itemprop="duration"') == -1 - or data.find('meta itemprop="name"') == -1 - ): - try: - status, data = urlget(url) - if status != 200: - irc_string = ( - f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03" - ) - ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m" - except URLgetException as e: - irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" - ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" - # print(f"\x1b[31m my data is: {data}\x1b[0m") - p.feed(data) - if p.h == {}: - print(ansi_string) - return irc_string, True - elif p.h == {"html_title": "YouTube"}: - irc_string = ( - "[\x0304Youtube\x03] \x0307ERROR:\x0308 flagged as bot \x0315\x03" - ) - ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m flagged as bot \x1b[37;2m\x1b[0m" - print(ansi_string) - return irc_string, True - y = p.h - print(y) - y.update(duration=self.fmt_dur(y["duration"])) - irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views" - ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views" - print(ansi_string) - return irc_string, False - - -if __name__ == "__main__": - import sys - - YouTube.premature_optimization = False - # YouTube.yt(YouTube, sys.argv[1]) - YouTube().yt(sys.argv[1]) diff --git a/youtube.py b/youtube.py deleted file mode 100755 index 0d51e16..0000000 --- a/youtube.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python3 -from urllib.request import urlopen -from urllib.error import HTTPError -from urllib.parse import urlencode, urlparse, parse_qs -from json import loads as json_loads - - -class YouTube: - def __init__(self): - try: - YouTube.prefer_playlist = YouTube.prefer_playlist - except AttributeError: - YouTube.prefer_playlist = False - - def mesg(self, msg, t=None): - self.util.mesg(msg, t) - - def match_urls(self, str): - r = [ - i - for i in str.split() - if "https://youtu.be/" in i - or "https://www.youtube.com/watch?v=" in i - or "https://m.youtube.com/watch?v=" in i - or "https://youtube.com/watch?v=" in i - or "https://www.youtube.com/embed/" in i - or "https://www.youtube-nocookie.com/embed/" in i - or "https://music.youtube.com/watch?v=" in i - or "https://youtube.com/shorts/" in i - or "https://www.youtube.com/shorts/" in i - ] - r = list(dict.fromkeys(r)) - n = 0 - for i in r: - if not i.startswith("http"): - r.pop(n) - n += 1 - - return r - - def is_embed(self, *str): - if type(self) == type("a"): - str = self - else: - str = str[0] - return str.startswith("https://www.youtube.com/embed/") or str.startswith( - "https://www.youtube-nocookie.com/embed/" - ) - - def is_ytmusic(self, *str): - if type(self) == type("a"): - str = self - else: - str = str[0] - return str.startswith("https://music.youtube.com/watch?v=") - - def is_ytshorts(self, *str): - if type(self) == type("a"): - str = self - else: - str = str[0] - return str.startswith("https://youtube.com/shorts/") or str.startswith( - "https://www.youtube.com/shorts/" - ) - - def is_clip(self, *str): - if type(self) == type("a"): - str = self - else: - str = str[0] - return str.startswith("https://youtube.com/clip/") or str.startswith( - "https://www.youtube.com/clip/" - ) - - def yt(self, url): - irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" - ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" - # self.util.mesg("dbg hello") - url = url.rstrip("\x01") - if self.is_embed(url): - videoId = url.split("/")[4] - url = f"https://www.youtube.com/watch?v={videoId}" - elif self.is_ytmusic(url): - for i in url.split("?")[1].split("&"): - if i[0:2] == "v=": - videoId = i[2:] - url = f"https://www.youtube.com/watch?v={videoId}" - elif self.is_ytshorts(url): - videoId = url.split("?")[0].split("/")[-1] - url = f"https://www.youtube.com/watch?v={videoId}" - url = urlparse(url) - qs = parse_qs(url.query) - try: - video_id = qs["v"][0] - except KeyError: - video_id = None - try: - playlist_id = qs["list"][0] - # ignore the "random mix" and "radio" lists - if playlist_id.startswith("RD"): - playlist_id = None - except KeyError: - playlist_id = None - if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id): - url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id - else: - url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id - url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}" - try: - # print(url, " and ", playlist_id) - data = urlopen(url).read().decode() - data = json_loads(data) - title = data["title"] - channelName = data["author_name"] - except HTTPError as e: - irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" - ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" - print(ansi_string) - return irc_string, True - irc_string = ( - f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d" - ) - ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m" - print(ansi_string) - return irc_string, False - - -if __name__ == "__main__": - import sys - - # if url is a video that's part of a playlist, - # return playlist (True) or video (False, default)? - # YouTube.prefer_playlist=False - YouTube().yt(sys.argv[1]) - # YouTube.yt(YouTube, sys.argv[1]) diff --git a/youtube_oembed.py b/youtube_oembed.py new file mode 100755 index 0000000..4a0ca20 --- /dev/null +++ b/youtube_oembed.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +from urllib.parse import urlencode, urlparse, parse_qs +from json import loads as json_loads +from URLget import urlget, URLgetException + +class YouTube: + def __init__(self): + try: + YouTube.prefer_playlist = YouTube.prefer_playlist + except AttributeError: + YouTube.prefer_playlist = False + + def mesg(self, msg, t=None): + self.util.mesg(msg, t) + + def match_urls(self, str): + r = [ + i + for i in str.split() + if "https://youtu.be/" in i + or "https://www.youtube.com/watch?v=" in i + or "https://m.youtube.com/watch?v=" in i + or "https://youtube.com/watch?v=" in i + or "https://www.youtube.com/embed/" in i + or "https://www.youtube-nocookie.com/embed/" in i + or "https://music.youtube.com/watch?v=" in i + or "https://youtube.com/shorts/" in i + or "https://www.youtube.com/shorts/" in i + ] + r = list(dict.fromkeys(r)) + n = 0 + for i in r: + if not i.startswith("http"): + r.pop(n) + n += 1 + + return r + + def is_embed(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://www.youtube.com/embed/") or str.startswith( + "https://www.youtube-nocookie.com/embed/" + ) + + def is_ytmusic(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://music.youtube.com/watch?v=") + + def is_ytshorts(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://youtube.com/shorts/") or str.startswith( + "https://www.youtube.com/shorts/" + ) + + def is_clip(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://youtube.com/clip/") or str.startswith( + "https://www.youtube.com/clip/" + ) + + def is_shorturl(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://youtu.be/") + + def yt(self, url): + irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" + ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" + # self.util.mesg("dbg hello") + url = url.rstrip("\x01") + if self.is_embed(url): + videoId = url.split("/")[4] + url = f"https://www.youtube.com/watch?v={videoId}" + elif self.is_ytmusic(url): + for i in url.split("?")[1].split("&"): + if i[0:2] == "v=": + videoId = i[2:] + url = f"https://www.youtube.com/watch?v={videoId}" + elif self.is_ytshorts(url): + videoId = url.split("?")[0].split("/")[-1] + url = f"https://www.youtube.com/watch?v={videoId}" + elif self.is_shorturl(url): + videoId = url.split("/")[3].split("?")[0] + url = f"https://www.youtube.com/watch?v={videoId}" + url = urlparse(url) + qs = parse_qs(url.query) + try: + video_id = qs["v"][0] + except KeyError: + video_id = None + try: + playlist_id = qs["list"][0] + # ignore the "random mix" and "radio" lists + if playlist_id.startswith("RD"): + playlist_id = None + except KeyError: + playlist_id = None + if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id): + url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id + else: + url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id + url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}" + try: + # print(url, " and ", playlist_id) + status,data = urlget(url) + if status != 200: + irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03" + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m" + data = json_loads(data) + title = data["title"] + channelName = data["author_name"] + except URLgetException as e: + irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" + print(ansi_string) + return irc_string, True + irc_string = ( + f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d" + ) + ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m" + if __import__("sys").stdout.isatty(): print(ansi_string) + return irc_string, False + + +if __name__ == "__main__": + import sys + + # if url is a video that's part of a playlist, + # return playlist (True) or video (False, default)? + # YouTube.prefer_playlist=False + YouTube().yt(sys.argv[1]) + # YouTube.yt(YouTube, sys.argv[1]) diff --git a/youtube_oembed_old.py b/youtube_oembed_old.py new file mode 100755 index 0000000..0d51e16 --- /dev/null +++ b/youtube_oembed_old.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +from urllib.request import urlopen +from urllib.error import HTTPError +from urllib.parse import urlencode, urlparse, parse_qs +from json import loads as json_loads + + +class YouTube: + def __init__(self): + try: + YouTube.prefer_playlist = YouTube.prefer_playlist + except AttributeError: + YouTube.prefer_playlist = False + + def mesg(self, msg, t=None): + self.util.mesg(msg, t) + + def match_urls(self, str): + r = [ + i + for i in str.split() + if "https://youtu.be/" in i + or "https://www.youtube.com/watch?v=" in i + or "https://m.youtube.com/watch?v=" in i + or "https://youtube.com/watch?v=" in i + or "https://www.youtube.com/embed/" in i + or "https://www.youtube-nocookie.com/embed/" in i + or "https://music.youtube.com/watch?v=" in i + or "https://youtube.com/shorts/" in i + or "https://www.youtube.com/shorts/" in i + ] + r = list(dict.fromkeys(r)) + n = 0 + for i in r: + if not i.startswith("http"): + r.pop(n) + n += 1 + + return r + + def is_embed(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://www.youtube.com/embed/") or str.startswith( + "https://www.youtube-nocookie.com/embed/" + ) + + def is_ytmusic(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://music.youtube.com/watch?v=") + + def is_ytshorts(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://youtube.com/shorts/") or str.startswith( + "https://www.youtube.com/shorts/" + ) + + def is_clip(self, *str): + if type(self) == type("a"): + str = self + else: + str = str[0] + return str.startswith("https://youtube.com/clip/") or str.startswith( + "https://www.youtube.com/clip/" + ) + + def yt(self, url): + irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" + ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" + # self.util.mesg("dbg hello") + url = url.rstrip("\x01") + if self.is_embed(url): + videoId = url.split("/")[4] + url = f"https://www.youtube.com/watch?v={videoId}" + elif self.is_ytmusic(url): + for i in url.split("?")[1].split("&"): + if i[0:2] == "v=": + videoId = i[2:] + url = f"https://www.youtube.com/watch?v={videoId}" + elif self.is_ytshorts(url): + videoId = url.split("?")[0].split("/")[-1] + url = f"https://www.youtube.com/watch?v={videoId}" + url = urlparse(url) + qs = parse_qs(url.query) + try: + video_id = qs["v"][0] + except KeyError: + video_id = None + try: + playlist_id = qs["list"][0] + # ignore the "random mix" and "radio" lists + if playlist_id.startswith("RD"): + playlist_id = None + except KeyError: + playlist_id = None + if (self.prefer_playlist and playlist_id) or (playlist_id and not video_id): + url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id + else: + url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id + url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}" + try: + # print(url, " and ", playlist_id) + data = urlopen(url).read().decode() + data = json_loads(data) + title = data["title"] + channelName = data["author_name"] + except HTTPError as e: + irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" + print(ansi_string) + return irc_string, True + irc_string = ( + f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d" + ) + ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m" + print(ansi_string) + return irc_string, False + + +if __name__ == "__main__": + import sys + + # if url is a video that's part of a playlist, + # return playlist (True) or video (False, default)? + # YouTube.prefer_playlist=False + YouTube().yt(sys.argv[1]) + # YouTube.yt(YouTube, sys.argv[1]) diff --git a/youtube_scrape.py b/youtube_scrape.py new file mode 100755 index 0000000..82671a2 --- /dev/null +++ b/youtube_scrape.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +from html.parser import HTMLParser +from URLget import urlget, URLgetException + +# from URLget import URLgetException +# urlget=URLget().urlget +# print(urlget("http://ip.envs.net")) +# print(dir(URLget)) + + +class YouTube: + # crude import, lol + # URLget = URLget().URLget + # def __init__(self): + # self.URLget = URLget().URLget + # print(URLget,URLget.URLget) + + video_type = "" + + def mesg(self, msg, t=None): + self.util.mesg(msg, t) + + def match_urls(self, str): + r = [ + i + for i in str.split() + if "https://youtu.be/" in i + or "https://www.youtube.com/watch?v=" in i + or "https://m.youtube.com/watch?v=" in i + or "https://youtube.com/watch?v=" in i + or "https://www.youtube.com/embed/" in i + or "https://www.youtube-nocookie.com/embed/" in i + or "https://music.youtube.com/watch?v=" in i + or "https://youtube.com/shorts/" in i + or "https://www.youtube.com/shorts/" in i + or "https://www.youtube.com/clip/" in i + or "https://youtube.com/clip/" in i + ] + r = list(dict.fromkeys(r)) + n = 0 + for i in r: + if not i.startswith("http"): + r.pop(n) + n += 1 + + return r + + def is_embed(self, str): + return str.startswith("https://www.youtube.com/embed/") or str.startswith( + "https://www.youtube-nocookie.com/embed/" + ) + + def is_ytmusic(self, str): + return str.startswith("https://music.youtube.com/watch?v=") + + def is_ytshorts(self, str): + return str.startswith("https://youtube.com/shorts/") or str.startswith( + "https://www.youtube.com/shorts/" + ) + + def is_clip(self, str): + return str.startswith("https://youtube.com/clip/") or str.startswith( + "https://www.youtube.com/clip/" + ) + + class parseprop(HTMLParser): + def __init__(self): + # print("yt parse init") + HTMLParser.__init__(self) + self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"] + self.h = {} + if YouTube.video_type == "clip": + self.itemprops_list += ["description"] + print("it is a clip!") + self.title = False + + def handle_data(self, data): + if self.title != False: + # print("title",data) + self.h.update({"html_title": data}) + self.title = False + + def handle_starttag(self, tag, attrs): + if tag == "title": + self.title = True + if (tag != "meta" and tag != "link") or ( + ( + [i for i in attrs if "itemprop" in i] == [] + and ("name", "title") not in attrs + ) + or (tag == "meta" and ("itemprop", "name") in attrs) + ): + return + # print(self,tag,attrs) + for k, v in attrs: + if k == "itemprop": + if v not in self.itemprops_list: + return + x = [v] + if tag == "link" and v == "name": + x = ["channelName"] + elif k == "content": + if attrs[0][1] == "interactionCount": + v = int(v) + x += [v] + elif k == "name" and v == "title": + x = [v] + else: + return + self.h.update({x[0]: x[1]}) + # print(x[0],"=",x[1]) + + def fmt_dur(dur): + h, m, s = 0, 0, 0 + m = dur[2:].split("M") + s = int(m[1][:-1]) + m = int(m[0]) + if m >= 60: + h = m // 60 + m = round((m / 60 - h) * 60) + return f"{h}h {m}m {s}s" + elif h == 0 and m == 0 and s == 0: + return "LIVE" + elif m == 0 and s != 0: + return f"{s}s" + elif s == 0: + return f"{m}m" + else: + return f"{m}m {s}s" + + def yt(self, url): + irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03" + ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m" + # self.util.mesg("dbg hello") + url = url.rstrip("\x01") + self.video_type = ( + "clip" + if self.is_clip(url) + else ( + "shorts" + if self.is_ytshorts(url) + else ( + "music" + if self.is_ytmusic(url) + else "embed" if self.is_embed(url) else "video" + ) + ) + ) + video_type = self.video_type + if video_type == "embed": + videoId = url.split("/")[4] + url = f"https://www.youtube.com/watch?v={videoId}" + elif video_type == "music": + for i in url.split("?")[1].split("&"): + if i[0:2] == "v=": + videoId = i[2:] + url = f"https://www.youtube.com/watch?v={videoId}" + elif video_type == "shorts": + videoId = url.split("?")[0].split("/")[-1] + url = f"https://www.youtube.com/watch?v={videoId}" + p = self.parseprop() + data = b"" + data = data.decode() # bytes to utf-8 + if ( + data.find('meta itemprop="duration"') == -1 + or data.find('meta itemprop="name"') == -1 + ): + try: + status, data = urlget(url) + if status != 200: + irc_string = ( + f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03" + ) + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m" + except URLgetException as e: + irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03" + ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m" + # print(f"\x1b[31m my data is: {data}\x1b[0m") + print(data) + p.feed(data) + if p.h == {}: + print(ansi_string) + return irc_string, True + elif p.h == {"html_title": "YouTube"}: + irc_string = ( + "[\x0304Youtube\x03] \x0307ERROR:\x0308 flagged as bot \x0315\x03" + ) + ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m flagged as bot \x1b[37;2m\x1b[0m" + print(ansi_string) + return irc_string, True + y = p.h + print(y) + y.update(duration=self.fmt_dur(y["duration"])) + irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views" + ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views" + print(ansi_string) + return irc_string, False + + +if __name__ == "__main__": + import sys + + YouTube.premature_optimization = False + # YouTube.yt(YouTube, sys.argv[1]) + YouTube().yt(sys.argv[1]) -- cgit 1.4.1-2-gfad0