diff options
| -rwxr-xr-x | youtube_abstract.py | 152 | 
1 files changed, 63 insertions, 89 deletions
| diff --git a/youtube_abstract.py b/youtube_abstract.py index fb93ad7..f773aa0 100755 --- a/youtube_abstract.py +++ b/youtube_abstract.py @@ -3,6 +3,13 @@ from urllib.parse import urlencode, urlparse, parse_qs  from json import loads as json_loads  from URLget import urlget, URLgetException +import sys + + +def dbgprint(*args, **kwargs): +    if sys.stdout.isatty(): +        print(*args, **kwargs) +  class YouTube:      def __init__(self): @@ -11,25 +18,25 @@ class YouTube:              YouTube.prefer_playlist = YouTube.prefer_playlist          except AttributeError:  # we probably want video title, default to that              YouTube.prefer_playlist = False -        self.irc_pal = { -            "rst": "\x0f",  ######### reset -            "ylw": "\x0307",  ####### yellow -            "b_ylw": "\x0307\x02",  # bold yellow -            "wht": "\x0315",  ####### white -            "red": "\x0304",  ####### red -            "grn": "\x0303",  ####### green -            "itl": "\x1d",  ######### italic -            "bld": "\x02",  ######### bold +        self.irc_pal = {  # ignore the fancy alignment BS lol +            "rst": "" + "\x0f",  ####### reset +            "ylw": "" + "\x0307",  ##### yellow +            "b_ylw": "" "\x0307\x02",  # bold yellow +            "wht": "" + "\x0315",  ##### white +            "red": "" + "\x0304",  ##### red +            "grn": "" + "\x0303",  ##### green +            "itl": "" + "\x1d",  ####### italic +            "bld": "" + "\x02",  ####### bold          } -        self.ansi_pal = { -            "rst": "\x1b[0m",  ###### reset -            "ylw": "\x1b[33;2m",  ### yellow -            "b_ylw": "\x1b[33;1m",  # bold yellow -            "wht": "\x1b[37;2m",  ### white -            "red": "\x1b[31m",  ##### red -            "grn": "\x1b[32m",  ##### green -            "itl": "\x1b[03m",  ##### italic -            "bld": "\x1b[1m",  ###### bold +        self.ansi_pal = {  # ignore the fancy alignment BS lol +            "rst": "" + "\x1b[0m",  #### reset +            "ylw": "" + "\x1b[33;2m",  # yellow +            "b_ylw": "" "\x1b[33;1m",  # bold yellow +            "wht": "" + "\x1b[37;2m",  # white +            "red": "" + "\x1b[31m",  ### red +            "grn": "" + "\x1b[32m",  ### green +            "itl": "" + "\x1b[03m",  ### italic +            "bld": "" + "\x1b[1m",  #### bold          }      def mesg(self, msg, t=None):  # just an alias to shorten full name @@ -38,61 +45,32 @@ class YouTube:      def match_urls(self, str, r=[]):          if str.startswith("http://"):              str = "https://" + str[7:] -        if str.startswith( -            "https://" -        ):  # first string has to be trimmed outside this func -            if ( +        if str.startswith("https://youtube."): +            str = "https://www." + str[8:] +        if str.startswith("https://"):  # first string has to be trimmed before calling match_urls +            if (  # I'm just doing fancy BS to align the urls nicely, lol, ignore this                  str.startswith("https://youtu.be/") -                or str.startswith("https://www.youtube.com/watch?") -                or str.startswith("https://music.youtube.com/watch?") -                or str.startswith("https://m.youtube.com/watch?") -                or str.startswith("https://www.youtube.com/playlist?") -                or str.startswith("https://music.youtube.com/playlist?") -                or str.startswith("https://m.youtube.com/playlist?") -                or str.startswith("https://www.youtube.com/shorts/") -                or str.startswith("https://youtube.com/shorts/") -                or str.startswith("https://m.youtube.com/shorts/") -                or str.startswith("https://www.youtube.com/embed/") -                or str.startswith("https://www.youtube-nocookie.com/embed/") -                or str.startswith("https://www.youtube.com/embed/videoseries?") +                or str.startswith("" "" "" "" "https://www.youtube.com/playlist?")  ####### playlist +                or str.startswith("" "" "" "https://music.youtube.com/playlist?") +                or str.startswith("" "" "" "" "https://m.youtube.com/playlist?") +                or str.startswith("" "" "" "" "https://www.youtube.com/shorts/")  ######### shorts +                or str.startswith("" "" "" "" "" "https://youtube.com/shorts/") +                or str.startswith("" "" "" "" "https://m.youtube.com/shorts/") +                or str.startswith("" "" "" "" "https://www.youtube.com/watch?")  ########## normal +                or str.startswith("" "" "" "https://music.youtube.com/watch?") +                or str.startswith("" "" "" "" "https://m.youtube.com/watch?") +                or str.startswith("https://www.youtube-nocookie.com/embed/")  ############# embed +                or str.startswith("" "" "" "https://www.youtube.com/embed/") +                or str.startswith("" "" "" "" "https://m.youtube.com/embed/") +                or str.startswith("https://www.youtube-nocookie.com/embed/videoseries?")  # embed playlist +                or str.startswith("" "" "" "https://www.youtube.com/embed/videoseries?") +                or str.startswith("" "" "" "" "https://m.youtube.com/embed/videoseries?")              ): -                r += [str[: str.find(" ")]] +                r += [str[: str.find(" ")]]  # make array of all matching "words" (urls)          i = str.find(" ") + 1 -        return match_urls(self, str[i:].strip(), r=r) if i != 0 else r - -    """ -    def match_urls(self, str): -        str = str.replace("http://", "https://") -        r = [ -            i -            for i in str.split() -            # shorturl -            if "https://youtu.be/" in i -            # desktop -            or "https://www.youtube.com/watch?" in i or "https://www.youtube.com/playlist?" in i -            # mobile -            or "https://m.youtube.com/watch?" in i or "https://m.youtube.com/playlist?" in i -            # music -            or "https://music.youtube.com/watch?" in i or "https://music.youtube.com/playlist?" in i -            # shorts -            or "https://www.youtube.com/shorts/" in i -            or "https://m.youtube.com/shorts/" in i -            or "https://youtube.com/shorts/" in i -            # embed -            or "https://www.youtube.com/embed/" in i or "https://www.youtube-nocookie.com/embed/" in i -            # or "https://www.youtube.com/embed/videoseries?" in i # embed playlist, lol -            # just in case (shouldn't happen) -            or "https://youtube.com/watch?" in i or "https://youtube.com/playlist?" in i -        ] -        r = list(dict.fromkeys(r)) -        n = 0 -        for i in r: -            if not i.startswith("http"): -                r.pop(n) -            n += 1 - -        return r -        """ +        return ( +            match_urls(self, str[i:].strip(), r=r) if i != 0 else r +        )  # recurse down each word, see if anything matches      # makes for a little better syntax than a bunch of str.startswith calls      def matchstart(self, str, *arr): @@ -102,32 +80,32 @@ class YouTube:          return False      def is_clip(self, str): -        return self.matchstart( -            str, "https://youtube.com/clip/", "https://www.youtube.com/clip/" -        ) +        return self.matchstart(str, "https://youtube.com/clip/", "https://www.youtube.com/clip/")      # boil down to video id + playlist id      def normalize_url(self, url): -        raw_url = url +        dbgprint("normalize", url) +        raw_url, videoId, listId = url, "", ""          # youtu.be          if self.matchstart(url, "https://youtu.be/"):              videoId = url.split("/")[3].split("?")[0] -        elif self.matchstart( -            url, "https://youtube.com/shorts/", "https://www.youtube.com/shorts/" -        ): +            dbgprint("youtu.be") +        elif self.matchstart(url, "https://www.youtube.com/shorts/", "https://m.youtube.com/shorts/"):              videoId = url.split("?")[0].split("/")[-1] +            dbgprint("/shorts", videoId)          # embed          elif self.matchstart(              url, +            "https://m.youtube.com/embed/",              "https://www.youtube.com/embed/",              "https://www.youtube-nocookie.com/embed/",          ): -            # try: -            listId = parse_qs(urlparse(url).query)["list"][0] -            # except -            if not url.split("/")[4].startswith("videoseries"): -                videoId = url.split("/")[4] -            # print("embed", videoId, listId) +            try: +                listId = parse_qs(urlparse(url).query)["list"][0] +            except KeyError: +                if not url.split("/")[4].startswith("videoseries"): +                    videoId = url.split("/")[4] +            dbgprint("embed", videoId, listId)          elif "v=" in url:  # handles yt music, normal url, etc              for i in url.split("?")[1].split("&"):                  if i[0:2] == "v=": @@ -167,9 +145,7 @@ class YouTube:          elif video_id:              url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id          else: -            self.setstring( -                "string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}" -            ) +            self.setstring("string", "{{i}_prefix_err} unable to detect video ID!{pal['rst']}")              return {"irc": irc_string, "ansi": ansi_string}, True          return url @@ -206,9 +182,7 @@ class YouTube:              # print(url, " and ", playlist_id)              status, data = urlget(url)              if status != 200: -                self.setstring( -                    "string", "{{i}_prefix_err} {status}{pal['rst']}", locals() -                ) +                self.setstring("string", "{{i}_prefix_err} {status}{pal['rst']}", locals())                  return {"irc": irc_string, "ansi": ansi_string}, True              data = json_loads(data)              title, channelName = data["title"], data["author_name"] | 
