summary refs log tree commit diff
path: root/youtube_scrape.py
diff options
context:
space:
mode:
authorPawky Languish2025-04-19 19:07:38 +0000
committerPawky Languish2025-04-19 19:07:38 +0000
commitf4fb34dde6b03fdb49f71476d587c5c7f986b565 (patch)
treeef8cc174c413dcd766754bc51c52b9a2fc328e5c /youtube_scrape.py
parent12062621d6d67adb8abee962f4201e2d5196f55b (diff)
idk some changes lol, formatting and stuff HEAD master
Diffstat (limited to 'youtube_scrape.py')
-rwxr-xr-xyoutube_scrape.py37
1 files changed, 11 insertions, 26 deletions
diff --git a/youtube_scrape.py b/youtube_scrape.py
index 82671a2..951af3b 100755
--- a/youtube_scrape.py
+++ b/youtube_scrape.py
@@ -21,6 +21,7 @@ class YouTube:
         self.util.mesg(msg, t)
 
     def match_urls(self, str):
+        str = str.replace("http://", "https://")
         r = [
             i
             for i in str.split()
@@ -54,14 +55,10 @@ class YouTube:
         return str.startswith("https://music.youtube.com/watch?v=")
 
     def is_ytshorts(self, str):
-        return str.startswith("https://youtube.com/shorts/") or str.startswith(
-            "https://www.youtube.com/shorts/"
-        )
+        return str.startswith("https://youtube.com/shorts/") or str.startswith("https://www.youtube.com/shorts/")
 
     def is_clip(self, str):
-        return str.startswith("https://youtube.com/clip/") or str.startswith(
-            "https://www.youtube.com/clip/"
-        )
+        return str.startswith("https://youtube.com/clip/") or str.startswith("https://www.youtube.com/clip/")
 
     class parseprop(HTMLParser):
         def __init__(self):
@@ -84,10 +81,7 @@ class YouTube:
             if tag == "title":
                 self.title = True
             if (tag != "meta" and tag != "link") or (
-                (
-                    [i for i in attrs if "itemprop" in i] == []
-                    and ("name", "title") not in attrs
-                )
+                ([i for i in attrs if "itemprop" in i] == [] and ("name", "title") not in attrs)
                 or (tag == "meta" and ("itemprop", "name") in attrs)
             ):
                 return
@@ -129,7 +123,9 @@ class YouTube:
             return f"{m}m {s}s"
 
     def yt(self, url):
-        irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+        irc_string = (
+            "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+        )
         ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
         # self.util.mesg("dbg hello")
         url = url.rstrip("\x01")
@@ -139,11 +135,7 @@ class YouTube:
             else (
                 "shorts"
                 if self.is_ytshorts(url)
-                else (
-                    "music"
-                    if self.is_ytmusic(url)
-                    else "embed" if self.is_embed(url) else "video"
-                )
+                else ("music" if self.is_ytmusic(url) else "embed" if self.is_embed(url) else "video")
             )
         )
         video_type = self.video_type
@@ -161,16 +153,11 @@ class YouTube:
         p = self.parseprop()
         data = b""
         data = data.decode()  # bytes to utf-8
-        if (
-            data.find('meta itemprop="duration"') == -1
-            or data.find('meta itemprop="name"') == -1
-        ):
+        if data.find('meta itemprop="duration"') == -1 or data.find('meta itemprop="name"') == -1:
             try:
                 status, data = urlget(url)
                 if status != 200:
-                    irc_string = (
-                        f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03"
-                    )
+                    irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {status} \x0315\x03"
                     ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {status} \x1b[37;2m\x1b[0m"
             except URLgetException as e:
                 irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
@@ -182,9 +169,7 @@ class YouTube:
             print(ansi_string)
             return irc_string, True
         elif p.h == {"html_title": "YouTube"}:
-            irc_string = (
-                "[\x0304Youtube\x03] \x0307ERROR:\x0308 flagged as bot \x0315\x03"
-            )
+            irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 flagged as bot \x0315\x03"
             ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m flagged as bot \x1b[37;2m\x1b[0m"
             print(ansi_string)
             return irc_string, True