summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--requirements.txt2
-rwxr-xr-xsoundcloud.py6
-rwxr-xr-xtest.sh21
-rwxr-xr-xyoutube.py132
-rwxr-xr-xyoutube.py.old179
6 files changed, 233 insertions, 108 deletions
diff --git a/.gitignore b/.gitignore
index f05327d..7d94e97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ venv
pass.txt
local_config.py
log.txt
+yt_keys.json
# ---> Python
# Byte-compiled / optimized / DLL files
diff --git a/requirements.txt b/requirements.txt
index 8376b04..8a70ebc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-ircstates ~=0.11.9
+ircstates
diff --git a/soundcloud.py b/soundcloud.py
index ae17f9d..a1f5f91 100755
--- a/soundcloud.py
+++ b/soundcloud.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
-from urllib.parse import urlencode, urlparse
from urllib.request import urlopen
+from urllib.parse import urlencode, urlparse
from json import loads as json_loads
@@ -29,7 +29,7 @@ class SoundCloud:
url = f"https://soundcloud.com/oembed?{urlencode([('url',url),('format','json')])}"
data = urlopen(url).read().decode()
data = json_loads(data)
- print(data)
+ #print(data)
# print(data["title"].removesuffix(" by "+data["author_name"]),data["author_name"])
try:
artist = data["author_name"]
@@ -37,7 +37,7 @@ class SoundCloud:
except KeyError:
title = ""
artist = ""
- print(title.removesuffix(" by " + artist), "|", artist)
+ #print(title.removesuffix(" by " + artist), "|", artist)
if title == "":
irc_string = "[\x0304SoundCloud\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
ansi_string = "[\x1b[31mSoundCloud\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..fdcbba3
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+#just random urls to test the modules
+
+./bandcamp.py https://austinwintory.bandcamp.com/album/stray-gods
+./bandcamp.py https://soundoftheaviators.bandcamp.com/track/writing-on-the-walls-2
+
+#soundcloud and spotify add junk to links if you click "share"
+./soundcloud.py https://soundcloud.com/lindseystomp/sets/artemis-3
+./soundcloud.py 'https://soundcloud.com/lindseystomp/sets/artemis-3?utm_source=clipboard&utm_medium=text&utm_campaign=social_sharing'
+./soundcloud.py https://soundcloud.com/user-152508755/thefatrat-maisy-kay-the-storm-epic-orchestra-remix
+./soundcloud.py 'https://soundcloud.com/user-152508755/thefatrat-maisy-kay-the-storm-epic-orchestra-remix?utm_source=clipboard&utm_medium=text&utm_campaign=social_sharing'
+
+./spotify.py https://open.spotify.com/track/4pY1okPrJvIPBQM0t4i28v
+./spotify.py 'https://open.spotify.com/track/4sOX1nhpKwFWPvoMMExi3q?si=c880ccca72ee435d'
+./spotify.py https://open.spotify.com/album/2hvCFY4DYaKzzkNYd60oS3
+./spotify.py 'https://open.spotify.com/album/1u2ACTYzVNK3vSLG0Ah4H3?si=c1ZT_3YeS8SXkrbErFl6bw'
+
+#youtube oembed does support playlists but ONLY when it's /playlist?list= *NOT* when it is /watch?v=bla&list=
+./youtube.py https://www.youtube.com/watch?v=EUD9UTwXAZY
+./youtube.py https://www.youtube.com/playlist?list=PL0bbUqXsNHE0ZELST3vW_11GDHKDAwLYh
+./youtube.py 'https://www.youtube.com/watch?v=eneLP_P1_fg&list=PL0bbUqXsNHE0ZELST3vW_11GDHKDAwLYh&index=2'
diff --git a/youtube.py b/youtube.py
index 41dd18b..a75f52b 100755
--- a/youtube.py
+++ b/youtube.py
@@ -1,11 +1,10 @@
#!/usr/bin/env python3
-from html.parser import HTMLParser
from urllib.request import urlopen
-
+from urllib.error import HTTPError
+from urllib.parse import urlencode, urlparse, parse_qs
+from json import loads as json_loads
class YouTube:
- video_type = ""
-
def mesg(self, msg, t=None):
self.util.mesg(msg, t)
@@ -52,119 +51,44 @@ class YouTube:
"https://www.youtube.com/clip/"
)
- class parseprop(HTMLParser):
- def __init__(self):
- print("yt parse init")
- HTMLParser.__init__(self)
- self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
- self.h = {}
- if YouTube.video_type == "clip":
- self.itemprops_list += ["description"]
- print("it is a clip!")
-
- def handle_starttag(self, tag, attrs):
- if (tag != "meta" and tag != "link") or (
- (
- [i for i in attrs if "itemprop" in i] == []
- and ("name", "title") not in attrs
- )
- or (tag == "meta" and ("itemprop", "name") in attrs)
- ):
- return
- # print(self,tag,attrs)
- for k, v in attrs:
- if k == "itemprop":
- if v not in self.itemprops_list:
- return
- x = [v]
- if tag == "link" and v == "name":
- x = ["channelName"]
- elif k == "content":
- if attrs[0][1] == "interactionCount":
- v = int(v)
- x += [v]
- elif k == "name" and v == "title":
- x = [v]
- else:
- return
- self.h.update({x[0]: x[1]})
- # print(x[0],"=",x[1])
-
- def fmt_dur(dur):
- h, m, s = 0, 0, 0
- m = dur[2:].split("M")
- s = int(m[1][:-1])
- m = int(m[0])
- if m >= 60:
- h = m // 60
- m = round((m / 60 - h) * 60)
- return f"{h}h {m}m {s}s"
- elif h == 0 and m == 0 and s == 0:
- return "LIVE"
- elif m == 0 and s != 0:
- return f"{s}s"
- elif s == 0:
- return f"{m}m"
- else:
- return f"{m}m {s}s"
-
def yt(self, url):
+ irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+ ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
# self.util.mesg("dbg hello")
url = url.rstrip("\x01")
- self.video_type = (
- "clip"
- if self.is_clip(url)
- else (
- "shorts"
- if self.is_ytshorts(url)
- else (
- "music"
- if self.is_ytmusic(url)
- else "embed" if self.is_embed(url) else "video"
- )
- )
- )
- video_type = self.video_type
- if video_type == "embed":
+ if self.is_embed(url):
videoId = url.split("/")[4]
url = f"https://www.youtube.com/watch?v={videoId}"
- elif video_type == "music":
+ elif self.is_ytmusic(url):
for i in url.split("?")[1].split("&"):
if i[0:2] == "v=":
videoId = i[2:]
url = f"https://www.youtube.com/watch?v={videoId}"
- elif video_type == "shorts":
+ elif self.is_ytshorts(url):
videoId = url.split("?")[0].split("/")[-1]
url = f"https://www.youtube.com/watch?v={videoId}"
- p = self.parseprop()
- # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
- data = b""
- if self.premature_optimization:
- url_h = urlopen(url)
- # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
- # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
- for i in range(24):
- data += url_h.readline()
- url_h.close()
- data = data.decode() # bytes to utf-8
- if (
- data.find('meta itemprop="duration"') == -1
- or data.find('meta itemprop="name"') == -1
- ): # acts as both fallback for optimization, and in case optimization's turned off
- # just read all of the html
+ url = urlparse(url)
+ qs=parse_qs(url.query);video_id=qs['v'][0]
+ try: playlist_id=qs['list'][0]
+ except KeyError: playlist_id=None
+ if self.prefer_playlist and playlist_id:
+ url = url.scheme + "://" + url.netloc + "/playlist?list=" + playlist_id
+ else:
+ url = url.scheme + "://" + url.netloc + url.path + "?v=" + video_id
+ url = f"https://www.youtube.com/oembed?{urlencode([('url',url),('format','json')])}"
+ try:
+ print(url," and ",playlist_id)
data = urlopen(url).read().decode()
- # print(f"\x1b[31m my data is: {data}\x1b[0m")
- p.feed(data)
- if p.h == {}:
- irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
- ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
+ data = json_loads(data)
+ title=data['title']
+ channelName=data['author_name']
+ except HTTPError as e:
+ irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
print(ansi_string)
return irc_string, True
- y = p.h
- print(y)
- y.update(duration=self.fmt_dur(y["duration"]))
- irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
- ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
+ irc_string = f"[\x0303Youtube\x03] \x02{title}\x02 uploaded by \x1d{channelName}\x1d"
+ ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{title}\x1b[0m uploaded by \x1b[03m{channelName}\x1b[0m"
print(ansi_string)
return irc_string, False
@@ -172,5 +96,5 @@ class YouTube:
if __name__ == "__main__":
import sys
- YouTube.premature_optimization = False
+ YouTube.prefer_playlist=False
YouTube.yt(YouTube, sys.argv[1])
diff --git a/youtube.py.old b/youtube.py.old
new file mode 100755
index 0000000..b09b1d1
--- /dev/null
+++ b/youtube.py.old
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+from html.parser import HTMLParser
+from urllib.request import urlopen
+from urllib.error import HTTPError
+
+class YouTube:
+ video_type = ""
+
+ def mesg(self, msg, t=None):
+ self.util.mesg(msg, t)
+
+ def match_urls(self, str):
+ r = [
+ i
+ for i in str.split()
+ if "https://youtu.be/" in i
+ or "https://www.youtube.com/watch?v=" in i
+ or "https://m.youtube.com/watch?v=" in i
+ or "https://youtube.com/watch?v=" in i
+ or "https://www.youtube.com/embed/" in i
+ or "https://www.youtube-nocookie.com/embed/" in i
+ or "https://music.youtube.com/watch?v=" in i
+ or "https://youtube.com/shorts/" in i
+ or "https://www.youtube.com/shorts/" in i
+ or "https://www.youtube.com/clip/" in i
+ or "https://youtube.com/clip/" in i
+ ]
+ r = list(dict.fromkeys(r))
+ n = 0
+ for i in r:
+ if not i.startswith("http"):
+ r.pop(n)
+ n += 1
+
+ return r
+
+ def is_embed(str):
+ return str.startswith("https://www.youtube.com/embed/") or str.startswith(
+ "https://www.youtube-nocookie.com/embed/"
+ )
+
+ def is_ytmusic(str):
+ return str.startswith("https://music.youtube.com/watch?v=")
+
+ def is_ytshorts(str):
+ return str.startswith("https://youtube.com/shorts/") or str.startswith(
+ "https://www.youtube.com/shorts/"
+ )
+
+ def is_clip(str):
+ return str.startswith("https://youtube.com/clip/") or str.startswith(
+ "https://www.youtube.com/clip/"
+ )
+
+ class parseprop(HTMLParser):
+ def __init__(self):
+ #print("yt parse init")
+ HTMLParser.__init__(self)
+ self.itemprops_list = ["name", "duration", "uploadDate", "interactionCount"]
+ self.h = {}
+ if YouTube.video_type == "clip":
+ self.itemprops_list += ["description"]
+ print("it is a clip!")
+
+ def handle_starttag(self, tag, attrs):
+ if (tag != "meta" and tag != "link") or (
+ (
+ [i for i in attrs if "itemprop" in i] == []
+ and ("name", "title") not in attrs
+ )
+ or (tag == "meta" and ("itemprop", "name") in attrs)
+ ):
+ return
+ # print(self,tag,attrs)
+ for k, v in attrs:
+ if k == "itemprop":
+ if v not in self.itemprops_list:
+ return
+ x = [v]
+ if tag == "link" and v == "name":
+ x = ["channelName"]
+ elif k == "content":
+ if attrs[0][1] == "interactionCount":
+ v = int(v)
+ x += [v]
+ elif k == "name" and v == "title":
+ x = [v]
+ else:
+ return
+ self.h.update({x[0]: x[1]})
+ # print(x[0],"=",x[1])
+
+ def fmt_dur(dur):
+ h, m, s = 0, 0, 0
+ m = dur[2:].split("M")
+ s = int(m[1][:-1])
+ m = int(m[0])
+ if m >= 60:
+ h = m // 60
+ m = round((m / 60 - h) * 60)
+ return f"{h}h {m}m {s}s"
+ elif h == 0 and m == 0 and s == 0:
+ return "LIVE"
+ elif m == 0 and s != 0:
+ return f"{s}s"
+ elif s == 0:
+ return f"{m}m"
+ else:
+ return f"{m}m {s}s"
+
+ def yt(self, url):
+ irc_string = "[\x0304Youtube\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
+ ansi_string = "[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
+ # self.util.mesg("dbg hello")
+ url = url.rstrip("\x01")
+ self.video_type = (
+ "clip"
+ if self.is_clip(url)
+ else (
+ "shorts"
+ if self.is_ytshorts(url)
+ else (
+ "music"
+ if self.is_ytmusic(url)
+ else "embed" if self.is_embed(url) else "video"
+ )
+ )
+ )
+ video_type = self.video_type
+ if video_type == "embed":
+ videoId = url.split("/")[4]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif video_type == "music":
+ for i in url.split("?")[1].split("&"):
+ if i[0:2] == "v=":
+ videoId = i[2:]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ elif video_type == "shorts":
+ videoId = url.split("?")[0].split("/")[-1]
+ url = f"https://www.youtube.com/watch?v={videoId}"
+ p = self.parseprop()
+ # use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
+ data = b""
+ if self.premature_optimization:
+ url_h = urlopen(url)
+ # <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
+ # I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
+ for i in range(24):
+ data += url_h.readline()
+ url_h.close()
+ data = data.decode() # bytes to utf-8
+ if (
+ data.find('meta itemprop="duration"') == -1
+ or data.find('meta itemprop="name"') == -1
+ ): # acts as both fallback for optimization, and in case optimization's turned off
+ # just read all of the html
+ try: data = urlopen(url).read().decode()
+ except HTTPError as e:
+ irc_string = f"[\x0304Youtube\x03] \x0307ERROR:\x0308 {e} \x0315\x03"
+ ansi_string = f"[\x1b[31mYoutube\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m {e} \x1b[37;2m\x1b[0m"
+ # print(f"\x1b[31m my data is: {data}\x1b[0m")
+ p.feed(data)
+ if p.h == {}:
+ print(ansi_string)
+ return irc_string, True
+ y = p.h
+ print(y)
+ y.update(duration=self.fmt_dur(y["duration"]))
+ irc_string = f"[\x0303Youtube\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
+ ansi_string = f"[\x1b[32mYoutube\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
+ print(ansi_string)
+ return irc_string, False
+
+
+if __name__ == "__main__":
+ import sys
+
+ YouTube.premature_optimization = False
+ YouTube.yt(YouTube, sys.argv[1])