1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#!/usr/bin/env python3
from html.parser import HTMLParser
from urllib.request import urlopen
class SoundCloud:
video_type = ""
def mesg(self, msg, t=None):
self.util.mesg(msg, t)
def match_urls(self, str):
r = [
i
for i in str.split()
if "https://soundcloud.com" in i
]
r = list(dict.fromkeys(r))
n = 0
for i in r:
if not i.startswith("http"):
r.pop(n)
n += 1
return r
class parseprop(HTMLParser):
def __init__(self):
print("soundcloud parse init")
HTMLParser.__init__(self)
self.itemprops_list = ["name", "duration", "byArtist"]
self.h = {}
self.readartist=False
def handle_starttag(self, tag, attrs):
print("yo",tag,attrs)
if (tag != "meta" and tag != "link" and tag != "div") or (
(
[i for i in attrs if "itemprop" in i] == []
and ("name", "title") not in attrs
)
or (tag == "meta" and ("itemprop", "name") in attrs and self.readartist!=True)
):
print("skip",tag,attrs)
return
# print(self,tag,attrs)
for k, v in attrs:
if k == "itemprop":
if v not in self.itemprops_list:
print("skipprop",tag,attrs)
return
x = [v]
if tag == "link" and v == "name":
x = ["channelName"]
elif k == "content":
#if attrs[0][1] == "interactionCount":
# v = int(v)
x += [v]
elif k == "name" and v == "title":
x = [v]
else:
return
print({x[0]: x[1]})
self.h.update({x[0]: x[1]})
# print(x[0],"=",x[1])
def fmt_dur(dur):
h, m, s = 0, 0, 0
dur=dur.removeprefix("PT").removesuffix("S")
h,m = dur.split("H")
m,s = dur.removeprefix(f"{h}H").split("M")
#s = int(m[1][:-1])
#m = int(m[0])
h,m,s=int(h),int(m),int(s)
if m >= 60:
h = m // 60
m = round((m / 60 - h) * 60)
return f"{h}h {m}m {s}s"
elif h == 0 and m == 0 and s == 0:
return "LIVE"
elif m == 0 and s != 0:
return f"{s}s"
elif s == 0:
return f"{m}m"
else:
return f"{m}m {s}s"
def yt(self, url):
# self.util.mesg("dbg hello")
url = url.rstrip("\x01")
p = self.parseprop()
# use premature optimization? it should be SLIGHTLY faster, but can sometimes fail
data = b""
if False: #self.premature_optimization:
url_h = urlopen(url)
# <body> appears on approximately line 21 or 22, so we read 24 lines to be safe (23-25 should be license comment)
# I tried to read byte amounts but it's hard to make sure no invalid utf8 bytes happen due to partial reads
for i in range(24):
data += url_h.readline()
url_h.close()
data = data.decode() # bytes to utf-8
if (
data.find('meta itemprop="duration"') == -1
or data.find('meta itemprop="name"') == -1
): # acts as both fallback for optimization, and in case optimization's turned off
# just read all of the html
data = urlopen(url).read().decode()
# print(f"\x1b[31m my data is: {data}\x1b[0m")
p.feed(data)
if p.h == {}:
irc_string = "[\x0304SoundCloud\x03] \x0307ERROR:\x0308 got no data from server! \x0315(check your URL for typos!)\x03"
ansi_string = "[\x1b[31mSoundCloud\x1b[0m] \x1b[33;2mERROR:\x1b[33;1m got no data from server! \x1b[37;2m(check your URL for typos!)\x1b[0m"
print(ansi_string)
return irc_string, True
y = p.h
print(y)
y.update(duration=self.fmt_dur(y["duration"]))
#irc_string = f"[\x0303SoundCloud\x03] \x02{y['title']}\x02 ({y['duration']}) uploaded by \x1d{y['channelName']}\x1d on {y['uploadDate']}, {y['interactionCount']:,} views"
#ansi_string = f"[\x1b[32mSoundCloud\x1b[0m] \x1b[1m{y['title']}\x1b[0m ({y['duration']}) uploaded by \x1b[03m{y['channelName']}\x1b[0m on {y['uploadDate']}, {y['interactionCount']:,} views"
irc_string="dummy";ansi_string="dummy"
print(y)
print(ansi_string)
return irc_string, False
if __name__ == "__main__":
import sys
SoundCloud.yt(SoundCloud, sys.argv[1])
|