1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
# assumes utf8
# began writing this on 2024-02-20
# found mozz-archiver on 2024-02-21
import datetime
import socket
import ssl
import uuid
import hashlib
import urllib.parse
from sys import argv, stdout
# based on:
# https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py
# https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py
# TODO ciphers etc
outf = stdout.buffer
def header(k, v):
v = str(v)
assert '\n' not in v
outf.write((k + ': ' + str(v) + '\r\n').encode('utf-8'))
def warcinfo():
payload = "software: garcon (a very early version thereof)\r\n"
payload += f"hostname: {socket.gethostname()}\r\n"
payload = payload.encode('utf-8')
outf.write(b'WARC/1.0\r\n')
header("WARC-Type", "warcinfo")
header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat())
header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>")
header("Content-Length", len(payload))
header("Content-Type", "application/warc-fields")
outf.write(b'\r\n')
outf.write(payload)
outf.write(b'\r\n\r\n')
def request_raw(host, port, url):
assert '\n' not in url
s = socket.create_connection((host, port))
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
s = context.wrap_socket(s, server_hostname=host)
s.sendall((url + '\r\n').encode("UTF-8"))
peername = s.getpeername()
cert = s.getpeercert(True)
fp = s.makefile("rb")
payload = fp.read()
fp.close()
s.close()
# warctools doesn't like WARC/1.1
outf.write(b'WARC/1.0\r\n')
# mandatory
header("WARC-Type", "response")
header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat())
header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>")
header("Content-Length", len(payload))
# optional
header("WARC-Payload-Digest", 'sha256:' + hashlib.sha256(payload).hexdigest())
header("WARC-IP-Address", peername[0])
header("WARC-Target-URI", url)
header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver
# my extensions
header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest())
outf.write(b'\r\n')
outf.write(payload)
outf.write(b'\r\n\r\n')
# TODO check for close_notify
return payload
def request_url(url):
p = urllib.parse.urlparse(url)
assert p.scheme == 'gemini'
return request_raw(p.hostname, p.port or 1965, url)
def request_url_loop(url):
while True:
res = request_url(url)
header = res.split(b'\r\n')[0]
if 2 + 1 + 1024 < len(header): break
if header[0] == ord('3'):
url = header.split(b' ', 2)[1].decode('utf-8')
else:
break
warcinfo()
request_url_loop(argv[1])
|