summary refs log tree commit diff
path: root/garcon.py
diff options
context:
space:
mode:
authordzwdz2024-02-22 19:34:32 +0100
committerdzwdz2024-02-22 19:34:32 +0100
commitc88059af91bb201b5d27e3cbf3166d8840c3f376 (patch)
tree3665c7e284e64eddbba7d53433f64e0afc0e125f /garcon.py
git init
Diffstat (limited to 'garcon.py')
-rw-r--r--garcon.py100
1 files changed, 100 insertions, 0 deletions
diff --git a/garcon.py b/garcon.py
new file mode 100644
index 0000000..9eee43d
--- /dev/null
+++ b/garcon.py
@@ -0,0 +1,100 @@
+# assumes utf8
+# began writing this on 2024-02-20
+# found mozz-archiver on 2024-02-21
+
+import datetime
+import socket
+import ssl
+import uuid
+import hashlib
+import urllib.parse
+from sys import argv, stdout
+
+# based on:
+# https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py
+# https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py
+# TODO ciphers etc
+
+outf = stdout.buffer
+
+def header(k, v):
+	v = str(v)
+	assert '\n' not in v
+	outf.write((k + ': ' + str(v) + '\r\n').encode('utf-8'))
+
+def warcinfo():
+	payload  = "software: garcon (a very early version thereof)\r\n"
+	payload += f"hostname: {socket.gethostname()}\r\n"
+	payload  = payload.encode('utf-8')
+
+	outf.write(b'WARC/1.0\r\n')
+	header("WARC-Type", "warcinfo")
+	header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat())
+	header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>")
+	header("Content-Length", len(payload))
+	header("Content-Type", "application/warc-fields")
+
+	outf.write(b'\r\n')
+	outf.write(payload)
+	outf.write(b'\r\n\r\n')
+
+def request_raw(host, port, url):
+	assert '\n' not in url
+
+	s = socket.create_connection((host, port))
+	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+	context.check_hostname = False
+	context.verify_mode = ssl.CERT_NONE
+	s = context.wrap_socket(s, server_hostname=host)
+
+	s.sendall((url + '\r\n').encode("UTF-8"))
+	peername = s.getpeername()
+	cert = s.getpeercert(True)
+
+	fp = s.makefile("rb")
+	payload = fp.read()
+	fp.close()
+	s.close()
+
+	# warctools doesn't like WARC/1.1
+	outf.write(b'WARC/1.0\r\n')
+
+	# mandatory
+	header("WARC-Type", "response")
+	header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat())
+	header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>")
+	header("Content-Length", len(payload))
+
+	# optional
+	header("WARC-Payload-Digest", 'sha256:' + hashlib.sha256(payload).hexdigest())
+	header("WARC-IP-Address", peername[0])
+	header("WARC-Target-URI", url)
+	header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver
+
+	# my extensions
+	header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest())
+
+	outf.write(b'\r\n')
+	outf.write(payload)
+	outf.write(b'\r\n\r\n')
+
+	# TODO check for close_notify
+	return payload
+
+def request_url(url):
+	p = urllib.parse.urlparse(url)
+	assert p.scheme == 'gemini'
+	return request_raw(p.hostname, p.port or 1965, url)
+
+def request_url_loop(url):
+	while True:
+		res = request_url(url)
+		header = res.split(b'\r\n')[0]
+		if 2 + 1 + 1024 < len(header): break
+		if header[0] == ord('3'):
+			url = header.split(b' ', 2)[1].decode('utf-8')
+		else:
+			break
+
+warcinfo()
+request_url_loop(argv[1])