diff options
-rw-r--r-- | garcon.py | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/garcon.py b/garcon.py new file mode 100644 index 0000000..9eee43d --- /dev/null +++ b/garcon.py @@ -0,0 +1,100 @@ +# assumes utf8 +# began writing this on 2024-02-20 +# found mozz-archiver on 2024-02-21 + +import datetime +import socket +import ssl +import uuid +import hashlib +import urllib.parse +from sys import argv, stdout + +# based on: +# https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py +# https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py +# TODO ciphers etc + +outf = stdout.buffer + +def header(k, v): + v = str(v) + assert '\n' not in v + outf.write((k + ': ' + str(v) + '\r\n').encode('utf-8')) + +def warcinfo(): + payload = "software: garcon (a very early version thereof)\r\n" + payload += f"hostname: {socket.gethostname()}\r\n" + payload = payload.encode('utf-8') + + outf.write(b'WARC/1.0\r\n') + header("WARC-Type", "warcinfo") + header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat()) + header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>") + header("Content-Length", len(payload)) + header("Content-Type", "application/warc-fields") + + outf.write(b'\r\n') + outf.write(payload) + outf.write(b'\r\n\r\n') + +def request_raw(host, port, url): + assert '\n' not in url + + s = socket.create_connection((host, port)) + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + s = context.wrap_socket(s, server_hostname=host) + + s.sendall((url + '\r\n').encode("UTF-8")) + peername = s.getpeername() + cert = s.getpeercert(True) + + fp = s.makefile("rb") + payload = fp.read() + fp.close() + s.close() + + # warctools doesn't like WARC/1.1 + outf.write(b'WARC/1.0\r\n') + + # mandatory + header("WARC-Type", "response") + header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat()) + header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>") + header("Content-Length", len(payload)) + + # optional + header("WARC-Payload-Digest", 'sha256:' + hashlib.sha256(payload).hexdigest()) + header("WARC-IP-Address", peername[0]) + header("WARC-Target-URI", url) + header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver + + # my extensions + header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest()) + + outf.write(b'\r\n') + outf.write(payload) + outf.write(b'\r\n\r\n') + + # TODO check for close_notify + return payload + +def request_url(url): + p = urllib.parse.urlparse(url) + assert p.scheme == 'gemini' + return request_raw(p.hostname, p.port or 1965, url) + +def request_url_loop(url): + while True: + res = request_url(url) + header = res.split(b'\r\n')[0] + if 2 + 1 + 1024 < len(header): break + if header[0] == ord('3'): + url = header.split(b' ', 2)[1].decode('utf-8') + else: + break + +warcinfo() +request_url_loop(argv[1]) |