# assumes utf8 # began writing this on 2024-02-20 # found mozz-archiver on 2024-02-21 import datetime import socket import ssl import uuid import hashlib import urllib.parse from sys import argv, stdout # based on: # https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py # https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py # TODO ciphers etc outf = stdout.buffer def header(k, v): v = str(v) assert '\n' not in v outf.write((k + ': ' + str(v) + '\r\n').encode('utf-8')) def warcinfo(): payload = "software: garcon (a very early version thereof)\r\n" payload += f"hostname: {socket.gethostname()}\r\n" payload = payload.encode('utf-8') outf.write(b'WARC/1.0\r\n') header("WARC-Type", "warcinfo") header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat()) header("WARC-Record-ID", f"") header("Content-Length", len(payload)) header("Content-Type", "application/warc-fields") outf.write(b'\r\n') outf.write(payload) outf.write(b'\r\n\r\n') def request_raw(host, port, url): assert '\n' not in url s = socket.create_connection((host, port)) context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context.check_hostname = False context.verify_mode = ssl.CERT_NONE s = context.wrap_socket(s, server_hostname=host) s.sendall((url + '\r\n').encode("UTF-8")) peername = s.getpeername() cert = s.getpeercert(True) fp = s.makefile("rb") payload = fp.read() fp.close() s.close() # warctools doesn't like WARC/1.1 outf.write(b'WARC/1.0\r\n') # mandatory header("WARC-Type", "response") header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat()) header("WARC-Record-ID", f"") header("Content-Length", len(payload)) # optional header("WARC-Payload-Digest", 'sha256:' + hashlib.sha256(payload).hexdigest()) header("WARC-IP-Address", peername[0]) header("WARC-Target-URI", url) header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver # my extensions header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest()) outf.write(b'\r\n') outf.write(payload) outf.write(b'\r\n\r\n') # TODO check for close_notify return payload def request_url(url): p = urllib.parse.urlparse(url) assert p.scheme == 'gemini' return request_raw(p.hostname, p.port or 1965, url) def request_url_loop(url): while True: res = request_url(url) header = res.split(b'\r\n')[0] if 2 + 1 + 1024 < len(header): break if header[0] == ord('3'): url = header.split(b' ', 2)[1].decode('utf-8') else: break warcinfo() request_url_loop(argv[1])