summary refs log tree commit diff
path: root/garcon.py
blob: 9eee43d207b88bd9f9ec60d5550a249be6ffc4a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# assumes utf8
# began writing this on 2024-02-20
# found mozz-archiver on 2024-02-21

import datetime
import socket
import ssl
import uuid
import hashlib
import urllib.parse
from sys import argv, stdout

# based on:
# https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py
# https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py
# TODO ciphers etc

outf = stdout.buffer

def header(k, v):
	v = str(v)
	assert '\n' not in v
	outf.write((k + ': ' + str(v) + '\r\n').encode('utf-8'))

def warcinfo():
	payload  = "software: garcon (a very early version thereof)\r\n"
	payload += f"hostname: {socket.gethostname()}\r\n"
	payload  = payload.encode('utf-8')

	outf.write(b'WARC/1.0\r\n')
	header("WARC-Type", "warcinfo")
	header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat())
	header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>")
	header("Content-Length", len(payload))
	header("Content-Type", "application/warc-fields")

	outf.write(b'\r\n')
	outf.write(payload)
	outf.write(b'\r\n\r\n')

def request_raw(host, port, url):
	assert '\n' not in url

	s = socket.create_connection((host, port))
	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	context.check_hostname = False
	context.verify_mode = ssl.CERT_NONE
	s = context.wrap_socket(s, server_hostname=host)

	s.sendall((url + '\r\n').encode("UTF-8"))
	peername = s.getpeername()
	cert = s.getpeercert(True)

	fp = s.makefile("rb")
	payload = fp.read()
	fp.close()
	s.close()

	# warctools doesn't like WARC/1.1
	outf.write(b'WARC/1.0\r\n')

	# mandatory
	header("WARC-Type", "response")
	header("WARC-Date", datetime.datetime.now(tz=datetime.timezone.utc).isoformat())
	header("WARC-Record-ID", f"<urn:uuid:{uuid.uuid4()}>")
	header("Content-Length", len(payload))

	# optional
	header("WARC-Payload-Digest", 'sha256:' + hashlib.sha256(payload).hexdigest())
	header("WARC-IP-Address", peername[0])
	header("WARC-Target-URI", url)
	header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver

	# my extensions
	header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest())

	outf.write(b'\r\n')
	outf.write(payload)
	outf.write(b'\r\n\r\n')

	# TODO check for close_notify
	return payload

def request_url(url):
	p = urllib.parse.urlparse(url)
	assert p.scheme == 'gemini'
	return request_raw(p.hostname, p.port or 1965, url)

def request_url_loop(url):
	while True:
		res = request_url(url)
		header = res.split(b'\r\n')[0]
		if 2 + 1 + 1024 < len(header): break
		if header[0] == ord('3'):
			url = header.split(b' ', 2)[1].decode('utf-8')
		else:
			break

warcinfo()
request_url_loop(argv[1])