summary refs log tree commit diff
path: root/garcon.py
diff options
context:
space:
mode:
Diffstat (limited to 'garcon.py')
-rw-r--r--garcon.py50
1 files changed, 36 insertions, 14 deletions
diff --git a/garcon.py b/garcon.py
index 9eee43d..040b045 100644
--- a/garcon.py
+++ b/garcon.py
@@ -1,22 +1,31 @@
-# assumes utf8
-# began writing this on 2024-02-20
-# found mozz-archiver on 2024-02-21
-
+from sys import stdin, stdout, stderr
 import datetime
+import hashlib
 import socket
 import ssl
-import uuid
-import hashlib
 import urllib.parse
-from sys import argv, stdout
+import uuid
+import traceback
 
 # based on:
 # https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py
 # https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py
 # TODO ciphers etc
 
+SIZE_LIMIT = 4 * 1024 * 1024 # 4MB seems reasonable
+
 outf = stdout.buffer
 
+# directly stolen from gemini-demo
+def absolutise_url(base, relative):
+    # Absolutise relative links
+    if "://" not in relative:
+        # Python's URL tools somehow only work with known schemes?
+        base = base.replace("gemini://","http://")
+        relative = urllib.parse.urljoin(base, relative)
+        relative = relative.replace("http://", "gemini://")
+    return relative
+
 def header(k, v):
 	v = str(v)
 	assert '\n' not in v
@@ -52,7 +61,9 @@ def request_raw(host, port, url):
 	cert = s.getpeercert(True)
 
 	fp = s.makefile("rb")
-	payload = fp.read()
+	payload = fp.read(SIZE_LIMIT)
+	truncated = fp.read() != b''
+	print(truncated)
 	fp.close()
 	s.close()
 
@@ -70,12 +81,14 @@ def request_raw(host, port, url):
 	header("WARC-IP-Address", peername[0])
 	header("WARC-Target-URI", url)
 	header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver
+	if trunacted:
+		header("WARC-Truncated", "length")
 
 	# my extensions
 	header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest())
 
 	outf.write(b'\r\n')
-	outf.write(payload)
+#outf.write(payload)
 	outf.write(b'\r\n\r\n')
 
 	# TODO check for close_notify
@@ -87,14 +100,23 @@ def request_url(url):
 	return request_raw(p.hostname, p.port or 1965, url)
 
 def request_url_loop(url):
-	while True:
+	# i only allow 3 redirects, so detecting loops isn't really necessary
+	for _ in range(3):
 		res = request_url(url)
 		header = res.split(b'\r\n')[0]
 		if 2 + 1 + 1024 < len(header): break
-		if header[0] == ord('3'):
-			url = header.split(b' ', 2)[1].decode('utf-8')
+		if len(header) > 0 and header[0] == ord('3'):
+			newurl = header.split(b' ', 2)[1].decode('utf-8')
+			url = absolutise_url(url, newurl)
 		else:
 			break
 
-warcinfo()
-request_url_loop(argv[1])
+if __name__ == '__main__':
+	warcinfo()
+	outf.flush()
+	for line in stdin:
+		try:
+			request_url_loop(line.rstrip('\r\n').rstrip('\n'))
+			outf.flush()
+		except:
+			print(traceback.format_exc(), file=stderr)