From 7a767e3c540b2fdaf227002b5010c3106b5b97f5 Mon Sep 17 00:00:00 2001 From: dzwdz Date: Thu, 22 Feb 2024 22:22:54 +0100 Subject: limit redirects, filesize --- garcon.py | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'garcon.py') diff --git a/garcon.py b/garcon.py index 9eee43d..040b045 100644 --- a/garcon.py +++ b/garcon.py @@ -1,22 +1,31 @@ -# assumes utf8 -# began writing this on 2024-02-20 -# found mozz-archiver on 2024-02-21 - +from sys import stdin, stdout, stderr import datetime +import hashlib import socket import ssl -import uuid -import hashlib import urllib.parse -from sys import argv, stdout +import uuid +import traceback # based on: # https://tildegit.org/solderpunk/gemini-demo-1/src/branch/master/gemini-demo.py # https://tildegit.org/solderpunk/AV-98/src/branch/master/src/av98/client.py # TODO ciphers etc +SIZE_LIMIT = 4 * 1024 * 1024 # 4MB seems reasonable + outf = stdout.buffer +# directly stolen from gemini-demo +def absolutise_url(base, relative): + # Absolutise relative links + if "://" not in relative: + # Python's URL tools somehow only work with known schemes? + base = base.replace("gemini://","http://") + relative = urllib.parse.urljoin(base, relative) + relative = relative.replace("http://", "gemini://") + return relative + def header(k, v): v = str(v) assert '\n' not in v @@ -52,7 +61,9 @@ def request_raw(host, port, url): cert = s.getpeercert(True) fp = s.makefile("rb") - payload = fp.read() + payload = fp.read(SIZE_LIMIT) + truncated = fp.read() != b'' + print(truncated) fp.close() s.close() @@ -70,12 +81,14 @@ def request_raw(host, port, url): header("WARC-IP-Address", peername[0]) header("WARC-Target-URI", url) header("Content-Type", "application/gemini; msgtype=response") # as in mozz-archiver + if trunacted: + header("WARC-Truncated", "length") # my extensions header("X-Server-Fingerprint", 'sha256:' + hashlib.sha256(cert).hexdigest()) outf.write(b'\r\n') - outf.write(payload) +#outf.write(payload) outf.write(b'\r\n\r\n') # TODO check for close_notify @@ -87,14 +100,23 @@ def request_url(url): return request_raw(p.hostname, p.port or 1965, url) def request_url_loop(url): - while True: + # i only allow 3 redirects, so detecting loops isn't really necessary + for _ in range(3): res = request_url(url) header = res.split(b'\r\n')[0] if 2 + 1 + 1024 < len(header): break - if header[0] == ord('3'): - url = header.split(b' ', 2)[1].decode('utf-8') + if len(header) > 0 and header[0] == ord('3'): + newurl = header.split(b' ', 2)[1].decode('utf-8') + url = absolutise_url(url, newurl) else: break -warcinfo() -request_url_loop(argv[1]) +if __name__ == '__main__': + warcinfo() + outf.flush() + for line in stdin: + try: + request_url_loop(line.rstrip('\r\n').rstrip('\n')) + outf.flush() + except: + print(traceback.format_exc(), file=stderr) -- cgit 1.4.1-2-gfad0