diff --git a/findlinks.py b/findlinks.py
new file mode 100755
index 0000000..de592c9
--- /dev/null
+++ b/findlinks.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+# Parses a WARC file, finds gemtext, resolves links.
+# ./findlinks.py file.warc
+
+import os
+import sys
+import urllib.parse
+
+def bail(msg):
+ print(msg, file=sys.stderr)
+ exit()
+
+# directly stolen from gemini-demo
+def absolutise_url(base, relative):
+ # Absolutise relative links
+ if "://" not in relative:
+ # Python's URL tools somehow only work with known schemes?
+ base = base.replace("gemini://","http://")
+ relative = urllib.parse.urljoin(base, relative)
+ relative = relative.replace("http://", "gemini://")
+ return relative
+
+def checkout(headers, fp):
+ if "Content-Type" not in headers: return
+ if headers["Content-Type"] != "application/gemini; msgtype=response": return
+ header = fp.readline()
+ if not header or header[0] != ord('2'): return
+ mime = header[3:]
+ if not mime.startswith(b'text/gemini'): return
+
+ if "WARC-Target-URI" not in headers: return
+ uri = headers["WARC-Target-URI"]
+
+ body = fp.read(int(headers["Content-Length"]) - len(header))
+ for line in body.split(b'\n'):
+ if line.startswith(b'=>'):
+ # technically speaking this is invalid as it splits on more than spaces and tabs
+ newurl = line[2:].split()[0]
+ print(absolutise_url(uri, newurl.decode('utf-8')))
+
+fp = open(sys.argv[1], "rb")
+
+while line := fp.readline():
+ if not line.startswith(b'WARC/'): bail("no WARC/")
+
+ headers = dict()
+ while (line := fp.readline()) != b'\r\n':
+ k, v = line.decode('utf-8').rstrip('\r\n').split(': ', 2)
+ headers[k] = v
+
+ if "Content-Length" not in headers: bail("no Content-Length")
+ pos = fp.tell()
+
+ checkout(headers, fp)
+
+ fp.seek(pos + int(headers["Content-Length"]))
+ if fp.read(4) != b'\r\n\r\n': bail("misaligned")
diff --git a/garcon.py b/garcon.py
index e10f85e..6ada364 100644..100755
--- a/garcon.py
+++ b/garcon.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
from sys import stdin, stdout, stderr
import asyncio
import datetime
|