From 3e22951e03a20bf7836b6c0e2c25e2cb7059a2f1 Mon Sep 17 00:00:00 2001
From: dzwdz
Date: Mon, 4 Mar 2024 23:30:01 +0100
Subject: python article, slight style changes
---
src/feed.ass | 1 +
src/our.md | 7 --
src/python_recursion.md | 196 ++++++++++++++++++++++++++++++++++++++++++++++++
src/tmpl.html | 12 ++-
4 files changed, 208 insertions(+), 8 deletions(-)
create mode 100644 src/python_recursion.md
(limited to 'src')
diff --git a/src/feed.ass b/src/feed.ass
index c058348..93dcc97 100644
--- a/src/feed.ass
+++ b/src/feed.ass
@@ -1,5 +1,6 @@
# actually simple syndication
# https://tilde.town/~dzwdz/ass/
+2024-03-04 https://tilde.town/~dzwdz/blog/python_recursion.html CPython hates deeply recursive code
2023-08-19 https://tilde.town/~dzwdz/blog/tui.html On TUIs
2023-07-23 https://tilde.town/~dzwdz/blog/our.html /town/our, a tildebrained irc bot
2023-05-25 https://tilde.town/~dzwdz/blog/feeds.html Linear feeds are a dark pattern
diff --git a/src/our.md b/src/our.md
index de507ac..415202a 100644
--- a/src/our.md
+++ b/src/our.md
@@ -3,13 +3,6 @@ title: /town/our, a tildebrained irc bot
date: 2023-07-23
---
-
-
tl;dr: `` is an IRC bot on tilde.town. Commands are just executables in a
world-writeable directory. If you're on town, you can create new commands just
by putting a script in `/town/our`.
diff --git a/src/python_recursion.md b/src/python_recursion.md
new file mode 100644
index 0000000..277e515
--- /dev/null
+++ b/src/python_recursion.md
@@ -0,0 +1,196 @@
+---
+title: CPython hates deeply recursive code
+date: 2024-03-04
+---
+
+Consider the following piece of code:
+
+```py
+import sys; sys.setrecursionlimit(1000000000)
+
+def rec(i): # pretend we're searching a very deep tree with few branches
+ if i == 1000000: return
+ rec(i+1)
+ if i%100000 == 0: rec(i+1)
+rec(0)
+```
+
+Let's run it for a while:
+
+```
+$ time python3 a.py
+
+real 0m52.090s
+user 0m29.546s
+sys 0m22.020s
+```
+
+See anything odd?
+Despite doing pretty much pure compute (if you can call it that) -- without any
+IO or anything that would require calling out to the kernel -- this script
+spends almost half its time in the kernel.
+
+What if we strace it?
+
+```
+$ strace python3 a.py | head
+execve("/usr/bin/python3", ["python3", "a.py"], 0x7ffe0abbf298 /* 41 vars */) = 0
+[...]
+munmap(0x7f31b044a000, 16384) = 0
+munmap(0x7f31b044e000, 16384) = 0
+munmap(0x7f31b0452000, 16384) = 0
+munmap(0x7f31b0456000, 16384) = 0
+munmap(0x7f31b045a000, 16384) = 0
+munmap(0x7f31b045e000, 16384) = 0
+munmap(0x7f31b0462000, 16384) = 0
+munmap(0x7f31b0466000, 16384) = 0
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0466000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0462000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b045e000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b045a000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0456000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0452000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b044e000
+mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b044a000
+[...]
+```
+
+It's hard to portray via text, but the terminal is spammed with pages upon pages
+of `mmap`s and `munmap`s, constantly allocating and deallocating the same pages.
+Hopefully it's obvious why this is wrong.
+
+This behaviour can affect real code too -- I first encountered it when solving
+some Project Euler problem.
+I asked about it in `#python`, and the consenus was that CPython just sucks for
+any deeply recursive code -- and that's it.
+I didn't end up filing a bug, and I've since lost[^1] the chatlog too.
+
+Discovering this has actually changed my coding style, too -- every time I'm
+writing a function which I think might run into this behaviour (so, as a CS
+student, pretty often), I end up sacrificing the readability of a straightforward
+recursive implementation, to instead try to minimize the recursion depth as much
+as possible to work around this bug.
+
+[^1]: I got some person to send me a copy... but I lost that copy too. If you've been idling in #python, just look for messages from dzwdz.
+
+## bonus: a backtrace
+
+```
+$ gdb --args python3 a.py
+GNU gdb (Debian 13.1-3) 13.1
+Copyright (C) 2023 Free Software Foundation, Inc.
+License GPLv3+: GNU GPL version 3 or later
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.
+Type "show copying" and "show warranty" for details.
+This GDB was configured as "x86_64-linux-gnu".
+Type "show configuration" for configuration details.
+For bug reporting instructions, please see:
+.
+Find the GDB manual and other documentation resources online at:
+ .
+
+For help, type "help".
+Type "apropos word" to search for commands related to "word"...
+Reading symbols from python3...
+Reading symbols from /usr/lib/debug/.build-id/ac/175ec7666754cf818b271b4fdc2761ac6865f2.debug...
+(gdb) run
+Starting program: /usr/bin/python3 a.py
+[Thread debugging using libthread_db enabled]
+Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
+^C
+Program received signal SIGINT, Interrupt.
+0x00007ffff7da08f7 in __GI_munmap () at ../sysdeps/unix/syscall-template.S:117
+117 ../sysdeps/unix/syscall-template.S: No such file or directory.
+(gdb) break mmap
+Breakpoint 1 at 0x7ffff7da0890: mmap. (2 locations)
+(gdb) c
+Continuing.
+
+Breakpoint 1.1, __GI___mmap64 (addr=addr@entry=0x0, len=16384, prot=prot@entry=3, flags=flags@entry=34, fd=fd@entry=-1, offset=offset@entry=0) at ../sysdeps/unix/sysv/linux/mmap64.c:50
+50 ../sysdeps/unix/sysv/linux/mmap64.c: No such file or directory.
+(gdb) bt
+#0 __GI___mmap64 (addr=addr@entry=0x0, len=16384, prot=prot@entry=3, flags=flags@entry=34, fd=fd@entry=-1, offset=offset@entry=0) at ../sysdeps/unix/sysv/linux/mmap64.c:50
+#1 0x000000000062c849 in _PyObject_ArenaMmap (ctx=, size=) at ../Objects/obmalloc.c:152
+#2 0x000000000063394d in _PyObject_VirtualAlloc (size=16384) at ../Objects/obmalloc.c:560
+#3 allocate_chunk (previous=0x7ffff171f000, size_in_bytes=) at ../Python/pystate.c:731
+#4 push_chunk (size=14, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/pystate.c:2184
+#5 _PyThreadState_BumpFramePointerSlow (tstate=0xa840d8 <_PyRuntime+166328>, size=14) at ../Python/pystate.c:2214
+#6 0x000000000059f0a3 in _PyThreadState_BumpFramePointer (size=, tstate=) at ../Include/internal/pycore_frame.h:218
+#7 _PyFrame_Push (tstate=, func=0x7ffff76f4720) at ../Python/frame.c:156
+#8 0x000000000052bc02 in _PyEval_EvalFrameDefault (tstate=, frame=, throwflag=) at ../Python/ceval.c:4852
+#9 0x000000000052360b in _PyEval_EvalFrame (throwflag=0, frame=0x7ffff7fb8020, tstate=0xa840d8 <_PyRuntime+166328>) at ../Include/internal/pycore_ceval.h:73
+#10 _PyEval_Vector (args=0x0, argcount=0, kwnames=0x0, locals=, func=0x7ffff7c19f80, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/ceval.c:6435
+#11 PyEval_EvalCode (co=, globals=, locals=) at ../Python/ceval.c:1154
+#12 0x0000000000647497 in run_eval_code_obj (tstate=0xa840d8 <_PyRuntime+166328>, co=0x7ffff7bbb220,
+ globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }) at ../Python/pythonrun.c:1714
+#13 0x0000000000644d4f in run_mod (mod=, filename=,
+ globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, flags=,
+ arena=) at ../Python/pythonrun.c:1735
+#14 0x0000000000651010 in pyrun_file (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', start=start@entry=257,
+ globals=globals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ locals=locals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ closeit=closeit@entry=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:1630
+#15 0x0000000000650d5b in _PyRun_SimpleFileObject (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', closeit=closeit@entry=1, flags=flags@entry=0x7fffffffdef8) at ../Python/pythonrun.c:440
+#16 0x0000000000650b84 in _PyRun_AnyFileObject (fp=0xaca370, filename='/tmp/a.py', closeit=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:79
+#17 0x000000000064f90f in pymain_run_file_obj (skip_source_first_line=0, filename='/tmp/a.py', program_name='/usr/bin/python3') at ../Modules/main.c:360
+#18 pymain_run_file (config=0xa6a120 <_PyRuntime+59904>) at ../Modules/main.c:379
+#19 pymain_run_python (exitcode=0x7fffffffdef4) at ../Modules/main.c:601
+#20 Py_RunMain () at ../Modules/main.c:680
+#21 0x00000000006275c7 in Py_BytesMain (argc=, argv=) at ../Modules/main.c:734
+#22 0x00007ffff7cc624a in __libc_start_call_main (main=main@entry=0x627530 , argc=argc@entry=2, argv=argv@entry=0x7fffffffe128) at ../sysdeps/nptl/libc_start_call_main.h:58
+#23 0x00007ffff7cc6305 in __libc_start_main_impl (main=0x627530 , argc=2, argv=0x7fffffffe128, init=, fini=, rtld_fini=, stack_end=0x7fffffffe118)
+ at ../csu/libc-start.c:360
+#24 0x0000000000627461 in _start ()
+(gdb) del
+Delete all breakpoints? (y or n) y
+(gdb) break munmap
+Breakpoint 2 at 0x7ffff7da08f0: munmap. (2 locations)
+(gdb) c
+Continuing.
+
+Breakpoint 2.1, __GI_munmap () at ../sysdeps/unix/syscall-template.S:117
+117 ../sysdeps/unix/syscall-template.S: No such file or directory.
+(gdb) bt
+#0 __GI_munmap () at ../sysdeps/unix/syscall-template.S:117
+#1 0x0000000000536457 in _PyThreadState_PopFrame (frame=, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/pystate.c:2229
+#2 _PyEvalFrameClearAndPop (frame=, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/ceval.c:6409
+#3 _PyEval_EvalFrameDefault (tstate=, frame=0x7fffeed0af88, throwflag=) at ../Python/ceval.c:2439
+#4 0x000000000052360b in _PyEval_EvalFrame (throwflag=0, frame=0x7ffff7fb8020, tstate=0xa840d8 <_PyRuntime+166328>) at ../Include/internal/pycore_ceval.h:73
+#5 _PyEval_Vector (args=0x0, argcount=0, kwnames=0x0, locals=, func=0x7ffff7c19f80, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/ceval.c:6435
+#6 PyEval_EvalCode (co=, globals=, locals=) at ../Python/ceval.c:1154
+#7 0x0000000000647497 in run_eval_code_obj (tstate=0xa840d8 <_PyRuntime+166328>, co=0x7ffff7bbb220,
+ globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }) at ../Python/pythonrun.c:1714
+#8 0x0000000000644d4f in run_mod (mod=, filename=,
+ globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, flags=,
+ arena=) at ../Python/pythonrun.c:1735
+#9 0x0000000000651010 in pyrun_file (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', start=start@entry=257,
+ globals=globals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ locals=locals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': },
+ closeit=closeit@entry=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:1630
+#10 0x0000000000650d5b in _PyRun_SimpleFileObject (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', closeit=closeit@entry=1, flags=flags@entry=0x7fffffffdef8) at ../Python/pythonrun.c:440
+#11 0x0000000000650b84 in _PyRun_AnyFileObject (fp=0xaca370, filename='/tmp/a.py', closeit=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:79
+#12 0x000000000064f90f in pymain_run_file_obj (skip_source_first_line=0, filename='/tmp/a.py', program_name='/usr/bin/python3') at ../Modules/main.c:360
+#13 pymain_run_file (config=0xa6a120 <_PyRuntime+59904>) at ../Modules/main.c:379
+#14 pymain_run_python (exitcode=0x7fffffffdef4) at ../Modules/main.c:601
+#15 Py_RunMain () at ../Modules/main.c:680
+#16 0x00000000006275c7 in Py_BytesMain (argc=, argv=) at ../Modules/main.c:734
+#17 0x00007ffff7cc624a in __libc_start_call_main (main=main@entry=0x627530 , argc=argc@entry=2, argv=argv@entry=0x7fffffffe128) at ../sysdeps/nptl/libc_start_call_main.h:58
+#18 0x00007ffff7cc6305 in __libc_start_main_impl (main=0x627530 , argc=2, argv=0x7fffffffe128, init=, fini=, rtld_fini=, stack_end=0x7fffffffe118)
+ at ../csu/libc-start.c:360
+#19 0x0000000000627461 in _start ()
+(gdb)
+```
+
+## bonus: PyPy
+```
+$ pypy3 a.py
+Segmentation fault
+```
+
+I have no idea what this one is even about.
+I'm pretty sure this code snippet worked just fine in earlier versions of PyPy.
diff --git a/src/tmpl.html b/src/tmpl.html
index a96320a..70dba34 100644
--- a/src/tmpl.html
+++ b/src/tmpl.html
@@ -10,9 +10,10 @@
body {
max-width: 70ch;
font-family: sans-serif;
- line-height: 1.3;
+ line-height: 1.4;
margin: 2em auto;
padding: 0 1ch;
+ text-align: justify;
}
.sidenote {
@@ -36,6 +37,15 @@ header p {
margin: 0;
margin-bottom: .5em;
}
+
+.sourceCode .co { font-style: italic; }
+pre {
+ white-space: pre-wrap;
+ border-left: .5ch lightgray solid;
+ padding-left: .5ch;
+}
+sup {vertical-align: top; font-size: .6em;}
+p code {font-size: 1rem;}
$for(header-includes)$
$header-includes$
--
cgit 1.4.1-2-gfad0