From 3e22951e03a20bf7836b6c0e2c25e2cb7059a2f1 Mon Sep 17 00:00:00 2001 From: dzwdz Date: Mon, 4 Mar 2024 23:30:01 +0100 Subject: python article, slight style changes --- src/python_recursion.md | 196 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 src/python_recursion.md (limited to 'src/python_recursion.md') diff --git a/src/python_recursion.md b/src/python_recursion.md new file mode 100644 index 0000000..277e515 --- /dev/null +++ b/src/python_recursion.md @@ -0,0 +1,196 @@ +--- +title: CPython hates deeply recursive code +date: 2024-03-04 +--- + +Consider the following piece of code: + +```py +import sys; sys.setrecursionlimit(1000000000) + +def rec(i): # pretend we're searching a very deep tree with few branches + if i == 1000000: return + rec(i+1) + if i%100000 == 0: rec(i+1) +rec(0) +``` + +Let's run it for a while: + +``` +$ time python3 a.py + +real 0m52.090s +user 0m29.546s +sys 0m22.020s +``` + +See anything odd? +Despite doing pretty much pure compute (if you can call it that) -- without any +IO or anything that would require calling out to the kernel -- this script +spends almost half its time in the kernel. + +What if we strace it? + +``` +$ strace python3 a.py | head +execve("/usr/bin/python3", ["python3", "a.py"], 0x7ffe0abbf298 /* 41 vars */) = 0 +[...] +munmap(0x7f31b044a000, 16384) = 0 +munmap(0x7f31b044e000, 16384) = 0 +munmap(0x7f31b0452000, 16384) = 0 +munmap(0x7f31b0456000, 16384) = 0 +munmap(0x7f31b045a000, 16384) = 0 +munmap(0x7f31b045e000, 16384) = 0 +munmap(0x7f31b0462000, 16384) = 0 +munmap(0x7f31b0466000, 16384) = 0 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0466000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0462000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b045e000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b045a000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0456000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b0452000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b044e000 +mmap(NULL, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b044a000 +[...] +``` + +It's hard to portray via text, but the terminal is spammed with pages upon pages +of `mmap`s and `munmap`s, constantly allocating and deallocating the same pages. +Hopefully it's obvious why this is wrong. + +This behaviour can affect real code too -- I first encountered it when solving +some Project Euler problem. +I asked about it in `#python`, and the consenus was that CPython just sucks for +any deeply recursive code -- and that's it. +I didn't end up filing a bug, and I've since lost[^1] the chatlog too. + +Discovering this has actually changed my coding style, too -- every time I'm +writing a function which I think might run into this behaviour (so, as a CS +student, pretty often), I end up sacrificing the readability of a straightforward +recursive implementation, to instead try to minimize the recursion depth as much +as possible to work around this bug. + +[^1]: I got some person to send me a copy... but I lost that copy too. If you've been idling in #python, just look for messages from dzwdz. + +## bonus: a backtrace + +``` +$ gdb --args python3 a.py +GNU gdb (Debian 13.1-3) 13.1 +Copyright (C) 2023 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +Type "show copying" and "show warranty" for details. +This GDB was configured as "x86_64-linux-gnu". +Type "show configuration" for configuration details. +For bug reporting instructions, please see: +. +Find the GDB manual and other documentation resources online at: + . + +For help, type "help". +Type "apropos word" to search for commands related to "word"... +Reading symbols from python3... +Reading symbols from /usr/lib/debug/.build-id/ac/175ec7666754cf818b271b4fdc2761ac6865f2.debug... +(gdb) run +Starting program: /usr/bin/python3 a.py +[Thread debugging using libthread_db enabled] +Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". +^C +Program received signal SIGINT, Interrupt. +0x00007ffff7da08f7 in __GI_munmap () at ../sysdeps/unix/syscall-template.S:117 +117 ../sysdeps/unix/syscall-template.S: No such file or directory. +(gdb) break mmap +Breakpoint 1 at 0x7ffff7da0890: mmap. (2 locations) +(gdb) c +Continuing. + +Breakpoint 1.1, __GI___mmap64 (addr=addr@entry=0x0, len=16384, prot=prot@entry=3, flags=flags@entry=34, fd=fd@entry=-1, offset=offset@entry=0) at ../sysdeps/unix/sysv/linux/mmap64.c:50 +50 ../sysdeps/unix/sysv/linux/mmap64.c: No such file or directory. +(gdb) bt +#0 __GI___mmap64 (addr=addr@entry=0x0, len=16384, prot=prot@entry=3, flags=flags@entry=34, fd=fd@entry=-1, offset=offset@entry=0) at ../sysdeps/unix/sysv/linux/mmap64.c:50 +#1 0x000000000062c849 in _PyObject_ArenaMmap (ctx=, size=) at ../Objects/obmalloc.c:152 +#2 0x000000000063394d in _PyObject_VirtualAlloc (size=16384) at ../Objects/obmalloc.c:560 +#3 allocate_chunk (previous=0x7ffff171f000, size_in_bytes=) at ../Python/pystate.c:731 +#4 push_chunk (size=14, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/pystate.c:2184 +#5 _PyThreadState_BumpFramePointerSlow (tstate=0xa840d8 <_PyRuntime+166328>, size=14) at ../Python/pystate.c:2214 +#6 0x000000000059f0a3 in _PyThreadState_BumpFramePointer (size=, tstate=) at ../Include/internal/pycore_frame.h:218 +#7 _PyFrame_Push (tstate=, func=0x7ffff76f4720) at ../Python/frame.c:156 +#8 0x000000000052bc02 in _PyEval_EvalFrameDefault (tstate=, frame=, throwflag=) at ../Python/ceval.c:4852 +#9 0x000000000052360b in _PyEval_EvalFrame (throwflag=0, frame=0x7ffff7fb8020, tstate=0xa840d8 <_PyRuntime+166328>) at ../Include/internal/pycore_ceval.h:73 +#10 _PyEval_Vector (args=0x0, argcount=0, kwnames=0x0, locals=, func=0x7ffff7c19f80, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/ceval.c:6435 +#11 PyEval_EvalCode (co=, globals=, locals=) at ../Python/ceval.c:1154 +#12 0x0000000000647497 in run_eval_code_obj (tstate=0xa840d8 <_PyRuntime+166328>, co=0x7ffff7bbb220, + globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }) at ../Python/pythonrun.c:1714 +#13 0x0000000000644d4f in run_mod (mod=, filename=, + globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, flags=, + arena=) at ../Python/pythonrun.c:1735 +#14 0x0000000000651010 in pyrun_file (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', start=start@entry=257, + globals=globals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + locals=locals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + closeit=closeit@entry=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:1630 +#15 0x0000000000650d5b in _PyRun_SimpleFileObject (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', closeit=closeit@entry=1, flags=flags@entry=0x7fffffffdef8) at ../Python/pythonrun.c:440 +#16 0x0000000000650b84 in _PyRun_AnyFileObject (fp=0xaca370, filename='/tmp/a.py', closeit=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:79 +#17 0x000000000064f90f in pymain_run_file_obj (skip_source_first_line=0, filename='/tmp/a.py', program_name='/usr/bin/python3') at ../Modules/main.c:360 +#18 pymain_run_file (config=0xa6a120 <_PyRuntime+59904>) at ../Modules/main.c:379 +#19 pymain_run_python (exitcode=0x7fffffffdef4) at ../Modules/main.c:601 +#20 Py_RunMain () at ../Modules/main.c:680 +#21 0x00000000006275c7 in Py_BytesMain (argc=, argv=) at ../Modules/main.c:734 +#22 0x00007ffff7cc624a in __libc_start_call_main (main=main@entry=0x627530
, argc=argc@entry=2, argv=argv@entry=0x7fffffffe128) at ../sysdeps/nptl/libc_start_call_main.h:58 +#23 0x00007ffff7cc6305 in __libc_start_main_impl (main=0x627530
, argc=2, argv=0x7fffffffe128, init=, fini=, rtld_fini=, stack_end=0x7fffffffe118) + at ../csu/libc-start.c:360 +#24 0x0000000000627461 in _start () +(gdb) del +Delete all breakpoints? (y or n) y +(gdb) break munmap +Breakpoint 2 at 0x7ffff7da08f0: munmap. (2 locations) +(gdb) c +Continuing. + +Breakpoint 2.1, __GI_munmap () at ../sysdeps/unix/syscall-template.S:117 +117 ../sysdeps/unix/syscall-template.S: No such file or directory. +(gdb) bt +#0 __GI_munmap () at ../sysdeps/unix/syscall-template.S:117 +#1 0x0000000000536457 in _PyThreadState_PopFrame (frame=, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/pystate.c:2229 +#2 _PyEvalFrameClearAndPop (frame=, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/ceval.c:6409 +#3 _PyEval_EvalFrameDefault (tstate=, frame=0x7fffeed0af88, throwflag=) at ../Python/ceval.c:2439 +#4 0x000000000052360b in _PyEval_EvalFrame (throwflag=0, frame=0x7ffff7fb8020, tstate=0xa840d8 <_PyRuntime+166328>) at ../Include/internal/pycore_ceval.h:73 +#5 _PyEval_Vector (args=0x0, argcount=0, kwnames=0x0, locals=, func=0x7ffff7c19f80, tstate=0xa840d8 <_PyRuntime+166328>) at ../Python/ceval.c:6435 +#6 PyEval_EvalCode (co=, globals=, locals=) at ../Python/ceval.c:1154 +#7 0x0000000000647497 in run_eval_code_obj (tstate=0xa840d8 <_PyRuntime+166328>, co=0x7ffff7bbb220, + globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }) at ../Python/pythonrun.c:1714 +#8 0x0000000000644d4f in run_mod (mod=, filename=, + globals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + locals={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, flags=, + arena=) at ../Python/pythonrun.c:1735 +#9 0x0000000000651010 in pyrun_file (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', start=start@entry=257, + globals=globals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + locals=locals@entry={'__name__': '__main__', '__doc__': None, '__package__': None, '__loader__': , '__spec__': None, '__annotations__': {}, '__builtins__': , '__file__': '/tmp/a.py', '__cached__': None, 'sys': , 'rec': }, + closeit=closeit@entry=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:1630 +#10 0x0000000000650d5b in _PyRun_SimpleFileObject (fp=fp@entry=0xaca370, filename=filename@entry='/tmp/a.py', closeit=closeit@entry=1, flags=flags@entry=0x7fffffffdef8) at ../Python/pythonrun.c:440 +#11 0x0000000000650b84 in _PyRun_AnyFileObject (fp=0xaca370, filename='/tmp/a.py', closeit=1, flags=0x7fffffffdef8) at ../Python/pythonrun.c:79 +#12 0x000000000064f90f in pymain_run_file_obj (skip_source_first_line=0, filename='/tmp/a.py', program_name='/usr/bin/python3') at ../Modules/main.c:360 +#13 pymain_run_file (config=0xa6a120 <_PyRuntime+59904>) at ../Modules/main.c:379 +#14 pymain_run_python (exitcode=0x7fffffffdef4) at ../Modules/main.c:601 +#15 Py_RunMain () at ../Modules/main.c:680 +#16 0x00000000006275c7 in Py_BytesMain (argc=, argv=) at ../Modules/main.c:734 +#17 0x00007ffff7cc624a in __libc_start_call_main (main=main@entry=0x627530
, argc=argc@entry=2, argv=argv@entry=0x7fffffffe128) at ../sysdeps/nptl/libc_start_call_main.h:58 +#18 0x00007ffff7cc6305 in __libc_start_main_impl (main=0x627530
, argc=2, argv=0x7fffffffe128, init=, fini=, rtld_fini=, stack_end=0x7fffffffe118) + at ../csu/libc-start.c:360 +#19 0x0000000000627461 in _start () +(gdb) +``` + +## bonus: PyPy +``` +$ pypy3 a.py +Segmentation fault +``` + +I have no idea what this one is even about. +I'm pretty sure this code snippet worked just fine in earlier versions of PyPy. -- cgit 1.4.1-2-gfad0