Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Doc/c-api/perfmaps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Note that holding an :term:`attached thread state` is not required for these API
or ``-2`` on failure to create a lock. Check ``errno`` for more information
about the cause of a failure.

.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, size_t code_size, const char *entry_name)

Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
thread safe. Here is what an example entry looks like::
Expand Down
2 changes: 1 addition & 1 deletion Include/cpython/ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ typedef struct {
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
const void *code_addr,
unsigned int code_size,
size_t code_size,
const char *entry_name);
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
Expand Down
6 changes: 5 additions & 1 deletion Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ typedef struct {
void* (*init_state)(void);
// Callback to register every trampoline being created
void (*write_state)(void* state, const void *code_addr,
unsigned int code_size, PyCodeObject* code);
size_t code_size, PyCodeObject* code);
// Callback to free the trampoline state
int (*free_state)(void* state);
} _PyPerf_Callbacks;
Expand All @@ -108,6 +108,10 @@ extern PyStatus _PyPerfTrampoline_AfterFork_Child(void);
#ifdef PY_HAVE_PERF_TRAMPOLINE
extern _PyPerf_Callbacks _Py_perfmap_callbacks;
extern _PyPerf_Callbacks _Py_perfmap_jit_callbacks;
extern void _PyPerfJit_WriteNamedCode(const void *code_addr,
size_t code_size,
const char *entry,
const char *filename);
#endif

static inline PyObject*
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct code_arena_st;
struct trampoline_api_st {
void* (*init_state)(void);
void (*write_state)(void* state, const void *code_addr,
unsigned int code_size, PyCodeObject* code);
size_t code_size, PyCodeObject* code);
int (*free_state)(void* state);
void *state;
Py_ssize_t code_padding;
Expand Down
59 changes: 59 additions & 0 deletions Include/internal/pycore_jit_unwind.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef Py_INTERNAL_JIT_UNWIND_H
#define Py_INTERNAL_JIT_UNWIND_H

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is missing Py_BUILD_CORE guard no?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I've seen now the other headers files.

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#if defined(PY_HAVE_PERF_TRAMPOLINE) || (defined(__linux__) && defined(__ELF__))

#include <stddef.h>

/* DWARF exception-handling pointer encodings shared by JIT unwind users. */
enum {
DWRF_EH_PE_absptr = 0x00,
DWRF_EH_PE_omit = 0xff,

/* Data type encodings */
DWRF_EH_PE_uleb128 = 0x01,
DWRF_EH_PE_udata2 = 0x02,
DWRF_EH_PE_udata4 = 0x03,
DWRF_EH_PE_udata8 = 0x04,
DWRF_EH_PE_sleb128 = 0x09,
DWRF_EH_PE_sdata2 = 0x0a,
DWRF_EH_PE_sdata4 = 0x0b,
DWRF_EH_PE_sdata8 = 0x0c,
DWRF_EH_PE_signed = 0x08,

/* Reference type encodings */
DWRF_EH_PE_pcrel = 0x10,
DWRF_EH_PE_textrel = 0x20,
DWRF_EH_PE_datarel = 0x30,
DWRF_EH_PE_funcrel = 0x40,
DWRF_EH_PE_aligned = 0x50,
DWRF_EH_PE_indirect = 0x80
};

/* Return the size of the generated .eh_frame data for the given encoding. */
size_t _PyJitUnwind_EhFrameSize(int absolute_addr);

/*
* Build DWARF .eh_frame data for JIT code; returns size written or 0 on error.
* absolute_addr selects the FDE address encoding:
* - 0: PC-relative offsets (perf jitdump synthesized DSO).
* - nonzero: absolute addresses (GDB JIT in-memory ELF).
*/
size_t _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
const void *code_addr, size_t code_size,
int absolute_addr);

void *_PyJitUnwind_GdbRegisterCode(const void *code_addr,
size_t code_size,
const char *entry,
const char *filename);

void _PyJitUnwind_GdbUnregisterCode(void *handle);

#endif // defined(PY_HAVE_PERF_TRAMPOLINE) || (defined(__linux__) && defined(__ELF__))

#endif // Py_INTERNAL_JIT_UNWIND_H
1 change: 1 addition & 0 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ typedef struct _PyExecutorObject {
uint32_t code_size;
size_t jit_size;
void *jit_code;
void *jit_gdb_handle;
_PyExitData exits[1];
} _PyExecutorObject;

Expand Down
27 changes: 27 additions & 0 deletions Lib/test/test_gdb/gdb_jit_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Sample script for use by test_gdb.test_jit

import _testinternalcapi
import operator


WARMUP_ITERATIONS = _testinternalcapi.TIER2_THRESHOLD + 10


def jit_bt_hot(depth, warming_up_caller=False):
if depth == 0:
if not warming_up_caller:
id(42)
return

for iteration in range(WARMUP_ITERATIONS):
operator.call(
jit_bt_hot,
depth - 1,
warming_up_caller or iteration + 1 != WARMUP_ITERATIONS,
)


# Warm the shared shim once without hitting builtin_id so the real run uses
# the steady-state shim path when GDB breaks inside id(42).
jit_bt_hot(1, warming_up_caller=True)
jit_bt_hot(1)
183 changes: 183 additions & 0 deletions Lib/test/test_gdb/test_jit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import os
import platform
import re
import sys
import unittest

from .util import setup_module, DebuggerTests


JIT_SAMPLE_SCRIPT = os.path.join(os.path.dirname(__file__), "gdb_jit_sample.py")
# In batch GDB, break in builtin_id() while it is running under JIT,
# then repeatedly "finish" until the selected frame is the JIT entry.
# That gives a deterministic backtrace starting with py::jit_entry:<jit>.
#
# builtin_id() sits only a few helper frames above the JIT entry on this path.
# This bound is just a generous upper limit so the test fails clearly if the
# expected stack shape changes.
MAX_FINISH_STEPS = 20
# Break directly on the lazy shim entry in the binary, then single-step just
# enough to let it install the compiled JIT entry and set a temporary
# breakpoint on the resulting address.
MAX_ENTRY_SETUP_STEPS = 20
# After landing on the JIT entry frame, single-step a bounded number of
# instructions further into the blob so the backtrace is taken from JIT code
# itself rather than the immediate helper-return site. The exact number of
# steps is not significant: each step is cross-checked against the selected
# frame's symbol so the test fails loudly if stepping escapes the registered
# JIT region, instead of asserting against a misleading backtrace.
MAX_JIT_ENTRY_STEPS = 4
EVAL_FRAME_RE = r"(_PyEval_EvalFrameDefault|_PyEval_Vector)"
BACKTRACE_FRAME_RE = re.compile(r"^#\d+\s+.*$", re.MULTILINE)

FINISH_TO_JIT_ENTRY = (
"python exec(\"import gdb\\n"
"target = 'py::jit_entry:<jit>'\\n"
f"for _ in range({MAX_FINISH_STEPS}):\\n"
" frame = gdb.selected_frame()\\n"
" if frame is not None and frame.name() == target:\\n"
" break\\n"
" gdb.execute('finish')\\n"
"else:\\n"
" raise RuntimeError('did not reach %s' % target)\\n\")"
)
BREAK_IN_COMPILED_JIT_ENTRY = (
"python exec(\"import gdb\\n"
"lazy = int(gdb.parse_and_eval('(void*)_Py_LazyJitShim'))\\n"
f"for _ in range({MAX_ENTRY_SETUP_STEPS}):\\n"
" entry = int(gdb.parse_and_eval('(void*)_Py_jit_entry'))\\n"
" if entry != lazy:\\n"
" gdb.execute('tbreak *0x%x' % entry)\\n"
" break\\n"
" gdb.execute('next')\\n"
"else:\\n"
" raise RuntimeError('compiled JIT entry was not installed')\\n\")"
)
STEP_INSIDE_JIT_ENTRY = (
"python exec(\"import gdb\\n"
"target = 'py::jit_entry:<jit>'\\n"
f"for _ in range({MAX_JIT_ENTRY_STEPS}):\\n"
" frame = gdb.selected_frame()\\n"
" if frame is None or frame.name() != target:\\n"
" raise RuntimeError('left JIT region during stepping: '\\n"
" + repr(frame and frame.name()))\\n"
" gdb.execute('si')\\n"
"frame = gdb.selected_frame()\\n"
"if frame is None or frame.name() != target:\\n"
" raise RuntimeError('stepped out of JIT region after si')\\n\")"
)


def setUpModule():
setup_module()


# The GDB JIT interface registration is gated on __linux__ && __ELF__ in
# Python/jit_unwind.c, and the synthetic EH-frame is only implemented for
# x86_64 and AArch64 (a #error fires otherwise). Skip cleanly on other
# platforms or architectures instead of producing timeouts / empty backtraces.
# is_enabled() implies is_available() and also implies that the runtime has
# JIT execution active; interpreter-only tier 2 builds don't hit this path.
@unittest.skipUnless(sys.platform == "linux",
"GDB JIT interface is only implemented for Linux + ELF")
@unittest.skipUnless(platform.machine() in ("x86_64", "aarch64"),
"GDB JIT CFI emitter only supports x86_64 and AArch64")
@unittest.skipUnless(hasattr(sys, "_jit") and sys._jit.is_enabled(),
"requires a JIT-enabled build with JIT execution active")
class JitBacktraceTests(DebuggerTests):
def _extract_backtrace_frames(self, gdb_output):
frames = BACKTRACE_FRAME_RE.findall(gdb_output)
self.assertGreater(
len(frames), 0,
f"expected at least one GDB backtrace frame in output:\n{gdb_output}",
)
return frames

def _assert_jit_backtrace_shape(self, gdb_output, *, anchor_at_top):
# Shape assertions applied to every JIT backtrace we produce:
# 1. The synthetic JIT symbol appears exactly once. A second
# py::jit_entry:<jit> frame would mean the unwinder is
# materializing two native frames for a single logical JIT
# region, or failing to unwind out of the region entirely.
# 2. At least one _PyEval_EvalFrameDefault / _PyEval_Vector
# frame appears after the JIT frame, proving the unwinder
# climbs back out of the JIT region into the eval loop.
# Helper frames from inside the JITted region may still
# appear above the synthetic JIT frame in the backtrace.
# 4. For tests that assert a specific entry PC, the JIT frame
# is also at #0.
frames = self._extract_backtrace_frames(gdb_output)
backtrace = "\n".join(frames)

jit_frames = [frame for frame in frames if "py::jit_entry:<jit>" in frame]
jit_count = len(jit_frames)
self.assertEqual(
jit_count, 1,
f"expected exactly 1 py::jit_entry:<jit> frame, got {jit_count}\n"
f"backtrace:\n{backtrace}",
)
eval_frames = [frame for frame in frames if re.search(EVAL_FRAME_RE, frame)]
eval_count = len(eval_frames)
self.assertGreaterEqual(
eval_count, 1,
f"expected at least one _PyEval_* frame, got {eval_count}\n"
f"backtrace:\n{backtrace}",
)
jit_frame_index = next(
i for i, frame in enumerate(frames) if "py::jit_entry:<jit>" in frame
)
eval_after_jit = any(
re.search(EVAL_FRAME_RE, frame)
for frame in frames[jit_frame_index + 1:]
)
self.assertTrue(
eval_after_jit,
f"expected an eval frame after the JIT frame\n"
f"backtrace:\n{backtrace}",
)
if anchor_at_top:
self.assertRegex(
frames[0],
re.compile(r"^#0\s+py::jit_entry:<jit>"),
)

def test_bt_shows_compiled_jit_entry(self):
gdb_output = self.get_stack_trace(
script=JIT_SAMPLE_SCRIPT,
breakpoint="_Py_LazyJitShim",
cmds_after_breakpoint=[
BREAK_IN_COMPILED_JIT_ENTRY,
"continue",
"bt",
],
PYTHON_JIT="1",
)
# GDB registers the compiled JIT entry and per-trace JIT regions under
# the same synthetic symbol name; breaking at the entry PC pins the
# JIT frame at #0.
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=True)

def test_bt_unwinds_through_jit_frames(self):
gdb_output = self.get_stack_trace(
script=JIT_SAMPLE_SCRIPT,
cmds_after_breakpoint=["bt"],
PYTHON_JIT="1",
)
# The executor should appear as a named JIT frame and unwind back into
# the eval loop. Whether GDB also materializes a separate shim frame is
# an implementation detail of the synthetic executor CFI.
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False)

def test_bt_unwinds_from_inside_jit_entry(self):
gdb_output = self.get_stack_trace(
script=JIT_SAMPLE_SCRIPT,
cmds_after_breakpoint=[
FINISH_TO_JIT_ENTRY,
STEP_INSIDE_JIT_ENTRY,
"bt",
],
PYTHON_JIT="1",
)
# Once the selected PC is inside the JIT entry, we require that GDB
# identifies the JIT frame at #0 and keeps unwinding into _PyEval_*.
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=True)
5 changes: 3 additions & 2 deletions Lib/test/test_gdb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def get_stack_trace(self, source=None, script=None,
breakpoint=BREAKPOINT_FN,
cmds_after_breakpoint=None,
import_site=False,
ignore_stderr=False):
ignore_stderr=False,
**env_vars):
'''
Run 'python -c SOURCE' under gdb with a breakpoint.

Expand Down Expand Up @@ -239,7 +240,7 @@ def get_stack_trace(self, source=None, script=None,
args += [script]

# Use "args" to invoke gdb, capturing stdout, stderr:
out, err = run_gdb(*args, PYTHONHASHSEED=PYTHONHASHSEED)
out, err = run_gdb(*args, PYTHONHASHSEED=PYTHONHASHSEED, **env_vars)

if not ignore_stderr:
for line in err.splitlines():
Expand Down
17 changes: 16 additions & 1 deletion Lib/test/test_perfmaps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
import sysconfig
import unittest

Expand All @@ -17,11 +18,25 @@ def supports_trampoline_profiling():
raise unittest.SkipTest("perf trampoline profiling not supported")

class TestPerfMapWriting(unittest.TestCase):
def tearDown(self):
perf_map_state_teardown()

def test_write_perf_map_entry(self):
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
with open(f"/tmp/perf-{os.getpid()}.map") as f:
perf_file_contents = f.read()
self.assertIn("1234 162e entry1", perf_file_contents)
self.assertIn("2345 1a85 entry2", perf_file_contents)
perf_map_state_teardown()

@unittest.skipIf(sys.maxsize <= 2**32, "requires size_t wider than unsigned int")
def test_write_perf_map_entry_large_size(self):
code_addr = 0x3456
code_size = 1 << 33
entry_name = "entry_big"

self.assertEqual(write_perf_map_entry(code_addr, code_size, entry_name), 0)
with open(f"/tmp/perf-{os.getpid()}.map") as f:
perf_file_contents = f.read()
self.assertIn(f"{code_addr:x} {code_size:x} {entry_name}",
perf_file_contents)
1 change: 1 addition & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ PYTHON_OBJS= \
Python/suggestions.o \
Python/perf_trampoline.o \
Python/perf_jit_trampoline.o \
Python/jit_unwind.o \
Python/remote_debugging.o \
Python/$(DYNLOADFILE) \
$(LIBOBJS) \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for unwinding JIT frames using GDB. Patch by Diego Russo
Loading
Loading