Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Doc/c-api/perfmaps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Note that holding an :term:`attached thread state` is not required for these API
or ``-2`` on failure to create a lock. Check ``errno`` for more information
about the cause of a failure.

.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, size_t code_size, const char *entry_name)

Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
thread safe. Here is what an example entry looks like::
Expand Down
2 changes: 1 addition & 1 deletion Include/cpython/ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ typedef struct {
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
const void *code_addr,
unsigned int code_size,
size_t code_size,
const char *entry_name);
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
Expand Down
6 changes: 5 additions & 1 deletion Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ typedef struct {
void* (*init_state)(void);
// Callback to register every trampoline being created
void (*write_state)(void* state, const void *code_addr,
unsigned int code_size, PyCodeObject* code);
size_t code_size, PyCodeObject* code);
// Callback to free the trampoline state
int (*free_state)(void* state);
} _PyPerf_Callbacks;
Expand All @@ -108,6 +108,10 @@ extern PyStatus _PyPerfTrampoline_AfterFork_Child(void);
#ifdef PY_HAVE_PERF_TRAMPOLINE
extern _PyPerf_Callbacks _Py_perfmap_callbacks;
extern _PyPerf_Callbacks _Py_perfmap_jit_callbacks;
extern void _PyPerfJit_WriteNamedCode(const void *code_addr,
size_t code_size,
const char *entry,
const char *filename);
#endif

static inline PyObject*
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_interp_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct code_arena_st;
struct trampoline_api_st {
void* (*init_state)(void);
void (*write_state)(void* state, const void *code_addr,
unsigned int code_size, PyCodeObject* code);
size_t code_size, PyCodeObject* code);
int (*free_state)(void* state);
void *state;
Py_ssize_t code_padding;
Expand Down
59 changes: 59 additions & 0 deletions Include/internal/pycore_jit_unwind.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef Py_INTERNAL_JIT_UNWIND_H
#define Py_INTERNAL_JIT_UNWIND_H

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is missing Py_BUILD_CORE guard no?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I've seen now the other headers files.

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#if defined(PY_HAVE_PERF_TRAMPOLINE) || (defined(__linux__) && defined(__ELF__))

#include <stddef.h>

/* DWARF exception-handling pointer encodings shared by JIT unwind users. */
enum {
DWRF_EH_PE_absptr = 0x00,
DWRF_EH_PE_omit = 0xff,

/* Data type encodings */
DWRF_EH_PE_uleb128 = 0x01,
DWRF_EH_PE_udata2 = 0x02,
DWRF_EH_PE_udata4 = 0x03,
DWRF_EH_PE_udata8 = 0x04,
DWRF_EH_PE_sleb128 = 0x09,
DWRF_EH_PE_sdata2 = 0x0a,
DWRF_EH_PE_sdata4 = 0x0b,
DWRF_EH_PE_sdata8 = 0x0c,
DWRF_EH_PE_signed = 0x08,

/* Reference type encodings */
DWRF_EH_PE_pcrel = 0x10,
DWRF_EH_PE_textrel = 0x20,
DWRF_EH_PE_datarel = 0x30,
DWRF_EH_PE_funcrel = 0x40,
DWRF_EH_PE_aligned = 0x50,
DWRF_EH_PE_indirect = 0x80
};

/* Return the size of the generated .eh_frame data for the given encoding. */
size_t _PyJitUnwind_EhFrameSize(int absolute_addr);

/*
* Build DWARF .eh_frame data for JIT code; returns size written or 0 on error.
* absolute_addr selects the FDE address encoding:
* - 0: PC-relative offsets (perf jitdump synthesized DSO).
* - nonzero: absolute addresses (GDB JIT in-memory ELF).
*/
size_t _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
const void *code_addr, size_t code_size,
int absolute_addr);

void *_PyJitUnwind_GdbRegisterCode(const void *code_addr,
size_t code_size,
const char *entry,
const char *filename);

void _PyJitUnwind_GdbUnregisterCode(void *handle);

#endif // defined(PY_HAVE_PERF_TRAMPOLINE) || (defined(__linux__) && defined(__ELF__))

#endif // Py_INTERNAL_JIT_UNWIND_H
1 change: 1 addition & 0 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ typedef struct _PyExecutorObject {
uint32_t code_size;
size_t jit_size;
void *jit_code;
void *jit_gdb_handle;
_PyExitData exits[1];
} _PyExecutorObject;

Expand Down
27 changes: 27 additions & 0 deletions Lib/test/test_gdb/gdb_jit_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Sample script for use by test_gdb.test_jit

import _testinternalcapi
import operator


WARMUP_ITERATIONS = _testinternalcapi.TIER2_THRESHOLD + 10


def jit_bt_hot(depth, warming_up_caller=False):
if depth == 0:
if not warming_up_caller:
id(42)
return

for iteration in range(WARMUP_ITERATIONS):
operator.call(
jit_bt_hot,
depth - 1,
warming_up_caller or iteration + 1 != WARMUP_ITERATIONS,
)


# Warm the shared shim once without hitting builtin_id so the real run uses
# the steady-state shim path when GDB breaks inside id(42).
jit_bt_hot(1, warming_up_caller=True)
jit_bt_hot(1)
118 changes: 118 additions & 0 deletions Lib/test/test_gdb/test_jit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import os
import re
import sys
import unittest

from .util import setup_module, DebuggerTests


JIT_SAMPLE_SCRIPT = os.path.join(os.path.dirname(__file__), "gdb_jit_sample.py")
# In batch GDB, break in builtin_id() while it is running under JIT,
# then repeatedly "finish" until the selected frame is the JIT entry.
# That gives a deterministic backtrace starting with py::jit_entry:<jit>.
#
# builtin_id() sits only a few helper frames above the JIT entry on this path.
# This bound is just a generous upper limit so the test fails clearly if the
# expected stack shape changes.
MAX_FINISH_STEPS = 20
# Break directly on the lazy shim entry in the binary, then single-step just
# enough to let it install the compiled JIT entry and set a temporary
# breakpoint on the resulting address.
MAX_ENTRY_SETUP_STEPS = 20
# After landing on the JIT entry frame, single-step a little further into the
# blob so the backtrace is taken from JIT code itself rather than the
# immediate helper-return site.
JIT_ENTRY_SINGLE_STEPS = 2
EVAL_FRAME_RE = r"(_PyEval_EvalFrameDefault|_PyEval_Vector)"

FINISH_TO_JIT_ENTRY = (
"python exec(\"import gdb\\n"
"target = 'py::jit_entry:<jit>'\\n"
f"for _ in range({MAX_FINISH_STEPS}):\\n"
" frame = gdb.selected_frame()\\n"
" if frame is not None and frame.name() == target:\\n"
" break\\n"
" gdb.execute('finish')\\n"
"else:\\n"
" raise RuntimeError('did not reach %s' % target)\\n\")"
)
BREAK_IN_COMPILED_JIT_ENTRY = (
"python exec(\"import gdb\\n"
"lazy = int(gdb.parse_and_eval('(void*)_Py_LazyJitShim'))\\n"
f"for _ in range({MAX_ENTRY_SETUP_STEPS}):\\n"
" entry = int(gdb.parse_and_eval('(void*)_Py_jit_entry'))\\n"
" if entry != lazy:\\n"
" gdb.execute('tbreak *0x%x' % entry)\\n"
" break\\n"
" gdb.execute('next')\\n"
"else:\\n"
" raise RuntimeError('compiled JIT entry was not installed')\\n\")"
)


def setUpModule():
setup_module()


@unittest.skipUnless(
hasattr(sys, "_jit") and sys._jit.is_available(),
"requires a JIT-enabled build",
)
class JitBacktraceTests(DebuggerTests):
def test_bt_shows_compiled_jit_entry(self):
gdb_output = self.get_stack_trace(
script=JIT_SAMPLE_SCRIPT,
breakpoint="_Py_LazyJitShim",
cmds_after_breakpoint=[
BREAK_IN_COMPILED_JIT_ENTRY,
"continue",
"bt",
],
PYTHON_JIT="1",
)
# GDB registers the compiled JIT entry and per-trace JIT regions under
# the same synthetic symbol name.
self.assertRegex(
gdb_output,
re.compile(
rf"#0\s+py::jit_entry:<jit>.*{EVAL_FRAME_RE}",
re.DOTALL,
),
)

def test_bt_unwinds_through_jit_frames(self):
gdb_output = self.get_stack_trace(
script=JIT_SAMPLE_SCRIPT,
cmds_after_breakpoint=["bt"],
PYTHON_JIT="1",
)
# The executor should appear as a named JIT frame and unwind back into
# the eval loop. Whether GDB also materializes a separate shim frame is
# an implementation detail of the synthetic executor CFI.
self.assertRegex(
gdb_output,
re.compile(
rf"py::jit_entry:<jit>.*{EVAL_FRAME_RE}",
re.DOTALL,
),
)

def test_bt_unwinds_from_inside_jit_entry(self):
gdb_output = self.get_stack_trace(
script=JIT_SAMPLE_SCRIPT,
cmds_after_breakpoint=[
FINISH_TO_JIT_ENTRY,
*(["si"] * JIT_ENTRY_SINGLE_STEPS),
"bt",
],
PYTHON_JIT="1",
)
# Once the selected PC is inside the JIT entry, we only require that
# GDB can identify the JIT frame and keep unwinding into _PyEval_*.
self.assertRegex(
gdb_output,
re.compile(
rf"#0\s+py::jit_entry:<jit>.*{EVAL_FRAME_RE}",
re.DOTALL,
),
)
5 changes: 3 additions & 2 deletions Lib/test/test_gdb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def get_stack_trace(self, source=None, script=None,
breakpoint=BREAKPOINT_FN,
cmds_after_breakpoint=None,
import_site=False,
ignore_stderr=False):
ignore_stderr=False,
**env_vars):
'''
Run 'python -c SOURCE' under gdb with a breakpoint.

Expand Down Expand Up @@ -239,7 +240,7 @@ def get_stack_trace(self, source=None, script=None,
args += [script]

# Use "args" to invoke gdb, capturing stdout, stderr:
out, err = run_gdb(*args, PYTHONHASHSEED=PYTHONHASHSEED)
out, err = run_gdb(*args, PYTHONHASHSEED=PYTHONHASHSEED, **env_vars)

if not ignore_stderr:
for line in err.splitlines():
Expand Down
17 changes: 16 additions & 1 deletion Lib/test/test_perfmaps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
import sysconfig
import unittest

Expand All @@ -17,11 +18,25 @@ def supports_trampoline_profiling():
raise unittest.SkipTest("perf trampoline profiling not supported")

class TestPerfMapWriting(unittest.TestCase):
def tearDown(self):
perf_map_state_teardown()

def test_write_perf_map_entry(self):
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
with open(f"/tmp/perf-{os.getpid()}.map") as f:
perf_file_contents = f.read()
self.assertIn("1234 162e entry1", perf_file_contents)
self.assertIn("2345 1a85 entry2", perf_file_contents)
perf_map_state_teardown()

@unittest.skipIf(sys.maxsize <= 2**32, "requires size_t wider than unsigned int")
def test_write_perf_map_entry_large_size(self):
code_addr = 0x3456
code_size = 1 << 33
entry_name = "entry_big"

self.assertEqual(write_perf_map_entry(code_addr, code_size, entry_name), 0)
with open(f"/tmp/perf-{os.getpid()}.map") as f:
perf_file_contents = f.read()
self.assertIn(f"{code_addr:x} {code_size:x} {entry_name}",
perf_file_contents)
1 change: 1 addition & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ PYTHON_OBJS= \
Python/suggestions.o \
Python/perf_trampoline.o \
Python/perf_jit_trampoline.o \
Python/jit_unwind.o \
Python/remote_debugging.o \
Python/$(DYNLOADFILE) \
$(LIBOBJS) \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for unwinding JIT frames using GDB. Patch by Diego Russo
10 changes: 8 additions & 2 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1210,16 +1210,22 @@ write_perf_map_entry(PyObject *self, PyObject *args)
{
PyObject *code_addr_v;
const void *code_addr;
unsigned int code_size;
PyObject *code_size_s;
size_t code_size;
const char *entry_name;

if (!PyArg_ParseTuple(args, "OIs", &code_addr_v, &code_size, &entry_name))
if (!PyArg_ParseTuple(args, "OOs", &code_addr_v, &code_size_s, &entry_name))
return NULL;
code_addr = PyLong_AsVoidPtr(code_addr_v);
if (code_addr == NULL) {
return NULL;
}

code_size = PyLong_AsSize_t(code_size_s);
if (code_size == (size_t)-1 && PyErr_Occurred()) {
return NULL;
}

int ret = PyUnstable_WritePerfMapEntry(code_addr, code_size, entry_name);
if (ret < 0) {
PyErr_SetFromErrno(PyExc_OSError);
Expand Down
Loading
Loading