From cc2ddcaabf031097b733badb356110d7e5fd9831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Mon, 1 Jun 2026 18:24:45 +0200 Subject: [PATCH 1/3] Fix malformed jitdump thread ids on macOS The perf jitdump format defines the thread id field of the JR_CODE_LOAD record as a 32-bit value, but on macOS it was declared as a uint64_t (since pthread_threadid_np() returns a uint64_t). Those extra 8 bytes plus alignment padding shifted every following field, so parsers reading the file by the spec misread code_size as the code address and failed to resolve any Python frames. Declare thread_id as uint32_t on all platforms and truncate the macOS thread id when writing the record. The value is only informational. Symbols are resolved by address, and not thread ids so truncation is safe here. --- .../2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst | 4 ++++ Python/perf_jit_trampoline.c | 11 ++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst new file mode 100644 index 00000000000000..1920c8cdfce4f4 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst @@ -0,0 +1,4 @@ +Fix malformed perf jitdump thread ids on macOS. The ``thread_id`` field of the +``JR_CODE_LOAD`` record was written as a 64-bit value instead of the 32-bit +value required by the jitdump format, which shifted every following field and +prevented profilers from resolving Python frames. diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 0c460282feceef..f86d86b9c8cddb 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -217,11 +217,7 @@ struct BaseEvent { typedef struct { struct BaseEvent base; // Common event header uint32_t process_id; // Process ID where code was generated -#if defined(__APPLE__) - uint64_t thread_id; // Thread ID where code was generated -#else uint32_t thread_id; // Thread ID where code was generated -#endif uint64_t vma; // Virtual memory address where code is loaded uint64_t code_address; // Address of the actual machine code uint64_t code_size; // Size of the machine code in bytes @@ -652,7 +648,12 @@ static void perf_map_jit_write_entry_with_name( ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); #if defined(__APPLE__) - pthread_threadid_np(NULL, &ev.thread_id); + // The jitdump format defines the thread id field as a 32-bit value, but + // pthread_threadid_np() returns a 64-bit id. Truncate it to 32 bits to + // keep the record layout identical to other platforms. + uint64_t thread_id = 0; + pthread_threadid_np(NULL, &thread_id); + ev.thread_id = (uint32_t)thread_id; #else ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call #endif From 16eb0e6fbb77e28a194a7e8fbd2529b25477ac3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Mon, 1 Jun 2026 19:11:22 +0200 Subject: [PATCH 2/3] Use mach_absolute_time for macOS jitdump timestamps On macOS the jitdump file is consumed by profilers such as samply, which timestamp their samples using mach_absolute_time(). The jitdump events were stamped with clock_gettime(CLOCK_MONOTONIC), a different clock domain that keeps advancing while the system is asleep, so the JIT code mappings could be off by days relative to the samples and no Python frame would resolve. Stamp jitdump events with mach_absolute_time() on macOS so they share the sampler's clock domain. Linux continues to use CLOCK_MONOTONIC to stay aligned with perf. --- ...6-06-01-19-24-12.gh-issue-150723.WlcL_-.rst | 4 ++++ Python/perf_jit_trampoline.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst new file mode 100644 index 00000000000000..78c896b669c239 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst @@ -0,0 +1,4 @@ +Fix perf jitdump timestamps on macOS. Events were stamped using +``CLOCK_MONOTONIC``, but macOS profilers timestamp their samples with +``mach_absolute_time()``. The mismatch prevented the JIT code mappings from +lining up with the samples, so no Python frame could be resolved. diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index f86d86b9c8cddb..32b147199544cf 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -82,6 +82,9 @@ #if defined(__linux__) # include // System call interface #endif +#if defined(__APPLE__) +# include // mach_absolute_time, mach_timebase_info +#endif // ============================================================================= // CONSTANTS AND CONFIGURATION @@ -291,7 +294,9 @@ static PerfMapJitState perf_jit_map_state; // ============================================================================= /* Time conversion constant */ +#if !defined(__APPLE__) static const intptr_t nanoseconds_per_second = 1000000000; +#endif /* * Get current monotonic time in nanoseconds @@ -303,6 +308,18 @@ static const intptr_t nanoseconds_per_second = 1000000000; * Returns: Current monotonic time in nanoseconds since an arbitrary epoch */ static int64_t get_current_monotonic_ticks(void) { +#if defined(__APPLE__) + // On macOS the jitdump file is consumed by profilers (such as samply) that + // timestamp their samples using mach_absolute_time(). The jitdump event + // timestamps must use the same clock domain, otherwise the JIT code + // mappings cannot be lined up with the samples. + static mach_timebase_info_data_t timebase = {0, 0}; + if (timebase.denom == 0) { + (void)mach_timebase_info(&timebase); + } + uint64_t ticks = mach_absolute_time(); + return (int64_t)(ticks * timebase.numer / timebase.denom); +#else struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { Py_UNREACHABLE(); // Should never fail on supported systems @@ -314,6 +331,7 @@ static int64_t get_current_monotonic_ticks(void) { result *= nanoseconds_per_second; result += ts.tv_nsec; return result; +#endif } /* From d059afe79fb85541c9a0c11cefe99e169ab0c2dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Naz=C4=B1m=20Can=20Alt=C4=B1nova?= Date: Mon, 1 Jun 2026 19:11:25 +0200 Subject: [PATCH 3/3] Add a samply jitdump end-to-end test on macOS Exercise the -Xperf_jit (jitdump) backend through samply and assert that Python frames resolve, exercising the binary jitdump path end to end. Skipped when samply is not installed. --- Lib/test/test_samply_profiler.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Lib/test/test_samply_profiler.py b/Lib/test/test_samply_profiler.py index ec0ed37ffd047b..f9ab9207c3c23d 100644 --- a/Lib/test/test_samply_profiler.py +++ b/Lib/test/test_samply_profiler.py @@ -240,5 +240,29 @@ def compile_trampolines_for_all_functions(): self.assertIn(line, child_perf_file_contents) +@unittest.skipUnless(samply_command_works(), "samply command doesn't work") +class TestSamplyProfilerWithJitDump(unittest.TestCase, TestSamplyProfilerMixin): + # Regression test for gh-150723: exercises the binary jitdump backend + # (-Xperf_jit) end to end through samply, unlike TestSamplyProfiler which + # uses the textual perf-map backend (-Xperf). + def run_samply(self, script_dir, script, activate_trampoline=True): + if activate_trampoline: + return run_samply(script_dir, sys.executable, "-Xperf_jit", script) + return run_samply(script_dir, sys.executable, script) + + def setUp(self): + super().setUp() + self.jit_files = set(pathlib.Path("/tmp/").glob("jit-*.dump")) + self.jit_files |= set(pathlib.Path("/tmp/").glob("jitted-*.so")) + + def tearDown(self) -> None: + super().tearDown() + files_to_delete = set(pathlib.Path("/tmp/").glob("jit-*.dump")) + files_to_delete |= set(pathlib.Path("/tmp/").glob("jitted-*.so")) + files_to_delete -= self.jit_files + for file in files_to_delete: + file.unlink() + + if __name__ == "__main__": unittest.main()