diff --git a/Lib/test/test_samply_profiler.py b/Lib/test/test_samply_profiler.py index ec0ed37ffd047b..f9ab9207c3c23d 100644 --- a/Lib/test/test_samply_profiler.py +++ b/Lib/test/test_samply_profiler.py @@ -240,5 +240,29 @@ def compile_trampolines_for_all_functions(): self.assertIn(line, child_perf_file_contents) +@unittest.skipUnless(samply_command_works(), "samply command doesn't work") +class TestSamplyProfilerWithJitDump(unittest.TestCase, TestSamplyProfilerMixin): + # Regression test for gh-150723: exercises the binary jitdump backend + # (-Xperf_jit) end to end through samply, unlike TestSamplyProfiler which + # uses the textual perf-map backend (-Xperf). + def run_samply(self, script_dir, script, activate_trampoline=True): + if activate_trampoline: + return run_samply(script_dir, sys.executable, "-Xperf_jit", script) + return run_samply(script_dir, sys.executable, script) + + def setUp(self): + super().setUp() + self.jit_files = set(pathlib.Path("/tmp/").glob("jit-*.dump")) + self.jit_files |= set(pathlib.Path("/tmp/").glob("jitted-*.so")) + + def tearDown(self) -> None: + super().tearDown() + files_to_delete = set(pathlib.Path("/tmp/").glob("jit-*.dump")) + files_to_delete |= set(pathlib.Path("/tmp/").glob("jitted-*.so")) + files_to_delete -= self.jit_files + for file in files_to_delete: + file.unlink() + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst new file mode 100644 index 00000000000000..1920c8cdfce4f4 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-21-01.gh-issue-150723.Hb3JDG.rst @@ -0,0 +1,4 @@ +Fix malformed perf jitdump thread ids on macOS. The ``thread_id`` field of the +``JR_CODE_LOAD`` record was written as a 64-bit value instead of the 32-bit +value required by the jitdump format, which shifted every following field and +prevented profilers from resolving Python frames. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst new file mode 100644 index 00000000000000..78c896b669c239 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-01-19-24-12.gh-issue-150723.WlcL_-.rst @@ -0,0 +1,4 @@ +Fix perf jitdump timestamps on macOS. Events were stamped using +``CLOCK_MONOTONIC``, but macOS profilers timestamp their samples with +``mach_absolute_time()``. The mismatch prevented the JIT code mappings from +lining up with the samples, so no Python frame could be resolved. diff --git a/Python/perf_jit_trampoline.c b/Python/perf_jit_trampoline.c index 0c460282feceef..32b147199544cf 100644 --- a/Python/perf_jit_trampoline.c +++ b/Python/perf_jit_trampoline.c @@ -82,6 +82,9 @@ #if defined(__linux__) # include // System call interface #endif +#if defined(__APPLE__) +# include // mach_absolute_time, mach_timebase_info +#endif // ============================================================================= // CONSTANTS AND CONFIGURATION @@ -217,11 +220,7 @@ struct BaseEvent { typedef struct { struct BaseEvent base; // Common event header uint32_t process_id; // Process ID where code was generated -#if defined(__APPLE__) - uint64_t thread_id; // Thread ID where code was generated -#else uint32_t thread_id; // Thread ID where code was generated -#endif uint64_t vma; // Virtual memory address where code is loaded uint64_t code_address; // Address of the actual machine code uint64_t code_size; // Size of the machine code in bytes @@ -295,7 +294,9 @@ static PerfMapJitState perf_jit_map_state; // ============================================================================= /* Time conversion constant */ +#if !defined(__APPLE__) static const intptr_t nanoseconds_per_second = 1000000000; +#endif /* * Get current monotonic time in nanoseconds @@ -307,6 +308,18 @@ static const intptr_t nanoseconds_per_second = 1000000000; * Returns: Current monotonic time in nanoseconds since an arbitrary epoch */ static int64_t get_current_monotonic_ticks(void) { +#if defined(__APPLE__) + // On macOS the jitdump file is consumed by profilers (such as samply) that + // timestamp their samples using mach_absolute_time(). The jitdump event + // timestamps must use the same clock domain, otherwise the JIT code + // mappings cannot be lined up with the samples. + static mach_timebase_info_data_t timebase = {0, 0}; + if (timebase.denom == 0) { + (void)mach_timebase_info(&timebase); + } + uint64_t ticks = mach_absolute_time(); + return (int64_t)(ticks * timebase.numer / timebase.denom); +#else struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { Py_UNREACHABLE(); // Should never fail on supported systems @@ -318,6 +331,7 @@ static int64_t get_current_monotonic_ticks(void) { result *= nanoseconds_per_second; result += ts.tv_nsec; return result; +#endif } /* @@ -652,7 +666,12 @@ static void perf_map_jit_write_entry_with_name( ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); #if defined(__APPLE__) - pthread_threadid_np(NULL, &ev.thread_id); + // The jitdump format defines the thread id field as a 32-bit value, but + // pthread_threadid_np() returns a 64-bit id. Truncate it to 32 bits to + // keep the record layout identical to other platforms. + uint64_t thread_id = 0; + pthread_threadid_np(NULL, &thread_id); + ev.thread_id = (uint32_t)thread_id; #else ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call #endif