Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions Lib/test/test_samply_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,29 @@ def compile_trampolines_for_all_functions():
self.assertIn(line, child_perf_file_contents)


@unittest.skipUnless(samply_command_works(), "samply command doesn't work")
class TestSamplyProfilerWithJitDump(unittest.TestCase, TestSamplyProfilerMixin):
# Regression test for gh-150723: exercises the binary jitdump backend
# (-Xperf_jit) end to end through samply, unlike TestSamplyProfiler which
# uses the textual perf-map backend (-Xperf).
def run_samply(self, script_dir, script, activate_trampoline=True):
if activate_trampoline:
return run_samply(script_dir, sys.executable, "-Xperf_jit", script)
return run_samply(script_dir, sys.executable, script)

def setUp(self):
super().setUp()
self.jit_files = set(pathlib.Path("/tmp/").glob("jit-*.dump"))
self.jit_files |= set(pathlib.Path("/tmp/").glob("jitted-*.so"))

def tearDown(self) -> None:
super().tearDown()
files_to_delete = set(pathlib.Path("/tmp/").glob("jit-*.dump"))
files_to_delete |= set(pathlib.Path("/tmp/").glob("jitted-*.so"))
files_to_delete -= self.jit_files
for file in files_to_delete:
file.unlink()


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix malformed perf jitdump thread ids on macOS. The ``thread_id`` field of the
``JR_CODE_LOAD`` record was written as a 64-bit value instead of the 32-bit
value required by the jitdump format, which shifted every following field and
prevented profilers from resolving Python frames.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix perf jitdump timestamps on macOS. Events were stamped using
``CLOCK_MONOTONIC``, but macOS profilers timestamp their samples with
``mach_absolute_time()``. The mismatch prevented the JIT code mappings from
lining up with the samples, so no Python frame could be resolved.
29 changes: 24 additions & 5 deletions Python/perf_jit_trampoline.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@
#if defined(__linux__)
# include <sys/syscall.h> // System call interface
#endif
#if defined(__APPLE__)
# include <mach/mach_time.h> // mach_absolute_time, mach_timebase_info
#endif

// =============================================================================
// CONSTANTS AND CONFIGURATION
Expand Down Expand Up @@ -217,11 +220,7 @@ struct BaseEvent {
typedef struct {
struct BaseEvent base; // Common event header
uint32_t process_id; // Process ID where code was generated
#if defined(__APPLE__)
uint64_t thread_id; // Thread ID where code was generated
#else
uint32_t thread_id; // Thread ID where code was generated
#endif
uint64_t vma; // Virtual memory address where code is loaded
uint64_t code_address; // Address of the actual machine code
uint64_t code_size; // Size of the machine code in bytes
Expand Down Expand Up @@ -295,7 +294,9 @@ static PerfMapJitState perf_jit_map_state;
// =============================================================================

/* Time conversion constant */
#if !defined(__APPLE__)
static const intptr_t nanoseconds_per_second = 1000000000;
#endif

/*
* Get current monotonic time in nanoseconds
Expand All @@ -307,6 +308,18 @@ static const intptr_t nanoseconds_per_second = 1000000000;
* Returns: Current monotonic time in nanoseconds since an arbitrary epoch
*/
static int64_t get_current_monotonic_ticks(void) {
#if defined(__APPLE__)
// On macOS the jitdump file is consumed by profilers (such as samply) that
// timestamp their samples using mach_absolute_time(). The jitdump event
// timestamps must use the same clock domain, otherwise the JIT code
// mappings cannot be lined up with the samples.
static mach_timebase_info_data_t timebase = {0, 0};
if (timebase.denom == 0) {
(void)mach_timebase_info(&timebase);
}
uint64_t ticks = mach_absolute_time();
return (int64_t)(ticks * timebase.numer / timebase.denom);
#else
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
Py_UNREACHABLE(); // Should never fail on supported systems
Expand All @@ -318,6 +331,7 @@ static int64_t get_current_monotonic_ticks(void) {
result *= nanoseconds_per_second;
result += ts.tv_nsec;
return result;
#endif
}

/*
Expand Down Expand Up @@ -652,7 +666,12 @@ static void perf_map_jit_write_entry_with_name(
ev.base.time_stamp = get_current_monotonic_ticks();
ev.process_id = getpid();
#if defined(__APPLE__)
pthread_threadid_np(NULL, &ev.thread_id);
// The jitdump format defines the thread id field as a 32-bit value, but
// pthread_threadid_np() returns a 64-bit id. Truncate it to 32 bits to
// keep the record layout identical to other platforms.
uint64_t thread_id = 0;
pthread_threadid_np(NULL, &thread_id);
ev.thread_id = (uint32_t)thread_id;
#else
ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call
#endif
Expand Down
Loading