27 #define FOREACH_OMPT_INQUIRY_FN(macro) \
28 macro (ompt_enumerate_states) \
29 macro (ompt_enumerate_mutex_impls) \
31 macro (ompt_set_callback) \
32 macro (ompt_get_callback) \
34 macro (ompt_get_state) \
36 macro (ompt_get_parallel_info) \
37 macro (ompt_get_task_info) \
38 macro (ompt_get_task_memory) \
39 macro (ompt_get_thread_data) \
40 macro (ompt_get_unique_id) \
41 macro (ompt_finalize_tool) \
43 macro(ompt_get_num_procs) \
44 macro(ompt_get_num_places) \
45 macro(ompt_get_place_proc_ids) \
46 macro(ompt_get_place_num) \
47 macro(ompt_get_partition_place_nums) \
48 macro(ompt_get_proc_id) \
50 macro(ompt_get_target_info) \
51 macro(ompt_get_num_devices)
53 #define FOREACH_OMPT_STATE(macro) \
56 macro (ompt_state_undefined, 0x102) \
59 macro (ompt_state_work_serial, 0x000) \
60 macro (ompt_state_work_parallel, 0x001) \
61 macro (ompt_state_work_reduction, 0x002) \
64 macro (ompt_state_wait_barrier, 0x010) \
65 macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \
67 macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \
69 macro (ompt_state_wait_barrier_implicit, 0x013) \
70 macro (ompt_state_wait_barrier_explicit, 0x014) \
73 macro (ompt_state_wait_taskwait, 0x020) \
74 macro (ompt_state_wait_taskgroup, 0x021) \
77 macro (ompt_state_wait_mutex, 0x040) \
78 macro (ompt_state_wait_lock, 0x041) \
79 macro (ompt_state_wait_critical, 0x042) \
80 macro (ompt_state_wait_atomic, 0x043) \
81 macro (ompt_state_wait_ordered, 0x044) \
84 macro (ompt_state_wait_target, 0x080) \
85 macro (ompt_state_wait_target_map, 0x081) \
86 macro (ompt_state_wait_target_update, 0x082) \
89 macro (ompt_state_idle, 0x100) \
90 macro (ompt_state_overhead, 0x101) \
95 #define FOREACH_KMP_MUTEX_IMPL(macro) \
96 macro (kmp_mutex_impl_none, 0) \
97 macro (kmp_mutex_impl_spin, 1) \
98 macro (kmp_mutex_impl_queuing, 2) \
99 macro (kmp_mutex_impl_speculative, 3)
101 #define FOREACH_OMPT_EVENT(macro) \
104 macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) \
105 macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) \
107 macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) \
108 macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) \
110 macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) \
111 macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) \
112 macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) \
114 macro (ompt_callback_target, ompt_callback_target_t, 8) \
115 macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) \
116 macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) \
118 macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) \
120 macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) \
121 macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) \
123 macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) \
124 macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) \
127 macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) \
129 macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) \
131 macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) \
132 macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) \
134 macro (ompt_callback_work, ompt_callback_work_t, 20) \
136 macro (ompt_callback_master, ompt_callback_master_t, 21) \
138 macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) \
140 macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) \
142 macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) \
143 macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) \
145 macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) \
146 macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) \
148 macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) \
150 macro (ompt_callback_flush, ompt_callback_flush_t, 29) \
152 macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) \
154 macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) \
156 macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32)
163 #define kmp_mutex_impl_macro(impl, code) impl = code,
165 #undef kmp_mutex_impl_macro
447 const char *interface_function_name
488 int initial_device_num,
514 const char **next_state_name
520 const char **next_impl_name
646 typedef void *(*ompt_get_record_native_t) (
653 (*ompt_get_record_abstract_t) (
674 unsigned int requested_parallelism,
676 const void *codeptr_ra
691 const void *codeptr_ra
707 const void *codeptr_ra
739 const void *codeptr_ra
788 unsigned int actual_parallelism,
806 const void *codeptr_ra
821 const void *codeptr_ra
837 const void *codeptr_ra
851 const void *codeptr_ra
863 const void *codeptr_ra
874 const void *codeptr_ra
884 const void *codeptr_ra
898 const char *documentation
907 const char *filename,
908 int64_t offset_in_file,
930 const void *codeptr_ra
951 const void *codeptr_ra
969 unsigned int *mapping_flags,
970 const void *codeptr_ra
986 unsigned int requested_num_teams
1000 const void *codeptr_ra
1072 #define ompt_id_none 0
1073 #define ompt_data_none {0}
1074 #define ompt_time_none 0
1075 #define ompt_hwid_none 0
1076 #define ompt_addr_none ~0
1077 #define ompt_mutex_impl_none 0
1078 #define ompt_wait_id_none 0
1080 #define ompd_segment_none 0
static __inline__ uint64_t
static __inline__ uint8_t
int(* ompt_enumerate_states_t)(int current_state, int *next_state, const char **next_state_name)
ompt_set_result_t(* ompt_set_callback_t)(ompt_callbacks_t event, ompt_callback_t callback)
void(* ompt_callback_implicit_task_t)(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int actual_parallelism, unsigned int index, int flags)
void(* ompt_callback_target_map_t)(ompt_id_t target_id, unsigned int nitems, void **host_addr, void **device_addr, size_t *bytes, unsigned int *mapping_flags, const void *codeptr_ra)
@ ompt_task_early_fulfill
void(* ompt_interface_fn_t)(void)
struct ompt_record_parallel_begin_t ompt_record_parallel_begin_t
struct ompt_record_target_data_op_t ompt_record_target_data_op_t
int(* ompt_get_task_memory_t)(void **addr, size_t *size, int block)
void(* ompt_callback_device_load_t)(int device_num, const char *filename, int64_t offset_in_file, void *vma_in_file, size_t bytes, void *host_addr, void *device_addr, uint64_t module_id)
struct ompt_record_master_t ompt_record_master_t
struct ompt_record_cancel_t ompt_record_cancel_t
void(* ompt_callback_task_schedule_t)(ompt_data_t *prior_task_data, ompt_task_status_t prior_task_status, ompt_data_t *next_task_data)
void(* ompt_callback_task_create_t)(ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *new_task_data, int flags, int has_dependences, const void *codeptr_ra)
int(* ompt_get_place_proc_ids_t)(int place_num, int ids_size, int *ids)
union ompt_data_t ompt_data_t
#define FOREACH_KMP_MUTEX_IMPL(macro)
void(* ompt_callback_thread_end_t)(ompt_data_t *thread_data)
void(* ompt_callback_device_unload_t)(int device_num, uint64_t module_id)
void(* ompt_callback_t)(void)
int(* ompt_get_partition_place_nums_t)(int place_nums_size, int *place_nums)
#define kmp_mutex_impl_macro(impl, code)
double(* ompt_translate_time_t)(ompt_device_t *device, ompt_device_time_t time)
ompt_interface_fn_t(* ompt_function_lookup_t)(const char *interface_function_name)
int(* ompt_get_num_devices_t)(void)
uint64_t ompt_device_time_t
@ ompt_cancel_discarded_task
struct ompt_record_target_kernel_t ompt_record_target_kernel_t
@ ompt_mutex_test_nest_lock
@ ompt_dependence_type_in
@ ompt_dependence_type_out
@ ompt_dependence_type_sink
@ ompt_dependence_type_source
@ ompt_dependence_type_mutexinoutset
@ ompt_dependence_type_inout
void(* ompt_callback_flush_t)(ompt_data_t *thread_data, const void *codeptr_ra)
struct ompt_record_abstract_t ompt_record_abstract_t
void(* ompt_callback_target_submit_t)(ompt_id_t target_id, ompt_id_t host_op_id, unsigned int requested_num_teams)
@ ompt_parallel_invoker_runtime
@ ompt_parallel_invoker_program
@ ompt_target_map_flag_implicit
@ ompt_target_map_flag_to
@ ompt_target_map_flag_alloc
@ ompt_target_map_flag_delete
@ ompt_target_map_flag_from
@ ompt_target_map_flag_release
struct ompt_record_control_tool_t ompt_record_control_tool_t
void(* ompt_finalize_tool_t)(void)
struct ompd_frame_info_t ompd_frame_info_t
ompt_set_result_t(* ompt_set_trace_ompt_t)(ompt_device_t *device, unsigned int enable, unsigned int etype)
int(* ompt_get_place_num_t)(void)
struct ompt_record_parallel_end_t ompt_record_parallel_end_t
int(* ompt_pause_trace_t)(ompt_device_t *device, int begin_pause)
struct ompt_record_dependences_t ompt_record_dependences_t
struct ompt_record_task_dependence_t ompt_record_task_dependence_t
void(* ompt_callback_mutex_acquire_t)(ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr_ra)
int(* ompt_get_num_procs_t)(void)
int(* ompt_get_device_num_procs_t)(ompt_device_t *device)
int(* ompt_enumerate_mutex_impls_t)(int current_impl, int *next_impl, const char **next_impl_name)
void(* ompt_finalize_t)(ompt_data_t *tool_data)
void(* ompt_callback_device_finalize_t)(int device_num)
void(* ompt_callback_master_t)(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
struct ompt_record_sync_region_t ompt_record_sync_region_t
struct ompt_record_task_create_t ompt_record_task_create_t
void(* ompt_callback_work_t)(ompt_work_t wstype, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, uint64_t count, const void *codeptr_ra)
struct ompt_record_thread_begin_t ompt_record_thread_begin_t
void(* ompt_callback_thread_begin_t)(ompt_thread_t thread_type, ompt_data_t *thread_data)
void(* ompt_callback_device_initialize_t)(int device_num, const char *type, ompt_device_t *device, ompt_function_lookup_t lookup, const char *documentation)
void(* ompt_callback_parallel_begin_t)(ompt_data_t *encountering_task_data, const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, unsigned int requested_parallelism, int flags, const void *codeptr_ra)
@ ompt_target_data_disassociate
@ ompt_target_data_transfer_to_device
@ ompt_target_data_associate
@ ompt_target_data_delete
@ ompt_target_data_transfer_from_device
@ ompt_sync_region_reduction
@ ompt_sync_region_barrier
@ ompt_sync_region_barrier_implementation
@ ompt_sync_region_taskgroup
@ ompt_sync_region_taskwait
@ ompt_sync_region_barrier_explicit
@ ompt_sync_region_barrier_implicit
uint64_t(* ompt_get_unique_id_t)(void)
uint64_t ompt_buffer_cursor_t
@ ompt_work_single_executor
int(* ompt_get_state_t)(ompt_wait_id_t *wait_id)
struct ompt_record_dispatch_t ompt_record_dispatch_t
struct ompt_start_tool_result_t ompt_start_tool_result_t
struct ompd_address_t ompd_address_t
int(* ompt_get_parallel_info_t)(int ancestor_level, ompt_data_t **parallel_data, int *team_size)
struct ompt_record_implicit_task_t ompt_record_implicit_task_t
void(* ompt_callback_dispatch_t)(ompt_data_t *parallel_data, ompt_data_t *task_data, ompt_dispatch_t kind, ompt_data_t instance)
@ ompt_dispatch_iteration
struct ompt_record_mutex_acquire_t ompt_record_mutex_acquire_t
uint64_t ompd_thread_id_t
@ ompt_callback_lock_init
@ ompt_callback_device_initialize
@ ompt_callback_sync_region
@ ompt_callback_nest_lock
@ ompt_callback_thread_begin
@ ompt_callback_thread_end
@ ompt_callback_task_schedule
@ ompt_callback_mutex_released
@ ompt_callback_device_unload
@ ompt_callback_device_load
@ ompt_callback_mutex_acquired
@ ompt_callback_target_map
@ ompt_callback_dependences
@ ompt_callback_parallel_end
@ ompt_callback_task_create
@ ompt_callback_device_finalize
@ ompt_callback_sync_region_wait
@ ompt_callback_target_data_op
@ ompt_callback_target_submit
@ ompt_callback_control_tool
@ ompt_callback_reduction
@ ompt_callback_mutex_acquire
@ ompt_callback_parallel_begin
@ ompt_callback_lock_destroy
@ ompt_callback_implicit_task
@ ompt_callback_task_dependence
@ ompt_native_kernel_invocation
@ ompt_native_data_motion_explicit
@ ompt_native_data_motion_implicit
@ ompt_native_kernel_execution
ompt_record_t(* ompt_get_record_type_t)(ompt_buffer_t *buffer, ompt_buffer_cursor_t current)
int(* ompt_start_trace_t)(ompt_device_t *device, ompt_callback_buffer_request_t request, ompt_callback_buffer_complete_t complete)
@ ompt_state_wait_barrier_explicit
@ ompt_state_wait_ordered
@ ompt_state_wait_critical
@ ompt_state_wait_target_map
@ ompt_state_work_parallel
@ ompt_state_wait_barrier_implicit
@ ompt_state_work_reduction
@ ompt_state_wait_target_update
@ ompt_state_wait_barrier_implicit_parallel
@ ompt_state_wait_taskwait
@ ompt_state_wait_barrier_implicit_workshare
@ ompt_state_wait_taskgroup
@ ompt_state_wait_barrier
struct ompt_record_work_t ompt_record_work_t
int(* ompt_get_num_places_t)(void)
struct ompt_dependence_t ompt_dependence_t
int(* ompt_stop_trace_t)(ompt_device_t *device)
@ ompt_record_native_event
@ ompt_record_native_info
ompt_device_time_t(* ompt_get_device_time_t)(ompt_device_t *device)
void(* ompt_callback_parallel_end_t)(ompt_data_t *parallel_data, ompt_data_t *encountering_task_data, int flags, const void *codeptr_ra)
struct ompd_device_type_sizes_t ompd_device_type_sizes_t
void(* ompt_callback_mutex_t)(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra)
@ ompt_set_sometimes_paired
@ ompd_scope_address_space
@ ompd_scope_implicit_task
void(* ompt_callback_dependences_t)(ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps)
int(* ompt_initialize_t)(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t *tool_data)
int(* ompt_get_task_info_t)(int ancestor_level, int *flags, ompt_data_t **task_data, ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num)
struct ompt_record_target_t ompt_record_target_t
void(* ompt_callback_target_data_op_t)(ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, const void *codeptr_ra)
struct ompt_frame_t ompt_frame_t
void(* ompt_callback_target_t)(ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num, ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr_ra)
struct ompt_record_nest_lock_t ompt_record_nest_lock_t
struct ompt_record_task_schedule_t ompt_record_task_schedule_t
int(* ompt_flush_trace_t)(ompt_device_t *device)
struct ompt_record_mutex_t ompt_record_mutex_t
void(* ompt_callback_buffer_request_t)(int device_num, ompt_buffer_t **buffer, size_t *bytes)
int(* ompt_callback_control_tool_t)(uint64_t command, uint64_t modifier, void *arg, const void *codeptr_ra)
ompt_set_result_t(* ompt_set_trace_native_t)(ompt_device_t *device, int enable, int flags)
int(* ompt_get_callback_t)(ompt_callbacks_t event, ompt_callback_t *callback)
@ ompd_rc_needs_state_tracking
@ ompd_rc_device_read_error
@ ompd_rc_device_write_error
struct ompt_record_target_map_t ompt_record_target_map_t
@ ompt_frame_framepointer
@ ompt_frame_stackaddress
void(* ompt_callback_task_dependence_t)(ompt_data_t *src_task_data, ompt_data_t *sink_task_data)
void(* ompt_callback_nest_lock_t)(ompt_scope_endpoint_t endpoint, ompt_wait_id_t wait_id, const void *codeptr_ra)
struct ompt_record_flush_t ompt_record_flush_t
int(* ompt_get_target_info_t)(uint64_t *device_num, ompt_id_t *target_id, ompt_id_t *host_op_id)
void(* ompt_callback_cancel_t)(ompt_data_t *task_data, int flags, const void *codeptr_ra)
int(* ompt_get_proc_id_t)(void)
int(* ompt_advance_buffer_cursor_t)(ompt_device_t *device, ompt_buffer_t *buffer, size_t size, ompt_buffer_cursor_t current, ompt_buffer_cursor_t *next)
void(* ompt_callback_sync_region_t)(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra)
struct ompt_record_ompt_t ompt_record_ompt_t
void(* ompt_callback_buffer_complete_t)(int device_num, ompt_buffer_t *buffer, size_t bytes, ompt_buffer_cursor_t begin, int buffer_owned)
ompd_address_t frame_address
ompt_dependence_type_t dependence_type
ompt_device_time_t start_time
ompt_record_native_t rclass
ompt_device_time_t end_time
unsigned int actual_parallelism
ompt_scope_endpoint_t endpoint
ompt_scope_endpoint_t endpoint
ompt_scope_endpoint_t endpoint
ompt_record_task_create_t task_create
ompt_record_sync_region_t sync_region
ompt_record_master_t master
ompt_record_cancel_t cancel
ompt_record_control_tool_t control_tool
ompt_record_dependences_t dependences
ompt_record_nest_lock_t nest_lock
ompt_record_target_t target
ompt_record_task_dependence_t task_dependence
ompt_record_flush_t flush
ompt_record_target_kernel_t target_kernel
ompt_record_parallel_begin_t parallel_begin
ompt_record_task_schedule_t task_schedule
ompt_record_mutex_acquire_t mutex_acquire
ompt_record_target_data_op_t target_data_op
ompt_record_target_map_t target_map
ompt_record_thread_begin_t thread_begin
union ompt_record_ompt_t::@0 record
ompt_record_implicit_task_t implicit_task
ompt_record_mutex_t mutex
ompt_record_parallel_end_t parallel_end
ompt_record_dispatch_t dispatch
unsigned int requested_parallelism
ompt_id_t encountering_task_id
ompt_id_t encountering_task_id
ompt_scope_endpoint_t endpoint
ompt_device_time_t end_time
ompt_target_data_op_t optype
unsigned int granted_num_teams
ompt_device_time_t end_time
unsigned int requested_num_teams
unsigned int * mapping_flags
ompt_scope_endpoint_t endpoint
ompt_id_t encountering_task_id
ompt_task_status_t prior_task_status
ompt_thread_t thread_type
ompt_scope_endpoint_t endpoint