1#if USE_ITT_BUILD
2/*
3 * kmp_itt.h -- ITT Notify interface.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_ITT_H
15#define KMP_ITT_H
16
17#include "kmp_lock.h"
18
19#define INTEL_ITTNOTIFY_API_PRIVATE
20#include "ittnotify.h"
21#include "legacy/ittnotify.h"
22
23#if KMP_DEBUG
24#define __kmp_inline // Turn off inlining in debug mode.
25#else
26#define __kmp_inline static inline
27#endif
28
29#if USE_ITT_NOTIFY
30extern kmp_int32 __kmp_itt_prepare_delay;
31#ifdef __cplusplus
32extern "C" void __kmp_itt_fini_ittlib(void);
33#else
34extern void __kmp_itt_fini_ittlib(void);
35#endif
36#endif
37
38// Simplify the handling of an argument that is only required when USE_ITT_BUILD
39// is enabled.
40#define USE_ITT_BUILD_ARG(x) , x
41
42void __kmp_itt_initialize();
43void __kmp_itt_destroy();
44void __kmp_itt_reset();
45
46// -----------------------------------------------------------------------------
47// New stuff for reporting high-level constructs.
48
49// Note the naming convention:
50//     __kmp_itt_xxxing() function should be called before action, while
51//     __kmp_itt_xxxed()  function should be called after action.
52
53// --- Parallel region reporting ---
54__kmp_inline void
55__kmp_itt_region_forking(int gtid, int team_size,
56                         int barriers); // Master only, before forking threads.
57__kmp_inline void
58__kmp_itt_region_joined(int gtid); // Master only, after joining threads.
59// (*) Note: A thread may execute tasks after this point, though.
60
61// --- Frame reporting ---
62// region=0: no regions, region=1: parallel, region=2: serialized parallel
63__kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
64                                         __itt_timestamp end, int imbalance,
65                                         ident_t *loc, int team_size,
66                                         int region = 0);
67
68// --- Metadata reporting ---
69// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
70// wait time value, reduction -if this is a reduction barrier
71__kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
72                                               kmp_uint64 end,
73                                               kmp_uint64 imbalance,
74                                               kmp_uint64 reduction);
75// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
76// iterations - loop trip count, chunk - chunk size
77__kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
78                                          kmp_uint64 iterations,
79                                          kmp_uint64 chunk);
80__kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
81
82// --- Barrier reporting ---
83__kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
84                                            int delta = 0);
85__kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
86__kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
87__kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
88
89// --- Taskwait reporting ---
90__kmp_inline void *__kmp_itt_taskwait_object(int gtid);
91__kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
92__kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
93
94// --- Task reporting ---
95__kmp_inline void __kmp_itt_task_starting(void *object);
96__kmp_inline void __kmp_itt_task_finished(void *object);
97
98// --- Lock reporting ---
99#if KMP_USE_DYNAMIC_LOCK
100__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
101                                          const ident_t *);
102#else
103__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
104#endif
105__kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
106__kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
107__kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
108__kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
109__kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
110
111// --- Critical reporting ---
112#if KMP_USE_DYNAMIC_LOCK
113__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
114                                              const ident_t *);
115#else
116__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
117#endif
118__kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
119__kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
120__kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
121__kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
122
123// --- Single reporting ---
124__kmp_inline void __kmp_itt_single_start(int gtid);
125__kmp_inline void __kmp_itt_single_end(int gtid);
126
127// --- Ordered reporting ---
128__kmp_inline void __kmp_itt_ordered_init(int gtid);
129__kmp_inline void __kmp_itt_ordered_prep(int gtid);
130__kmp_inline void __kmp_itt_ordered_start(int gtid);
131__kmp_inline void __kmp_itt_ordered_end(int gtid);
132
133// --- Threads reporting ---
134__kmp_inline void __kmp_itt_thread_ignore();
135__kmp_inline void __kmp_itt_thread_name(int gtid);
136
137// --- System objects ---
138__kmp_inline void __kmp_itt_system_object_created(void *object,
139                                                  char const *name);
140
141// --- Stack stitching ---
142__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
143__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
144__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
145__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
146
147// -----------------------------------------------------------------------------
148// Old stuff for reporting low-level internal synchronization.
149
150#if USE_ITT_NOTIFY
151
152/* Support for SSC marks, which are used by SDE
153   http://software.intel.com/en-us/articles/intel-software-development-emulator
154   to mark points in instruction traces that represent spin-loops and are
155   therefore uninteresting when collecting traces for architecture simulation.
156 */
157#ifndef INCLUDE_SSC_MARKS
158#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
159#endif
160
161/* Linux 64 only for now */
162#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
163// Portable (at least for gcc and icc) code to insert the necessary instructions
164// to set %ebx and execute the unlikely no-op.
165#if defined(__INTEL_COMPILER)
166#define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
167#else
168#define INSERT_SSC_MARK(tag)                                                   \
169  __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag)    \
170                       : "%ebx")
171#endif
172#else
173#define INSERT_SSC_MARK(tag) ((void)0)
174#endif
175
176/* Markers for the start and end of regions that represent polling and are
177   therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
178   arbitrary numbers that should be unique in the space of SSC tags, but there
179   is no central issuing authority rather randomness is expected to work. */
180#define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
181#define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
182
183// Markers for architecture simulation.
184// FORKING      : Before the master thread forks.
185// JOINING      : At the start of the join.
186// INVOKING     : Before the threads invoke microtasks.
187// DISPATCH_INIT: At the start of dynamically scheduled loop.
188// DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
189#define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
190#define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
191#define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
192#define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
193#define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
194
195// The object is an address that associates a specific set of the prepare,
196// acquire, release, and cancel operations.
197
198/* Sync prepare indicates a thread is going to start waiting for another thread
199   to send a release event.  This operation should be done just before the
200   thread begins checking for the existence of the release event */
201
202/* Sync cancel indicates a thread is cancelling a wait on another thread and
203   continuing execution without waiting for the other thread to release it */
204
205/* Sync acquired indicates a thread has received a release event from another
206   thread and has stopped waiting.  This operation must occur only after the
207   release event is received. */
208
209/* Sync release indicates a thread is going to send a release event to another
210   thread so it will stop waiting and continue execution. This operation must
211   just happen before the release event. */
212
213#define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
214#define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
215#define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
216#define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
217
218/* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
219   with a delay (and not called at all if waiting time is small). So, in spin
220   loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
221   spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
222   KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
223
224#undef KMP_FSYNC_SPIN_INIT
225#define KMP_FSYNC_SPIN_INIT(obj, spin)                                         \
226  int sync_iters = 0;                                                          \
227  if (__itt_fsync_prepare_ptr) {                                               \
228    if (obj == NULL) {                                                         \
229      obj = spin;                                                              \
230    } /* if */                                                                 \
231  } /* if */                                                                   \
232  SSC_MARK_SPIN_START()
233
234#undef KMP_FSYNC_SPIN_PREPARE
235#define KMP_FSYNC_SPIN_PREPARE(obj)                                            \
236  do {                                                                         \
237    if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) {     \
238      ++sync_iters;                                                            \
239      if (sync_iters >= __kmp_itt_prepare_delay) {                             \
240        KMP_FSYNC_PREPARE((void *)obj);                                        \
241      } /* if */                                                               \
242    } /* if */                                                                 \
243  } while (0)
244#undef KMP_FSYNC_SPIN_ACQUIRED
245#define KMP_FSYNC_SPIN_ACQUIRED(obj)                                           \
246  do {                                                                         \
247    SSC_MARK_SPIN_END();                                                       \
248    if (sync_iters >= __kmp_itt_prepare_delay) {                               \
249      KMP_FSYNC_ACQUIRED((void *)obj);                                         \
250    } /* if */                                                                 \
251  } while (0)
252
253/* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
254       KMP_ITT_IGNORE(
255           ptr = malloc( size );
256       );
257*/
258#define KMP_ITT_IGNORE(statement)                                              \
259  do {                                                                         \
260    __itt_state_t __itt_state_;                                                \
261    if (__itt_state_get_ptr) {                                                 \
262      __itt_state_ = __itt_state_get();                                        \
263      __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set);          \
264    } /* if */                                                                 \
265    { statement }                                                              \
266    if (__itt_state_get_ptr) {                                                 \
267      __itt_state_set(__itt_state_);                                           \
268    } /* if */                                                                 \
269  } while (0)
270
271const int KMP_MAX_FRAME_DOMAINS =
272    512; // Maximum number of frame domains to use (maps to
273// different OpenMP regions in the user source code).
274extern kmp_int32 __kmp_barrier_domain_count;
275extern kmp_int32 __kmp_region_domain_count;
276extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
277extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
278extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
279extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
280extern __itt_domain *metadata_domain;
281extern __itt_string_handle *string_handle_imbl;
282extern __itt_string_handle *string_handle_loop;
283extern __itt_string_handle *string_handle_sngl;
284
285#else
286
287// Null definitions of the synchronization tracing functions.
288#define KMP_FSYNC_PREPARE(obj) ((void)0)
289#define KMP_FSYNC_CANCEL(obj) ((void)0)
290#define KMP_FSYNC_ACQUIRED(obj) ((void)0)
291#define KMP_FSYNC_RELEASING(obj) ((void)0)
292
293#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
294#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
295#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
296
297#define KMP_ITT_IGNORE(stmt)                                                   \
298  do {                                                                         \
299    stmt                                                                       \
300  } while (0)
301
302#endif // USE_ITT_NOTIFY
303
304#if !KMP_DEBUG
305// In release mode include definitions of inline functions.
306#include "kmp_itt.inl"
307#endif
308
309#endif // KMP_ITT_H
310
311#else /* USE_ITT_BUILD */
312
313// Null definitions of the synchronization tracing functions.
314// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
315// By defining these we avoid unpleasant ifdef tests in many places.
316#define KMP_FSYNC_PREPARE(obj) ((void)0)
317#define KMP_FSYNC_CANCEL(obj) ((void)0)
318#define KMP_FSYNC_ACQUIRED(obj) ((void)0)
319#define KMP_FSYNC_RELEASING(obj) ((void)0)
320
321#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
322#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
323#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
324
325#define KMP_ITT_IGNORE(stmt)                                                   \
326  do {                                                                         \
327    stmt                                                                       \
328  } while (0)
329
330#define USE_ITT_BUILD_ARG(x)
331
332#endif /* USE_ITT_BUILD */
333