-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP] Add explicit attributes to every function declaration #122399
Open
jhuber6
wants to merge
1
commit into
llvm:main
Choose a base branch
from
jhuber6:Assume2
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+849
−736
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Summary: Instead of having the scoped attributes, add this to every function.
jhuber6
requested review from
carlobertolli,
jdoerfert,
jplehr,
ronlieb,
saiislam and
shiltian
January 10, 2025 01:40
@llvm/pr-subscribers-offload Author: Joseph Huber (jhuber6) ChangesSummary: Patch is 149.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122399.diff 28 Files Affected:
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 099634e211e7a7..e6859ab3d9e9e3 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -98,7 +98,7 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
${clang_opt_flags} --offload-device-only
-nocudalib -nogpulib -nogpuinc -nostdlibinc
- -fopenmp -fopenmp-cuda-mode
+ -fopenmp -fopenmp-cuda-mode -Wno-unknown-assumption
-Wno-unknown-cuda-version -Wno-openmp-target
-DOMPTARGET_DEVICE_RUNTIME
-I${include_directory}
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h
index 475f6a21bb47eb..d3ff7185bb29bb 100644
--- a/offload/DeviceRTL/include/Allocator.h
+++ b/offload/DeviceRTL/include/Allocator.h
@@ -26,22 +26,23 @@ namespace allocator {
static uint64_t constexpr ALIGNMENT = 16;
/// Initialize the allocator according to \p KernelEnvironment
-void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
+OMP_ATTRS void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
/// Allocate \p Size bytes.
-[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
+[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT),
+ gnu::malloc]] OMP_ATTRS void *
alloc(uint64_t Size);
/// Free the allocation pointed to by \p Ptr.
-void free(void *Ptr);
+OMP_ATTRS void free(void *Ptr);
} // namespace allocator
} // namespace ompx
extern "C" {
-[[gnu::weak]] void *malloc(size_t Size);
-[[gnu::weak]] void free(void *Ptr);
+[[gnu::weak]] OMP_ATTRS void *malloc(size_t Size);
+[[gnu::weak]] OMP_ATTRS void free(void *Ptr);
}
#pragma omp end declare target
diff --git a/offload/DeviceRTL/include/Configuration.h b/offload/DeviceRTL/include/Configuration.h
index f8b7a6c3c6c9da..cf638838a7d382 100644
--- a/offload/DeviceRTL/include/Configuration.h
+++ b/offload/DeviceRTL/include/Configuration.h
@@ -22,45 +22,45 @@ namespace config {
/// Return the number of devices in the system, same number as returned on the
/// host by omp_get_num_devices.
-uint32_t getNumDevices();
+OMP_ATTRS uint32_t getNumDevices();
/// Return the device number in the system for omp_get_device_num.
-uint32_t getDeviceNum();
+OMP_ATTRS uint32_t getDeviceNum();
/// Return the user choosen debug level.
-uint32_t getDebugKind();
+OMP_ATTRS uint32_t getDebugKind();
/// Return if teams oversubscription is assumed
-uint32_t getAssumeTeamsOversubscription();
+OMP_ATTRS uint32_t getAssumeTeamsOversubscription();
/// Return if threads oversubscription is assumed
-uint32_t getAssumeThreadsOversubscription();
+OMP_ATTRS uint32_t getAssumeThreadsOversubscription();
/// Return the amount of dynamic shared memory that was allocated at launch.
-uint64_t getDynamicMemorySize();
+OMP_ATTRS uint64_t getDynamicMemorySize();
/// Returns the cycles per second of the device's fixed frequency clock.
-uint64_t getClockFrequency();
+OMP_ATTRS uint64_t getClockFrequency();
/// Returns the pointer to the beginning of the indirect call table.
-void *getIndirectCallTablePtr();
+OMP_ATTRS void *getIndirectCallTablePtr();
/// Returns the size of the indirect call table.
-uint64_t getIndirectCallTableSize();
+OMP_ATTRS uint64_t getIndirectCallTableSize();
/// Returns the size of the indirect call table.
-uint64_t getHardwareParallelism();
+OMP_ATTRS uint64_t getHardwareParallelism();
/// Return if debugging is enabled for the given debug kind.
-bool isDebugMode(DeviceDebugKind Level);
+OMP_ATTRS bool isDebugMode(DeviceDebugKind Level);
/// Indicates if this kernel may require thread-specific states, or if it was
/// explicitly disabled by the user.
-bool mayUseThreadStates();
+OMP_ATTRS bool mayUseThreadStates();
/// Indicates if this kernel may require data environments for nested
/// parallelism, or if it was explicitly disabled by the user.
-bool mayUseNestedParallelism();
+OMP_ATTRS bool mayUseNestedParallelism();
} // namespace config
} // namespace ompx
diff --git a/offload/DeviceRTL/include/Debug.h b/offload/DeviceRTL/include/Debug.h
index 22998f44a5bea5..31b465fe425b39 100644
--- a/offload/DeviceRTL/include/Debug.h
+++ b/offload/DeviceRTL/include/Debug.h
@@ -19,11 +19,12 @@
///
/// {
extern "C" {
-void __assert_assume(bool condition);
-void __assert_fail(const char *expr, const char *file, unsigned line,
- const char *function);
-void __assert_fail_internal(const char *expr, const char *msg, const char *file,
- unsigned line, const char *function);
+OMP_ATTRS void __assert_assume(bool condition);
+OMP_ATTRS void __assert_fail(const char *expr, const char *file, unsigned line,
+ const char *function);
+OMP_ATTRS void __assert_fail_internal(const char *expr, const char *msg,
+ const char *file, unsigned line,
+ const char *function);
}
#define ASSERT(expr, msg) \
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 259bc008f91d13..404c2d7ca8d5ef 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -20,9 +20,9 @@
// another function but only inline assembly that performs some operation or
// side-effect and then continues execution with something on the existing call
// stack.
-//
-// TODO: Find a good place for this
-#pragma omp assumes ext_no_call_asm
+#pragma omp begin declare variant match(device = {kind(gpu)})
+#define OMP_ATTRS [[omp::assume("ext_no_call_asm")]]
+#pragma omp end declare variant
enum omp_proc_bind_t {
omp_proc_bind_false = 0,
diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
index fa66b973a4f5e7..fddd0c8722f3f2 100644
--- a/offload/DeviceRTL/include/DeviceUtils.h
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -60,32 +60,35 @@ struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
template <class T>
using remove_addrspace_t = typename remove_addrspace<T>::type;
-template <typename To, typename From> inline To bitCast(From V) {
+template <typename To, typename From> OMP_ATTRS inline To bitCast(From V) {
static_assert(sizeof(To) == sizeof(From), "Bad conversion");
return __builtin_bit_cast(To, V);
}
/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
/// is identified by \p Mask.
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
+OMP_ATTRS int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
+ int32_t Width);
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
+OMP_ATTRS int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
+ int32_t Width);
-int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
+OMP_ATTRS int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
+ int32_t Width);
-uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+OMP_ATTRS uint64_t ballotSync(uint64_t Mask, int32_t Pred);
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
-uint64_t pack(uint32_t LowBits, uint32_t HighBits);
+OMP_ATTRS uint64_t pack(uint32_t LowBits, uint32_t HighBits);
/// Unpack \p Val into \p LowBits and \p HighBits.
-void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
+OMP_ATTRS void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
-bool isSharedMemPtr(void *Ptr);
+OMP_ATTRS bool isSharedMemPtr(void *Ptr);
/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
-bool isThreadLocalMemPtr(void *Ptr);
+OMP_ATTRS bool isThreadLocalMemPtr(void *Ptr);
/// A pointer variable that has by design an `undef` value. Use with care.
[[clang::loader_uninitialized]] static void *const UndefPtr;
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4f..cb0bfed8edc9df 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -28,8 +28,8 @@ extern "C" {
/// getter: returns 0.
///
///{
-void omp_set_dynamic(int);
-int omp_get_dynamic(void);
+OMP_ATTRS void omp_set_dynamic(int);
+OMP_ATTRS int omp_get_dynamic(void);
///}
/// ICV: nthreads-var, integer
@@ -43,8 +43,8 @@ int omp_get_dynamic(void);
///
///
///{
-void omp_set_num_threads(int);
-int omp_get_max_threads(void);
+OMP_ATTRS void omp_set_num_threads(int);
+OMP_ATTRS int omp_get_max_threads(void);
///}
/// ICV: thread-limit-var, computed
@@ -52,7 +52,7 @@ int omp_get_max_threads(void);
/// getter: returns thread limited defined during launch.
///
///{
-int omp_get_thread_limit(void);
+OMP_ATTRS int omp_get_thread_limit(void);
///}
/// ICV: max-active-level-var, constant 1
@@ -61,8 +61,8 @@ int omp_get_thread_limit(void);
/// getter: returns 1.
///
///{
-void omp_set_max_active_levels(int);
-int omp_get_max_active_levels(void);
+OMP_ATTRS void omp_set_max_active_levels(int);
+OMP_ATTRS int omp_get_max_active_levels(void);
///}
/// ICV: places-partition-var
@@ -76,7 +76,7 @@ int omp_get_max_active_levels(void);
/// getter: returns 0 or 1.
///
///{
-int omp_get_active_level(void);
+OMP_ATTRS int omp_get_active_level(void);
///}
/// ICV: level-var
@@ -84,88 +84,88 @@ int omp_get_active_level(void);
/// getter: returns parallel region nesting
///
///{
-int omp_get_level(void);
+OMP_ATTRS int omp_get_level(void);
///}
/// ICV: run-sched-var
///
///
///{
-void omp_set_schedule(omp_sched_t, int);
-void omp_get_schedule(omp_sched_t *, int *);
+OMP_ATTRS void omp_set_schedule(omp_sched_t, int);
+OMP_ATTRS void omp_get_schedule(omp_sched_t *, int *);
///}
/// TODO this is incomplete.
-int omp_get_num_threads(void);
-int omp_get_thread_num(void);
-void omp_set_nested(int);
+OMP_ATTRS int omp_get_num_threads(void);
+OMP_ATTRS int omp_get_thread_num(void);
+OMP_ATTRS void omp_set_nested(int);
-int omp_get_nested(void);
+OMP_ATTRS int omp_get_nested(void);
-void omp_set_max_active_levels(int Level);
+OMP_ATTRS void omp_set_max_active_levels(int Level);
-int omp_get_max_active_levels(void);
+OMP_ATTRS int omp_get_max_active_levels(void);
-omp_proc_bind_t omp_get_proc_bind(void);
+OMP_ATTRS omp_proc_bind_t omp_get_proc_bind(void);
-int omp_get_num_places(void);
+OMP_ATTRS int omp_get_num_places(void);
-int omp_get_place_num_procs(int place_num);
+OMP_ATTRS int omp_get_place_num_procs(int place_num);
-void omp_get_place_proc_ids(int place_num, int *ids);
+OMP_ATTRS void omp_get_place_proc_ids(int place_num, int *ids);
-int omp_get_place_num(void);
+OMP_ATTRS int omp_get_place_num(void);
-int omp_get_partition_num_places(void);
+OMP_ATTRS int omp_get_partition_num_places(void);
-void omp_get_partition_place_nums(int *place_nums);
+OMP_ATTRS void omp_get_partition_place_nums(int *place_nums);
-int omp_get_cancellation(void);
+OMP_ATTRS int omp_get_cancellation(void);
-void omp_set_default_device(int deviceId);
+OMP_ATTRS void omp_set_default_device(int deviceId);
-int omp_get_default_device(void);
+OMP_ATTRS int omp_get_default_device(void);
-int omp_get_num_devices(void);
+OMP_ATTRS int omp_get_num_devices(void);
-int omp_get_device_num(void);
+OMP_ATTRS int omp_get_device_num(void);
-int omp_get_num_teams(void);
+OMP_ATTRS int omp_get_num_teams(void);
-int omp_get_team_num();
+OMP_ATTRS int omp_get_team_num();
-int omp_get_initial_device(void);
+OMP_ATTRS int omp_get_initial_device(void);
-void *llvm_omp_target_dynamic_shared_alloc();
+OMP_ATTRS void *llvm_omp_target_dynamic_shared_alloc();
/// Synchronization
///
///{
-void omp_init_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_init_lock(omp_lock_t *Lock);
-void omp_destroy_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_destroy_lock(omp_lock_t *Lock);
-void omp_set_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_set_lock(omp_lock_t *Lock);
-void omp_unset_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_unset_lock(omp_lock_t *Lock);
-int omp_test_lock(omp_lock_t *Lock);
+OMP_ATTRS int omp_test_lock(omp_lock_t *Lock);
///}
/// Tasking
///
///{
-int omp_in_final(void);
+OMP_ATTRS int omp_in_final(void);
-int omp_get_max_task_priority(void);
+OMP_ATTRS int omp_get_max_task_priority(void);
///}
/// Misc
///
///{
-double omp_get_wtick(void);
+OMP_ATTRS double omp_get_wtick(void);
-double omp_get_wtime(void);
+OMP_ATTRS double omp_get_wtime(void);
///}
}
@@ -173,16 +173,16 @@ extern "C" {
/// Allocate \p Bytes in "shareable" memory and return the address. Needs to be
/// called balanced with __kmpc_free_shared like a stack (push/pop). Can be
/// called by any thread, allocation happens *per thread*.
-void *__kmpc_alloc_shared(uint64_t Bytes);
+OMP_ATTRS void *__kmpc_alloc_shared(uint64_t Bytes);
/// Deallocate \p Ptr. Needs to be called balanced with __kmpc_alloc_shared like
/// a stack (push/pop). Can be called by any thread. \p Ptr has to be the
/// allocated by __kmpc_alloc_shared by the same thread.
-void __kmpc_free_shared(void *Ptr, uint64_t Bytes);
+OMP_ATTRS void __kmpc_free_shared(void *Ptr, uint64_t Bytes);
/// Get a pointer to the memory buffer containing dynamically allocated shared
/// memory configured at launch.
-void *__kmpc_get_dynamic_shared();
+OMP_ATTRS void *__kmpc_get_dynamic_shared();
/// Allocate sufficient space for \p NumArgs sequential `void*` and store the
/// allocation address in \p GlobalArgs.
@@ -191,27 +191,28 @@ void *__kmpc_get_dynamic_shared();
///
/// We also remember it in GlobalArgsPtr to ensure the worker threads and
/// deallocation function know the allocation address too.
-void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs);
+OMP_ATTRS void __kmpc_begin_sharing_variables(void ***GlobalArgs,
+ uint64_t NumArgs);
/// Deallocate the memory allocated by __kmpc_begin_sharing_variables.
///
/// Called by the main thread after a parallel region.
-void __kmpc_end_sharing_variables();
+OMP_ATTRS void __kmpc_end_sharing_variables();
/// Store the allocation address obtained via __kmpc_begin_sharing_variables in
/// \p GlobalArgs.
///
/// Called by the worker threads in the parallel region (function).
-void __kmpc_get_shared_variables(void ***GlobalArgs);
+OMP_ATTRS void __kmpc_get_shared_variables(void ***GlobalArgs);
/// External interface to get the thread ID.
-uint32_t __kmpc_get_hardware_thread_id_in_block();
+OMP_ATTRS uint32_t __kmpc_get_hardware_thread_id_in_block();
/// External interface to get the number of threads.
-uint32_t __kmpc_get_hardware_num_threads_in_block();
+OMP_ATTRS uint32_t __kmpc_get_hardware_num_threads_in_block();
/// External interface to get the warp size.
-uint32_t __kmpc_get_warp_size();
+OMP_ATTRS uint32_t __kmpc_get_warp_size();
/// Kernel
///
@@ -219,27 +220,26 @@ uint32_t __kmpc_get_warp_size();
// Forward declaration
struct KernelEnvironmentTy;
-int8_t __kmpc_is_spmd_exec_mode();
+OMP_ATTRS int8_t __kmpc_is_spmd_exec_mode();
-int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+OMP_ATTRS int32_t
+__kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
-void __kmpc_target_deinit();
+OMP_ATTRS void __kmpc_target_deinit();
///}
/// Reduction
///
///{
-void *__kmpc_reduction_get_fixed_buffer();
+OMP_ATTRS void *__kmpc_reduction_get_fixed_buffer();
-int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc,
- uint64_t reduce_data_size,
- void *reduce_data,
- ShuffleReductFnTy shflFct,
- InterWarpCopyFnTy cpyFct);
+OMP_ATTRS int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
+ IdentTy *Loc, uint64_t reduce_data_size, void *reduce_data,
+ ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct);
-int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
+OMP_ATTRS int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records,
uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct,
InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct,
@@ -249,116 +249,120 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
/// Synchronization
///
///{
-void __kmpc_ordered(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_ordered(IdentTy *Loc, int32_t TId);
-void __kmpc_end_ordered(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_ordered(IdentTy *Loc, int32_t TId);
-int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
-void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
-void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
-void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
-int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
+OMP_ATTRS int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
-void __kmpc_end_master(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_master(IdentTy *Loc, int32_t TId);
-int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
+OMP_ATTRS int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
-void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
-int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
+OMP_ATTRS int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
-void __kmpc_end_single(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_single(IdentTy *Loc, int32_t TId);
-void __kmpc_flush(IdentTy *Loc);
+OMP_ATTRS void __kmpc_flush(IdentTy *Loc);
-uint64_t __kmpc_warp_active_thread_mask(void);
+OMP_ATTRS uint64_t __kmpc_warp_active_thread_mask(void);
-void __kmpc_syncwarp(uint64_t Mask);
+OMP_ATTRS void __kmpc_syncwarp(uint64_t Mask);
-void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
+OMP_ATTRS void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
-void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
+OMP_ATTRS void __kmpc_end_critical(IdentTy *Loc, int32_t TId,
+ CriticalNameTy *Name);
///}
/// Parallelism
///
///{
/// TODO
-void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn);
+OMP_ATTRS void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn);
/// TODO
-bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn);
+OMP_ATTRS bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn);
/// TODO
-void __kmpc_kernel_end_parallel();
+OMP_ATTRS void __kmpc_kernel_end_parallel();
/// TODO
-void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind);
+OMP_ATTRS void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind);
/// TODO
-void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId, int32_t NumTeams,
- int32_t ThreadLimit);
+OMP_ATTRS void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId,
+ int32_t NumTeams, int32_t ThreadLimit);
/// TODO
-uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t);
+OMP_ATTRS uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t);
///}
/// Tasking
///
///{
-TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
- size_t TaskSizeInclPrivateValues,
- size_t SharedValuesSize,
- TaskFnTy TaskFn);
+OMP_ATTRS TaskDescriptorTy *
+__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
+ size_t...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Summary:
Instead of having the scoped attributes, add this to every function.