[OpenMP] Add explicit attributes to every function declaration #122399

jhuber6 · 2025-01-10T01:40:24Z

Summary:
Instead of having the scoped attributes, add this to every function.

Summary: Instead of having the scoped attributes, add this to every function.

llvmbot · 2025-01-10T01:41:01Z

@llvm/pr-subscribers-offload

Author: Joseph Huber (jhuber6)

Changes

Summary:
Instead of having the scoped attributes, add this to every function.

Patch is 149.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122399.diff

28 Files Affected:

(modified) offload/DeviceRTL/CMakeLists.txt (+1-1)
(modified) offload/DeviceRTL/include/Allocator.h (+6-5)
(modified) offload/DeviceRTL/include/Configuration.h (+13-13)
(modified) offload/DeviceRTL/include/Debug.h (+6-5)
(modified) offload/DeviceRTL/include/DeviceTypes.h (+3-3)
(modified) offload/DeviceRTL/include/DeviceUtils.h (+12-9)
(modified) offload/DeviceRTL/include/Interface.h (+117-113)
(modified) offload/DeviceRTL/include/LibC.h (+3-3)
(modified) offload/DeviceRTL/include/Mapping.h (+22-22)
(modified) offload/DeviceRTL/include/Profiling.h (+5-3)
(modified) offload/DeviceRTL/include/State.h (+50-45)
(modified) offload/DeviceRTL/include/Synchronization.h (+27-26)
(modified) offload/DeviceRTL/include/Workshare.h (+3-1)
(modified) offload/DeviceRTL/src/Allocator.cpp (+8-5)
(modified) offload/DeviceRTL/src/Configuration.cpp (+15-13)
(modified) offload/DeviceRTL/src/Debug.cpp (+7-5)
(modified) offload/DeviceRTL/src/DeviceUtils.cpp (+39-28)
(modified) offload/DeviceRTL/src/Kernel.cpp (+7-6)
(modified) offload/DeviceRTL/src/LibC.cpp (+9-8)
(modified) offload/DeviceRTL/src/Mapping.cpp (+72-69)
(modified) offload/DeviceRTL/src/Misc.cpp (+20-16)
(modified) offload/DeviceRTL/src/Parallelism.cpp (+22-16)
(modified) offload/DeviceRTL/src/Profiling.cpp (+3-3)
(modified) offload/DeviceRTL/src/Reduction.cpp (+27-27)
(modified) offload/DeviceRTL/src/State.cpp (+94-77)
(modified) offload/DeviceRTL/src/Synchronization.cpp (+114-90)
(modified) offload/DeviceRTL/src/Tasking.cpp (+27-25)
(modified) offload/DeviceRTL/src/Workshare.cpp (+117-99)

diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 099634e211e7a7..e6859ab3d9e9e3 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -98,7 +98,7 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
 set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
               ${clang_opt_flags} --offload-device-only
              -nocudalib -nogpulib -nogpuinc -nostdlibinc
-             -fopenmp -fopenmp-cuda-mode
+             -fopenmp -fopenmp-cuda-mode -Wno-unknown-assumption
              -Wno-unknown-cuda-version -Wno-openmp-target
              -DOMPTARGET_DEVICE_RUNTIME
              -I${include_directory}
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h
index 475f6a21bb47eb..d3ff7185bb29bb 100644
--- a/offload/DeviceRTL/include/Allocator.h
+++ b/offload/DeviceRTL/include/Allocator.h
@@ -26,22 +26,23 @@ namespace allocator {
 static uint64_t constexpr ALIGNMENT = 16;
 
 /// Initialize the allocator according to \p KernelEnvironment
-void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
+OMP_ATTRS void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
 
 /// Allocate \p Size bytes.
-[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
+[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT),
+  gnu::malloc]] OMP_ATTRS void *
 alloc(uint64_t Size);
 
 /// Free the allocation pointed to by \p Ptr.
-void free(void *Ptr);
+OMP_ATTRS void free(void *Ptr);
 
 } // namespace allocator
 
 } // namespace ompx
 
 extern "C" {
-[[gnu::weak]] void *malloc(size_t Size);
-[[gnu::weak]] void free(void *Ptr);
+[[gnu::weak]] OMP_ATTRS void *malloc(size_t Size);
+[[gnu::weak]] OMP_ATTRS void free(void *Ptr);
 }
 
 #pragma omp end declare target
diff --git a/offload/DeviceRTL/include/Configuration.h b/offload/DeviceRTL/include/Configuration.h
index f8b7a6c3c6c9da..cf638838a7d382 100644
--- a/offload/DeviceRTL/include/Configuration.h
+++ b/offload/DeviceRTL/include/Configuration.h
@@ -22,45 +22,45 @@ namespace config {
 
 /// Return the number of devices in the system, same number as returned on the
 /// host by omp_get_num_devices.
-uint32_t getNumDevices();
+OMP_ATTRS uint32_t getNumDevices();
 
 /// Return the device number in the system for omp_get_device_num.
-uint32_t getDeviceNum();
+OMP_ATTRS uint32_t getDeviceNum();
 
 /// Return the user choosen debug level.
-uint32_t getDebugKind();
+OMP_ATTRS uint32_t getDebugKind();
 
 /// Return if teams oversubscription is assumed
-uint32_t getAssumeTeamsOversubscription();
+OMP_ATTRS uint32_t getAssumeTeamsOversubscription();
 
 /// Return if threads oversubscription is assumed
-uint32_t getAssumeThreadsOversubscription();
+OMP_ATTRS uint32_t getAssumeThreadsOversubscription();
 
 /// Return the amount of dynamic shared memory that was allocated at launch.
-uint64_t getDynamicMemorySize();
+OMP_ATTRS uint64_t getDynamicMemorySize();
 
 /// Returns the cycles per second of the device's fixed frequency clock.
-uint64_t getClockFrequency();
+OMP_ATTRS uint64_t getClockFrequency();
 
 /// Returns the pointer to the beginning of the indirect call table.
-void *getIndirectCallTablePtr();
+OMP_ATTRS void *getIndirectCallTablePtr();
 
 /// Returns the size of the indirect call table.
-uint64_t getIndirectCallTableSize();
+OMP_ATTRS uint64_t getIndirectCallTableSize();
 
 /// Returns the size of the indirect call table.
-uint64_t getHardwareParallelism();
+OMP_ATTRS uint64_t getHardwareParallelism();
 
 /// Return if debugging is enabled for the given debug kind.
-bool isDebugMode(DeviceDebugKind Level);
+OMP_ATTRS bool isDebugMode(DeviceDebugKind Level);
 
 /// Indicates if this kernel may require thread-specific states, or if it was
 /// explicitly disabled by the user.
-bool mayUseThreadStates();
+OMP_ATTRS bool mayUseThreadStates();
 
 /// Indicates if this kernel may require data environments for nested
 /// parallelism, or if it was explicitly disabled by the user.
-bool mayUseNestedParallelism();
+OMP_ATTRS bool mayUseNestedParallelism();
 
 } // namespace config
 } // namespace ompx
diff --git a/offload/DeviceRTL/include/Debug.h b/offload/DeviceRTL/include/Debug.h
index 22998f44a5bea5..31b465fe425b39 100644
--- a/offload/DeviceRTL/include/Debug.h
+++ b/offload/DeviceRTL/include/Debug.h
@@ -19,11 +19,12 @@
 ///
 /// {
 extern "C" {
-void __assert_assume(bool condition);
-void __assert_fail(const char *expr, const char *file, unsigned line,
-                   const char *function);
-void __assert_fail_internal(const char *expr, const char *msg, const char *file,
-                            unsigned line, const char *function);
+OMP_ATTRS void __assert_assume(bool condition);
+OMP_ATTRS void __assert_fail(const char *expr, const char *file, unsigned line,
+                             const char *function);
+OMP_ATTRS void __assert_fail_internal(const char *expr, const char *msg,
+                                      const char *file, unsigned line,
+                                      const char *function);
 }
 
 #define ASSERT(expr, msg)                                                      \
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 259bc008f91d13..404c2d7ca8d5ef 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -20,9 +20,9 @@
 // another function but only inline assembly that performs some operation or
 // side-effect and then continues execution with something on the existing call
 // stack.
-//
-// TODO: Find a good place for this
-#pragma omp assumes ext_no_call_asm
+#pragma omp begin declare variant match(device = {kind(gpu)})
+#define OMP_ATTRS [[omp::assume("ext_no_call_asm")]]
+#pragma omp end declare variant
 
 enum omp_proc_bind_t {
   omp_proc_bind_false = 0,
diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
index fa66b973a4f5e7..fddd0c8722f3f2 100644
--- a/offload/DeviceRTL/include/DeviceUtils.h
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -60,32 +60,35 @@ struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
 template <class T>
 using remove_addrspace_t = typename remove_addrspace<T>::type;
 
-template <typename To, typename From> inline To bitCast(From V) {
+template <typename To, typename From> OMP_ATTRS inline To bitCast(From V) {
   static_assert(sizeof(To) == sizeof(From), "Bad conversion");
   return __builtin_bit_cast(To, V);
 }
 
 /// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
 /// is identified by \p Mask.
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
+OMP_ATTRS int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
+                          int32_t Width);
 
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
+OMP_ATTRS int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
+                              int32_t Width);
 
-int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
+OMP_ATTRS int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
+                              int32_t Width);
 
-uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+OMP_ATTRS uint64_t ballotSync(uint64_t Mask, int32_t Pred);
 
 /// Return \p LowBits and \p HighBits packed into a single 64 bit value.
-uint64_t pack(uint32_t LowBits, uint32_t HighBits);
+OMP_ATTRS uint64_t pack(uint32_t LowBits, uint32_t HighBits);
 
 /// Unpack \p Val into \p LowBits and \p HighBits.
-void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
+OMP_ATTRS void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
 
 /// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
-bool isSharedMemPtr(void *Ptr);
+OMP_ATTRS bool isSharedMemPtr(void *Ptr);
 
 /// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
-bool isThreadLocalMemPtr(void *Ptr);
+OMP_ATTRS bool isThreadLocalMemPtr(void *Ptr);
 
 /// A  pointer variable that has by design an `undef` value. Use with care.
 [[clang::loader_uninitialized]] static void *const UndefPtr;
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4f..cb0bfed8edc9df 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -28,8 +28,8 @@ extern "C" {
 /// getter: returns 0.
 ///
 ///{
-void omp_set_dynamic(int);
-int omp_get_dynamic(void);
+OMP_ATTRS void omp_set_dynamic(int);
+OMP_ATTRS int omp_get_dynamic(void);
 ///}
 
 /// ICV: nthreads-var, integer
@@ -43,8 +43,8 @@ int omp_get_dynamic(void);
 ///
 ///
 ///{
-void omp_set_num_threads(int);
-int omp_get_max_threads(void);
+OMP_ATTRS void omp_set_num_threads(int);
+OMP_ATTRS int omp_get_max_threads(void);
 ///}
 
 /// ICV: thread-limit-var, computed
@@ -52,7 +52,7 @@ int omp_get_max_threads(void);
 /// getter: returns thread limited defined during launch.
 ///
 ///{
-int omp_get_thread_limit(void);
+OMP_ATTRS int omp_get_thread_limit(void);
 ///}
 
 /// ICV: max-active-level-var, constant 1
@@ -61,8 +61,8 @@ int omp_get_thread_limit(void);
 /// getter: returns 1.
 ///
 ///{
-void omp_set_max_active_levels(int);
-int omp_get_max_active_levels(void);
+OMP_ATTRS void omp_set_max_active_levels(int);
+OMP_ATTRS int omp_get_max_active_levels(void);
 ///}
 
 /// ICV: places-partition-var
@@ -76,7 +76,7 @@ int omp_get_max_active_levels(void);
 /// getter: returns 0 or 1.
 ///
 ///{
-int omp_get_active_level(void);
+OMP_ATTRS int omp_get_active_level(void);
 ///}
 
 /// ICV: level-var
@@ -84,88 +84,88 @@ int omp_get_active_level(void);
 /// getter: returns parallel region nesting
 ///
 ///{
-int omp_get_level(void);
+OMP_ATTRS int omp_get_level(void);
 ///}
 
 /// ICV: run-sched-var
 ///
 ///
 ///{
-void omp_set_schedule(omp_sched_t, int);
-void omp_get_schedule(omp_sched_t *, int *);
+OMP_ATTRS void omp_set_schedule(omp_sched_t, int);
+OMP_ATTRS void omp_get_schedule(omp_sched_t *, int *);
 ///}
 
 /// TODO this is incomplete.
-int omp_get_num_threads(void);
-int omp_get_thread_num(void);
-void omp_set_nested(int);
+OMP_ATTRS int omp_get_num_threads(void);
+OMP_ATTRS int omp_get_thread_num(void);
+OMP_ATTRS void omp_set_nested(int);
 
-int omp_get_nested(void);
+OMP_ATTRS int omp_get_nested(void);
 
-void omp_set_max_active_levels(int Level);
+OMP_ATTRS void omp_set_max_active_levels(int Level);
 
-int omp_get_max_active_levels(void);
+OMP_ATTRS int omp_get_max_active_levels(void);
 
-omp_proc_bind_t omp_get_proc_bind(void);
+OMP_ATTRS omp_proc_bind_t omp_get_proc_bind(void);
 
-int omp_get_num_places(void);
+OMP_ATTRS int omp_get_num_places(void);
 
-int omp_get_place_num_procs(int place_num);
+OMP_ATTRS int omp_get_place_num_procs(int place_num);
 
-void omp_get_place_proc_ids(int place_num, int *ids);
+OMP_ATTRS void omp_get_place_proc_ids(int place_num, int *ids);
 
-int omp_get_place_num(void);
+OMP_ATTRS int omp_get_place_num(void);
 
-int omp_get_partition_num_places(void);
+OMP_ATTRS int omp_get_partition_num_places(void);
 
-void omp_get_partition_place_nums(int *place_nums);
+OMP_ATTRS void omp_get_partition_place_nums(int *place_nums);
 
-int omp_get_cancellation(void);
+OMP_ATTRS int omp_get_cancellation(void);
 
-void omp_set_default_device(int deviceId);
+OMP_ATTRS void omp_set_default_device(int deviceId);
 
-int omp_get_default_device(void);
+OMP_ATTRS int omp_get_default_device(void);
 
-int omp_get_num_devices(void);
+OMP_ATTRS int omp_get_num_devices(void);
 
-int omp_get_device_num(void);
+OMP_ATTRS int omp_get_device_num(void);
 
-int omp_get_num_teams(void);
+OMP_ATTRS int omp_get_num_teams(void);
 
-int omp_get_team_num();
+OMP_ATTRS int omp_get_team_num();
 
-int omp_get_initial_device(void);
+OMP_ATTRS int omp_get_initial_device(void);
 
-void *llvm_omp_target_dynamic_shared_alloc();
+OMP_ATTRS void *llvm_omp_target_dynamic_shared_alloc();
 
 /// Synchronization
 ///
 ///{
-void omp_init_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_init_lock(omp_lock_t *Lock);
 
-void omp_destroy_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_destroy_lock(omp_lock_t *Lock);
 
-void omp_set_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_set_lock(omp_lock_t *Lock);
 
-void omp_unset_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_unset_lock(omp_lock_t *Lock);
 
-int omp_test_lock(omp_lock_t *Lock);
+OMP_ATTRS int omp_test_lock(omp_lock_t *Lock);
 ///}
 
 /// Tasking
 ///
 ///{
-int omp_in_final(void);
+OMP_ATTRS int omp_in_final(void);
 
-int omp_get_max_task_priority(void);
+OMP_ATTRS int omp_get_max_task_priority(void);
 ///}
 
 /// Misc
 ///
 ///{
-double omp_get_wtick(void);
+OMP_ATTRS double omp_get_wtick(void);
 
-double omp_get_wtime(void);
+OMP_ATTRS double omp_get_wtime(void);
 ///}
 }
 
@@ -173,16 +173,16 @@ extern "C" {
 /// Allocate \p Bytes in "shareable" memory and return the address. Needs to be
 /// called balanced with __kmpc_free_shared like a stack (push/pop). Can be
 /// called by any thread, allocation happens *per thread*.
-void *__kmpc_alloc_shared(uint64_t Bytes);
+OMP_ATTRS void *__kmpc_alloc_shared(uint64_t Bytes);
 
 /// Deallocate \p Ptr. Needs to be called balanced with __kmpc_alloc_shared like
 /// a stack (push/pop). Can be called by any thread. \p Ptr has to be the
 /// allocated by __kmpc_alloc_shared by the same thread.
-void __kmpc_free_shared(void *Ptr, uint64_t Bytes);
+OMP_ATTRS void __kmpc_free_shared(void *Ptr, uint64_t Bytes);
 
 /// Get a pointer to the memory buffer containing dynamically allocated shared
 /// memory configured at launch.
-void *__kmpc_get_dynamic_shared();
+OMP_ATTRS void *__kmpc_get_dynamic_shared();
 
 /// Allocate sufficient space for \p NumArgs sequential `void*` and store the
 /// allocation address in \p GlobalArgs.
@@ -191,27 +191,28 @@ void *__kmpc_get_dynamic_shared();
 ///
 /// We also remember it in GlobalArgsPtr to ensure the worker threads and
 /// deallocation function know the allocation address too.
-void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs);
+OMP_ATTRS void __kmpc_begin_sharing_variables(void ***GlobalArgs,
+                                              uint64_t NumArgs);
 
 /// Deallocate the memory allocated by __kmpc_begin_sharing_variables.
 ///
 /// Called by the main thread after a parallel region.
-void __kmpc_end_sharing_variables();
+OMP_ATTRS void __kmpc_end_sharing_variables();
 
 /// Store the allocation address obtained via __kmpc_begin_sharing_variables in
 /// \p GlobalArgs.
 ///
 /// Called by the worker threads in the parallel region (function).
-void __kmpc_get_shared_variables(void ***GlobalArgs);
+OMP_ATTRS void __kmpc_get_shared_variables(void ***GlobalArgs);
 
 /// External interface to get the thread ID.
-uint32_t __kmpc_get_hardware_thread_id_in_block();
+OMP_ATTRS uint32_t __kmpc_get_hardware_thread_id_in_block();
 
 /// External interface to get the number of threads.
-uint32_t __kmpc_get_hardware_num_threads_in_block();
+OMP_ATTRS uint32_t __kmpc_get_hardware_num_threads_in_block();
 
 /// External interface to get the warp size.
-uint32_t __kmpc_get_warp_size();
+OMP_ATTRS uint32_t __kmpc_get_warp_size();
 
 /// Kernel
 ///
@@ -219,27 +220,26 @@ uint32_t __kmpc_get_warp_size();
 // Forward declaration
 struct KernelEnvironmentTy;
 
-int8_t __kmpc_is_spmd_exec_mode();
+OMP_ATTRS int8_t __kmpc_is_spmd_exec_mode();
 
-int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
-                           KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+OMP_ATTRS int32_t
+__kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
+                   KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
 
-void __kmpc_target_deinit();
+OMP_ATTRS void __kmpc_target_deinit();
 
 ///}
 
 /// Reduction
 ///
 ///{
-void *__kmpc_reduction_get_fixed_buffer();
+OMP_ATTRS void *__kmpc_reduction_get_fixed_buffer();
 
-int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc,
-                                               uint64_t reduce_data_size,
-                                               void *reduce_data,
-                                               ShuffleReductFnTy shflFct,
-                                               InterWarpCopyFnTy cpyFct);
+OMP_ATTRS int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
+    IdentTy *Loc, uint64_t reduce_data_size, void *reduce_data,
+    ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct);
 
-int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
+OMP_ATTRS int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
     IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records,
     uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct,
     InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct,
@@ -249,116 +249,120 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
 /// Synchronization
 ///
 ///{
-void __kmpc_ordered(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_ordered(IdentTy *Loc, int32_t TId);
 
-void __kmpc_end_ordered(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_ordered(IdentTy *Loc, int32_t TId);
 
-int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
 
-void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
 
-void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
 
-void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
 
-int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
+OMP_ATTRS int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
 
-void __kmpc_end_master(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_master(IdentTy *Loc, int32_t TId);
 
-int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
+OMP_ATTRS int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
 
-void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
 
-int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
+OMP_ATTRS int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
 
-void __kmpc_end_single(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_single(IdentTy *Loc, int32_t TId);
 
-void __kmpc_flush(IdentTy *Loc);
+OMP_ATTRS void __kmpc_flush(IdentTy *Loc);
 
-uint64_t __kmpc_warp_active_thread_mask(void);
+OMP_ATTRS uint64_t __kmpc_warp_active_thread_mask(void);
 
-void __kmpc_syncwarp(uint64_t Mask);
+OMP_ATTRS void __kmpc_syncwarp(uint64_t Mask);
 
-void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
+OMP_ATTRS void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
 
-void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
+OMP_ATTRS void __kmpc_end_critical(IdentTy *Loc, int32_t TId,
+                                   CriticalNameTy *Name);
 ///}
 
 /// Parallelism
 ///
 ///{
 /// TODO
-void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn);
+OMP_ATTRS void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn);
 
 /// TODO
-bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn);
+OMP_ATTRS bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn);
 
 /// TODO
-void __kmpc_kernel_end_parallel();
+OMP_ATTRS void __kmpc_kernel_end_parallel();
 
 /// TODO
-void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind);
+OMP_ATTRS void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind);
 
 /// TODO
-void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId, int32_t NumTeams,
-                           int32_t ThreadLimit);
+OMP_ATTRS void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId,
+                                     int32_t NumTeams, int32_t ThreadLimit);
 
 /// TODO
-uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t);
+OMP_ATTRS uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t);
 
 ///}
 
 /// Tasking
 ///
 ///{
-TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
-                                        size_t TaskSizeInclPrivateValues,
-                                        size_t SharedValuesSize,
-                                        TaskFnTy TaskFn);
+OMP_ATTRS TaskDescriptorTy *
+__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
+                      size_t...
[truncated]

[OpenMP] Add explicit attributes to every function declaration

19b46c8

Summary: Instead of having the scoped attributes, add this to every function.

jhuber6 requested review from carlobertolli, jdoerfert, jplehr, ronlieb, saiislam and shiltian January 10, 2025 01:40

llvmbot added the offload label Jan 10, 2025

ronlieb requested review from dpalermo and macurtis-amd January 10, 2025 01:44

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[OpenMP] Add explicit attributes to every function declaration #122399

[OpenMP] Add explicit attributes to every function declaration #122399

jhuber6 commented Jan 10, 2025

llvmbot commented Jan 10, 2025

[OpenMP] Add explicit attributes to every function declaration #122399

Are you sure you want to change the base?

[OpenMP] Add explicit attributes to every function declaration #122399

Conversation

jhuber6 commented Jan 10, 2025

llvmbot commented Jan 10, 2025