Skip to content

Commit

Permalink
[AMDGPU] Remove s_wakeup_barrier instruction (#122277)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrkusanin authored Jan 10, 2025
1 parent 1ef2580 commit 3def49c
Show file tree
Hide file tree
Showing 11 changed files with 5 additions and 91 deletions.
1 change: 0 additions & 1 deletion clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,6 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
Expand Down
15 changes: 0 additions & 15 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,6 @@ void test_s_barrier_join(void *bar)
__builtin_amdgcn_s_barrier_join(bar);
}

// CHECK-LABEL: @test_s_wakeup_barrier(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
// CHECK-NEXT: ret void
//
void test_s_wakeup_barrier(void *bar)
{
__builtin_amdgcn_s_wakeup_barrier(bar);
}

// CHECK-LABEL: @test_s_barrier_leave(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1)
Expand Down
6 changes: 0 additions & 6 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,6 @@ def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;

// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
// The %barrier argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;

// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2239,7 +2239,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
case Intrinsic::amdgcn_s_get_named_barrier_state:
return selectNamedBarrierInst(I, IntrinsicID);
case Intrinsic::amdgcn_s_get_barrier_state:
Expand Down Expand Up @@ -5838,8 +5837,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
llvm_unreachable("not a named barrier op");
case Intrinsic::amdgcn_s_barrier_join:
return AMDGPU::S_BARRIER_JOIN_IMM;
case Intrinsic::amdgcn_s_wakeup_barrier:
return AMDGPU::S_WAKEUP_BARRIER_IMM;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_IMM;
};
Expand All @@ -5849,8 +5846,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
llvm_unreachable("not a named barrier op");
case Intrinsic::amdgcn_s_barrier_join:
return AMDGPU::S_BARRIER_JOIN_M0;
case Intrinsic::amdgcn_s_wakeup_barrier:
return AMDGPU::S_WAKEUP_BARRIER_M0;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_M0;
};
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,6 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
case Intrinsic::amdgcn_s_barrier_wait:
case Intrinsic::amdgcn_s_barrier_leave:
case Intrinsic::amdgcn_s_get_barrier_state:
case Intrinsic::amdgcn_s_wakeup_barrier:
case Intrinsic::amdgcn_wave_barrier:
case Intrinsic::amdgcn_sched_barrier:
case Intrinsic::amdgcn_sched_group_barrier:
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3304,7 +3304,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(B, MI, 1);
return;
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
constrainOpWithReadfirstlane(B, MI, 1);
return;
case Intrinsic::amdgcn_s_barrier_init:
Expand Down Expand Up @@ -5272,7 +5271,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_barrier_init:
Expand Down
27 changes: 5 additions & 22 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10107,8 +10107,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
return SDValue(NewMI, 0);
}
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier: {
case Intrinsic::amdgcn_s_barrier_join: {
// these three intrinsics have one operand: barrier pointer
SDValue Chain = Op->getOperand(0);
SmallVector<SDValue, 2> Ops;
Expand All @@ -10117,32 +10116,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,

if (isa<ConstantSDNode>(BarOp)) {
uint64_t BarVal = cast<ConstantSDNode>(BarOp)->getZExtValue();
switch (IntrinsicID) {
default:
return SDValue();
case Intrinsic::amdgcn_s_barrier_join:
Opc = AMDGPU::S_BARRIER_JOIN_IMM;
break;
case Intrinsic::amdgcn_s_wakeup_barrier:
Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
break;
}
Opc = AMDGPU::S_BARRIER_JOIN_IMM;

// extract the BarrierID from bits 4-9 of the immediate
unsigned BarID = (BarVal >> 4) & 0x3F;
SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
Ops.push_back(K);
Ops.push_back(Chain);
} else {
switch (IntrinsicID) {
default:
return SDValue();
case Intrinsic::amdgcn_s_barrier_join:
Opc = AMDGPU::S_BARRIER_JOIN_M0;
break;
case Intrinsic::amdgcn_s_wakeup_barrier:
Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
break;
}
Opc = AMDGPU::S_BARRIER_JOIN_M0;

// extract the BarrierID from bits 4-9 of BarOp, copy to M0[5:0]
SDValue M0Val;
M0Val = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp,
Expand Down
12 changes: 0 additions & 12 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,6 @@ def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
let isConvergent = 1;
}

def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
}
} // End Uses = [M0]

def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
Expand All @@ -514,11 +509,6 @@ def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
let isConvergent = 1;
}

def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
}
} // End has_sdst = 0

def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
Expand Down Expand Up @@ -2092,13 +2082,11 @@ defm S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_M0_Real_gfx12<0x04f>;
defm S_GET_BARRIER_STATE_M0 : SOP1_M0_Real_gfx12<0x050>;
defm S_BARRIER_INIT_M0 : SOP1_M0_Real_gfx12<0x051>;
defm S_BARRIER_JOIN_M0 : SOP1_M0_Real_gfx12<0x052>;
defm S_WAKEUP_BARRIER_M0 : SOP1_M0_Real_gfx12<0x057>;
defm S_BARRIER_SIGNAL_IMM : SOP1_IMM_Real_gfx12<0x04e>;
defm S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_IMM_Real_gfx12<0x04f>;
defm S_GET_BARRIER_STATE_IMM : SOP1_IMM_Real_gfx12<0x050>;
defm S_BARRIER_INIT_IMM : SOP1_IMM_Real_gfx12<0x051>;
defm S_BARRIER_JOIN_IMM : SOP1_IMM_Real_gfx12<0x052>;
defm S_WAKEUP_BARRIER_IMM : SOP1_IMM_Real_gfx12<0x057>;
defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>;

//===----------------------------------------------------------------------===//
Expand Down
9 changes: 0 additions & 9 deletions llvm/test/CodeGen/AMDGPU/s-barrier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
; GFX12-SDAG-NEXT: s_barrier_wait 1
; GFX12-SDAG-NEXT: s_barrier_leave
; GFX12-SDAG-NEXT: s_wakeup_barrier m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
; GFX12-SDAG-NEXT: s_wakeup_barrier m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0
Expand Down Expand Up @@ -176,8 +172,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
; GFX12-GISEL-NEXT: s_barrier_join m0
; GFX12-GISEL-NEXT: s_barrier_wait 1
; GFX12-GISEL-NEXT: s_barrier_leave
; GFX12-GISEL-NEXT: s_wakeup_barrier 2
; GFX12-GISEL-NEXT: s_wakeup_barrier m0
; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
Expand Down Expand Up @@ -218,8 +212,6 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
call void @llvm.amdgcn.s.barrier.leave(i16 1)
call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) @bar)
call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %in)
%state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar)
%state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in)
call void @llvm.amdgcn.s.barrier()
Expand Down Expand Up @@ -295,7 +287,6 @@ declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1
declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1
declare void @llvm.amdgcn.s.barrier.leave(i16) #1
declare void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3)) #1
declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1

Expand Down
9 changes: 0 additions & 9 deletions llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
Original file line number Diff line number Diff line change
Expand Up @@ -726,15 +726,6 @@ s_barrier_join -2
s_barrier_join m0
// GFX12: encoding: [0x7d,0x52,0x80,0xbe]

s_wakeup_barrier 1
// GFX12: encoding: [0x81,0x57,0x80,0xbe]

s_wakeup_barrier -1
// GFX12: encoding: [0xc1,0x57,0x80,0xbe]

s_wakeup_barrier m0
// GFX12: encoding: [0x7d,0x57,0x80,0xbe]

s_get_barrier_state s3, -1
// GFX12: encoding: [0xc1,0x50,0x83,0xbe]

Expand Down
9 changes: 0 additions & 9 deletions llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -726,15 +726,6 @@
# GFX12: s_barrier_join m0 ; encoding: [0x7d,0x52,0x80,0xbe]
0x7d,0x52,0x80,0xbe

# GFX12: s_wakeup_barrier 1 ; encoding: [0x81,0x57,0x80,0xbe]
0x81,0x57,0x80,0xbe

# GFX12: s_wakeup_barrier -1 ; encoding: [0xc1,0x57,0x80,0xbe]
0xc1,0x57,0x80,0xbe

# GFX12: s_wakeup_barrier m0 ; encoding: [0x7d,0x57,0x80,0xbe]
0x7d,0x57,0x80,0xbe

# GFX12: s_get_barrier_state s3, -1 ; encoding: [0xc1,0x50,0x83,0xbe]
0xc1,0x50,0x83,0xbe

Expand Down

0 comments on commit 3def49c

Please sign in to comment.