Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RTG] Add simple linear scan register allocation pass #8058

Open
wants to merge 1 commit into
base: maerhart-rtg-emit-assembly
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions include/circt/Dialect/RTG/Transforms/RTGPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,21 @@ def EmitRTGISAAssemblyPass : Pass<"rtg-emit-isa-assembly", "mlir::ModuleOp"> {
];
}

def LinearScanRegisterAllocationPass : Pass<
"rtg-linear-scan-register-allocation", "rtg::TestOp"> {

let summary = "simple linear scan register allocation for RTG";
let description = [{
Performs a simple version of the linear scan register allocation algorithm
based on the 'rtg.virtual_reg' operations.

This pass is expected to be run after elaboration.
}];

let statistics = [
Statistic<"numRegistersSpilled", "num-registers-spilled",
"Number of registers spilled to the stack.">,
];
}

#endif // CIRCT_DIALECT_RTG_TRANSFORMS_RTGPASSES_TD
1 change: 1 addition & 0 deletions lib/Dialect/RTG/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_circt_dialect_library(CIRCTRTGTransforms
ElaborationPass.cpp
EmitRTGISAAssemblyPass.cpp
LinearScanRegisterAllocationPass.cpp

DEPENDS
CIRCTRTGTransformsIncGen
Expand Down
169 changes: 169 additions & 0 deletions lib/Dialect/RTG/Transforms/LinearScanRegisterAllocationPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//===- LinearScanRegisterAllocationPass.cpp - Register Allocation ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass allocates registers using a simple linear scan algorithm.
//
//===----------------------------------------------------------------------===//

#include "circt/Dialect/RTG/IR/RTGISAAssemblyOpInterfaces.h"
#include "circt/Dialect/RTG/IR/RTGOps.h"
#include "circt/Dialect/RTG/Transforms/RTGPasses.h"
#include "mlir/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"

namespace circt {
namespace rtg {
#define GEN_PASS_DEF_LINEARSCANREGISTERALLOCATIONPASS
#include "circt/Dialect/RTG/Transforms/RTGPasses.h.inc"
} // namespace rtg
} // namespace circt

using namespace mlir;
using namespace circt;

#define DEBUG_TYPE "rtg-linear-scan-register-allocation"

namespace {

/// Represents a register and its live range.
struct RegisterLiveRange {
rtg::RegisterAttrInterface fixedReg;
rtg::VirtualRegisterOp regOp;
unsigned start;
unsigned end;
};

class LinearScanRegisterAllocationPass
: public circt::rtg::impl::LinearScanRegisterAllocationPassBase<
LinearScanRegisterAllocationPass> {
public:
void runOnOperation() override;
};

} // end namespace

static void expireOldInterval(SmallVector<RegisterLiveRange *> &active,
RegisterLiveRange *reg) {
// TODO: use a better datastructure for 'active'
llvm::sort(active, [](auto *a, auto *b) { return a->end < b->end; });

for (auto *iter = active.begin(); iter != active.end(); ++iter) {
auto *a = *iter;
if (a->end >= reg->start)
return;

active.erase(iter--);
}
}

void LinearScanRegisterAllocationPass::runOnOperation() {
auto testOp = getOperation();

LLVM_DEBUG(llvm::dbgs() << "=== Processing test @" << testOp.getSymName()
<< "\n\n");

DenseMap<Operation *, unsigned> opIndices;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mlir has an index value built into operations. I think it is only relative to a single block. It's used/updated by dominance analysis.

unsigned maxIdx;
for (auto [i, op] : llvm::enumerate(*testOp.getBody())) {
// TODO: ideally check that the IR is already fully elaborated
opIndices[&op] = i;
maxIdx = i;
}

// Collect all the register intervals we have to consider.
SmallVector<std::unique_ptr<RegisterLiveRange>> regRanges;
SmallVector<RegisterLiveRange *> active;
for (auto &op : *testOp.getBody()) {
if (!isa<rtg::FixedRegisterOp, rtg::VirtualRegisterOp>(&op))
continue;

RegisterLiveRange lr;
lr.start = maxIdx;
lr.end = 0;

if (auto regOp = dyn_cast<rtg::VirtualRegisterOp>(&op))
lr.regOp = regOp;

if (auto regOp = dyn_cast<rtg::FixedRegisterOp>(&op))
lr.fixedReg = regOp.getReg();

for (auto *user : op.getUsers()) {
if (!isa<rtg::InstructionOpInterface>(user)) {
user->emitError("only operations implementing 'InstructionOpInterface "
"are allowed to use registers");
return signalPassFailure();
}

// TODO: support labels and control-flow loops (jumps in general)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, this is interesting. We need to be able to deal with fixed-value jumps, jumptables, label relative jumps, etc. We will probably need an op interface for this. Something to resolve computed live ranges.

unsigned idx = opIndices.at(user);
lr.start = std::min(lr.start, idx);
lr.end = std::max(lr.end, idx);
}

regRanges.emplace_back(std::make_unique<RegisterLiveRange>(lr));

// Reserve fixed registers from the start. It will be made available again
// past the interval end. Not reserving it from the start can lead to the
// same register being chosen for a virtual register that overlaps with the
// fixed register interval.
// TODO: don't overapproximate that much
if (!lr.regOp)
active.push_back(regRanges.back().get());
}

// Sort such that we can process registers by increasing interval start.
llvm::sort(regRanges, [](const auto &a, const auto &b) {
return a->start < b->start || (a->start == b->start && !a->regOp);
});

for (auto &lr : regRanges) {
// Make registers out of live range available again.
expireOldInterval(active, lr.get());

// Handle already fixed registers.
if (!lr->regOp)
continue;

// Handle virtual registers.
rtg::RegisterAttrInterface availableReg;
for (auto reg : lr->regOp.getAllowedRegs()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we prioritizing/ordering registers just by their declaration order or by their index? Presumably things like stack pointers should be picked last and temporary (ABI-wise) registers picked first? Obviously this shouldn't be required, but if we want to simplify entry/exit (and temporary entry/exit for calls) into tests, this might be good.

Also might be good to know what registers are touched in a region, but maybe that is an analysis later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, registers earlier in the allowedRegisters attribute are picked first, so helper functions can be created by the payload dialect writers that add all legal registers to this list in the priority order they prefer. But we can always change that to be specified by a Dialect interface.

Yes, an analysis to check which fixed registers are already in use in the region would allow better allocation and I'm planning to add that some time later. It's not critical right now and keeps this PR simpler.

if (llvm::none_of(active, [&](auto *r) { return r->fixedReg == reg; })) {
availableReg = cast<rtg::RegisterAttrInterface>(reg);
break;
}
}

if (!availableReg) {
++numRegistersSpilled;
lr->regOp->emitError(
"need to spill this register, but not supported yet");
return signalPassFailure();
}

lr->fixedReg = availableReg;
active.push_back(lr.get());
}

LLVM_DEBUG({
for (auto &regRange : regRanges) {
llvm::dbgs() << "Start: " << regRange->start << ", End: " << regRange->end
<< ", Selected: " << regRange->fixedReg << "\n";
}
llvm::dbgs() << "\n";
});

for (auto &reg : regRanges) {
// No need to fix already fixed registers.
if (!reg->regOp)
continue;

IRRewriter rewriter(reg->regOp);
rewriter.replaceOpWithNewOp<rtg::FixedRegisterOp>(reg->regOp,
reg->fixedReg);
}
}
61 changes: 61 additions & 0 deletions test/Dialect/RTG/Transform/linear-scan-register-allocation.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// RUN: circt-opt --rtg-linear-scan-register-allocation --split-input-file --verify-diagnostics %s | FileCheck %s

// CHECK-LABEL: @test0
rtg.test @test0 : !rtg.dict<> {
// CHECK: [[V0:%.+]] = rtg.fixed_reg #rtgtest.ra
// CHECK: [[V1:%.+]] = rtg.fixed_reg #rtgtest.s1
// CHECK: [[V2:%.+]] = rtg.fixed_reg #rtgtest.s0
// CHECK: [[V3:%.+]] = rtg.fixed_reg #rtgtest.ra
// CHECK: rtgtest.rv32i.jalr [[V0]], [[V2]]
// CHECK: rtgtest.rv32i.jalr [[V1]], [[V0]]
// CHECK: rtgtest.rv32i.jalr [[V3]], [[V1]]
// CHECK: rtgtest.rv32i.jalr [[V2]], [[V3]]
%0 = rtg.virtual_reg [#rtgtest.ra, #rtgtest.s0, #rtgtest.s1]
%1 = rtg.virtual_reg [#rtgtest.ra, #rtgtest.s0, #rtgtest.s1]
%2 = rtg.virtual_reg [#rtgtest.ra, #rtgtest.s0, #rtgtest.s1]
%3 = rtg.virtual_reg [#rtgtest.ra, #rtgtest.s0, #rtgtest.s1]
%imm = rtgtest.immediate #rtgtest.imm12<0>
rtgtest.rv32i.jalr %0, %2, %imm
rtgtest.rv32i.jalr %1, %0, %imm
rtgtest.rv32i.jalr %3, %1, %imm
rtgtest.rv32i.jalr %2, %3, %imm
}

// CHECK-LABEL: @withFixedRegs
rtg.test @withFixedRegs : !rtg.dict<> {
// CHECK: [[V0:%.+]] = rtg.fixed_reg #rtgtest.ra
// CHECK: [[V1:%.+]] = rtg.fixed_reg #rtgtest.s1
// CHECK: [[V2:%.+]] = rtg.fixed_reg #rtgtest.s0
// CHECK: [[V3:%.+]] = rtg.fixed_reg #rtgtest.ra
// CHECK: rtgtest.rv32i.jalr [[V0]], [[V2]]
// CHECK: rtgtest.rv32i.jalr [[V1]], [[V0]]
// CHECK: rtgtest.rv32i.jalr [[V3]], [[V1]]
// CHECK: rtgtest.rv32i.jalr [[V2]], [[V3]]
%0 = rtg.fixed_reg #rtgtest.ra
%1 = rtg.virtual_reg [#rtgtest.ra, #rtgtest.s0, #rtgtest.s1]
%2 = rtg.fixed_reg #rtgtest.s0
%3 = rtg.virtual_reg [#rtgtest.ra, #rtgtest.s0, #rtgtest.s1]
%imm = rtgtest.immediate #rtgtest.imm12<0>
rtgtest.rv32i.jalr %0, %2, %imm
rtgtest.rv32i.jalr %1, %0, %imm
rtgtest.rv32i.jalr %3, %1, %imm
rtgtest.rv32i.jalr %2, %3, %imm
}

// -----

rtg.test @spilling : !rtg.dict<> {
%0 = rtg.virtual_reg [#rtgtest.ra]
// expected-error @below {{need to spill this register, but not supported yet}}
%1 = rtg.virtual_reg [#rtgtest.ra]
%imm = rtgtest.immediate #rtgtest.imm12<0>
rtgtest.rv32i.jalr %0, %1, %imm
}

// -----

rtg.test @unsupportedUser : !rtg.dict<> {
%0 = rtg.virtual_reg [#rtgtest.ra]
// expected-error @below {{only operations implementing 'InstructionOpInterface are allowed to use registers}}
rtg.set_create %0 : !rtgtest.ireg
}
Loading