Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TailDuplicator] Add maximum predecessors and successors to consider tail duplicating blocks #78582

Merged
merged 4 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions llvm/lib/CodeGen/TailDuplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);

static cl::opt<unsigned>
TailDupPredSize("tail-dup-pred-size",
cl::desc("Maximum predecessors (maximum successors at the "
"same time) to consider tail duplicating blocks."),
cl::init(16), cl::Hidden);

static cl::opt<unsigned>
TailDupSuccSize("tail-dup-succ-size",
cl::desc("Maximum successors (maximum predecessors at the "
"same time) to consider tail duplicating blocks."),
cl::init(16), cl::Hidden);

static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
Expand Down Expand Up @@ -565,6 +577,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;

// Duplicating a BB which has both multiple predecessors and successors will
// result in a complex CFG and also may cause huge amount of PHI nodes. If we
// want to remove this limitation, we have to address
// https://github.com/llvm/llvm-project/issues/78578.
if (TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize)
return false;

// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
Expand Down
260 changes: 260 additions & 0 deletions llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT

---
name: foo
tracksRegLiveness: true
jumpTable:
kind: block-address
entries:
- id: 0
blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ]
- id: 1
blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ]
body: |
; LIMIT-LABEL: name: foo
; LIMIT: bb.0:
; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
; LIMIT-NEXT: liveins: $rdi, $esi
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.2:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.3:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.4:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.5:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.6:
; LIMIT-NEXT: successors:
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.7:
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.9:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.10:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.11:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.12:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.13:
; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9
; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags
; LIMIT-NEXT: $eax = COPY [[OR32rr]]
; LIMIT-NEXT: RET 0, $eax
;
; NOLIMIT-LABEL: name: foo
; NOLIMIT: bb.0:
; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
; NOLIMIT-NEXT: liveins: $rdi, $esi
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.2:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.3:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.4:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.5:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.6:
; NOLIMIT-NEXT: successors:
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.9:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: JMP_1 %bb.13
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.10:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; NOLIMIT-NEXT: JMP_1 %bb.13
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.11:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: JMP_1 %bb.13
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.12:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.13:
; NOLIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
; NOLIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
; NOLIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
; NOLIMIT-NEXT: $eax = COPY [[OR32rr]]
; NOLIMIT-NEXT: RET 0, $eax
bb.0:
liveins: $rdi, $esi

%11:gr32 = COPY $esi
%10:gr64 = COPY $rdi
%13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags
%14:gr32 = AND32ri %13, 7, implicit-def dead $eflags
%12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit

bb.1:
successors: %bb.2, %bb.3, %bb.4, %bb.5

JMP64m $noreg, 8, %12, %jump-table.0, $noreg

bb.2:
%0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
JMP_1 %bb.7

bb.3:
%17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags
JMP_1 %bb.7

bb.4:
%16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags
JMP_1 %bb.7

bb.5:
%15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags
JMP_1 %bb.7

bb.6:
successors:

bb.7:
%4:gr32 = PHI %3, %bb.5, %2, %bb.4, %1, %bb.3, %0, %bb.2
%19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags
%20:gr32 = AND32ri %19, 7, implicit-def dead $eflags
%18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit

bb.8:
successors: %bb.9, %bb.10, %bb.11, %bb.12

JMP64m $noreg, 8, %18, %jump-table.1, $noreg

bb.9:
%5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
JMP_1 %bb.13

bb.10:
%23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags
JMP_1 %bb.13

bb.11:
%22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags
JMP_1 %bb.13

bb.12:
%21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags

bb.13:
%9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9
%24:gr32 = OR32rr %9, %4, implicit-def dead $eflags
$eax = COPY %24
RET 0, $eax

...
Loading