Skip to content

Commit

Permalink
[TailDuplicator] Add a limit on the number of indirect branch successors
Browse files Browse the repository at this point in the history
  • Loading branch information
DianQK committed Feb 5, 2024
1 parent aa8b1cf commit b0e06a2
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 36 deletions.
9 changes: 8 additions & 1 deletion llvm/lib/CodeGen/TailDuplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);

static cl::opt<unsigned> TailDupIndirectBranchSuccSize(
"tail-dup-indirect-succ-size",
cl::desc("Maximum successors to consider tail duplicating blocks that "
"end with indirect branches."),
cl::init(8), cl::Hidden);

static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
Expand Down Expand Up @@ -598,7 +604,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (!TailBB.empty())
HasIndirectbr = TailBB.back().isIndirectBranch();

if (HasIndirectbr && PreRegAlloc)
if (HasIndirectbr && PreRegAlloc &&
TailBB.succ_size() <= TailDupIndirectBranchSuccSize)
MaxDuplicateCount = TailDupIndirectBranchSize;

// Check the instructions in the block to determine whether tail-duplication
Expand Down
62 changes: 27 additions & 35 deletions llvm/test/CodeGen/X86/tail-dup-indirect-succ-size.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=LIMIT
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=NOLIMIT
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-indirect-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-indirect-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT

--- |
source_filename = "tail-dup-pred-size-limit.ll"
Expand Down Expand Up @@ -172,81 +172,73 @@ body: |
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.2 (%ir-block.5):
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.3 (%ir-block.7):
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table)
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.4 (%ir-block.10):
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table)
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.5 (%ir-block.13):
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table)
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.6.default.unreachable2:
; LIMIT-NEXT: successors:
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.7 (%ir-block.16):
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.9 (%ir-block.20):
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.10 (%ir-block.22):
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.11 (%ir-block.25):
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.12 (%ir-block.28):
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.13 (%ir-block.31):
; LIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
; LIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9
; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags
; LIMIT-NEXT: $eax = COPY [[OR32rr]]
; LIMIT-NEXT: RET 0, $eax
;
Expand Down

0 comments on commit b0e06a2

Please sign in to comment.