From 948dfb97685109c346aedbdae5122cdd9db499d9 Mon Sep 17 00:00:00 2001 From: DianQK Date: Mon, 5 Feb 2024 20:58:47 +0800 Subject: [PATCH 1/4] Pre-commit test cases --- .../CodeGen/X86/tail-dup-pred-succ-size.mir | 434 ++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir new file mode 100644 index 00000000000000..fe5bf1831d09cc --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -0,0 +1,434 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=LIMIT +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=NOLIMIT + +--- | + source_filename = "tail-dup-pred-succ-size.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + + define i32 @foo(ptr %0, i32 %1) { + %3 = lshr i32 %1, 1 + %4 = and i32 %3, 7 + switch i32 %4, label %default.unreachable2 [ + i32 0, label %5 + i32 1, label %7 + i32 2, label %10 + i32 3, label %13 + ] + + 5: ; preds = %2 + %6 = load i32, ptr %0, align 4 + br label %16 + + 7: ; preds = %2 + %8 = load i32, ptr %0, align 4 + %9 = lshr i32 %8, 1 + br label %16 + + 10: ; preds = %2 + %11 = load i32, ptr %0, align 4 + %12 = lshr i32 %11, 2 + br label %16 + + 13: ; preds = %2 + %14 = load i32, ptr %0, align 4 + %15 = lshr i32 %14, 3 + br label %16 + + default.unreachable2: ; preds = %16, %2 + unreachable + + 16: ; preds = %13, %10, %7, %5 + %17 = phi i32 [ %15, %13 ], [ %12, %10 ], [ %9, %7 ], [ %6, %5 ] + %18 = lshr i32 %1, 2 + %19 = and i32 %18, 7 + switch i32 %19, label %default.unreachable2 [ + i32 0, label %20 + i32 1, label %22 + i32 2, label %25 + i32 3, label %28 + ] + + 20: ; preds = %16 + %21 = load i32, ptr %0, align 4 + br label %31 + + 22: ; preds = %16 + %23 = load i32, ptr %0, align 4 + %24 = lshr i32 %23, 1 + br label %31 + + 25: ; preds = %16 + %26 = load i32, ptr %0, align 4 + %27 = lshr i32 %26, 2 + br label %31 + + 28: ; preds = %16 + %29 = load i32, ptr %0, align 4 + %30 = lshr i32 %29, 6 + br label %31 + + 31: ; preds = %28, %25, %22, %20 + %32 = phi i32 [ %30, %28 ], [ %27, %25 ], [ %24, %22 ], [ %21, %20 ] + %33 = or i32 %32, %17 + ret i32 %33 + } + +... +--- +name: foo +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr32, preferred-register: '' } + - { id: 1, class: gr32, preferred-register: '' } + - { id: 2, class: gr32, preferred-register: '' } + - { id: 3, class: gr32, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } + - { id: 6, class: gr32, preferred-register: '' } + - { id: 7, class: gr32, preferred-register: '' } + - { id: 8, class: gr32, preferred-register: '' } + - { id: 9, class: gr32, preferred-register: '' } + - { id: 10, class: gr64, preferred-register: '' } + - { id: 11, class: gr32, preferred-register: '' } + - { id: 12, class: gr64_nosp, preferred-register: '' } + - { id: 13, class: gr32, preferred-register: '' } + - { id: 14, class: gr32, preferred-register: '' } + - { id: 15, class: gr32, preferred-register: '' } + - { id: 16, class: gr32, preferred-register: '' } + - { id: 17, class: gr32, preferred-register: '' } + - { id: 18, class: gr64_nosp, preferred-register: '' } + - { id: 19, class: gr32, preferred-register: '' } + - { id: 20, class: gr32, preferred-register: '' } + - { id: 21, class: gr32, preferred-register: '' } + - { id: 22, class: gr32, preferred-register: '' } + - { id: 23, class: gr32, preferred-register: '' } + - { id: 24, class: gr32, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%10' } + - { reg: '$esi', virtual-reg: '%11' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.1', '%bb.2', '%bb.3', '%bb.4' ] + - id: 1 + blocks: [ '%bb.7', '%bb.8', '%bb.9', '%bb.10' ] +body: | + ; LIMIT-LABEL: name: foo + ; LIMIT: bb.0 (%ir-block.2): + ; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; LIMIT-NEXT: liveins: $rdi, $esi + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.2 (%ir-block.5): + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.3 (%ir-block.7): + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.4 (%ir-block.10): + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.5 (%ir-block.13): + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.6.default.unreachable2: + ; LIMIT-NEXT: successors: + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.9 (%ir-block.20): + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.10 (%ir-block.22): + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.11 (%ir-block.25): + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.12 (%ir-block.28): + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.13 (%ir-block.31): + ; LIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12 + ; LIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 + ; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags + ; LIMIT-NEXT: $eax = COPY [[OR32rr]] + ; LIMIT-NEXT: RET 0, $eax + ; + ; NOLIMIT-LABEL: name: foo + ; NOLIMIT: bb.0 (%ir-block.2): + ; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; NOLIMIT-NEXT: liveins: $rdi, $esi + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.2 (%ir-block.5): + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.3 (%ir-block.7): + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.4 (%ir-block.10): + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.5 (%ir-block.13): + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.6.default.unreachable2: + ; NOLIMIT-NEXT: successors: + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.9 (%ir-block.20): + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.10 (%ir-block.22): + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.11 (%ir-block.25): + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.12 (%ir-block.28): + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.13 (%ir-block.31): + ; NOLIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12 + ; NOLIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 + ; NOLIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags + ; NOLIMIT-NEXT: $eax = COPY [[OR32rr]] + ; NOLIMIT-NEXT: RET 0, $eax + bb.0 (%ir-block.2): + successors: %bb.12(0x80000000) + liveins: $rdi, $esi + + %11:gr32 = COPY $esi + %10:gr64 = COPY $rdi + %13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags + %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags + %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit + + bb.12 (%ir-block.2): + successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) + + JMP64m $noreg, 8, %12, %jump-table.0, $noreg :: (load (s64) from jump-table) + + bb.1 (%ir-block.5): + successors: %bb.6(0x80000000) + + %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + JMP_1 %bb.6 + + bb.2 (%ir-block.7): + successors: %bb.6(0x80000000) + + %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags + JMP_1 %bb.6 + + bb.3 (%ir-block.10): + successors: %bb.6(0x80000000) + + %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags + JMP_1 %bb.6 + + bb.4 (%ir-block.13): + successors: %bb.6(0x80000000) + + %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags + JMP_1 %bb.6 + + bb.5.default.unreachable2: + successors: + + + bb.6 (%ir-block.16): + successors: %bb.13(0x80000000) + + %4:gr32 = PHI %3, %bb.4, %2, %bb.3, %1, %bb.2, %0, %bb.1 + %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags + %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags + %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit + + bb.13 (%ir-block.16): + successors: %bb.7(0x20000000), %bb.8(0x20000000), %bb.9(0x20000000), %bb.10(0x20000000) + + JMP64m $noreg, 8, %18, %jump-table.1, $noreg :: (load (s64) from jump-table) + + bb.7 (%ir-block.20): + successors: %bb.11(0x80000000) + + %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + JMP_1 %bb.11 + + bb.8 (%ir-block.22): + successors: %bb.11(0x80000000) + + %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags + JMP_1 %bb.11 + + bb.9 (%ir-block.25): + successors: %bb.11(0x80000000) + + %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags + JMP_1 %bb.11 + + bb.10 (%ir-block.28): + successors: %bb.11(0x80000000) + + %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags + + bb.11 (%ir-block.31): + %9:gr32 = PHI %8, %bb.10, %7, %bb.9, %6, %bb.8, %5, %bb.7 + %24:gr32 = OR32rr %9, %4, implicit-def dead $eflags + $eax = COPY %24 + RET 0, $eax + +... From d155f418bc760dfbd71c65fb2981afc9884b2ba0 Mon Sep 17 00:00:00 2001 From: DianQK Date: Mon, 5 Feb 2024 21:16:58 +0800 Subject: [PATCH 2/4] [TailDuplicator] Add maximum predecessors and successors to consider tail duplicating blocks --- llvm/lib/CodeGen/TailDuplicator.cpp | 16 +++++ .../CodeGen/X86/tail-dup-pred-succ-size.mir | 62 ++++++++----------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 5ed67bd0a121ed..c1e32fa7d63692 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -68,6 +68,18 @@ static cl::opt TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt + TailDupPredSize("tail-dup-pred-size", + cl::desc("Maximum predecessors (maximum successors at the " + "same time) to consider tail duplicating blocks."), + cl::init(16), cl::Hidden); + +static cl::opt + TailDupSuccSize("tail-dup-succ-size", + cl::desc("Maximum successors (maximum predecessors at the " + "same time) to consider tail duplicating blocks."), + cl::init(16), cl::Hidden); + static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -565,6 +577,10 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + if (TailBB.pred_size() > TailDupPredSize && + TailBB.succ_size() > TailDupSuccSize) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir index fe5bf1831d09cc..10448e33bd8ba2 100644 --- a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=LIMIT -# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=NOLIMIT +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT --- | source_filename = "tail-dup-pred-succ-size.ll" @@ -172,81 +172,73 @@ body: | ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.2 (%ir-block.5): - ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags - ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.3 (%ir-block.7): - ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags - ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags - ; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.4 (%ir-block.10): - ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags - ; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.5 (%ir-block.13): - ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags - ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags - ; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.6.default.unreachable2: ; LIMIT-NEXT: successors: ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.7 (%ir-block.16): + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2 + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.9 (%ir-block.20): ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) ; LIMIT-NEXT: JMP_1 %bb.13 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.10 (%ir-block.22): ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.13 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.11 (%ir-block.25): ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.13 ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.12 (%ir-block.28): ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - ; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.13 (%ir-block.31): - ; LIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12 - ; LIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 - ; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags + ; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9 + ; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags ; LIMIT-NEXT: $eax = COPY [[OR32rr]] ; LIMIT-NEXT: RET 0, $eax ; From fea16d0e108a865f3cbf429a5841012fdff4a1e0 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sat, 24 Feb 2024 20:39:42 +0800 Subject: [PATCH 3/4] Simplify MIR --- .../CodeGen/X86/tail-dup-pred-succ-size.mir | 332 +++++------------- 1 file changed, 83 insertions(+), 249 deletions(-) diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir index 10448e33bd8ba2..67f8cc72e0d726 100644 --- a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -2,165 +2,19 @@ # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT ---- | - source_filename = "tail-dup-pred-succ-size.ll" - target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" - - define i32 @foo(ptr %0, i32 %1) { - %3 = lshr i32 %1, 1 - %4 = and i32 %3, 7 - switch i32 %4, label %default.unreachable2 [ - i32 0, label %5 - i32 1, label %7 - i32 2, label %10 - i32 3, label %13 - ] - - 5: ; preds = %2 - %6 = load i32, ptr %0, align 4 - br label %16 - - 7: ; preds = %2 - %8 = load i32, ptr %0, align 4 - %9 = lshr i32 %8, 1 - br label %16 - - 10: ; preds = %2 - %11 = load i32, ptr %0, align 4 - %12 = lshr i32 %11, 2 - br label %16 - - 13: ; preds = %2 - %14 = load i32, ptr %0, align 4 - %15 = lshr i32 %14, 3 - br label %16 - - default.unreachable2: ; preds = %16, %2 - unreachable - - 16: ; preds = %13, %10, %7, %5 - %17 = phi i32 [ %15, %13 ], [ %12, %10 ], [ %9, %7 ], [ %6, %5 ] - %18 = lshr i32 %1, 2 - %19 = and i32 %18, 7 - switch i32 %19, label %default.unreachable2 [ - i32 0, label %20 - i32 1, label %22 - i32 2, label %25 - i32 3, label %28 - ] - - 20: ; preds = %16 - %21 = load i32, ptr %0, align 4 - br label %31 - - 22: ; preds = %16 - %23 = load i32, ptr %0, align 4 - %24 = lshr i32 %23, 1 - br label %31 - - 25: ; preds = %16 - %26 = load i32, ptr %0, align 4 - %27 = lshr i32 %26, 2 - br label %31 - - 28: ; preds = %16 - %29 = load i32, ptr %0, align 4 - %30 = lshr i32 %29, 6 - br label %31 - - 31: ; preds = %28, %25, %22, %20 - %32 = phi i32 [ %30, %28 ], [ %27, %25 ], [ %24, %22 ], [ %21, %20 ] - %33 = or i32 %32, %17 - ret i32 %33 - } - -... --- name: foo -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: true -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: gr32, preferred-register: '' } - - { id: 1, class: gr32, preferred-register: '' } - - { id: 2, class: gr32, preferred-register: '' } - - { id: 3, class: gr32, preferred-register: '' } - - { id: 4, class: gr32, preferred-register: '' } - - { id: 5, class: gr32, preferred-register: '' } - - { id: 6, class: gr32, preferred-register: '' } - - { id: 7, class: gr32, preferred-register: '' } - - { id: 8, class: gr32, preferred-register: '' } - - { id: 9, class: gr32, preferred-register: '' } - - { id: 10, class: gr64, preferred-register: '' } - - { id: 11, class: gr32, preferred-register: '' } - - { id: 12, class: gr64_nosp, preferred-register: '' } - - { id: 13, class: gr32, preferred-register: '' } - - { id: 14, class: gr32, preferred-register: '' } - - { id: 15, class: gr32, preferred-register: '' } - - { id: 16, class: gr32, preferred-register: '' } - - { id: 17, class: gr32, preferred-register: '' } - - { id: 18, class: gr64_nosp, preferred-register: '' } - - { id: 19, class: gr32, preferred-register: '' } - - { id: 20, class: gr32, preferred-register: '' } - - { id: 21, class: gr32, preferred-register: '' } - - { id: 22, class: gr32, preferred-register: '' } - - { id: 23, class: gr32, preferred-register: '' } - - { id: 24, class: gr32, preferred-register: '' } -liveins: - - { reg: '$rdi', virtual-reg: '%10' } - - { reg: '$esi', virtual-reg: '%11' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} jumpTable: kind: block-address entries: - id: 0 - blocks: [ '%bb.1', '%bb.2', '%bb.3', '%bb.4' ] + blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ] - id: 1 - blocks: [ '%bb.7', '%bb.8', '%bb.9', '%bb.10' ] + blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ] body: | ; LIMIT-LABEL: name: foo - ; LIMIT: bb.0 (%ir-block.2): + ; LIMIT: bb.0: ; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) ; LIMIT-NEXT: liveins: $rdi, $esi ; LIMIT-NEXT: {{ $}} @@ -169,81 +23,81 @@ body: | ; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags ; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags ; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.2 (%ir-block.5): + ; LIMIT-NEXT: bb.2: ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.3 (%ir-block.7): + ; LIMIT-NEXT: bb.3: ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.4 (%ir-block.10): + ; LIMIT-NEXT: bb.4: ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.5 (%ir-block.13): + ; LIMIT-NEXT: bb.5: ; LIMIT-NEXT: successors: %bb.7(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.7 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.6.default.unreachable2: + ; LIMIT-NEXT: bb.6: ; LIMIT-NEXT: successors: ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.7 (%ir-block.16): + ; LIMIT-NEXT: bb.7: ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2 ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.9 (%ir-block.20): + ; LIMIT-NEXT: bb.9: ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: JMP_1 %bb.13 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.10 (%ir-block.22): + ; LIMIT-NEXT: bb.10: ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.13 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.11 (%ir-block.25): + ; LIMIT-NEXT: bb.11: ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags ; LIMIT-NEXT: JMP_1 %bb.13 ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.12 (%ir-block.28): + ; LIMIT-NEXT: bb.12: ; LIMIT-NEXT: successors: %bb.13(0x80000000) ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.13 (%ir-block.31): + ; LIMIT-NEXT: bb.13: ; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9 ; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags ; LIMIT-NEXT: $eax = COPY [[OR32rr]] ; LIMIT-NEXT: RET 0, $eax ; ; NOLIMIT-LABEL: name: foo - ; NOLIMIT: bb.0 (%ir-block.2): + ; NOLIMIT: bb.0: ; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) ; NOLIMIT-NEXT: liveins: $rdi, $esi ; NOLIMIT-NEXT: {{ $}} @@ -252,88 +106,87 @@ body: | ; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags ; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit - ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.2 (%ir-block.5): + ; NOLIMIT-NEXT: bb.2: ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags ; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit - ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.3 (%ir-block.7): + ; NOLIMIT-NEXT: bb.3: ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags ; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit - ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.4 (%ir-block.10): + ; NOLIMIT-NEXT: bb.4: ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags ; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit - ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.5 (%ir-block.13): + ; NOLIMIT-NEXT: bb.5: ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags ; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags ; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit - ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table) + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.6.default.unreachable2: + ; NOLIMIT-NEXT: bb.6: ; NOLIMIT-NEXT: successors: ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.9 (%ir-block.20): + ; NOLIMIT-NEXT: bb.9: ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) ; NOLIMIT-NEXT: {{ $}} ; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 - ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: JMP_1 %bb.13 ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.10 (%ir-block.22): + ; NOLIMIT-NEXT: bb.10: ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) ; NOLIMIT-NEXT: {{ $}} ; NOLIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 - ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags ; NOLIMIT-NEXT: JMP_1 %bb.13 ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.11 (%ir-block.25): + ; NOLIMIT-NEXT: bb.11: ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) ; NOLIMIT-NEXT: {{ $}} ; NOLIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 - ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags ; NOLIMIT-NEXT: JMP_1 %bb.13 ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.12 (%ir-block.28): + ; NOLIMIT-NEXT: bb.12: ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) ; NOLIMIT-NEXT: {{ $}} ; NOLIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 - ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags ; NOLIMIT-NEXT: {{ $}} - ; NOLIMIT-NEXT: bb.13 (%ir-block.31): + ; NOLIMIT-NEXT: bb.13: ; NOLIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12 ; NOLIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 ; NOLIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags ; NOLIMIT-NEXT: $eax = COPY [[OR32rr]] ; NOLIMIT-NEXT: RET 0, $eax - bb.0 (%ir-block.2): - successors: %bb.12(0x80000000) + bb.0: liveins: $rdi, $esi %11:gr32 = COPY $esi @@ -342,83 +195,64 @@ body: | %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit - bb.12 (%ir-block.2): - successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000) - - JMP64m $noreg, 8, %12, %jump-table.0, $noreg :: (load (s64) from jump-table) + bb.1: + successors: %bb.2, %bb.3, %bb.4, %bb.5 - bb.1 (%ir-block.5): - successors: %bb.6(0x80000000) + JMP64m $noreg, 8, %12, %jump-table.0, $noreg - %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - JMP_1 %bb.6 + bb.2: + %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.7 - bb.2 (%ir-block.7): - successors: %bb.6(0x80000000) - - %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + bb.3: + %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags - JMP_1 %bb.6 - - bb.3 (%ir-block.10): - successors: %bb.6(0x80000000) + JMP_1 %bb.7 - %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + bb.4: + %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags - JMP_1 %bb.6 - - bb.4 (%ir-block.13): - successors: %bb.6(0x80000000) + JMP_1 %bb.7 - %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + bb.5: + %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags - JMP_1 %bb.6 + JMP_1 %bb.7 - bb.5.default.unreachable2: + bb.6: successors: - - bb.6 (%ir-block.16): - successors: %bb.13(0x80000000) - - %4:gr32 = PHI %3, %bb.4, %2, %bb.3, %1, %bb.2, %0, %bb.1 + bb.7: + %4:gr32 = PHI %3, %bb.5, %2, %bb.4, %1, %bb.3, %0, %bb.2 %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit - bb.13 (%ir-block.16): - successors: %bb.7(0x20000000), %bb.8(0x20000000), %bb.9(0x20000000), %bb.10(0x20000000) - - JMP64m $noreg, 8, %18, %jump-table.1, $noreg :: (load (s64) from jump-table) + bb.8: + successors: %bb.9, %bb.10, %bb.11, %bb.12 - bb.7 (%ir-block.20): - successors: %bb.11(0x80000000) + JMP64m $noreg, 8, %18, %jump-table.1, $noreg - %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) - JMP_1 %bb.11 + bb.9: + %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.13 - bb.8 (%ir-block.22): - successors: %bb.11(0x80000000) - - %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + bb.10: + %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags - JMP_1 %bb.11 - - bb.9 (%ir-block.25): - successors: %bb.11(0x80000000) + JMP_1 %bb.13 - %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + bb.11: + %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags - JMP_1 %bb.11 - - bb.10 (%ir-block.28): - successors: %bb.11(0x80000000) + JMP_1 %bb.13 - %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + bb.12: + %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags - bb.11 (%ir-block.31): - %9:gr32 = PHI %8, %bb.10, %7, %bb.9, %6, %bb.8, %5, %bb.7 + bb.13: + %9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9 %24:gr32 = OR32rr %9, %4, implicit-def dead $eflags $eax = COPY %24 RET 0, $eax From dfc2e9e61225d16365979a516baa5fff68ea739f Mon Sep 17 00:00:00 2001 From: DianQK Date: Sat, 24 Feb 2024 20:46:29 +0800 Subject: [PATCH 4/4] Add comments --- llvm/lib/CodeGen/TailDuplicator.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index c1e32fa7d63692..f5dd21cb927012 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -577,6 +577,10 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + // Duplicating a BB which has both multiple predecessors and successors will + // result in a complex CFG and also may cause huge amount of PHI nodes. If we + // want to remove this limitation, we have to address + // https://github.com/llvm/llvm-project/issues/78578. if (TailBB.pred_size() > TailDupPredSize && TailBB.succ_size() > TailDupSuccSize) return false;