Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] X64 - Three instruction replacement sequence for multiply in certain cases #76981

Merged
merged 25 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ class Lowering final : public Phase
void LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode);
#ifdef TARGET_XARCH
void LowerPutArgStk(GenTreePutArgStk* putArgStk);
GenTree* TryLowerMulToLshSubOrLshAdd(GenTreeOp* node);
#endif // TARGET_XARCH

bool TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* parent);
Expand Down
101 changes: 101 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,98 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
ContainCheckStoreIndir(node);
}

//----------------------------------------------------------------------------------------------
// Lowering::TryLowerMulToLshSubOrLshAdd:
// Lowers a tree MUL(X, CNS) to SUB(LSH(X, CNS_SHIFT), X)
// or
// Lowers a tree MUL(X, CNS) to ADD(LSH(X, CNS_SHIFT), X)
//
// Arguments:
// node - GT_MUL node of integral type
//
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerMulToLshSubOrLshAdd(GenTreeOp* node)
{
assert(node->OperIs(GT_MUL));

// We do not do this optimization in X86 as it is not recommended.
#if TARGET_X86
return nullptr;
#else // !TARGET_X86
TIHan marked this conversation as resolved.
Show resolved Hide resolved
if (!varTypeIsIntegral(node))
return nullptr;

if (node->gtOverflow())
return nullptr;

GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();

if (op1->isContained() || op2->isContained())
return nullptr;

if (!op1->OperIs(GT_LCL_VAR))
return nullptr;

if (!op2->IsCnsIntOrI())
return nullptr;

GenTreeIntConCommon* cns = op2->AsIntConCommon();
ssize_t cnsVal = cns->IconValue();

// Use GT_LSH if cnsVal is a power of two.
// This is handled in codegen.
if (isPow2(cnsVal))
return nullptr;

// Use GT_LEA if cnsVal is 3, 5, or 9.
// This is handled in codegen.
if (cnsVal == 3 || cnsVal == 5 || cnsVal == 9)
return nullptr;

ssize_t cnsValPlusOne = cnsVal + 1;
ssize_t cnsValMinusOne = cnsVal - 1;

bool useSub = isPow2(cnsValPlusOne);

if (!useSub && !isPow2(cnsValMinusOne))
return nullptr;

if (useSub)
{
cnsVal = cnsValPlusOne;
node->ChangeOper(GT_SUB);
}
else
{
cnsVal = cnsValMinusOne;
node->ChangeOper(GT_ADD);
}

unsigned int shiftAmount = genLog2(static_cast<uint64_t>(static_cast<size_t>(cnsVal)));
cns->SetIconValue(shiftAmount);

node->gtOp1 = comp->gtNewOperNode(GT_LSH, node->gtType, op1, cns);
node->gtOp2 = comp->gtClone(op1);

BlockRange().Remove(op1);
BlockRange().Remove(cns);
BlockRange().InsertBefore(node, node->gtGetOp2());
BlockRange().InsertBefore(node, cns);
BlockRange().InsertBefore(node, op1);
BlockRange().InsertBefore(node, node->gtGetOp1());

ContainCheckBinary(node);
ContainCheckShiftRotate(node->gtGetOp1()->AsOp());

return node;
#endif // !TARGET_X86
}

//------------------------------------------------------------------------
// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node.
//
Expand All @@ -120,6 +212,15 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
{
assert(mul->OperIsMul());

if (mul->OperIs(GT_MUL))
{
GenTree* replacementNode = TryLowerMulToLshSubOrLshAdd(mul);
if (replacementNode != nullptr)
{
return replacementNode->gtNext;
}
}

ContainCheckMul(mul);

return mul->gtNext;
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/tools/SuperFileCheck/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ static string GetFullyQualifiedEnclosingTypeName(MethodDeclarationSyntax methodD
if (namespaceDecl != null)
{
var identifiers =
namespaceDecl.DescendantTokens().Where(x => x.IsKind(SyntaxKind.IdentifierToken)).Select(x => x.ValueText);
namespaceDecl.Name.DescendantTokens().Where(x => x.IsKind(SyntaxKind.IdentifierToken)).Select(x => x.ValueText);
return $"{String.Join(".", identifiers)}.{qualifiedTypeName}";
}

Expand Down Expand Up @@ -373,8 +373,8 @@ static string PreProcessMethod(MethodDeclarationInfo methodDeclInfo, string[] ch
var methodName = methodDeclInfo.FullyQualifiedName.Replace("*", "{{.*}}"); // Change wild-card to FileCheck wild-card syntax.

// Create anchors from the first prefix.
var startAnchorText = $"// {checkPrefixes[0]}-LABEL: {methodName}";
var endAnchorText = $"// {checkPrefixes[0]}: {methodName}";
var startAnchorText = $"// {checkPrefixes[0]}-LABEL: for method {methodName}";
var endAnchorText = $"// {checkPrefixes[0]}: for method {methodName}";

// Create temp source file based on the source text of the method.
// Newlines are added to pad the text so FileCheck's error messages will correspond
Expand Down
181 changes: 181 additions & 0 deletions src/tests/JIT/opt/Multiply/IntMultiply.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;

namespace CodeGenTests
{
static class IntMultiply
{
[MethodImpl(MethodImplOptions.NoInlining)]
static uint UInt32_MultiplyWithUInt32MaxValue(uint value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: neg [[REG0]]
return value * UInt32.MaxValue;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWithUInt32MaxValue(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 32
// X64-FULL-LINE-NEXT: sub [[REG0]], [[REG1]]
return value * (ulong)UInt32.MaxValue;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWithUInt32MaxValuePlusOne(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 32
return value * ((ulong)UInt32.MaxValue + 1);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWithUInt32MaxValuePlusTwo(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 32
// X64-FULL-LINE-NEXT: add [[REG0]], [[REG1]]
return value * ((ulong)UInt32.MaxValue + 2);
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith2(ulong value)
{
// X64-FULL-LINE: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+[[REG1]]{{\]}}
return value * 2;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith3(ulong value)
{
// X64-FULL-LINE: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+2*[[REG1]]{{\]}}
return value * 3;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith4(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 2
return value * 4;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith5(ulong value)
{
// X64-FULL-LINE: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+4*[[REG1]]{{\]}}
return value * 5;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith6(ulong value)
{
// X64-FULL-LINE: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+2*[[REG1]]{{\]}}
return value * 6;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith7(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 3
// X64-FULL-LINE-NEXT: sub [[REG0]], [[REG1]]
return value * 7;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith8(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 3
return value * 8;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith9(ulong value)
{
// X64-FULL-LINE: lea [[REG0:[a-z]+]], {{\[}}[[REG1:[a-z]+]]+8*[[REG1]]{{\]}}
return value * 9;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith15(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 4
// X64-FULL-LINE-NEXT: sub [[REG0]], [[REG1]]
return value * 15;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith16(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 4
return value * 16;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static ulong UInt64_MultiplyWith17(ulong value)
{
// X64-FULL-LINE: mov [[REG0:[a-z]+]], [[REG1:[a-z]+]]
// X64-FULL-LINE-NEXT: shl [[REG0]], 4
// X64-FULL-LINE-NEXT: add [[REG0]], [[REG1]]
return value * 17;
}

static int Main()
{
if (UInt32_MultiplyWithUInt32MaxValue(1) != UInt32.MaxValue)
return 0;

if (UInt64_MultiplyWithUInt32MaxValue(1) != (ulong)UInt32.MaxValue)
return 0;

if (UInt64_MultiplyWithUInt32MaxValuePlusOne(1) != ((ulong)UInt32.MaxValue + 1))
return 0;

if (UInt64_MultiplyWithUInt32MaxValuePlusTwo(1) != ((ulong)UInt32.MaxValue + 2))
return 0;

if (UInt64_MultiplyWith2(1) != 2)
return 0;

if (UInt64_MultiplyWith3(1) != 3)
return 0;

if (UInt64_MultiplyWith4(1) != 4)
return 0;

if (UInt64_MultiplyWith5(1) != 5)
return 0;

if (UInt64_MultiplyWith6(1) != 6)
return 0;

if (UInt64_MultiplyWith7(1) != 7)
return 0;

if (UInt64_MultiplyWith8(1) != 8)
return 0;

if (UInt64_MultiplyWith9(1) != 9)
return 0;

if (UInt64_MultiplyWith15(1) != 15)
return 0;

if (UInt64_MultiplyWith16(1) != 16)
return 0;

if (UInt64_MultiplyWith17(1) != 17)
return 0;

return 100;
}
}
}
26 changes: 26 additions & 0 deletions src/tests/JIT/opt/Multiply/IntMultiply.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs">
<HasDisasmCheck>true</HasDisasmCheck>
</Compile>
</ItemGroup>
<PropertyGroup>
<CLRTestBatchPreCommands><![CDATA[
$(CLRTestBatchPreCommands)
set COMPlus_TieredCompilation=0
set COMPlus_JITMinOpts=0
]]></CLRTestBatchPreCommands>
<BashCLRTestPreCommands><![CDATA[
$(BashCLRTestPreCommands)
export COMPlus_TieredCompilation=0
export COMPlus_JITMinOpts=0
]]></BashCLRTestPreCommands>
TIHan marked this conversation as resolved.
Show resolved Hide resolved
</PropertyGroup>
</Project>