diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge index c14ec464a43a66..4ebf304e23d587 100755 --- a/.ci/generate-buildkite-pipeline-premerge +++ b/.ci/generate-buildkite-pipeline-premerge @@ -233,10 +233,7 @@ linux_projects=$(add-dependencies ${linux_projects_to_test} | sort | uniq) windows_projects_to_test=$(exclude-windows $(compute-projects-to-test ${modified_projects})) windows_check_targets=$(check-targets ${windows_projects_to_test} | sort | uniq) -# Temporary disable the windows job. -# See https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840 -#windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq) -windows_projects="" +windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq) # Generate the appropriate pipeline if [[ "${linux_projects}" != "" ]]; then diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index a704e855f011cb..9561bf668a90cb 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -38,6 +38,12 @@ targets="${2}" echo "--- cmake" pip install -q -r ${MONOREPO_ROOT}/mlir/python/requirements.txt + +# The CMAKE_*_LINKER_FLAGS to disable the manifest come from research +# on fixing a build reliability issue on the build server, please +# see https://github.com/llvm/llvm-project/pull/82393 and +# https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840/40 +# for further information. cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \ -D LLVM_ENABLE_PROJECTS="${projects}" \ -G Ninja \ @@ -49,7 +55,10 @@ cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \ -D COMPILER_RT_BUILD_ORC=OFF \ -D CMAKE_C_COMPILER_LAUNCHER=sccache \ -D CMAKE_CXX_COMPILER_LAUNCHER=sccache \ - -D MLIR_ENABLE_BINDINGS_PYTHON=ON + -D MLIR_ENABLE_BINDINGS_PYTHON=ON \ + -D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \ + -D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \ + -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" echo "--- ninja" # Targets are not escaped as they are passed as separate arguments. diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml index f2a831ad3577ad..53da8662b0203a 100644 --- a/.github/workflows/release-tasks.yml +++ b/.github/workflows/release-tasks.yml @@ -28,6 +28,7 @@ jobs: name: Create a New Release runs-on: ubuntu-latest needs: validate-tag + steps: - name: Install Dependencies run: | @@ -40,8 +41,9 @@ jobs: - name: Create Release env: GITHUB_TOKEN: ${{ github.token }} + USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }} run: | - ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} create + ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --release ${{ needs.validate-tag.outputs.release-version }} --user ${{ github.actor }} --user-token "$USER_TOKEN" create release-documentation: name: Build and Upload Release Documentation needs: diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index fb4d7a02dd086f..711baf45f449a0 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -833,6 +833,7 @@ to ``float``; see below for more information on this emulation. * 32-bit ARM (natively on some architecture versions) * 64-bit ARM (AArch64) (natively on ARMv8.2a and above) * AMDGPU (natively) + * NVPTX (natively) * SPIR (natively) * X86 (if SSE2 is available; natively if AVX512-FP16 is also available) * RISC-V (natively if Zfh or Zhinx is available) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 3d456610e7a7f1..60a499533510d9 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -255,6 +255,11 @@ class Driver { /// from non-system headers are emitted. HeaderIncludeFilteringKind CCPrintHeadersFiltering = HIFIL_None; + /// Name of the library that provides implementations of + /// IEEE-754 128-bit float math functions used by Fortran F128 + /// runtime library. It should be linked as needed by the linker job. + std::string FlangF128MathLibrary; + /// Set CC_LOG_DIAGNOSTICS mode, which causes the frontend to log diagnostics /// to CCLogDiagnosticsFilename or to stderr, in a stable machine readable /// format. @@ -446,6 +451,11 @@ class Driver { bool offloadHostOnly() const { return Offload == OffloadHost; } bool offloadDeviceOnly() const { return Offload == OffloadDevice; } + void setFlangF128MathLibrary(std::string name) { + FlangF128MathLibrary = std::move(name); + } + StringRef getFlangF128MathLibrary() const { return FlangF128MathLibrary; } + /// Compute the desired OpenMP runtime from the flags provided. OpenMPRuntimeKind getOpenMPRuntime(const llvm::opt::ArgList &Args) const; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index a8efae3a1ce388..b47c399fef6042 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -61,6 +61,10 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, NoAsmVariants = true; GPU = CudaArch::UNUSED; + // PTX supports f16 as a fundamental type. + HasLegalHalfType = true; + HasFloat16 = true; + if (TargetPointerWidth == 32) resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); else if (Opts.NVPTXUseShortPointers) diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index cd955b6c849456..d5fc1d5dd25a8b 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -368,11 +368,7 @@ void BareMetal::AddLinkRuntimeLib(const ArgList &Args, ToolChain::RuntimeLibType RLT = GetRuntimeLibType(Args); switch (RLT) { case ToolChain::RLT_CompilerRT: { - const std::string FileName = getCompilerRT(Args, "builtins"); - llvm::StringRef BaseName = llvm::sys::path::filename(FileName); - BaseName.consume_front("lib"); - BaseName.consume_back(".a"); - CmdArgs.push_back(Args.MakeArgString("-l" + BaseName)); + CmdArgs.push_back(getCompilerRTArgString(Args, "builtins")); return; } case ToolChain::RLT_Libgcc: @@ -462,11 +458,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, for (const auto &LibPath : TC.getLibraryPaths()) CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-L", LibPath))); - const std::string FileName = TC.getCompilerRT(Args, "builtins"); - llvm::SmallString<128> PathBuf{FileName}; - llvm::sys::path::remove_filename(PathBuf); - CmdArgs.push_back(Args.MakeArgString("-L" + PathBuf)); - if (TC.ShouldLinkCXXStdlib(Args)) TC.AddCXXStdlibLibArgs(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index e031f60731834d..9c7a0e9699c59b 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1396,6 +1396,14 @@ void tools::addFortranRuntimeLibs(const ToolChain &TC, const ArgList &Args, // add the correct libraries to link against as dependents in the object // file. if (!TC.getTriple().isKnownWindowsMSVCEnvironment()) { + StringRef f128LibName = TC.getDriver().getFlangF128MathLibrary(); + f128LibName.consume_front_insensitive("lib"); + if (!f128LibName.empty()) { + CmdArgs.push_back("-lFortranFloat128Math"); + addAsNeededOption(TC, Args, CmdArgs, /*as_needed=*/true); + CmdArgs.push_back(Args.MakeArgString("-l" + f128LibName)); + addAsNeededOption(TC, Args, CmdArgs, /*as_needed=*/false); + } CmdArgs.push_back("-lFortranRuntime"); CmdArgs.push_back("-lFortranDecimal"); } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index 8d344f9b63961a..8b41a949fd6734 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -170,6 +170,9 @@ class UncountedCallArgsChecker if (!Callee) return false; + if (isMethodOnWTFContainerType(Callee)) + return true; + auto overloadedOperatorType = Callee->getOverloadedOperator(); if (overloadedOperatorType == OO_EqualEqual || overloadedOperatorType == OO_ExclaimEqual || @@ -198,6 +201,31 @@ class UncountedCallArgsChecker return false; } + bool isMethodOnWTFContainerType(const FunctionDecl *Decl) const { + if (!isa(Decl)) + return false; + auto *ClassDecl = Decl->getParent(); + if (!ClassDecl || !isa(ClassDecl)) + return false; + + auto *NsDecl = ClassDecl->getParent(); + if (!NsDecl || !isa(NsDecl)) + return false; + + auto MethodName = safeGetName(Decl); + auto ClsNameStr = safeGetName(ClassDecl); + StringRef ClsName = ClsNameStr; // FIXME: Make safeGetName return StringRef. + auto NamespaceName = safeGetName(NsDecl); + // FIXME: These should be implemented via attributes. + return NamespaceName == "WTF" && + (MethodName == "find" || MethodName == "findIf" || + MethodName == "reverseFind" || MethodName == "reverseFindIf" || + MethodName == "get" || MethodName == "inlineGet" || + MethodName == "contains" || MethodName == "containsIf") && + (ClsName.ends_with("Vector") || ClsName.ends_with("Set") || + ClsName.ends_with("Map")); + } + void reportBug(const Expr *CallArg, const ParmVarDecl *Param) const { assert(CallArg); diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-wtf-containers.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-wtf-containers.cpp new file mode 100644 index 00000000000000..0a63a789856127 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/call-args-wtf-containers.cpp @@ -0,0 +1,146 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s + +#include "mock-types.h" + +namespace WTF { + + template + class HashSet { + public: + template T* find(U&) const; + template bool contains(U&) const; + unsigned size() { return m_size; } + template void add(U&) const; + template void remove(U&) const; + + private: + T* m_table { nullptr }; + unsigned m_size { 0 }; + }; + + template + class HashMap { + public: + struct Item { + T key; + S value; + }; + + template Item* find(U&) const; + template bool contains(U&) const; + template S* get(U&) const; + template S* inlineGet(U&) const; + template void add(U&) const; + template void remove(U&) const; + + private: + Item* m_table { nullptr }; + }; + + template + class WeakHashSet { + public: + template T* find(U&) const; + template bool contains(U&) const; + template void add(U&) const; + template void remove(U&) const; + }; + + template + class Vector { + public: + unsigned size() { return m_size; } + T& at(unsigned i) { return m_buffer[i]; } + T& operator[](unsigned i) { return m_buffer[i]; } + template unsigned find(U&); + template unsigned reverseFind(U&); + template bool contains(U&); + template unsigned findIf(const MatchFunction& match) + { + for (unsigned i = 0; i < m_size; ++i) { + if (match(at(i))) + return i; + } + return static_cast(-1); + } + template unsigned reverseFindIf(const MatchFunction& match) + { + for (unsigned i = 0; i < m_size; ++i) { + if (match(at(m_size - i))) + return i; + } + return static_cast(-1); + } + template bool containsIf(const MatchFunction& match) + { + for (unsigned i = 0; i < m_size; ++i) { + if (match(at(m_size - i))) + return true; + } + return false; + } + template void append(U&) const; + template void remove(U&) const; + + private: + T* m_buffer { nullptr }; + unsigned m_size { 0 }; + }; + +} + +using WTF::HashSet; +using WTF::HashMap; +using WTF::WeakHashSet; +using WTF::Vector; + +class RefCounted { +public: + void ref() const; + void deref() const; +}; + +RefCounted* object(); + +void test() { + HashSet> set; + set.find(*object()); + set.contains(*object()); + set.add(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + set.remove(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + + HashMap, unsigned> map; + map.find(*object()); + map.contains(*object()); + map.inlineGet(*object()); + map.add(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + map.remove(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + + WeakHashSet> weakSet; + weakSet.find(*object()); + weakSet.contains(*object()); + weakSet.add(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + weakSet.remove(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + + Vector> vector; + vector.at(0); + vector[0]; + vector.find(*object()); + vector.reverseFind(*object()); + vector.contains(*object()); + vector.append(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + vector.remove(*object()); + // expected-warning@-1{{Call argument is uncounted and unsafe}} + + auto* obj = object(); + vector.findIf([&](Ref key) { return key.ptr() == obj; }); + vector.reverseFindIf([&](Ref key) { return key.ptr() == obj; }); + vector.containsIf([&](Ref key) { return key.ptr() == obj; }); +} \ No newline at end of file diff --git a/clang/test/Driver/arm-compiler-rt.c b/clang/test/Driver/arm-compiler-rt.c index 954947bb890f87..adecacbcaabf9c 100644 --- a/clang/test/Driver/arm-compiler-rt.c +++ b/clang/test/Driver/arm-compiler-rt.c @@ -3,7 +3,7 @@ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-EABI -// ARM-EABI: "-lclang_rt.builtins-arm" +// ARM-EABI: "{{[^"]*}}libclang_rt.builtins-arm.a" // RUN: %clang -target arm-linux-gnueabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ diff --git a/clang/test/Driver/baremetal-multilib.yaml b/clang/test/Driver/baremetal-multilib.yaml index af26e82621c91e..3f026cbeb437b2 100644 --- a/clang/test/Driver/baremetal-multilib.yaml +++ b/clang/test/Driver/baremetal-multilib.yaml @@ -17,7 +17,7 @@ # CHECK-SAME: "-x" "c++" "{{.*}}baremetal-multilib.yaml" # CHECK-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" # CHECK-SAME: "-L[[SYSROOT]]/bin/../lib/clang-runtimes/arm-none-eabi/thumb/v8-m.main/fp/lib" -# CHECK-SAME: "-lc" "-lm" "-lclang_rt.builtins" +# CHECK-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins.a" # CHECK-SAME: "-o" "{{.*}}.tmp.out" # RUN: %T/baremetal_multilib/bin/clang -no-canonical-prefixes -x c++ %s -### -o %t.out 2>&1 \ diff --git a/clang/test/Driver/baremetal-sysroot.cpp b/clang/test/Driver/baremetal-sysroot.cpp index fc66020772a771..46338185ffd9d5 100644 --- a/clang/test/Driver/baremetal-sysroot.cpp +++ b/clang/test/Driver/baremetal-sysroot.cpp @@ -18,5 +18,5 @@ // CHECK-V6M-C-SAME: "-x" "c++" "{{.*}}baremetal-sysroot.cpp" // CHECK-V6M-C-NEXT: "{{[^"]*}}ld{{(\.(lld|bfd|gold))?}}{{(\.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-V6M-C-SAME: "-L{{.*}}/baremetal_default_sysroot{{[/\\]+}}bin{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+}}armv6m-none-eabi{{[/\\]+}}lib" -// CHECK-V6M-C-SAME: "-lc" "-lm" "-lclang_rt.builtins-armv6m" +// CHECK-V6M-C-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-armv6m.a" // CHECK-V6M-C-SAME: "-o" "{{.*}}.o" diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp index 7511d7d1adb4dd..8baf388894eb27 100644 --- a/clang/test/Driver/baremetal.cpp +++ b/clang/test/Driver/baremetal.cpp @@ -18,8 +18,7 @@ // CHECK-V6M-C-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" "-EL" // CHECK-V6M-C-SAME: "-T" "semihosted.lds" "-Lsome{{[/\\]+}}directory{{[/\\]+}}user{{[/\\]+}}asked{{[/\\]+}}for" // CHECK-V6M-C-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib" -// CHECK-V6M-C-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-V6M-C-SAME: "-lc" "-lm" "-lclang_rt.builtins-armv6m" "--target2=rel" "-o" "{{.*}}.tmp.out" +// CHECK-V6M-C-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-armv6m.a" "--target2=rel" "-o" "{{.*}}.tmp.out" // RUN: %clang %s -### --target=armv6m-none-eabi -nostdlibinc -nobuiltininc 2>&1 \ // RUN: --sysroot=%S/Inputs/baremetal_arm | FileCheck --check-prefix=CHECK-V6M-LIBINC %s @@ -37,16 +36,15 @@ // CHECK-ARMV7M-PER-TARGET: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" "-EL" // CHECK-ARMV7M-PER-TARGET: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib" // CHECK-ARMV7M-PER-TARGET: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}armv7m-vendor-none-eabi -// CHECK-ARMV7M-PER-TARGET: "-lc" "-lm" "-lclang_rt.builtins" +// CHECK-ARMV7M-PER-TARGET: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins.a" // RUN: %clangxx %s -### --target=armv6m-none-eabi 2>&1 \ // RUN: --sysroot=%S/Inputs/baremetal_arm | FileCheck --check-prefix=CHECK-V6M-DEFAULTCXX %s // CHECK-V6M-DEFAULTCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-V6M-DEFAULTCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" "-EL" // CHECK-V6M-DEFAULTCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib" -// CHECK-V6M-DEFAULTCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-V6M-DEFAULTCXX-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-V6M-DEFAULTCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-armv6m" "--target2=rel" "-o" "a.out" +// CHECK-V6M-DEFAULTCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-armv6m.a" "--target2=rel" "-o" "a.out" // RUN: %clangxx %s -### --target=armv6m-none-eabi -stdlib=libc++ 2>&1 \ // RUN: --sysroot=%S/Inputs/baremetal_arm | FileCheck --check-prefix=CHECK-V6M-LIBCXX %s @@ -55,9 +53,8 @@ // CHECK-V6M-LIBCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1" // CHECK-V6M-LIBCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" "-EL" // CHECK-V6M-LIBCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib" -// CHECK-V6M-LIBCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-V6M-LIBCXX-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-V6M-LIBCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-armv6m" "--target2=rel" "-o" "a.out" +// CHECK-V6M-LIBCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-armv6m.a" "--target2=rel" "-o" "a.out" // RUN: %clangxx %s -### --target=armv6m-none-eabi 2>&1 \ // RUN: --sysroot=%S/Inputs/baremetal_arm \ @@ -68,9 +65,8 @@ // CHECK-V6M-LIBSTDCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}6.0.0" // CHECK-V6M-LIBSTDCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" "-EL" // CHECK-V6M-LIBSTDCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib" -// CHECK-V6M-LIBSTDCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-V6M-LIBSTDCXX-SAME: "-lstdc++" "-lsupc++" "-lunwind" -// CHECK-V6M-LIBSTDCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-armv6m" "--target2=rel" "-o" "a.out" +// CHECK-V6M-LIBSTDCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-armv6m.a" "--target2=rel" "-o" "a.out" // RUN: %clangxx %s -### --target=armv6m-none-eabi 2>&1 \ // RUN: --sysroot=%S/Inputs/baremetal_arm \ @@ -79,7 +75,6 @@ // CHECK-V6M-NDL: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-V6M-NDL: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" "-EL" // CHECK-V6M-NDL-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib" -// CHECK-V6M-NDL-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // RUN: rm -rf %T/baremetal_cxx_sysroot // RUN: mkdir -p %T/baremetal_cxx_sysroot/usr/include/c++/v1 @@ -93,9 +88,8 @@ // CHECK-V6M-LIBCXX-USR-SAME: "-internal-isystem" "{{[^"]+}}baremetal_cxx_sysroot{{[/\\]+}}usr{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1" // CHECK-V6M-LIBCXX-USR: "{{[^"]*}}-Bstatic" // CHECK-V6M-LIBCXX-USR-SAME: "-L{{[^"]*}}{{[/\\]+}}baremetal_cxx_sysroot{{[/\\]+}}lib" -// CHECK-V6M-LIBCXX-USR-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-V6M-LIBCXX-USR-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-V6M-LIBCXX-USR-SAME: "-lc" "-lm" "-lclang_rt.builtins-armv6m" +// CHECK-V6M-LIBCXX-USR-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-armv6m.a" // RUN: %clangxx --target=arm-none-eabi -v 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-THREAD-MODEL @@ -178,8 +172,7 @@ // CHECK-RV64-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV64-SAME: "-Lsome{{[/\\]+}}directory{{[/\\]+}}user{{[/\\]+}}asked{{[/\\]+}}for" // CHECK-RV64-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib" -// CHECK-RV64-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-RV64-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv64" "-X" "-o" "{{.*}}.tmp.out" +// CHECK-RV64-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv64.a" "-X" "-o" "{{.*}}.tmp.out" // RUN: %clangxx %s -### --target=riscv64-unknown-elf 2>&1 \ // RUN: --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \ @@ -187,9 +180,8 @@ // CHECK-RV64-DEFAULTCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-RV64-DEFAULTCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV64-DEFAULTCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv64_tree{{[/\\]+}}riscv64-unknown-elf{{[/\\]+}}lib" -// CHECK-RV64-DEFAULTCXX-SAME: "-L[[RESOURCE_DIR]]{{.*}}{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-RV64-DEFAULTCXX-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-RV64-DEFAULTCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv64" "-X" "-o" "a.out" +// CHECK-RV64-DEFAULTCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv64.a" "-X" "-o" "a.out" // RUN: %clangxx %s -### --target=riscv64-unknown-elf 2>&1 \ // RUN: --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \ @@ -200,9 +192,8 @@ // CHECK-RV64-LIBCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1" // CHECK-RV64-LIBCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV64-LIBCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv64_tree{{[/\\]+}}riscv64-unknown-elf{{[/\\]+}}lib" -// CHECK-RV64-LIBCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-RV64-LIBCXX-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-RV64-LIBCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv64" "-X" "-o" "a.out" +// CHECK-RV64-LIBCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv64.a" "-X" "-o" "a.out" // RUN: %clangxx %s -### 2>&1 --target=riscv64-unknown-elf \ // RUN: --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \ @@ -213,9 +204,8 @@ // CHECK-RV64-LIBSTDCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}8.0.1" // CHECK-RV64-LIBSTDCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV64-LIBSTDCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv64_tree{{[/\\]+}}riscv64-unknown-elf{{[/\\]+}}lib" -// CHECK-RV64-LIBSTDCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-RV64-LIBSTDCXX-SAME: "-lstdc++" "-lsupc++" "-lunwind" -// CHECK-RV64-LIBSTDCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv64" "-X" "-o" "a.out" +// CHECK-RV64-LIBSTDCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv64.a" "-X" "-o" "a.out" // RUN: %clang %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: -L some/directory/user/asked/for \ @@ -230,8 +220,7 @@ // CHECK-RV32-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32-SAME: "-Lsome{{[/\\]+}}directory{{[/\\]+}}user{{[/\\]+}}asked{{[/\\]+}}for" // CHECK-RV32-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib" -// CHECK-RV32-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-RV32-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv32" "-X" "-o" "a.out" +// CHECK-RV32-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv32.a" "-X" "-o" "a.out" // RUN: %clangxx %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \ @@ -239,9 +228,8 @@ // CHECK-RV32-DEFAULTCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-RV32-DEFAULTCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32-DEFAULTCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv32_tree{{[/\\]+}}riscv32-unknown-elf{{[/\\]+}}lib" -// CHECK-RV32-DEFAULTCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-RV32-DEFAULTCXX-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-RV32-DEFAULTCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv32" "-X" "-o" "a.out" +// CHECK-RV32-DEFAULTCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv32.a" "-X" "-o" "a.out" // RUN: %clangxx %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \ @@ -252,9 +240,8 @@ // CHECK-RV32-LIBCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1" // CHECK-RV32-LIBCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32-LIBCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv32_tree{{[/\\]+}}riscv32-unknown-elf{{[/\\]+}}lib" -// CHECK-RV32-LIBCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-RV32-LIBCXX-SAME: "-lc++" "-lc++abi" "-lunwind" -// CHECK-RV32-LIBCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv32" "-X" "-o" "a.out" +// CHECK-RV32-LIBCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv32.a" "-X" "-o" "a.out" // RUN: %clangxx %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \ @@ -265,9 +252,8 @@ // CHECK-RV32-LIBSTDCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}8.0.1" // CHECK-RV32-LIBSTDCXX: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32-LIBSTDCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv32_tree{{[/\\]+}}riscv32-unknown-elf{{[/\\]+}}lib" -// CHECK-RV32-LIBSTDCXX-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // CHECK-RV32-LIBSTDCXX-SAME: "-lstdc++" "-lsupc++" "-lunwind" -// CHECK-RV32-LIBSTDCXX-SAME: "-lc" "-lm" "-lclang_rt.builtins-riscv32" "-X" "-o" "a.out" +// CHECK-RV32-LIBSTDCXX-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-riscv32.a" "-X" "-o" "a.out" // RUN: %clang %s -### 2>&1 --target=riscv64-unknown-elf \ // RUN: -nostdlibinc -nobuiltininc \ @@ -286,7 +272,6 @@ // CHECK-RV64-NDL: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-RV64-NDL: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV64-NDL-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}basic_riscv64_tree{{[/\\]+}}riscv64-unknown-elf{{[/\\]+}}lib" -// CHECK-RV64-NDL-SAME: "-L[[RESOURCE_DIR]]{{[/\\]+}}lib{{[/\\]+}}baremetal" // RUN: %clang %s -### 2>&1 --target=riscv64-unknown-elf \ // RUN: -march=rv64imafdc -mabi=lp64d \ @@ -306,7 +291,6 @@ // CHECK-RV64FD-SAME: "-x" "c++" "{{.*}}baremetal.cpp" // CHECK-RV64FD-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV64FD-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}rv64imafdc{{[/\\]+}}lp64d{{[/\\]+}}lib" -// CHECK-RV64FD-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal{{[/\\]+}}rv64imafdc{{[/\\]+}}lp64d" // RUN: %clang %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: -march=rv32i -mabi=ilp32 \ @@ -326,7 +310,6 @@ // CHECK-RV32I-SAME: "-x" "c++" "{{.*}}baremetal.cpp" // CHECK-RV32I-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32I-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}rv32i{{[/\\]+}}ilp32{{[/\\]+}}lib" -// CHECK-RV32I-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal{{[/\\]+}}rv32i{{[/\\]+}}ilp32" // RUN: %clang %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: -march=rv32im -mabi=ilp32 \ @@ -346,7 +329,6 @@ // CHECK-RV32IM-SAME: "-x" "c++" "{{.*}}baremetal.cpp" // CHECK-RV32IM-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32IM-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}rv32im{{[/\\]+}}ilp32{{[/\\]+}}lib" -// CHECK-RV32IM-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal{{[/\\]+}}rv32im{{[/\\]+}}ilp32" // RUN: %clang %s -### 2>&1 --target=riscv32-unknown-elf \ // RUN: -march=rv32iac -mabi=ilp32 \ @@ -361,7 +343,6 @@ // CHECK-RV32IAC-SAME: "-x" "c++" "{{.*}}baremetal.cpp" // CHECK-RV32IAC-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32IAC-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}rv32iac{{[/\\]+}}ilp32{{[/\\]+}}lib" -// CHECK-RV32IAC-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal{{[/\\]+}}rv32iac{{[/\\]+}}ilp32" // RUN: %clang %s -### 2>&1 --target=riscv32-unknown-elf -march=rv32imafc -mabi=ilp32f \ // RUN: --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \ @@ -383,7 +364,6 @@ // CHECK-RV32IMAFC-SAME: "-x" "c++" "{{.*}}baremetal.cpp" // CHECK-RV32IMAFC-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-RV32IMAFC-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}rv32imafc{{[/\\]+}}ilp32f{{[/\\]+}}lib" -// CHECK-RV32IMAFC-SAME: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}baremetal{{[/\\]+}}rv32imafc{{[/\\]+}}ilp32f" // RUN: %clang -no-canonical-prefixes %s -### --target=powerpc-unknown-eabi 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PPCEABI %s @@ -395,8 +375,7 @@ // CHECK-PPCEABI-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" // CHECK-PPCEABI-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-PPCEABI-SAME: "-L[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}lib" -// CHECK-PPCEABI-SAME: "-L[[RESOURCE]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-PPCEABI-SAME: "-lc" "-lm" "-lclang_rt.builtins-powerpc" "-o" "a.out" +// CHECK-PPCEABI-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-powerpc.a" "-o" "a.out" // RUN: %clang -no-canonical-prefixes %s -### --target=powerpc64-unknown-eabi 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PPC64EABI %s @@ -408,8 +387,7 @@ // CHECK-PPC64EABI-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" // CHECK-PPC64EABI-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-PPC64EABI-SAME: "-L[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}lib" -// CHECK-PPC64EABI-SAME: "-L[[RESOURCE]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-PPC64EABI-SAME: "-lc" "-lm" "-lclang_rt.builtins-powerpc64" "-o" "a.out" +// CHECK-PPC64EABI-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-powerpc64.a" "-o" "a.out" // RUN: %clang -no-canonical-prefixes %s -### --target=powerpcle-unknown-eabi 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PPCLEEABI %s @@ -421,8 +399,7 @@ // CHECK-PPCLEEABI-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" // CHECK-PPCLEEABI-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-PPCLEEABI-SAME: "-L[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}lib" -// CHECK-PPCLEEABI-SAME: "-L[[RESOURCE]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-PPCLEEABI-SAME: "-lc" "-lm" "-lclang_rt.builtins-powerpcle" "-o" "a.out" +// CHECK-PPCLEEABI-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-powerpcle.a" "-o" "a.out" // RUN: %clang -no-canonical-prefixes %s -### --target=powerpc64le-unknown-eabi 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PPC64LEEABI %s @@ -434,8 +411,7 @@ // CHECK-PPC64LEEABI-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" // CHECK-PPC64LEEABI-NEXT: ld{{(.exe)?}}" "{{.*}}.o" "-Bstatic" // CHECK-PPC64LEEABI-SAME: "-L[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}lib" -// CHECK-PPC64LEEABI-SAME: "-L[[RESOURCE]]{{[/\\]+}}lib{{[/\\]+}}baremetal" -// CHECK-PPC64LEEABI-SAME: "-lc" "-lm" "-lclang_rt.builtins-powerpc64le" "-o" "a.out" +// CHECK-PPC64LEEABI-SAME: "-lc" "-lm" "{{[^"]*}}libclang_rt.builtins-powerpc64le.a" "-o" "a.out" // Check that compiler-rt library without the arch filename suffix will // be used if present. @@ -446,8 +422,8 @@ // RUN: --target=armv6m-none-eabi \ // RUN: --sysroot=%T/baremetal_clang_rt_noarch \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-NOARCH %s -// CHECK-CLANGRT-NOARCH: "-lclang_rt.builtins" -// CHECK-CLANGRT-NOARCH-NOT: "-lclang_rt.builtins-armv6m" +// CHECK-CLANGRT-NOARCH: "{{[^"]*}}libclang_rt.builtins.a" +// CHECK-CLANGRT-NOARCH-NOT: "{{[^"]*}}libclang_rt.builtins-armv6m.a" // Check that compiler-rt library with the arch filename suffix will be // used if present. @@ -458,8 +434,8 @@ // RUN: --target=armv6m-none-eabi \ // RUN: --sysroot=%T/baremetal_clang_rt_arch \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARCH %s -// CHECK-CLANGRT-ARCH: "-lclang_rt.builtins-armv6m" -// CHECK-CLANGRT-ARCH-NOT: "-lclang_rt.builtins" +// CHECK-CLANGRT-ARCH: "{{[^"]*}}libclang_rt.builtins-armv6m.a" +// CHECK-CLANGRT-ARCH-NOT: "{{[^"]*}}libclang_rt.builtins.a" // Check that "--no-relax" is forwarded to the linker for RISC-V. // RUN: %clang %s -### 2>&1 --target=riscv64-unknown-elf -nostdinc -mno-relax \ @@ -471,4 +447,4 @@ // RUN: %clang %s -### 2>&1 --target=riscv64-unknown-elf -nostdinc \ // RUN: --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \ // RUN: | FileCheck --check-prefix=CHECK-RV64-RELAX %s -// CHECK-RV64-RELAX-NOT: "--no-relax" \ No newline at end of file +// CHECK-RV64-RELAX-NOT: "--no-relax" diff --git a/clang/test/SemaCUDA/float16.cu b/clang/test/SemaCUDA/float16.cu index a9cbe87f32c100..bb5ed606438491 100644 --- a/clang/test/SemaCUDA/float16.cu +++ b/clang/test/SemaCUDA/float16.cu @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s +// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple nvptx64 -verify %s // expected-no-diagnostics #include "Inputs/cuda.h" diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index f8ad39ba712f8c..21617aeea0215e 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -33,6 +33,17 @@ endif() option(FLANG_ENABLE_WERROR "Fail and stop building flang if a warning is triggered." OFF) +# The out of tree builds of the compiler and the Fortran runtime +# must use the same setting of FLANG_RUNTIME_F128_MATH_LIB +# to be composable. Failure to synchronize this setting may result +# in linking errors or fatal failures in F128 runtime functions. +set(FLANG_RUNTIME_F128_MATH_LIB "" CACHE STRING + "Specifies the target library used for implementing IEEE-754 128-bit float \ + math in F18 runtime, e.g. it might be libquadmath for targets where \ + REAL(16) is mapped to __float128, or libm for targets where REAL(16) \ + is mapped to long double, etc." + ) + # Check for a standalone build and configure as appropriate from # there. if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) @@ -321,6 +332,12 @@ if (FLANG_REPOSITORY_STRING) add_definitions(-DFLANG_REPOSITORY_STRING="${FLANG_REPOSITORY_STRING}") endif() +if (FLANG_RUNTIME_F128_MATH_LIB) + add_compile_definitions( + -DFLANG_RUNTIME_F128_MATH_LIB="${FLANG_RUNTIME_F128_MATH_LIB}" + ) +endif() + include(TestBigEndian) test_big_endian(IS_BIGENDIAN) if (IS_BIGENDIAN) diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 3f1e22ecca4ccc..7cb99d61a686ed 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -494,12 +494,13 @@ struct RuntimeFunction { fir::runtime::FuncTypeBuilderFunc typeGenerator; }; -/// Callback type for generating lowering for a math operation. -using MathGeneratorTy = mlir::Value (*)(fir::FirOpBuilder &, mlir::Location, - llvm::StringRef, mlir::FunctionType, - llvm::ArrayRef); - struct MathOperation { + // Callback type for generating lowering for a math operation. + using MathGeneratorTy = mlir::Value (*)(fir::FirOpBuilder &, mlir::Location, + const MathOperation &, + mlir::FunctionType, + llvm::ArrayRef); + // Overrides fir::runtime::FuncTypeBuilderFunc to add FirOpBuilder argument. using FuncTypeBuilderFunc = mlir::FunctionType (*)(mlir::MLIRContext *, fir::FirOpBuilder &); @@ -681,25 +682,25 @@ getTypesForArgs(llvm::ArrayRef args) { } mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef libFuncName, + const MathOperation &mathOp, mlir::FunctionType libFuncType, llvm::ArrayRef args); template mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef mathLibFuncName, + const MathOperation &mathOp, mlir::FunctionType mathLibFuncType, llvm::ArrayRef args); template mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef mathLibFuncName, + const MathOperation &mathOp, mlir::FunctionType mathLibFuncType, llvm::ArrayRef args); mlir::Value genLibSplitComplexArgsCall(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef libFuncName, + const MathOperation &mathOp, mlir::FunctionType libFuncType, llvm::ArrayRef args); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 446b1529ca0088..151077d81ba14a 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -3247,7 +3247,7 @@ static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, mlir::Location loc, fir::GlobalOp globalOp, mlir::acc::DataClause clause, - const std::string declareGlobalName, + const std::string &declareGlobalName, bool implicit, std::stringstream &asFortran) { GlobalOp declareGlobalOp = modBuilder.create(loc, declareGlobalName); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index a3536895ca3b7c..3a82be895d37c4 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -657,10 +657,61 @@ static llvm::cl::opt "instead of libm complex operations"), llvm::cl::init(false)); +/// Return a string containing the given Fortran intrinsic name +/// with the type of its arguments specified in funcType +/// surrounded by the given prefix/suffix. +static std::string +prettyPrintIntrinsicName(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef prefix, llvm::StringRef name, + llvm::StringRef suffix, mlir::FunctionType funcType) { + std::string output = prefix.str(); + llvm::raw_string_ostream sstream(output); + if (name == "pow") { + assert(funcType.getNumInputs() == 2 && "power operator has two arguments"); + std::string displayName{" ** "}; + sstream << numericMlirTypeToFortran(builder, funcType.getInput(0), loc, + displayName) + << displayName + << numericMlirTypeToFortran(builder, funcType.getInput(1), loc, + displayName); + } else { + sstream << name.upper() << "("; + if (funcType.getNumInputs() > 0) + sstream << numericMlirTypeToFortran(builder, funcType.getInput(0), loc, + name); + for (mlir::Type argType : funcType.getInputs().drop_front()) { + sstream << ", " << numericMlirTypeToFortran(builder, argType, loc, name); + } + sstream << ")"; + } + sstream << suffix; + return output; +} + +// Generate a call to the Fortran runtime library providing +// support for 128-bit float math via a third-party library. +// If the compiler is built without FLANG_RUNTIME_F128_MATH_LIB, +// this function will report an error. +static mlir::Value genLibF128Call(fir::FirOpBuilder &builder, + mlir::Location loc, + const MathOperation &mathOp, + mlir::FunctionType libFuncType, + llvm::ArrayRef args) { +#ifndef FLANG_RUNTIME_F128_MATH_LIB + std::string message = prettyPrintIntrinsicName( + builder, loc, "compiler is built without support for '", mathOp.key, "'", + libFuncType); + fir::emitFatalError(loc, message, /*genCrashDiag=*/false); +#else // FLANG_RUNTIME_F128_MATH_LIB + return genLibCall(builder, loc, mathOp, libFuncType, args); +#endif // FLANG_RUNTIME_F128_MATH_LIB +} + mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef libFuncName, + const MathOperation &mathOp, mlir::FunctionType libFuncType, llvm::ArrayRef args) { + llvm::StringRef libFuncName = mathOp.runtimeFunc; LLVM_DEBUG(llvm::dbgs() << "Generating '" << libFuncName << "' call with type "; libFuncType.dump(); llvm::dbgs() << "\n"); @@ -718,7 +769,7 @@ mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value genLibSplitComplexArgsCall(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef libFuncName, + const MathOperation &mathOp, mlir::FunctionType libFuncType, llvm::ArrayRef args) { assert(args.size() == 2 && "Incorrect #args to genLibSplitComplexArgsCall"); @@ -762,13 +813,12 @@ mlir::Value genLibSplitComplexArgsCall(fir::FirOpBuilder &builder, cplx2, /*isImagPart=*/true); splitArgs.push_back(imag2); - return genLibCall(builder, loc, libFuncName, getSplitComplexArgsType(), - splitArgs); + return genLibCall(builder, loc, mathOp, getSplitComplexArgsType(), splitArgs); } template mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef mathLibFuncName, + const MathOperation &mathOp, mlir::FunctionType mathLibFuncType, llvm::ArrayRef args) { // TODO: we have to annotate the math operations with flags @@ -791,13 +841,14 @@ mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc, // can be also lowered to libm calls for "fast" and "relaxed" // modes. mlir::Value result; + llvm::StringRef mathLibFuncName = mathOp.runtimeFunc; if (mathRuntimeVersion == preciseVersion && // Some operations do not have to be lowered as conservative // calls, since they do not affect strict FP behavior. // For example, purely integer operations like exponentiation // with integer operands fall into this class. !mathLibFuncName.empty()) { - result = genLibCall(builder, loc, mathLibFuncName, mathLibFuncType, args); + result = genLibCall(builder, loc, mathOp, mathLibFuncType, args); } else { LLVM_DEBUG(llvm::dbgs() << "Generating '" << mathLibFuncName << "' operation with type "; @@ -810,7 +861,7 @@ mlir::Value genMathOp(fir::FirOpBuilder &builder, mlir::Location loc, template mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc, - llvm::StringRef mathLibFuncName, + const MathOperation &mathOp, mlir::FunctionType mathLibFuncType, llvm::ArrayRef args) { mlir::Value result; @@ -819,11 +870,12 @@ mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc, // If we have libm functions, we can attempt to generate the more precise // version of the complex math operation. + llvm::StringRef mathLibFuncName = mathOp.runtimeFunc; if (!mathLibFuncName.empty()) { // If we enabled MLIR complex or can use approximate operations, we should // NOT use libm. if (!forceMlirComplex && !canUseApprox) { - result = genLibCall(builder, loc, mathLibFuncName, mathLibFuncType, args); + result = genLibCall(builder, loc, mathOp, mathLibFuncType, args); LLVM_DEBUG(result.dump(); llvm::dbgs() << "\n"); return result; } @@ -863,6 +915,10 @@ mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc, /// TODO: support remaining Fortran math intrinsics. /// See https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gfortran/\ /// Intrinsic-Procedures.html for a reference. +constexpr auto FuncTypeReal16Real16 = genFuncType, Ty::Real<16>>; +constexpr auto FuncTypeReal16Complex16 = + genFuncType, Ty::Complex<16>>; + static constexpr MathOperation mathOperations[] = { {"abs", "fabsf", genFuncType, Ty::Real<4>>, genMathOp}, @@ -874,6 +930,7 @@ static constexpr MathOperation mathOperations[] = { genComplexMathOp}, {"abs", "cabs", genFuncType, Ty::Complex<8>>, genComplexMathOp}, + {"abs", RTNAME_STRING(CAbsF128), FuncTypeReal16Complex16, genLibF128Call}, {"acos", "acosf", genFuncType, Ty::Real<4>>, genLibCall}, {"acos", "acos", genFuncType, Ty::Real<8>>, genLibCall}, {"acos", "cacosf", genFuncType, Ty::Complex<4>>, genLibCall}, @@ -1110,6 +1167,7 @@ static constexpr MathOperation mathOperations[] = { genMathOp}, {"sin", "sin", genFuncType, Ty::Real<8>>, genMathOp}, + {"sin", RTNAME_STRING(SinF128), FuncTypeReal16Real16, genLibF128Call}, {"sin", "csinf", genFuncType, Ty::Complex<4>>, genComplexMathOp}, {"sin", "csin", genFuncType, Ty::Complex<8>>, @@ -1122,6 +1180,7 @@ static constexpr MathOperation mathOperations[] = { genMathOp}, {"sqrt", "sqrt", genFuncType, Ty::Real<8>>, genMathOp}, + {"sqrt", RTNAME_STRING(SqrtF128), FuncTypeReal16Real16, genLibF128Call}, {"sqrt", "csqrtf", genFuncType, Ty::Complex<4>>, genComplexMathOp}, {"sqrt", "csqrt", genFuncType, Ty::Complex<8>>, @@ -1345,27 +1404,9 @@ static void checkPrecisionLoss(llvm::StringRef name, // lowering and could be used here. Emit an error and continue // generating the code with the narrowing cast so that the user // can get a complete list of the problematic intrinsic calls. - std::string message("not yet implemented: no math runtime available for '"); - llvm::raw_string_ostream sstream(message); - if (name == "pow") { - assert(funcType.getNumInputs() == 2 && "power operator has two arguments"); - std::string displayName{" ** "}; - sstream << numericMlirTypeToFortran(builder, funcType.getInput(0), loc, - displayName) - << displayName - << numericMlirTypeToFortran(builder, funcType.getInput(1), loc, - displayName); - } else { - sstream << name.upper() << "("; - if (funcType.getNumInputs() > 0) - sstream << numericMlirTypeToFortran(builder, funcType.getInput(0), loc, - name); - for (mlir::Type argType : funcType.getInputs().drop_front()) { - sstream << ", " << numericMlirTypeToFortran(builder, argType, loc, name); - } - sstream << ")"; - } - sstream << "'"; + std::string message = prettyPrintIntrinsicName( + builder, loc, "not yet implemented: no math runtime available for '", + name, "'", funcType); mlir::emitError(loc, message); } @@ -1887,7 +1928,7 @@ IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name, for (auto [fst, snd] : llvm::zip(actualFuncType.getInputs(), args)) convertedArguments.push_back(builder.createConvert(loc, fst, snd)); mlir::Value result = mathOp->funcGenerator( - builder, loc, mathOp->runtimeFunc, actualFuncType, convertedArguments); + builder, loc, *mathOp, actualFuncType, convertedArguments); mlir::Type soughtType = soughtFuncType.getResult(0); return builder.createConvert(loc, soughtType, result); }; diff --git a/flang/lib/Optimizer/CodeGen/Target.cpp b/flang/lib/Optimizer/CodeGen/Target.cpp index 19730f7a64337c..7c77bdd79008f1 100644 --- a/flang/lib/Optimizer/CodeGen/Target.cpp +++ b/flang/lib/Optimizer/CodeGen/Target.cpp @@ -47,7 +47,7 @@ static const llvm::fltSemantics &floatToSemantics(const KindMapping &kindMap, } static void typeTodo(const llvm::fltSemantics *sem, mlir::Location loc, - std::string context) { + const std::string &context) { if (sem == &llvm::APFloat::IEEEhalf()) { TODO(loc, "COMPLEX(KIND=2): for " + context + " type"); } else if (sem == &llvm::APFloat::BFloat()) { diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index 4c2bd31a2ae841..515b8f62daf9ad 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -252,7 +252,7 @@ void Preprocessor::DefineStandardMacros() { Define("__LINE__"s, "__LINE__"s); } -void Preprocessor::Define(std::string macro, std::string value) { +void Preprocessor::Define(const std::string ¯o, const std::string &value) { definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); } diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h index 3b456364944c3d..b61f1577727beb 100644 --- a/flang/lib/Parser/preprocessor.h +++ b/flang/lib/Parser/preprocessor.h @@ -70,7 +70,7 @@ class Preprocessor { AllSources &allSources() { return allSources_; } void DefineStandardMacros(); - void Define(std::string macro, std::string value); + void Define(const std::string ¯o, const std::string &value); void Undefine(std::string macro); bool IsNameDefined(const CharBlock &); bool IsFunctionLikeDefinition(const CharBlock &); diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index f7f22177a7d0bf..e9b23172ed2e28 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -630,9 +630,11 @@ bool Prescanner::NextToken(TokenSequence &tokens) { preventHollerith_ = false; } else if (IsLegalInIdentifier(*at_)) { int parts{1}; + const char *afterLast{nullptr}; do { EmitChar(tokens, *at_); ++at_, ++column_; + afterLast = at_; if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { tokens.CloseToken(); ++parts; @@ -640,12 +642,20 @@ bool Prescanner::NextToken(TokenSequence &tokens) { } while (IsLegalInIdentifier(*at_)); if (parts >= 3) { // Subtlety: When an identifier is split across three or more continuation - // lines, its parts are kept as distinct pp-tokens so that macro - // operates on them independently. This trick accommodates the historic - // practice of using line continuation for token pasting after - // replacement. + // lines (or two continuation lines, immediately preceded or followed + // by '&' free form continuation line markers, its parts are kept as + // distinct pp-tokens so that macro operates on them independently. + // This trick accommodates the historic practice of using line + // continuation for token pasting after replacement. } else if (parts == 2) { - tokens.ReopenLastToken(); + if ((start > start_ && start[-1] == '&') || + (afterLast < limit_ && (*afterLast == '&' || *afterLast == '\n'))) { + // call & call foo& call foo& + // &MACRO& OR &MACRO& OR &MACRO + // &foo(...) &(...) + } else { + tokens.ReopenLastToken(); + } } if (InFixedFormSource()) { SkipSpaces(); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 816227fb3354ff..2db3f9a27d8f4a 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -192,7 +192,7 @@ class DistinguishabilityHelper { private: void SayNotDistinguishable(const Scope &, const SourceName &, GenericKind, - const Symbol &, const Symbol &, bool isError); + const Symbol &, const Symbol &, bool isHardConflict); void AttachDeclaration(parser::Message &, const Scope &, const Symbol &); SemanticsContext &context_; @@ -3513,6 +3513,11 @@ void DistinguishabilityHelper::Add(const Symbol &generic, GenericKind kind, } void DistinguishabilityHelper::Check(const Scope &scope) { + if (FindModuleFileContaining(scope)) { + // Distinguishability was checked when the module was created; + // don't let optional warnings then become errors now. + return; + } for (const auto &[name, info] : nameToSpecifics_) { for (auto iter1{info.begin()}; iter1 != info.end(); ++iter1) { const auto &[ultimate, procInfo]{*iter1}; @@ -3534,15 +3539,21 @@ void DistinguishabilityHelper::Check(const Scope &scope) { void DistinguishabilityHelper::SayNotDistinguishable(const Scope &scope, const SourceName &name, GenericKind kind, const Symbol &proc1, - const Symbol &proc2, bool isError) { - if (!isError && - !context_.ShouldWarn( - common::LanguageFeature::IndistinguishableSpecifics)) { - // The rules for distinguishing specific procedures (F'2023 15.4.3.4.5) - // are inadequate for some real-world cases like pFUnit. - // When there are optional dummy arguments or unlimited polymorphic - // dummy data object arguments, the best that we can do is emit an optional - // portability warning. + const Symbol &proc2, bool isHardConflict) { + bool isUseAssociated{!scope.sourceRange().Contains(name)}; + // The rules for distinguishing specific procedures (F'2023 15.4.3.4.5) + // are inadequate for some real-world cases like pFUnit. + // When there are optional dummy arguments or unlimited polymorphic + // dummy data object arguments, the best that we can do is emit an optional + // portability warning. Also, named generics created by USE association + // merging shouldn't receive hard errors for ambiguity. + // (Non-named generics might be defined I/O procedures or defined + // assignments that need to be used by the runtime.) + bool isWarning{!isHardConflict || (isUseAssociated && kind.IsName())}; + if (isWarning && + (!context_.ShouldWarn( + common::LanguageFeature::IndistinguishableSpecifics) || + FindModuleFileContaining(scope))) { return; } std::string name1{proc1.name().ToString()}; @@ -3557,17 +3568,20 @@ void DistinguishabilityHelper::SayNotDistinguishable(const Scope &scope, } } parser::Message *msg; - if (scope.sourceRange().Contains(name)) { + if (!isUseAssociated) { + CHECK(isWarning == !isHardConflict); msg = &context_.Say(name, - isError + isHardConflict ? "Generic '%s' may not have specific procedures '%s' and '%s' as their interfaces are not distinguishable"_err_en_US : "Generic '%s' should not have specific procedures '%s' and '%s' as their interfaces are not distinguishable by the rules in the standard"_port_en_US, MakeOpName(name), name1, name2); } else { msg = &context_.Say(*GetTopLevelUnitContaining(proc1).GetName(), - isError - ? "USE-associated generic '%s' may not have specific procedures '%s' and '%s' as their interfaces are not distinguishable"_err_en_US - : "USE-associated generic '%s' should not have specific procedures '%s' and '%s' as their interfaces are not distinguishable by the incomplete rules in the standard"_port_en_US, + isHardConflict + ? (isWarning + ? "USE-associated generic '%s' should not have specific procedures '%s' and '%s' as their interfaces are not distinguishable"_warn_en_US + : "USE-associated generic '%s' may not have specific procedures '%s' and '%s' as their interfaces are not distinguishable"_err_en_US) + : "USE-associated generic '%s' should not have specific procedures '%s' and '%s' as their interfaces are not distinguishable by the rules in the standard"_port_en_US, MakeOpName(name), name1, name2); } AttachDeclaration(*msg, scope, proc1); diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h index 829405f99d64c0..97e13c59ac4167 100644 --- a/flang/lib/Semantics/check-directive-structure.h +++ b/flang/lib/Semantics/check-directive-structure.h @@ -176,8 +176,8 @@ template class DirectiveStructureChecker : public virtual BaseChecker { protected: DirectiveStructureChecker(SemanticsContext &context, - std::unordered_map> - directiveClausesMap) + const std::unordered_map> + &directiveClausesMap) : context_{context}, directiveClausesMap_(directiveClausesMap) {} virtual ~DirectiveStructureChecker() {} diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index dfa9da502db0a8..ac89184a7cbffc 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -46,6 +46,23 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) endif () include_directories(BEFORE ${FLANG_SOURCE_DIR}/include) + + # The out of tree builds of the compiler and the Fortran runtime + # must use the same setting of FLANG_RUNTIME_F128_MATH_LIB + # to be composable. Failure to synchronize this setting may result + # in linking errors or fatal failures in F128 runtime functions. + set(FLANG_RUNTIME_F128_MATH_LIB "" CACHE STRING + "Specifies the target library used for implementing IEEE-754 128-bit float \ + math in F18 runtime, e.g. it might be libquadmath for targets where \ + REAL(16) is mapped to __float128, or libm for targets where REAL(16) \ + is mapped to long double, etc." + ) + + if (NOT FLANG_RUNTIME_F128_MATH_LIB STREQUAL "") + add_compile_definitions( + -DFLANG_RUNTIME_F128_MATH_LIB="${FLANG_RUNTIME_F128_MATH_LIB}" + ) + endif() endif() include(CheckCXXSymbolExists) @@ -83,6 +100,9 @@ add_definitions(-U_GLIBCXX_ASSERTIONS) add_definitions(-U_LIBCPP_ENABLE_ASSERTIONS) add_subdirectory(FortranMain) +if (NOT ${FLANG_RUNTIME_F128_MATH_LIB} STREQUAL "") + add_subdirectory(Float128Math) +endif() set(sources ISO_Fortran_binding.cpp diff --git a/flang/runtime/Float128Math/CMakeLists.txt b/flang/runtime/Float128Math/CMakeLists.txt new file mode 100644 index 00000000000000..f8da4d7ca1a9fe --- /dev/null +++ b/flang/runtime/Float128Math/CMakeLists.txt @@ -0,0 +1,56 @@ +#===-- runtime/Float128Math/CMakeLists.txt ---------------------------------===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# + +# FortranFloat128 implements IEEE-754 128-bit float math functions. +# It is a thin wapper and it currently relies on third-party +# libraries available for the target. +# It is distributed as a static library only. +# Fortran programs/libraries that end up linking any of the provided +# will have a dependency on the third-party library that is being +# used for building this FortranFloat128Math library. + +if (${FLANG_RUNTIME_F128_MATH_LIB} STREQUAL "libquadmath" OR + ${FLANG_RUNTIME_F128_MATH_LIB} STREQUAL "quadmath") + check_include_file(quadmath.h FOUND_QUADMATH_HEADER) + if(FOUND_QUADMATH_HEADER) + add_compile_definitions(HAS_QUADMATHLIB) + else() + message(FATAL_ERROR + "FLANG_RUNTIME_F128_MATH_LIB setting requires quadmath.h " + "to be available: ${FLANG_RUNTIME_F128_MATH_LIB}" + ) + endif() +else() + message(FATAL_ERROR + "Unsupported third-party library for Fortran F128 math runtime: " + "${FLANG_RUNTIME_F128_MATH_LIB}" + ) +endif() + +set(sources + cabs.cpp + sin.cpp + sqrt.cpp + ) + +include_directories(AFTER "${CMAKE_CURRENT_SOURCE_DIR}/..") +add_flang_library(FortranFloat128Math STATIC INSTALL_WITH_TOOLCHAIN ${sources}) + +if (DEFINED MSVC) + set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded) + add_flang_library(FortranFloat128Math.static STATIC INSTALL_WITH_TOOLCHAIN + ${sources} + ) + set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDebug) + add_flang_library(FortranFloat128Math.static_dbg STATIC INSTALL_WITH_TOOLCHAIN + ${sources} + ) + add_dependencies(FortranFloat128Math FortranFloat128Math.static + FortranFloat128Math.static_dbg + ) +endif() diff --git a/flang/runtime/Float128Math/cabs.cpp b/flang/runtime/Float128Math/cabs.cpp new file mode 100644 index 00000000000000..63f2bdf8e177ae --- /dev/null +++ b/flang/runtime/Float128Math/cabs.cpp @@ -0,0 +1,24 @@ +//===-- runtime/Float128Math/cabs.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// FIXME: the argument should be CppTypeFor, +// and it should be translated into the underlying library's +// corresponding complex128 type. +CppTypeFor RTDEF(CAbsF128)(ComplexF128 x) { + return CAbs::invoke(x); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/math-entries.h b/flang/runtime/Float128Math/math-entries.h new file mode 100644 index 00000000000000..91c14b008b5768 --- /dev/null +++ b/flang/runtime/Float128Math/math-entries.h @@ -0,0 +1,77 @@ +//===-- runtime/Float128Math/math-entries.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_FLOAT128MATH_MATH_ENTRIES_H_ +#define FORTRAN_RUNTIME_FLOAT128MATH_MATH_ENTRIES_H_ +#include "terminator.h" +#include "tools.h" +#include "flang/Common/float128.h" +#include "flang/Runtime/entry-names.h" +#include + +namespace Fortran::runtime { + +// Define a class template to gracefully fail, when +// there is no specialized template that implements +// the required function via using the third-party +// implementation. +#define DEFINE_FALLBACK(caller) \ + template struct caller { \ + template \ + [[noreturn]] static std::invoke_result_t invoke( \ + ATs... args) { \ + Terminator terminator{__FILE__, __LINE__}; \ + terminator.Crash("Float128 variant of '%s' is unsupported", #caller); \ + } \ + }; + +// Define template specialization that is calling the third-party +// implementation. The template is specialized by a function pointer +// that is the FortranFloat128Math entry point. The signatures +// of the caller and the callee must match. +// +// Defining the specialization for any target library requires +// adding the generic template via DEFINE_FALLBACK, so that +// a build with another target library that does not define +// the same alias can gracefully fail in runtime. +#define DEFINE_SIMPLE_ALIAS(caller, callee) \ + template struct caller

{ \ + static RT invoke(ATs... args) { \ + static_assert(std::is_invocable_r_v); \ + if constexpr (std::is_same_v) { \ + callee(args...); \ + } else { \ + return callee(args...); \ + } \ + } \ + }; + +// Define fallback callers. +DEFINE_FALLBACK(CAbs) +DEFINE_FALLBACK(Sin) +DEFINE_FALLBACK(Sqrt) + +// Define ComplexF128 type that is compatible with +// the type of results/arguments of libquadmath. +// TODO: this may need more work for other libraries/compilers. +#if !defined(_ARCH_PPC) || defined(__LONG_DOUBLE_IEEE128__) +typedef _Complex float __attribute__((mode(TC))) ComplexF128; +#else +typedef _Complex float __attribute__((mode(KC))) ComplexF128; +#endif + +#if HAS_QUADMATHLIB +// Define wrapper callers for libquadmath. +#include "quadmath.h" +DEFINE_SIMPLE_ALIAS(CAbs, cabsq) +DEFINE_SIMPLE_ALIAS(Sin, sinq) +DEFINE_SIMPLE_ALIAS(Sqrt, sqrtq) +#endif +} // namespace Fortran::runtime + +#endif // FORTRAN_RUNTIME_FLOAT128MATH_MATH_ENTRIES_H_ diff --git a/flang/runtime/Float128Math/sin.cpp b/flang/runtime/Float128Math/sin.cpp new file mode 100644 index 00000000000000..013eb9d119a6a3 --- /dev/null +++ b/flang/runtime/Float128Math/sin.cpp @@ -0,0 +1,22 @@ +//===-- runtime/Float128Math/sin.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +CppTypeFor RTDEF(SinF128)( + CppTypeFor x) { + return Sin::invoke(x); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/sqrt.cpp b/flang/runtime/Float128Math/sqrt.cpp new file mode 100644 index 00000000000000..aafbd850ca973a --- /dev/null +++ b/flang/runtime/Float128Math/sqrt.cpp @@ -0,0 +1,22 @@ +//===-- runtime/Float128Math/sqrt.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +CppTypeFor RTDEF(SqrtF128)( + CppTypeFor x) { + return Sqrt::invoke(x); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 18590567c65eb0..58ca313d9e4454 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -679,6 +679,7 @@ void ExternalFileUnit::Rewind(IoErrorHandler &handler) { handler.SignalError(IostatRewindNonSequential, "REWIND(UNIT=%d) on non-sequential file", unitNumber()); } else { + DoImpliedEndfile(handler); SetPosition(0, handler); currentRecordNumber = 1; leftTabLimit.reset(); @@ -687,7 +688,6 @@ void ExternalFileUnit::Rewind(IoErrorHandler &handler) { } void ExternalFileUnit::SetPosition(std::int64_t pos, IoErrorHandler &handler) { - DoImpliedEndfile(handler); frameOffsetInFile_ = pos; recordOffsetInFrame_ = 0; if (access == Access::Direct) { @@ -707,6 +707,12 @@ bool ExternalFileUnit::SetStreamPos( "POS=%zd is invalid", static_cast(oneBasedPos)); return false; } + // A backwards POS= implies truncation after writing, at least in + // Intel and NAG. + if (static_cast(oneBasedPos - 1) < + frameOffsetInFile_ + recordOffsetInFrame_) { + DoImpliedEndfile(handler); + } SetPosition(oneBasedPos - 1, handler); // We no longer know which record we're in. Set currentRecordNumber to // a large value from whence we can both advance and backspace. diff --git a/flang/test/Lower/Intrinsics/missing-math-runtime.f90 b/flang/test/Lower/Intrinsics/missing-math-runtime.f90 index 98d3abb17f3a8f..ff767ba18faaec 100644 --- a/flang/test/Lower/Intrinsics/missing-math-runtime.f90 +++ b/flang/test/Lower/Intrinsics/missing-math-runtime.f90 @@ -1,10 +1,14 @@ ! There is no quad math runtime available in lowering ! for now. Test that the TODO are emitted correctly. +! FIXME: the lit config has to flip a feature flag so that +! the tests can use different checks depending on whether +! REAL(16) math support is enabled or not. +! XFAIL: * ! RUN: bbc -emit-fir %s -o /dev/null 2>&1 | FileCheck %s complex(16) :: a real(16) :: b -! CHECK: not yet implemented: no math runtime available for 'ABS(COMPLEX(KIND=16))' +! CHECK: compiler is built without support for 'ABS(COMPLEX(KIND=16))' b = abs(a) end diff --git a/flang/test/Preprocessing/pp005.F b/flang/test/Preprocessing/pp005.F index e4483b404c3673..a8d7394cb12d38 100644 --- a/flang/test/Preprocessing/pp005.F +++ b/flang/test/Preprocessing/pp005.F @@ -1,11 +1,11 @@ ! RUN: %flang -E %s 2>&1 | FileCheck %s -! CHECK: res = 777 +! CHECK: res = (777) * KWM split across continuation, implicit padding integer, parameter :: KWM = 666 #define KWM 777 integer :: res - res = KW - +M + res = (KW + +M) if (res .eq. 777) then print *, 'pp005.F yes' else diff --git a/flang/test/Preprocessing/pp006.F b/flang/test/Preprocessing/pp006.F index f526ad31733efa..e45dcf9c18e196 100644 --- a/flang/test/Preprocessing/pp006.F +++ b/flang/test/Preprocessing/pp006.F @@ -1,12 +1,12 @@ ! RUN: %flang -E %s 2>&1 | FileCheck %s -! CHECK: res = 777 +! CHECK: res = (777) * ditto, but with intervening *comment line integer, parameter :: KWM = 666 #define KWM 777 integer :: res - res = KW + res = (KW *comment - +M + +M) if (res .eq. 777) then print *, 'pp006.F yes' else diff --git a/flang/test/Preprocessing/pp105.F90 b/flang/test/Preprocessing/pp105.F90 index b4f73da6fa24c0..e861e9688d2c54 100644 --- a/flang/test/Preprocessing/pp105.F90 +++ b/flang/test/Preprocessing/pp105.F90 @@ -1,11 +1,11 @@ ! RUN: %flang -E %s 2>&1 | FileCheck %s -! CHECK: res = 777 +! CHECK: res = (777) ! KWM call name split across continuation, with leading & integer, parameter :: KWM = 666 #define KWM 777 integer :: res - res = KW& -&M + res = (KW& +&M) if (res .eq. 777) then print *, 'pp105.F90 yes' else diff --git a/flang/test/Preprocessing/pp106.F90 b/flang/test/Preprocessing/pp106.F90 index 556d779048f6c9..a450807f0bd214 100644 --- a/flang/test/Preprocessing/pp106.F90 +++ b/flang/test/Preprocessing/pp106.F90 @@ -1,11 +1,11 @@ ! RUN: %flang -E %s 2>&1 | FileCheck %s -! CHECK: res = 777 +! CHECK: res = (777) ! ditto, with & ! comment integer, parameter :: KWM = 666 #define KWM 777 integer :: res - res = KW& ! comment -&M + res = (KW& ! comment +&M) if (res .eq. 777) then print *, 'pp106.F90 yes' else diff --git a/flang/test/Preprocessing/pp134.F90 b/flang/test/Preprocessing/pp134.F90 index 01e7b010d426ec..bc34767224fa03 100644 --- a/flang/test/Preprocessing/pp134.F90 +++ b/flang/test/Preprocessing/pp134.F90 @@ -1,9 +1,23 @@ ! RUN: %flang -E %s 2>&1 | FileCheck %s -! CHECK: print *, ADC +! CHECK: print *, ADC, 1 +! CHECK: print *, AD, 1 +! CHECK: print *, DC, 1 +! CHECK: print *, AD +! CHECK: print *, AB #define B D implicit none real ADC print *, A& &B& - &C + &C, 1 +print *, A& + &B& + &, 1 +print *, & + &B& + &C, 1 +print *, A& + &B +print *, A& + &B ! but not this end diff --git a/flang/test/Semantics/resolve17.f90 b/flang/test/Semantics/resolve17.f90 index 513676fe670a14..770af756d03bc3 100644 --- a/flang/test/Semantics/resolve17.f90 +++ b/flang/test/Semantics/resolve17.f90 @@ -180,7 +180,7 @@ subroutine g() end end module subroutine s9 - !ERROR: USE-associated generic 'g' may not have specific procedures 'g' and 'g' as their interfaces are not distinguishable + !PORTABILITY: USE-associated generic 'g' should not have specific procedures 'g' and 'g' as their interfaces are not distinguishable use m9a use m9b end diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp index c4e56a862c8613..52136df10c0b02 100644 --- a/flang/tools/flang-driver/driver.cpp +++ b/flang/tools/flang-driver/driver.cpp @@ -130,6 +130,9 @@ int main(int argc, const char **argv) { llvm::sys::getDefaultTargetTriple(), diags, "flang LLVM compiler"); theDriver.setTargetAndMode(targetandMode); +#ifdef FLANG_RUNTIME_F128_MATH_LIB + theDriver.setFlangF128MathLibrary(FLANG_RUNTIME_F128_MATH_LIB); +#endif std::unique_ptr c( theDriver.BuildCompilation(args)); llvm::SmallVector, 4> diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index f725b1c2394c6a..608ac46034306b 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -73,6 +73,49 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.vsprintf libc.src.stdio.vsnprintf + + # stdbit.h entrypoints + libc.src.stdbit.stdc_leading_zeros_uc + libc.src.stdbit.stdc_leading_zeros_us + libc.src.stdbit.stdc_leading_zeros_ui + libc.src.stdbit.stdc_leading_zeros_ul + libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_us + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull + # stdlib.h entrypoints libc.src.stdlib.abort libc.src.stdlib.abs diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index f725b1c2394c6a..2f299e992be09a 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -73,6 +73,48 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.vsprintf libc.src.stdio.vsnprintf + # stdbit.h entrypoints + libc.src.stdbit.stdc_leading_zeros_uc + libc.src.stdbit.stdc_leading_zeros_us + libc.src.stdbit.stdc_leading_zeros_ui + libc.src.stdbit.stdc_leading_zeros_ul + libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_us + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull + # stdlib.h entrypoints libc.src.stdlib.abort libc.src.stdlib.abs diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index b333c6be144627..5224e92bbcc589 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -65,6 +65,48 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strtok_r libc.src.string.strxfrm + # stdbit.h entrypoints + libc.src.stdbit.stdc_leading_zeros_uc + libc.src.stdbit.stdc_leading_zeros_us + libc.src.stdbit.stdc_leading_zeros_ui + libc.src.stdbit.stdc_leading_zeros_ul + libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_us + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull + # stdlib.h entrypoints libc.src.stdlib.abs libc.src.stdlib.atoi diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 6e194682df4bfc..8a6c160c099322 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -95,6 +95,41 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_leading_zeros_ui libc.src.stdbit.stdc_leading_zeros_ul libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_us + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 9bacfab7b0e5ae..7df19049088867 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -73,6 +73,41 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_leading_zeros_ui libc.src.stdbit.stdc_leading_zeros_ul libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_us + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 71ff4bcfc35195..5c8cc7618a9e8c 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -97,6 +97,41 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_leading_zeros_ui libc.src.stdbit.stdc_leading_zeros_ul libc.src.stdbit.stdc_leading_zeros_ull + libc.src.stdbit.stdc_leading_ones_uc + libc.src.stdbit.stdc_leading_ones_us + libc.src.stdbit.stdc_leading_ones_ui + libc.src.stdbit.stdc_leading_ones_ul + libc.src.stdbit.stdc_leading_ones_ull + libc.src.stdbit.stdc_trailing_zeros_uc + libc.src.stdbit.stdc_trailing_zeros_us + libc.src.stdbit.stdc_trailing_zeros_ui + libc.src.stdbit.stdc_trailing_zeros_ul + libc.src.stdbit.stdc_trailing_zeros_ull + libc.src.stdbit.stdc_trailing_ones_uc + libc.src.stdbit.stdc_trailing_ones_us + libc.src.stdbit.stdc_trailing_ones_ui + libc.src.stdbit.stdc_trailing_ones_ul + libc.src.stdbit.stdc_trailing_ones_ull + libc.src.stdbit.stdc_first_leading_zero_uc + libc.src.stdbit.stdc_first_leading_zero_us + libc.src.stdbit.stdc_first_leading_zero_ui + libc.src.stdbit.stdc_first_leading_zero_ul + libc.src.stdbit.stdc_first_leading_zero_ull + libc.src.stdbit.stdc_first_leading_one_uc + libc.src.stdbit.stdc_first_leading_one_us + libc.src.stdbit.stdc_first_leading_one_ui + libc.src.stdbit.stdc_first_leading_one_ul + libc.src.stdbit.stdc_first_leading_one_ull + libc.src.stdbit.stdc_first_trailing_zero_uc + libc.src.stdbit.stdc_first_trailing_zero_us + libc.src.stdbit.stdc_first_trailing_zero_ui + libc.src.stdbit.stdc_first_trailing_zero_ul + libc.src.stdbit.stdc_first_trailing_zero_ull + libc.src.stdbit.stdc_first_trailing_one_uc + libc.src.stdbit.stdc_first_trailing_one_us + libc.src.stdbit.stdc_first_trailing_one_ui + libc.src.stdbit.stdc_first_trailing_one_ul + libc.src.stdbit.stdc_first_trailing_one_ull # stdlib.h entrypoints libc.src.stdlib.abs diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index f5e50262371f26..7d11e7d5c497e0 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -248,6 +248,35 @@ template >> return value == cpp::numeric_limits::max() ? 0 : countr_zero(value) + 1; } +/// Count number of 1's aka population count or hamming weight. +/// +/// Only unsigned integral types are allowed. +template >> +[[nodiscard]] LIBC_INLINE constexpr int count_ones(T value) { + int count = 0; + for (int i = 0; i != cpp::numeric_limits::digits; ++i) + if ((value >> i) & 0x1) + ++count; + return count; +} +#define ADD_SPECIALIZATION(TYPE, BUILTIN) \ + template <> \ + [[nodiscard]] LIBC_INLINE constexpr int count_ones(TYPE value) { \ + return BUILTIN(value); \ + } +ADD_SPECIALIZATION(unsigned char, __builtin_popcount) +ADD_SPECIALIZATION(unsigned short, __builtin_popcount) +ADD_SPECIALIZATION(unsigned, __builtin_popcount) +ADD_SPECIALIZATION(unsigned long, __builtin_popcountl) +ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll) +// TODO: 128b specializations? +#undef ADD_SPECIALIZATION + +template >> +[[nodiscard]] LIBC_INLINE constexpr int count_zeros(T value) { + return count_ones(static_cast(~value)); +} + } // namespace LIBC_NAMESPACE::cpp #endif // LLVM_LIBC_SRC___SUPPORT_CPP_BIT_H diff --git a/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h b/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h index ea1fd68a5fcdfc..fd915373020ec9 100644 --- a/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h +++ b/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h @@ -161,8 +161,8 @@ LIBC_INLINE int set_except(int excepts) { LIBC_INLINE int raise_except(int excepts) { float zero = 0.0f; float one = 1.0f; - float large_value = FPBits::max_normal(); - float small_value = FPBits::min_normal(); + float large_value = FPBits::max_normal().get_val(); + float small_value = FPBits::min_normal().get_val(); auto divfunc = [](float a, float b) { __asm__ __volatile__("ldr s0, %0\n\t" "ldr s1, %1\n\t" @@ -277,8 +277,8 @@ LIBC_INLINE int set_env(const fenv_t *envp) { return 0; } const FEnv::FPState *state = reinterpret_cast(envp); - FEnv::set_control_word(state->ControlWord); - FEnv::set_status_word(state->StatusWord); + FEnv::set_control_word(static_cast(state->ControlWord)); + FEnv::set_status_word(static_cast(state->StatusWord)); return 0; } diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 5d1f451776a5fe..115a5d505c4b7a 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -232,4 +232,17 @@ TYPED_TEST(LlvmLibcBitTest, FirstTrailingOne, UnsignedTypes) { EXPECT_EQ(first_trailing_one(T(1) << i), i + 1); } +TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypes) { + EXPECT_EQ(count_zeros(T(0)), cpp::numeric_limits::digits); + for (int i = 0; i != cpp::numeric_limits::digits; ++i) + EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); +} + +TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypes) { + EXPECT_EQ(count_ones(T(0)), 0); + for (int i = 0; i != cpp::numeric_limits::digits; ++i) + EXPECT_EQ(count_ones(cpp::numeric_limits::max() >> i), + cpp::numeric_limits::digits - i); +} + } // namespace LIBC_NAMESPACE::cpp diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index fcca8c42b29b71..691ebfc074320f 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -310,6 +310,7 @@ struct Config { bool zInitfirst; bool zInterpose; bool zKeepTextSectionPrefix; + bool zLrodataAfterBss; bool zNodefaultlib; bool zNodelete; bool zNodlopen; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 01ca2e0ab0ff96..d94b8939de15bd 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1436,6 +1436,8 @@ static void readConfigs(opt::InputArgList &args) { config->zInterpose = hasZOption(args, "interpose"); config->zKeepTextSectionPrefix = getZFlag( args, "keep-text-section-prefix", "nokeep-text-section-prefix", false); + config->zLrodataAfterBss = + getZFlag(args, "lrodata-after-bss", "nolrodata-after-bss", false); config->zNodefaultlib = hasZOption(args, "nodefaultlib"); config->zNodelete = hasZOption(args, "nodelete"); config->zNodlopen = hasZOption(args, "nodlopen"); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index bada394aa30d7d..b6bdc350bc0dd1 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -537,9 +537,11 @@ SmallVector EhFrameSection::getFdeData() const { for (EhSectionPiece *fde : rec->fdes) { uint64_t pc = getFdePc(buf, fde->outputOff, enc); uint64_t fdeVA = getParent()->addr + fde->outputOff; - if (!isInt<32>(pc - va)) - fatal(toString(fde->sec) + ": PC offset is too large: 0x" + - Twine::utohexstr(pc - va)); + if (!isInt<32>(pc - va)) { + errorOrWarn(toString(fde->sec) + ": PC offset is too large: 0x" + + Twine::utohexstr(pc - va)); + continue; + } ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)}); } } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5b7dfd358e764e..0bbf43ddf694aa 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -911,11 +911,12 @@ enum RankFlags { RF_NOT_ALLOC = 1 << 26, RF_PARTITION = 1 << 18, // Partition number (8 bits) RF_NOT_SPECIAL = 1 << 17, - RF_WRITE = 1 << 16, - RF_EXEC_WRITE = 1 << 15, - RF_EXEC = 1 << 14, - RF_RODATA = 1 << 13, - RF_LARGE = 1 << 12, + RF_LARGE_ALT = 1 << 15, + RF_WRITE = 1 << 14, + RF_EXEC_WRITE = 1 << 13, + RF_EXEC = 1 << 12, + RF_RODATA = 1 << 11, + RF_LARGE = 1 << 10, RF_NOT_RELRO = 1 << 9, RF_NOT_TLS = 1 << 8, RF_BSS = 1 << 7, @@ -974,8 +975,14 @@ static unsigned getSectionRank(OutputSection &osec) { if (osec.type == SHT_PROGBITS) rank |= RF_RODATA; // Among PROGBITS sections, place .lrodata further from .text. - if (!(osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64)) - rank |= RF_LARGE; + // For -z lrodata-after-bss, place .lrodata after .lbss like GNU ld. This + // layout has one extra PT_LOAD, but alleviates relocation overflow + // pressure for absolute relocations referencing small data from -fno-pic + // relocatable files. + if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64) + rank |= config->zLrodataAfterBss ? RF_LARGE_ALT : 0; + else + rank |= config->zLrodataAfterBss ? 0 : RF_LARGE; } else if (isExec) { rank |= isWrite ? RF_EXEC_WRITE : RF_EXEC; } else { @@ -988,10 +995,15 @@ static unsigned getSectionRank(OutputSection &osec) { osec.relro = true; else rank |= RF_NOT_RELRO; - // Place .ldata and .lbss after .bss. Making .bss closer to .text alleviates - // relocation overflow pressure. - if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64) - rank |= RF_LARGE; + // Place .ldata and .lbss after .bss. Making .bss closer to .text + // alleviates relocation overflow pressure. + // For -z lrodata-after-bss, place .lbss/.lrodata/.ldata after .bss. + // .bss/.lbss being adjacent reuses the NOBITS size optimization. + if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64) { + rank |= config->zLrodataAfterBss + ? (osec.type == SHT_NOBITS ? 1 : RF_LARGE_ALT) + : RF_LARGE; + } } // Within TLS sections, or within other RelRo sections, or within non-RelRo @@ -1103,7 +1115,7 @@ template void Writer::setReservedSymbolSections() { } PhdrEntry *last = nullptr; - PhdrEntry *lastRO = nullptr; + OutputSection *lastRO = nullptr; auto isLarge = [](OutputSection *osec) { return config->emachine == EM_X86_64 && osec->flags & SHF_X86_64_LARGE; }; @@ -1112,17 +1124,18 @@ template void Writer::setReservedSymbolSections() { if (p->p_type != PT_LOAD) continue; last = p; - if (!(p->p_flags & PF_W)) - lastRO = p; + if (!(p->p_flags & PF_W) && p->lastSec && !isLarge(p->lastSec)) + lastRO = p->lastSec; } } if (lastRO) { - // _etext is the first location after the last read-only loadable segment. + // _etext is the first location after the last read-only loadable segment + // that does not contain large sections. if (ElfSym::etext1) - ElfSym::etext1->section = lastRO->lastSec; + ElfSym::etext1->section = lastRO; if (ElfSym::etext2) - ElfSym::etext2->section = lastRO->lastSec; + ElfSym::etext2->section = lastRO; } if (last) { diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index efe6c256ca9d01..8dc402b7d7f49f 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -791,6 +791,9 @@ flag to indicate to the runtime linker that the object is an interposer. During symbol resolution interposers are searched after the application but before other dependencies. .Pp +.It Cm lrodata-after-bss +Place .lrodata after .bss. +.Pp .It Cm muldefs Do not error if a symbol is defined multiple times. The first definition will be used. diff --git a/lld/test/ELF/eh-frame-pcrel-overflow.s b/lld/test/ELF/eh-frame-pcrel-overflow.s index 78e804768dad63..3dfcf9ee1a7f9a 100644 --- a/lld/test/ELF/eh-frame-pcrel-overflow.s +++ b/lld/test/ELF/eh-frame-pcrel-overflow.s @@ -4,7 +4,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/eh-frame-pcrel-overflow.s -o %t1.o # RUN: ld.lld --eh-frame-hdr -Ttext=0x90000000 %t.o -o /dev/null # RUN: not ld.lld --eh-frame-hdr %t.o %t1.o -o /dev/null 2>&1 | FileCheck %s +# RUN: ld.lld --eh-frame-hdr %t.o %t1.o -o /dev/null --noinhibit-exec 2>&1 | FileCheck %s --check-prefix=WARN # CHECK: error: {{.*}}.o:(.eh_frame): PC offset is too large: 0x90001054 +# WARN: warning: {{.*}}.o:(.eh_frame): PC offset is too large: 0x90001054 .text .global _start diff --git a/lld/test/ELF/lto/codemodel.ll b/lld/test/ELF/lto/codemodel.ll index a35f87729411d7..cf7d0e409ec4b1 100644 --- a/lld/test/ELF/lto/codemodel.ll +++ b/lld/test/ELF/lto/codemodel.ll @@ -2,8 +2,8 @@ ; RUN: llvm-as %s -o %t.o ; RUN: ld.lld %t.o -o %ts -mllvm -code-model=small ; RUN: ld.lld %t.o -o %tl -mllvm -code-model=large -; RUN: llvm-objdump --no-print-imm-hex -d %ts | FileCheck %s --check-prefix=CHECK-SMALL -; RUN: llvm-objdump --no-print-imm-hex -d %tl | FileCheck %s --check-prefix=CHECK-LARGE +; RUN: llvm-objdump -d %ts | FileCheck %s --check-prefix=CHECK-SMALL +; RUN: llvm-objdump -d %tl | FileCheck %s --check-prefix=CHECK-LARGE target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -13,8 +13,8 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define ptr @_start() nounwind readonly { entry: ; CHECK-SMALL-LABEL: <_start>: -; CHECK-SMALL: movl $2097440, %eax +; CHECK-SMALL: movl ${{.*}}, %eax ; CHECK-LARGE-LABEL: <_start>: -; CHECK-LARGE: movabsq $2097440, %rax +; CHECK-LARGE: movabsq ${{.*}}, %rax ret ptr @data } diff --git a/lld/test/ELF/x86-64-section-layout.s b/lld/test/ELF/x86-64-section-layout.s index 0ba60539389397..b03d3e6c2b9990 100644 --- a/lld/test/ELF/x86-64-section-layout.s +++ b/lld/test/ELF/x86-64-section-layout.s @@ -12,9 +12,12 @@ # RUN: ld.lld --section-start=.note=0x200300 a1.o -o a1 # RUN: llvm-readelf -S -sX a1 | FileCheck %s --check-prefix=CHECK1 -# RUN: ld.lld -T b.lds -z norelro a.o -o b +# RUN: ld.lld -T b.lds -z norelro a.o -z lrodata-after-bss -z nolrodata-after-bss -o b --fatal-warnings # RUN: llvm-readelf -S -l b | FileCheck %s --check-prefix=CHECK2 +# RUN: ld.lld --section-start=.note=0x200300 a.o -z lrodata-after-bss -o a3 +# RUN: llvm-readelf -S -l -sX a3 | FileCheck %s --check-prefix=CHECK3 + # CHECK: Name Type Address Off Size ES Flg Lk Inf Al # CHECK-NEXT: NULL 0000000000000000 000000 000000 00 0 0 0 # CHECK-NEXT: .note NOTE 0000000000200300 000300 000001 00 A 0 0 1 @@ -80,6 +83,39 @@ # CHECK2-NEXT: LOAD 0x000305 0x0000000000200305 0x0000000000200305 0x001805 0x002a06 RW 0x1000 # CHECK2-NEXT: TLS 0x000305 0x0000000000200305 0x0000000000200305 0x000001 0x000003 R 0x1 +# CHECK3: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK3-NEXT: NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK3-NEXT: .note NOTE 0000000000200300 000300 000001 00 A 0 0 1 +# CHECK3-NEXT: .rodata PROGBITS 0000000000200301 000301 000001 00 A 0 0 1 +# CHECK3-NEXT: .text PROGBITS 0000000000201304 000304 000001 00 AX 0 0 4 +# CHECK3-NEXT: .tdata PROGBITS 0000000000202305 000305 000001 00 WAT 0 0 1 +# CHECK3-NEXT: .tbss NOBITS 0000000000202306 000306 000002 00 WAT 0 0 1 +# CHECK3-NEXT: .relro_padding NOBITS 0000000000202306 000306 000cfa 00 WA 0 0 1 +# CHECK3-NEXT: .data PROGBITS 0000000000203306 000306 000001 00 WA 0 0 1 +# CHECK3-NEXT: .bss NOBITS 0000000000203307 000307 001800 00 WA 0 0 1 +## We spend (size(.bss) + size(.lbss)) % MAXPAGESIZE bytes. +# CHECK3-NEXT: .lbss NOBITS 0000000000204b07 000307 001201 00 WAl 0 0 1 +# CHECK3-NEXT: .lrodata PROGBITS 0000000000206d08 000d08 000002 00 Al 0 0 1 +# CHECK3-NEXT: .ldata PROGBITS 0000000000207d0a 000d0a 000002 00 WAl 0 0 1 +# CHECK3-NEXT: .ldata2 PROGBITS 0000000000207d0c 000d0c 000001 00 WAl 0 0 1 +# CHECK3-NEXT: .comment PROGBITS 0000000000000000 000d0d {{.*}} 01 MS 0 0 1 + +# CHECK3: Program Headers: +# CHECK3-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK3-NEXT: PHDR 0x000040 0x0000000000200040 0x0000000000200040 {{.*}} {{.*}} R 0x8 +# CHECK3-NEXT: LOAD 0x000000 0x0000000000200000 0x0000000000200000 0x000302 0x000302 R 0x1000 +# CHECK3-NEXT: LOAD 0x000304 0x0000000000201304 0x0000000000201304 0x000001 0x000001 R E 0x1000 +# CHECK3-NEXT: LOAD 0x000305 0x0000000000202305 0x0000000000202305 0x000001 0x000cfb RW 0x1000 +# CHECK3-NEXT: LOAD 0x000306 0x0000000000203306 0x0000000000203306 0x000001 0x002a02 RW 0x1000 +# CHECK3-NEXT: LOAD 0x000d08 0x0000000000206d08 0x0000000000206d08 0x000002 0x000002 R 0x1000 +# CHECK3-NEXT: LOAD 0x000d0a 0x0000000000207d0a 0x0000000000207d0a 0x000003 0x000003 RW 0x1000 +# CHECK3-NEXT: TLS 0x000305 0x0000000000202305 0x0000000000202305 0x000001 0x000003 R 0x1 + +# CHECK3: 0000000000201304 0 NOTYPE GLOBAL DEFAULT [[#]] (.text) _start +# CHECK3-NEXT: 0000000000201305 0 NOTYPE GLOBAL DEFAULT [[#]] (.text) _etext +# CHECK3-NEXT: 0000000000203307 0 NOTYPE GLOBAL DEFAULT [[#]] (.data) _edata +# CHECK3-NEXT: 0000000000207d0d 0 NOTYPE GLOBAL DEFAULT [[#]] (.ldata2) _end + #--- a.s .globl _start, _etext, _edata, _end _start: diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h index b99de56f534469..a326c6dc38a37a 100644 --- a/lldb/include/lldb/Interpreter/CommandObject.h +++ b/lldb/include/lldb/Interpreter/CommandObject.h @@ -242,6 +242,13 @@ class CommandObject : public std::enable_shared_from_this { /// The completion request that needs to be answered. virtual void HandleCompletion(CompletionRequest &request); + /// The default version handles argument definitions that have only one + /// argument type, and use one of the argument types that have an entry in + /// the CommonCompletions. Override this if you have a more complex + /// argument setup. + /// FIXME: we should be able to extend this to more complex argument + /// definitions provided we have completers for all the argument types. + /// /// The input array contains a parsed version of the line. /// /// We've constructed the map of options and their arguments as well if that @@ -251,7 +258,7 @@ class CommandObject : public std::enable_shared_from_this { /// The completion request that needs to be answered. virtual void HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) {} + OptionElementVector &opt_element_vector); bool HelpTextContainsWord(llvm::StringRef search_word, bool search_short_help = true, diff --git a/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h b/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h index d0cf54c31ca73f..9248e2ac814461 100644 --- a/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h +++ b/lldb/include/lldb/Interpreter/CommandOptionArgumentTable.h @@ -243,7 +243,7 @@ static constexpr CommandObject::ArgumentTableEntry g_argument_table[] = { { lldb::eArgTypeLogCategory, "log-category", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of a category within a log channel, e.g. all (try \"log list\" to see a list of all channels and their categories." }, { lldb::eArgTypeLogChannel, "log-channel", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of a log channel, e.g. process.gdb-remote (try \"log list\" to see a list of all channels and their categories)." }, { lldb::eArgTypeMethod, "method", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "A C++ method name." }, - { lldb::eArgTypeName, "name", lldb::eTypeCategoryNameCompletion, {}, { nullptr, false }, "Help text goes here." }, + { lldb::eArgTypeName, "name", lldb::eTypeCategoryNameCompletion, {}, { nullptr, false }, "The name of a type category." }, { lldb::eArgTypeNewPathPrefix, "new-path-prefix", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, { lldb::eArgTypeNumLines, "num-lines", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The number of lines to use." }, { lldb::eArgTypeNumberPerLine, "number-per-line", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The number of items per line to display." }, @@ -260,9 +260,9 @@ static constexpr CommandObject::ArgumentTableEntry g_argument_table[] = { { lldb::eArgTypePythonFunction, "python-function", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of a Python function." }, { lldb::eArgTypePythonScript, "python-script", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Source code written in Python." }, { lldb::eArgTypeQueueName, "queue-name", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of the thread queue." }, - { lldb::eArgTypeRegisterName, "register-name", lldb::CompletionType::eNoCompletion, {}, { RegisterNameHelpTextCallback, true }, nullptr }, + { lldb::eArgTypeRegisterName, "register-name", lldb::CompletionType::eRegisterCompletion, {}, { RegisterNameHelpTextCallback, true }, nullptr }, { lldb::eArgTypeRegularExpression, "regular-expression", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "A POSIX-compliant extended regular expression." }, - { lldb::eArgTypeRunArgs, "run-args", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Arguments to be passed to the target program when it starts executing." }, + { lldb::eArgTypeRunArgs, "run-args", lldb::CompletionType::eDiskFileCompletion, {}, { nullptr, false }, "Arguments to be passed to the target program when it starts executing." }, { lldb::eArgTypeRunMode, "run-mode", lldb::CompletionType::eNoCompletion, g_running_mode, { nullptr, false }, "Help text goes here." }, { lldb::eArgTypeScriptedCommandSynchronicity, "script-cmd-synchronicity", lldb::CompletionType::eNoCompletion, g_script_synchro_type, { nullptr, false }, "The synchronicity to use to run scripted commands with regard to LLDB event system." }, { lldb::eArgTypeScriptLang, "script-language", lldb::CompletionType::eNoCompletion, g_script_option_enumeration, { nullptr, false }, "The scripting language to be used for script-based commands. Supported languages are python and lua." }, @@ -270,21 +270,21 @@ static constexpr CommandObject::ArgumentTableEntry g_argument_table[] = { { lldb::eArgTypeSelector, "selector", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "An Objective-C selector name." }, { lldb::eArgTypeSettingIndex, "setting-index", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "An index into a settings variable that is an array (try 'settings list' to see all the possible settings variables and their types)." }, { lldb::eArgTypeSettingKey, "setting-key", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "A key into a settings variables that is a dictionary (try 'settings list' to see all the possible settings variables and their types)." }, - { lldb::eArgTypeSettingPrefix, "setting-prefix", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of a settable internal debugger variable up to a dot ('.'), e.g. 'target.process.'" }, - { lldb::eArgTypeSettingVariableName, "setting-variable-name", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of a settable internal debugger variable. Type 'settings list' to see a complete list of such variables." }, - { lldb::eArgTypeShlibName, "shlib-name", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The name of a shared library." }, + { lldb::eArgTypeSettingPrefix, "setting-prefix", lldb::CompletionType::eSettingsNameCompletion, {}, { nullptr, false }, "The name of a settable internal debugger variable up to a dot ('.'), e.g. 'target.process.'" }, + { lldb::eArgTypeSettingVariableName, "setting-variable-name", lldb::CompletionType::eSettingsNameCompletion, {}, { nullptr, false }, "The name of a settable internal debugger variable. Type 'settings list' to see a complete list of such variables." }, + { lldb::eArgTypeShlibName, "shlib-name", lldb::CompletionType::eDiskFileCompletion, {}, { nullptr, false }, "The name of a shared library." }, { lldb::eArgTypeSourceFile, "source-file", lldb::eSourceFileCompletion, {}, { nullptr, false }, "The name of a source file.." }, { lldb::eArgTypeSortOrder, "sort-order", lldb::CompletionType::eNoCompletion, g_sort_option_enumeration, { nullptr, false }, "Specify a sort order when dumping lists." }, { lldb::eArgTypeStartAddress, "start-address", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, { lldb::eArgTypeSummaryString, "summary-string", lldb::CompletionType::eNoCompletion, {}, { SummaryStringHelpTextCallback, true }, nullptr }, { lldb::eArgTypeSymbol, "symbol", lldb::eSymbolCompletion, {}, { nullptr, false }, "Any symbol name (function name, variable, argument, etc.)" }, - { lldb::eArgTypeThreadID, "thread-id", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Thread ID number." }, - { lldb::eArgTypeThreadIndex, "thread-index", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Index into the process' list of threads." }, + { lldb::eArgTypeThreadID, "thread-id", lldb::CompletionType::eThreadIndexCompletion, {}, { nullptr, false }, "Thread ID number." }, + { lldb::eArgTypeThreadIndex, "thread-index", lldb::CompletionType::eThreadIndexCompletion, {}, { nullptr, false }, "Index into the process' list of threads." }, { lldb::eArgTypeThreadName, "thread-name", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The thread's name." }, { lldb::eArgTypeTypeName, "type-name", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "A type name." }, { lldb::eArgTypeUnsignedInteger, "unsigned-integer", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "An unsigned integer." }, { lldb::eArgTypeUnixSignal, "unix-signal", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "A valid Unix signal name or number (e.g. SIGKILL, KILL or 9)." }, - { lldb::eArgTypeVarName, "variable-name", lldb::CompletionType::eNoCompletion, {} ,{ nullptr, false }, "The name of a variable in your program." }, + { lldb::eArgTypeVarName, "variable-name", lldb::CompletionType::eVariablePathCompletion, {} ,{ nullptr, false }, "The name of a variable in your program." }, { lldb::eArgTypeValue, "value", lldb::CompletionType::eNoCompletion, g_dependents_enumeration, { nullptr, false }, "A value could be anything, depending on where and how it is used." }, { lldb::eArgTypeWidth, "width", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "Help text goes here." }, { lldb::eArgTypeNone, "none", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "No help available for this." }, @@ -302,8 +302,11 @@ static constexpr CommandObject::ArgumentTableEntry g_argument_table[] = { { lldb::eArgTypeRecognizerID, "frame-recognizer-id", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The ID for a stack frame recognizer." }, { lldb::eArgTypeConnectURL, "process-connect-url", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "A URL-style specification for a remote connection." }, { lldb::eArgTypeTargetID, "target-id", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The index ID for an lldb Target." }, - { lldb::eArgTypeStopHookID, "stop-hook-id", lldb::CompletionType::eNoCompletion, {}, { nullptr, false }, "The ID you receive when you create a stop-hook." }, + { lldb::eArgTypeStopHookID, "stop-hook-id", lldb::CompletionType::eStopHookIDCompletion, {}, { nullptr, false }, "The ID you receive when you create a stop-hook." }, { lldb::eArgTypeCompletionType, "completion-type", lldb::CompletionType::eNoCompletion, g_completion_type, { nullptr, false }, "The completion type to use when adding custom commands. If none is specified, the command won't use auto-completion." }, + { lldb::eArgTypeRemotePath, "remote-path", lldb::CompletionType::eRemoteDiskFileCompletion, {}, { nullptr, false }, "A path on the system managed by the current platform." }, + { lldb::eArgTypeRemoteFilename, "remote-filename", lldb::CompletionType::eRemoteDiskFileCompletion, {}, { nullptr, false }, "A file on the system managed by the current platform." }, + { lldb::eArgTypeModule, "module", lldb::CompletionType::eModuleCompletion, {}, { nullptr, false }, "The name of a module loaded into the current target." }, // clang-format on }; diff --git a/lldb/include/lldb/Utility/FileSpecList.h b/lldb/include/lldb/Utility/FileSpecList.h index 49edc667ddd5b6..6eb3bb9971f13a 100644 --- a/lldb/include/lldb/Utility/FileSpecList.h +++ b/lldb/include/lldb/Utility/FileSpecList.h @@ -238,6 +238,10 @@ class FileSpecList { const_iterator begin() const { return m_files.begin(); } const_iterator end() const { return m_files.end(); } + llvm::iterator_range files() const { + return llvm::make_range(begin(), end()); + } + protected: collection m_files; ///< A collection of FileSpec objects. }; diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 4640533047833b..85769071dae785 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -651,6 +651,9 @@ enum CommandArgumentType { eArgTypeTargetID, eArgTypeStopHookID, eArgTypeCompletionType, + eArgTypeRemotePath, + eArgTypeRemoteFilename, + eArgTypeModule, eArgTypeLastArg // Always keep this entry as the last entry in this // enumeration!! }; diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index b7cd65059b2214..7c459bdaf38022 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -63,13 +63,6 @@ class CommandObjectCommandsSource : public CommandObjectParsed { return std::string(""); } - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - Options *GetOptions() override { return &m_options; } protected: @@ -1968,13 +1961,6 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed { ~CommandObjectCommandsScriptImport() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - Options *GetOptions() override { return &m_options; } protected: diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 695f3d7931cd0a..fb2cc106ffd2dd 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -52,12 +52,6 @@ CommandObjectDWIMPrint::CommandObjectDWIMPrint(CommandInterpreter &interpreter) Options *CommandObjectDWIMPrint::GetOptions() { return &m_option_group; } -void CommandObjectDWIMPrint::HandleArgumentCompletion( - CompletionRequest &request, OptionElementVector &opt_element_vector) { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eVariablePathCompletion, request, nullptr); -} - void CommandObjectDWIMPrint::DoExecute(StringRef command, CommandReturnObject &result) { m_option_group.NotifyOptionParsingStarting(&m_exe_ctx); diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.h b/lldb/source/Commands/CommandObjectDWIMPrint.h index d868f8964c2ac5..01ba9c225e3301 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.h +++ b/lldb/source/Commands/CommandObjectDWIMPrint.h @@ -39,10 +39,6 @@ class CommandObjectDWIMPrint : public CommandObjectRaw { bool WantsCompletion() override { return true; } - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override; - private: void DoExecute(llvm::StringRef command, CommandReturnObject &result) override; diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index a4d3fb66e8b552..f092d54ffe9932 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -286,16 +286,6 @@ class CommandObjectFrameSelect : public CommandObjectParsed { ~CommandObjectFrameSelect() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - if (request.GetCursorIndex() != 0) - return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eFrameIndexCompletion, request, nullptr); - } - Options *GetOptions() override { return &m_options; } protected: @@ -446,15 +436,6 @@ may even involve JITing and running code in the target program.)"); Options *GetOptions() override { return &m_option_group; } - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - // Arguments are the standard source file completer. - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eVariablePathCompletion, request, - nullptr); - } - protected: llvm::StringRef GetScopeString(VariableSP var_sp) { if (!var_sp) diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index 790f1dbb475358..b25c391bd4faa2 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -418,7 +418,7 @@ class CommandObjectPlatformMkDir : public CommandObjectParsed { : CommandObjectParsed(interpreter, "platform mkdir", "Make a new directory on the remote end.", nullptr, 0) { - CommandArgumentData thread_arg{eArgTypePath, eArgRepeatPlain}; + CommandArgumentData thread_arg{eArgTypeRemotePath, eArgRepeatPlain}; m_arguments.push_back({thread_arg}); } @@ -467,21 +467,12 @@ class CommandObjectPlatformFOpen : public CommandObjectParsed { CommandObjectPlatformFOpen(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "platform file open", "Open a file on the remote end.", nullptr, 0) { - CommandArgumentData path_arg{eArgTypePath, eArgRepeatPlain}; + CommandArgumentData path_arg{eArgTypeRemotePath, eArgRepeatPlain}; m_arguments.push_back({path_arg}); } ~CommandObjectPlatformFOpen() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - if (request.GetCursorIndex() == 0) - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eRemoteDiskFileCompletion, request, - nullptr); - } - void DoExecute(Args &args, CommandReturnObject &result) override { PlatformSP platform_sp( GetDebugger().GetPlatformList().GetSelectedPlatform()); @@ -795,7 +786,7 @@ class CommandObjectPlatformGetFile : public CommandObjectParsed { CommandArgumentData file_arg_remote, file_arg_host; // Define the first (and only) variant of this arg. - file_arg_remote.arg_type = eArgTypeFilename; + file_arg_remote.arg_type = eArgTypeRemoteFilename; file_arg_remote.arg_repetition = eArgRepeatPlain; // There is only one variant this argument could be; put it into the // argument entry. @@ -876,7 +867,7 @@ class CommandObjectPlatformGetSize : public CommandObjectParsed { CommandArgumentData file_arg_remote; // Define the first (and only) variant of this arg. - file_arg_remote.arg_type = eArgTypeFilename; + file_arg_remote.arg_type = eArgTypeRemoteFilename; file_arg_remote.arg_repetition = eArgRepeatPlain; // There is only one variant this argument could be; put it into the // argument entry. @@ -888,17 +879,6 @@ class CommandObjectPlatformGetSize : public CommandObjectParsed { ~CommandObjectPlatformGetSize() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - if (request.GetCursorIndex() != 0) - return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eRemoteDiskFileCompletion, request, - nullptr); - } - void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 1) { @@ -946,7 +926,7 @@ class CommandObjectPlatformGetPermissions : public CommandObjectParsed { CommandArgumentData file_arg_remote; // Define the first (and only) variant of this arg. - file_arg_remote.arg_type = eArgTypeFilename; + file_arg_remote.arg_type = eArgTypeRemoteFilename; file_arg_remote.arg_repetition = eArgRepeatPlain; // There is only one variant this argument could be; put it into the // argument entry. @@ -958,17 +938,6 @@ class CommandObjectPlatformGetPermissions : public CommandObjectParsed { ~CommandObjectPlatformGetPermissions() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - if (request.GetCursorIndex() != 0) - return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eRemoteDiskFileCompletion, request, - nullptr); - } - void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 1) { @@ -1015,7 +984,7 @@ class CommandObjectPlatformFileExists : public CommandObjectParsed { CommandArgumentData file_arg_remote; // Define the first (and only) variant of this arg. - file_arg_remote.arg_type = eArgTypeFilename; + file_arg_remote.arg_type = eArgTypeRemoteFilename; file_arg_remote.arg_repetition = eArgRepeatPlain; // There is only one variant this argument could be; put it into the // argument entry. @@ -1027,17 +996,6 @@ class CommandObjectPlatformFileExists : public CommandObjectParsed { ~CommandObjectPlatformFileExists() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - if (request.GetCursorIndex() != 0) - return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eRemoteDiskFileCompletion, request, - nullptr); - } - void DoExecute(Args &args, CommandReturnObject &result) override { // If the number of arguments is incorrect, issue an error message. if (args.GetArgumentCount() != 1) { @@ -1080,7 +1038,7 @@ class CommandObjectPlatformPutFile : public CommandObjectParsed { Omitting the destination places the file in the platform working directory.)"); CommandArgumentData source_arg{eArgTypePath, eArgRepeatPlain}; - CommandArgumentData path_arg{eArgTypePath, eArgRepeatOptional}; + CommandArgumentData path_arg{eArgTypeRemotePath, eArgRepeatOptional}; m_arguments.push_back({source_arg}); m_arguments.push_back({path_arg}); } @@ -1139,6 +1097,16 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed { m_arguments.push_back({run_arg_arg}); } + void + HandleArgumentCompletion(CompletionRequest &request, + OptionElementVector &opt_element_vector) override { + // I didn't make a type for RemoteRunArgs, but since we're going to run + // this on the remote system we should use the remote completer. + lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( + GetCommandInterpreter(), lldb::eRemoteDiskFileCompletion, request, + nullptr); + } + ~CommandObjectPlatformProcessLaunch() override = default; Options *GetOptions() override { return &m_all_options; } @@ -1552,13 +1520,6 @@ class CommandObjectPlatformProcessInfo : public CommandObjectParsed { ~CommandObjectPlatformProcessInfo() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eProcessIDCompletion, request, nullptr); - } - protected: void DoExecute(Args &args, CommandReturnObject &result) override { Target *target = GetDebugger().GetSelectedTarget().get(); @@ -1850,7 +1811,7 @@ class CommandObjectPlatformInstall : public CommandObjectParsed { "Install a target (bundle or executable file) to the remote end.", "platform target-install ", 0) { CommandArgumentData local_arg{eArgTypePath, eArgRepeatPlain}; - CommandArgumentData remote_arg{eArgTypePath, eArgRepeatPlain}; + CommandArgumentData remote_arg{eArgTypeRemotePath, eArgRepeatPlain}; m_arguments.push_back({local_arg}); m_arguments.push_back({remote_arg}); } diff --git a/lldb/source/Commands/CommandObjectPlugin.cpp b/lldb/source/Commands/CommandObjectPlugin.cpp index f22885144b09b3..da3b5f0518a690 100644 --- a/lldb/source/Commands/CommandObjectPlugin.cpp +++ b/lldb/source/Commands/CommandObjectPlugin.cpp @@ -36,13 +36,6 @@ class CommandObjectPluginLoad : public CommandObjectParsed { ~CommandObjectPluginLoad() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - protected: void DoExecute(Args &command, CommandReturnObject &result) override { size_t argc = command.GetArgumentCount(); diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 722b0e0c376be8..7cd5ad656f1b05 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -143,14 +143,6 @@ class CommandObjectProcessLaunch : public CommandObjectProcessLaunchOrAttach { ~CommandObjectProcessLaunch() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - Options *GetOptions() override { return &m_all_options; } std::optional GetRepeatCommand(Args ¤t_command_args, @@ -1015,9 +1007,7 @@ class CommandObjectProcessLoad : public CommandObjectParsed { OptionElementVector &opt_element_vector) override { if (!m_exe_ctx.HasProcessScope()) return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); + CommandObject::HandleArgumentCompletion(request, opt_element_vector); } Options *GetOptions() override { return &m_options; } @@ -1292,13 +1282,6 @@ class CommandObjectProcessSaveCore : public CommandObjectParsed { Options *GetOptions() override { return &m_options; } - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - class CommandOptions : public Options { public: CommandOptions() = default; diff --git a/lldb/source/Commands/CommandObjectRegister.cpp b/lldb/source/Commands/CommandObjectRegister.cpp index a4d53e5c8dd5f1..4ffdde1ee09f9f 100644 --- a/lldb/source/Commands/CommandObjectRegister.cpp +++ b/lldb/source/Commands/CommandObjectRegister.cpp @@ -80,9 +80,7 @@ class CommandObjectRegisterRead : public CommandObjectParsed { OptionElementVector &opt_element_vector) override { if (!m_exe_ctx.HasProcessScope()) return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eRegisterCompletion, request, nullptr); + CommandObject::HandleArgumentCompletion(request, opt_element_vector); } Options *GetOptions() override { return &m_option_group; } @@ -440,8 +438,7 @@ different for the same register when connected to different debug servers.)"); OptionElementVector &opt_element_vector) override { if (!m_exe_ctx.HasProcessScope() || request.GetCursorIndex() != 0) return; - CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eRegisterCompletion, request, nullptr); + CommandObject::HandleArgumentCompletion(request, opt_element_vector); } protected: diff --git a/lldb/source/Commands/CommandObjectSession.cpp b/lldb/source/Commands/CommandObjectSession.cpp index d140bdfdba57b3..28506d6c59512d 100644 --- a/lldb/source/Commands/CommandObjectSession.cpp +++ b/lldb/source/Commands/CommandObjectSession.cpp @@ -28,13 +28,6 @@ class CommandObjectSessionSave : public CommandObjectParsed { ~CommandObjectSessionSave() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - protected: void DoExecute(Args &args, CommandReturnObject &result) override { llvm::StringRef file_path; diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp index 5fb7dcc80279fd..0cf3d1daf7f528 100644 --- a/lldb/source/Commands/CommandObjectSettings.cpp +++ b/lldb/source/Commands/CommandObjectSettings.cpp @@ -262,14 +262,6 @@ class CommandObjectSettingsShow : public CommandObjectParsed { ~CommandObjectSettingsShow() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eSettingsNameCompletion, request, - nullptr); - } - protected: void DoExecute(Args &args, CommandReturnObject &result) override { result.SetStatus(eReturnStatusSuccessFinishResult); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index c3ecdb7700c256..4e006e4bb0e0fc 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -257,13 +257,6 @@ class CommandObjectTargetCreate : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - protected: void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); @@ -2789,13 +2782,6 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { Options *GetOptions() override { return &m_option_group; } - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - protected: OptionGroupOptions m_option_group; OptionGroupUUID m_uuid_option_group; @@ -3233,7 +3219,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { : CommandObjectParsed( interpreter, "target modules list", "List current executable and dependent shared library images.") { - CommandArgumentData module_arg{eArgTypeShlibName, eArgRepeatStar}; + CommandArgumentData module_arg{eArgTypeModule, eArgRepeatStar}; m_arguments.push_back({module_arg}); } @@ -4343,13 +4329,6 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed { ~CommandObjectTargetSymbolsAdd() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eDiskFileCompletion, request, nullptr); - } - Options *GetOptions() override { return &m_option_group; } protected: @@ -5195,8 +5174,7 @@ class CommandObjectTargetStopHookDelete : public CommandObjectParsed { OptionElementVector &opt_element_vector) override { if (request.GetCursorIndex()) return; - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eStopHookIDCompletion, request, nullptr); + CommandObject::HandleArgumentCompletion(request, opt_element_vector); } protected: @@ -5251,8 +5229,7 @@ class CommandObjectTargetStopHookEnableDisable : public CommandObjectParsed { OptionElementVector &opt_element_vector) override { if (request.GetCursorIndex()) return; - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eStopHookIDCompletion, request, nullptr); + CommandObject::HandleArgumentCompletion(request, opt_element_vector); } protected: diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index a1e7e3f11361e7..52e493b13c61ca 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -403,10 +403,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { OptionElementVector &opt_element_vector) override { if (request.GetCursorIndex()) return; - - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eThreadIndexCompletion, request, - nullptr); + CommandObject::HandleArgumentCompletion(request, opt_element_vector); } Options *GetOptions() override { return &m_all_options; } @@ -663,14 +660,6 @@ class CommandObjectThreadContinue : public CommandObjectParsed { ~CommandObjectThreadContinue() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eThreadIndexCompletion, request, - nullptr); - } - void DoExecute(Args &command, CommandReturnObject &result) override { bool synchronous_execution = m_interpreter.GetSynchronous(); diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index f76420f3cc6837..036b8e9d9def13 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -1758,14 +1758,6 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed { ~CommandObjectTypeCategoryDefine() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eTypeCategoryNameCompletion, request, - nullptr); - } - protected: void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); @@ -1859,14 +1851,6 @@ class CommandObjectTypeCategoryEnable : public CommandObjectParsed { ~CommandObjectTypeCategoryEnable() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eTypeCategoryNameCompletion, request, - nullptr); - } - protected: void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); @@ -1926,14 +1910,6 @@ class CommandObjectTypeCategoryDelete : public CommandObjectParsed { ~CommandObjectTypeCategoryDelete() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eTypeCategoryNameCompletion, request, - nullptr); - } - protected: void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); @@ -2033,14 +2009,6 @@ class CommandObjectTypeCategoryDisable : public CommandObjectParsed { ~CommandObjectTypeCategoryDisable() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eTypeCategoryNameCompletion, request, - nullptr); - } - protected: void DoExecute(Args &command, CommandReturnObject &result) override { const size_t argc = command.GetArgumentCount(); diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp index 438a16c50bd67f..5b74b1ae43accc 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.cpp +++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp @@ -831,16 +831,6 @@ corresponding to the byte size of the data type."); ~CommandObjectWatchpointSetVariable() override = default; - void - HandleArgumentCompletion(CompletionRequest &request, - OptionElementVector &opt_element_vector) override { - if (request.GetCursorIndex() != 0) - return; - lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( - GetCommandInterpreter(), lldb::eVariablePathCompletion, request, - nullptr); - } - Options *GetOptions() override { return &m_option_group; } protected: diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp index 6ed0fd1f1ddbd9..93c53e89f7d1ae 100644 --- a/lldb/source/Interpreter/CommandObject.cpp +++ b/lldb/source/Interpreter/CommandObject.cpp @@ -305,6 +305,43 @@ void CommandObject::HandleCompletion(CompletionRequest &request) { } } +void CommandObject::HandleArgumentCompletion( + CompletionRequest &request, OptionElementVector &opt_element_vector) { + size_t num_arg_entries = GetNumArgumentEntries(); + if (num_arg_entries != 1) + return; + + CommandArgumentEntry *entry_ptr = GetArgumentEntryAtIndex(0); + if (!entry_ptr) { + assert(entry_ptr && "We said there was one entry, but there wasn't."); + return; // Not worth crashing if asserts are off... + } + + CommandArgumentEntry &entry = *entry_ptr; + // For now, we only handle the simple case of one homogenous argument type. + if (entry.size() != 1) + return; + + // Look up the completion type, and if it has one, invoke it: + const CommandObject::ArgumentTableEntry *arg_entry = + FindArgumentDataByType(entry[0].arg_type); + const ArgumentRepetitionType repeat = entry[0].arg_repetition; + + if (arg_entry == nullptr || arg_entry->completion_type == lldb::eNoCompletion) + return; + + // FIXME: This should be handled higher in the Command Parser. + // Check the case where this command only takes one argument, and don't do + // the completion if we aren't on the first entry: + if (repeat == eArgRepeatPlain && request.GetCursorIndex() != 0) + return; + + lldb_private::CommandCompletions::InvokeCommonCompletionCallbacks( + GetCommandInterpreter(), arg_entry->completion_type, request, nullptr); + +} + + bool CommandObject::HelpTextContainsWord(llvm::StringRef search_word, bool search_short_help, bool search_long_help, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp index 6b063f3bd88d85..795355b57a06db 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp @@ -22,6 +22,7 @@ static constexpr Log::Category g_categories[] = { {{"map"}, {"log insertions of object files into DWARF debug maps"}, DWARFLog::DebugMap}, + {{"split"}, {"log split DWARF related activities"}, DWARFLog::SplitDwarf}, }; static Log::Channel g_channel(g_categories, DWARFLog::DebugInfo); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h index 662aa6757e2ffc..7f254a1162bd10 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h @@ -20,6 +20,7 @@ enum class DWARFLog : Log::MaskType { DebugMap = Log::ChannelFlag<2>, Lookups = Log::ChannelFlag<3>, TypeCompletion = Log::ChannelFlag<4>, + SplitDwarf = Log::ChannelFlag<5>, LLVM_MARK_AS_BITMASK_ENUM(TypeCompletion) }; LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 42211b9a21b0e3..84ff4c2565a050 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4349,26 +4349,60 @@ SymbolFileDWARFDebugMap *SymbolFileDWARF::GetDebugMapSymfile() { const std::shared_ptr &SymbolFileDWARF::GetDwpSymbolFile() { llvm::call_once(m_dwp_symfile_once_flag, [this]() { + // Create a list of files to try and append .dwp to. + FileSpecList symfiles; + // Append the module's object file path. + const FileSpec module_fspec = m_objfile_sp->GetModule()->GetFileSpec(); + symfiles.Append(module_fspec); + // Append the object file for this SymbolFile only if it is different from + // the module's file path. Our main module could be "a.out", our symbol file + // could be "a.debug" and our ".dwp" file might be "a.debug.dwp" instead of + // "a.out.dwp". + const FileSpec symfile_fspec(m_objfile_sp->GetFileSpec()); + if (symfile_fspec != module_fspec) { + symfiles.Append(symfile_fspec); + } else { + // If we don't have a separate debug info file, then try stripping the + // extension. The main module could be "a.debug" and the .dwp file could + // be "a.dwp" instead of "a.debug.dwp". + ConstString filename_no_ext = + module_fspec.GetFileNameStrippingExtension(); + if (filename_no_ext != module_fspec.GetFilename()) { + FileSpec module_spec_no_ext(module_fspec); + module_spec_no_ext.SetFilename(filename_no_ext); + symfiles.Append(module_spec_no_ext); + } + } + Log *log = GetLog(DWARFLog::SplitDwarf); + FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); ModuleSpec module_spec; module_spec.GetFileSpec() = m_objfile_sp->GetFileSpec(); - module_spec.GetSymbolFileSpec() = - FileSpec(m_objfile_sp->GetModule()->GetFileSpec().GetPath() + ".dwp"); - module_spec.GetUUID() = m_objfile_sp->GetUUID(); - FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); - FileSpec dwp_filespec = - PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - if (FileSystem::Instance().Exists(dwp_filespec)) { - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - GetObjectFile()->GetModule(), &dwp_filespec, 0, - FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, - dwp_file_data_offset); - if (!dwp_obj_file) - return; - m_dwp_symfile = std::make_shared( - *this, dwp_obj_file, DIERef::k_file_index_mask); + for (const auto &symfile : symfiles.files()) { + module_spec.GetSymbolFileSpec() = + FileSpec(symfile.GetPath() + ".dwp", symfile.GetPathStyle()); + LLDB_LOG(log, "Searching for DWP using: \"{0}\"", + module_spec.GetSymbolFileSpec()); + FileSpec dwp_filespec = + PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); + if (FileSystem::Instance().Exists(dwp_filespec)) { + LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + GetObjectFile()->GetModule(), &dwp_filespec, 0, + FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, + dwp_file_data_offset); + if (dwp_obj_file) { + m_dwp_symfile = std::make_shared( + *this, dwp_obj_file, DIERef::k_file_index_mask); + break; + } + } + } + if (!m_dwp_symfile) { + LLDB_LOG(log, "Unable to locate for DWP file for: \"{0}\"", + m_objfile_sp->GetModule()->GetFileSpec()); } }); return m_dwp_symfile; diff --git a/lldb/test/API/commands/help/TestHelp.py b/lldb/test/API/commands/help/TestHelp.py index 95ffdb3cc8b18b..f0f5bcb3218011 100644 --- a/lldb/test/API/commands/help/TestHelp.py +++ b/lldb/test/API/commands/help/TestHelp.py @@ -104,7 +104,7 @@ def test_help_image_du_line_should_work(self): def test_help_image_list_shows_positional_args(self): """Command 'help image list' should describe positional args.""" # 'image' is an alias for 'target modules'. - self.expect("help image list", substrs=[" [...]"]) + self.expect("help image list", substrs=[" [...]"]) @no_debug_info_test def test_help_target_variable_syntax(self): diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp index a47209931c3840..9a8149065b6e58 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp @@ -1,12 +1,16 @@ // REQUIRES: lld +// Now test with DWARF5 // RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.dwarf5.o // RUN: ld.lld %t.dwarf5.o -o %t.dwarf5 // RUN: llvm-dwp %t.dwarf5.dwo -o %t.dwarf5.dwp // RUN: rm %t.dwarf5.dwo // RUN: llvm-objcopy --only-keep-debug %t.dwarf5 %t.dwarf5.debug // RUN: llvm-objcopy --strip-all --add-gnu-debuglink=%t.dwarf5.debug %t.dwarf5 -// RUN: %lldb %t.dwarf5 -o "target variable a" -b | FileCheck %s +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "target variable a" \ +// RUN: -b %t.dwarf5 | FileCheck %s // Run one time with the index cache enabled to populate the index cache. When // we populate the index cache we have to parse all of the DWARF debug info @@ -34,6 +38,31 @@ // RUN: -o "statistics dump" \ // RUN: %t.dwarf5 -b | FileCheck %s -check-prefix=CACHED +// Make sure that if we load the "%t.dwarf5.debug" file, that we can find and +// load the .dwo file from the .dwp when it is "%t.dwarf5.dwp" +// RUN: %lldb %t.dwarf5.debug -o "b main" -b | FileCheck %s -check-prefix=DEBUG + +// Make sure that if we load the "%t.dwarf5" file, that we can find and +// load the .dwo file from the .dwp when it is "%t.dwarf5.debug.dwp" +// RUN: mv %t.dwarf5.dwp %t.dwarf5.debug.dwp +// RUN: %lldb %t.dwarf5 -o "b main" -b | FileCheck %s -check-prefix=DEBUG + +// Make sure that if we load the "%t.dwarf5.debug" file, that we can find and +// load the .dwo file from the .dwp when it is "%t.dwarf5.debug.dwp" +// RUN: %lldb %t.dwarf5.debug -o "b main" -b | FileCheck %s -check-prefix=DEBUG + +// Make sure that if we remove the .dwp file we see an appropriate error. +// RUN: rm %t.dwarf5.debug.dwp +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "b main" \ +// RUN: -b %t.dwarf5 2>&1 | FileCheck %s -check-prefix=NODWP + +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "b main" \ +// RUN: -b %t.dwarf5.debug 2>&1 | FileCheck %s -check-prefix=NODWP + // Now test with DWARF4 // RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-4 -c %s -o %t.dwarf4.o // RUN: ld.lld %t.dwarf4.o -o %t.dwarf4 @@ -41,7 +70,10 @@ // RUN: rm %t.dwarf4.dwo // RUN: llvm-objcopy --only-keep-debug %t.dwarf4 %t.dwarf4.debug // RUN: llvm-objcopy --strip-all --add-gnu-debuglink=%t.dwarf4.debug %t.dwarf4 -// RUN: %lldb %t.dwarf4 -o "target variable a" -b | FileCheck %s +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "target variable a" \ +// RUN: -b %t.dwarf4 | FileCheck %s // Run one time with the index cache enabled to populate the index cache. When // we populate the index cache we have to parse all of the DWARF debug info @@ -69,6 +101,46 @@ // RUN: -o "statistics dump" \ // RUN: %t.dwarf4 -b | FileCheck %s -check-prefix=CACHED +// Make sure that if we load the "%t.dwarf4.debug" file, that we can find and +// load the .dwo file from the .dwp when it is "%t.dwarf4.dwp" +// RUN: %lldb %t.dwarf4.debug -o "b main" -b | FileCheck %s -check-prefix=DEBUG + +// Make sure that if we load the "%t.dwarf4" file, that we can find and +// load the .dwo file from the .dwp when it is "%t.dwarf4.debug.dwp" +// RUN: mv %t.dwarf4.dwp %t.dwarf4.debug.dwp +// RUN: %lldb %t.dwarf4 -o "b main" -b | FileCheck %s -check-prefix=DEBUG + +// Make sure that if we load the "%t.dwarf4.debug" file, that we can find and +// load the .dwo file from the .dwp when it is "%t.dwarf4.debug.dwp" +// RUN: %lldb %t.dwarf4.debug -o "b main" -b | FileCheck %s -check-prefix=DEBUG + +// Make sure that if we remove the .dwp file we see an appropriate error. +// RUN: rm %t.dwarf4.debug.dwp +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "b main" \ +// RUN: -b %t.dwarf4 2>&1 | FileCheck %s -check-prefix=NODWP + +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "b main" \ +// RUN: -b %t.dwarf4.debug 2>&1 | FileCheck %s -check-prefix=NODWP + +// Test if we have a GNU build ID in our main executable and in our debug file, +// and we have a .dwp file that doesn't, that we can still load our .dwp file. +// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.o +// RUN: ld.lld %t.o --build-id=md5 -o %t +// RUN: llvm-dwp %t.dwo -o %t.dwp +// RUN: rm %t.dwo +// RUN: llvm-objcopy --only-keep-debug %t %t.debug +// RUN: llvm-objcopy --strip-all --add-gnu-debuglink=%t.debug %t +// RUN: %lldb \ +// RUN: -O "log enable dwarf split" \ +// RUN: -o "target variable a" \ +// RUN: -b %t | FileCheck %s + +// CHECK: Searching for DWP using: +// CHECK: Found DWP file: // CHECK: (A) a = (x = 47) // CACHE: script lldb.target.modules[0].FindTypes('::A').GetTypeAtIndex(0) @@ -83,6 +155,16 @@ // CACHED-NEXT: } // CACHED: "totalDebugInfoIndexLoadedFromCache": 1 +// Make sure debug information was loaded by verifying that the +// DEBUG: Breakpoint 1: where = dwp-separate-debug-file.cpp.tmp.dwarf{{[45]}}{{(\.debug)?}}`main + {{[0-9]+}} at dwp-separate-debug-file.cpp:{{[0-9]+}}:{{[0-9]+}}, address = {{0x[0-9a-fA-F]+}} + +// Make sure if we load the stripped binary or the debug info file with no .dwp +// nor any .dwo files that we are not able to fine the .dwp or .dwo files. +// NODWP: Searching for DWP using: +// NODWP: Searching for DWP using: +// NODWP: Unable to locate for DWP file for: +// NODWP: unable to locate separate debug file (dwo, dwp). Debugging will be degraded. + struct A { int x = 47; }; diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 075ff0eb28ac0a..02ec869725af2f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -224,12 +224,15 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) { Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || Encoding == dwarf::DW_ATE_complex_float || + Encoding == dwarf::DW_ATE_signed_fixed || + Encoding == dwarf::DW_ATE_unsigned_fixed || (Ty->getTag() == dwarf::DW_TAG_unspecified_type && Ty->getName() == "decltype(nullptr)")) && "Unsupported encoding"); return Encoding == dwarf::DW_ATE_unsigned || Encoding == dwarf::DW_ATE_unsigned_char || Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Encoding == llvm::dwarf::DW_ATE_unsigned_fixed || Ty->getTag() == dwarf::DW_TAG_unspecified_type; } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 85bd33e4efbd0f..5b32b34079f44e 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -323,6 +323,9 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); } + bool isPackedFP16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16); + } bool isVReg() const { return isRegClass(AMDGPU::VGPR_32RegClassID) || diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 8bf05682cbe7ea..d16d8ebd41a54f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -219,8 +219,10 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const { default: return false; case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: break; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index cd14c12a8a80c6..97c723752b70b9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1289,9 +1289,8 @@ def IntVRegInputMods : InputMods { class PackedFPInputModsMatchClass : AsmOperandClass { let Name = "PackedFP"#opSize#"InputMods"; - let ParserMethod = "parseRegOrImm"; - let PredicateMethod = "isRegOrImm"; -// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; + let ParserMethod = "parseRegOrImmWithFPInputMods"; + let PredicateMethod = "isPackedFP"#opSize#"InputMods"; } class PackedIntInputModsMatchClass : AsmOperandClass { @@ -1305,7 +1304,7 @@ def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; class PackedFPInputMods : InputMods { -// let PrintMethod = "printPackedFPInputMods"; + let PrintMethod = "printOperandAndFPInputMods"; } class PackedIntInputMods : InputMods { @@ -1606,8 +1605,11 @@ class getSrcMod { } class getOpSelMod { - Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), - FP16InputMods, IntOpSelMods); + Operand ret = !cond(!eq(VT, f16) : FP16InputMods, + !eq(VT, bf16) : FP16InputMods, + !eq(VT, v2f16) : PackedF16InputMods, + !eq(VT, v2bf16) : PackedF16InputMods, + 1 : IntOpSelMods); } // Return type of input modifiers operand specified input operand for DPP diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 0d4057b3ddd109..99f8e8ede4ace9 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1,3 +1,4 @@ + //===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -565,7 +566,7 @@ class VOPProfile_Base_CVT_F32_F8 : VOPProfileI2F { def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 ; def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 ; -let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0, +let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; @@ -653,7 +654,7 @@ class Cvt_F32_F8_Pat_OpSel index, (inst_e32 $src)) >; -let SubtargetPredicate = isGFX12Plus in { +let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { foreach Index = [0, 1, 2, 3] in { def : Cvt_F32_F8_Pat_OpSel; @@ -670,7 +671,7 @@ class Cvt_PK_F32_F8_Pat_OpSel; -let SubtargetPredicate = isGFX12Plus in { +let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { foreach Index = [0, -1] in { def : Cvt_PK_F32_F8_Pat_OpSel; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 35cffa22f45929..396ae9c9d92eea 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -640,7 +640,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile>; -let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0, +let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; @@ -667,6 +667,7 @@ class Cvt_SR_F8_F32_Pat index, VOP3_Pseudo inst> !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0) >; +let OtherPredicates = [HasFP8ConversionInsts] in { foreach Index = [0, -1] in { def : Cvt_PK_F8_F32_Pat; def : Cvt_PK_F8_F32_Pat; @@ -676,6 +677,7 @@ foreach Index = [0, 1, 2, 3] in { def : Cvt_SR_F8_F32_Pat; def : Cvt_SR_F8_F32_Pat; } +} class ThreeOp_i32_Pats : GCNPat < // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. @@ -866,20 +868,9 @@ def : DivFmasPat; def : DivFmasPat; } -class VOP3_DOT_Profile : VOP3_Profile { +class VOP3_DOT_Profile : VOP3_Profile { let HasClamp = 0; let HasOMod = 0; - // Override modifiers for bf16(i16) (same as float modifiers). - let HasSrc0Mods = 1; - let HasSrc1Mods = 1; - let HasSrc2Mods = 1; - let Src0ModVOP3DPP = FPVRegInputMods; - let Src1ModVOP3DPP = FPVRegInputMods; - let Src2ModVOP3DPP = FP16InputMods; - let InsVOP3OpSel = getInsVOP3OpSel.ret; - let AsmVOP3OpSel = getAsmVOP3OpSel.ret; } let SubtargetPredicate = isGFX11Plus in { diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index c47c13dbb84025..801afabbdb1401 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -210,7 +210,7 @@ class VOP3_Real : VOP3_Real { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1349,7 +1349,7 @@ class VOP3_DPP16_Gen op, VOP_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName> : VOP3_DPP16 { let AssemblerPredicate = Gen.AssemblerPredicate; - let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); + let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6c7e88fbe2eb86..619c7dc69f9b27 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2765,12 +2765,40 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::PS_vloadrw_nt_ai: case Hexagon::V6_vL32b_ai: case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vS32b_pred_ai: + case Hexagon::V6_vS32b_npred_ai: case Hexagon::V6_vS32b_qpred_ai: case Hexagon::V6_vS32b_nqpred_ai: + case Hexagon::V6_vS32b_new_ai: + case Hexagon::V6_vS32b_new_pred_ai: + case Hexagon::V6_vS32b_new_npred_ai: + case Hexagon::V6_vS32b_nt_pred_ai: + case Hexagon::V6_vS32b_nt_npred_ai: + case Hexagon::V6_vS32b_nt_new_ai: + case Hexagon::V6_vS32b_nt_new_pred_ai: + case Hexagon::V6_vS32b_nt_new_npred_ai: + case Hexagon::V6_vS32b_nt_qpred_ai: + case Hexagon::V6_vS32b_nt_nqpred_ai: case Hexagon::V6_vL32b_nt_ai: case Hexagon::V6_vS32b_nt_ai: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vS32Ub_ai: + case Hexagon::V6_vL32b_cur_ai: + case Hexagon::V6_vL32b_tmp_ai: + case Hexagon::V6_vL32b_pred_ai: + case Hexagon::V6_vL32b_npred_ai: + case Hexagon::V6_vL32b_cur_pred_ai: + case Hexagon::V6_vL32b_cur_npred_ai: + case Hexagon::V6_vL32b_tmp_pred_ai: + case Hexagon::V6_vL32b_tmp_npred_ai: + case Hexagon::V6_vL32b_nt_cur_ai: + case Hexagon::V6_vL32b_nt_tmp_ai: + case Hexagon::V6_vL32b_nt_pred_ai: + case Hexagon::V6_vL32b_nt_npred_ai: + case Hexagon::V6_vL32b_nt_cur_pred_ai: + case Hexagon::V6_vL32b_nt_cur_npred_ai: + case Hexagon::V6_vL32b_nt_tmp_pred_ai: + case Hexagon::V6_vL32b_nt_tmp_npred_ai: case Hexagon::V6_vgathermh_pseudo: case Hexagon::V6_vgathermw_pseudo: case Hexagon::V6_vgathermhw_pseudo: diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7e3dcb3283caba..8bac41372b5a83 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -399,9 +399,9 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, // Optimize compile time offset case StackOffset Offset = StackOffset::getScalable(Amount); - if (STI.getRealMinVLen() == STI.getRealMaxVLen()) { + if (auto VLEN = STI.getRealVLen()) { // 1. Multiply the number of v-slots by the (constant) length of register - const int64_t VLENB = STI.getRealMinVLen() / 8; + const int64_t VLENB = *VLEN / 8; assert(Amount % 8 == 0 && "Reserve the stack by the multiple of one vector size."); const int64_t NumOfVReg = Amount / 8; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 80797e36ad40fe..904f1d7fdf9065 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -577,9 +577,8 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { SDValue VLOperand; unsigned Opcode = RISCV::PseudoVSETVLI; if (auto *C = dyn_cast(Node->getOperand(1))) { - const unsigned VLEN = Subtarget->getRealMinVLen(); - if (VLEN == Subtarget->getRealMaxVLen()) - if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) + if (auto VLEN = Subtarget->getRealVLen()) + if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) VLMax = true; } if (VLMax || isAllOnesConstant(Node->getOperand(1))) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9ab6895aed521e..874c851cd9147a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8092,12 +8092,11 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, // If we're compiling for an exact VLEN value, we can always perform // the insert in m1 as we can determine the register corresponding to // the index in the register group. - const unsigned MinVLen = Subtarget.getRealMinVLen(); - const unsigned MaxVLen = Subtarget.getRealMaxVLen(); const MVT M1VT = getLMUL1VT(ContainerVT); - if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) { + if (auto VLEN = Subtarget.getRealVLen(); + VLEN && ContainerVT.bitsGT(M1VT)) { EVT ElemVT = VecVT.getVectorElementType(); - unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); + unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits(); unsigned RemIdx = OrigIdx % ElemsPerVReg; unsigned SubRegIdx = OrigIdx / ElemsPerVReg; unsigned ExtractIdx = diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index ca519dbc4c0359..9d1f01dffaaf47 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -283,8 +283,8 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); // Optimize for constant VLEN. - if (STI.getRealMinVLen() == STI.getRealMaxVLen()) { - const int64_t VLENB = STI.getRealMinVLen() / 8; + if (auto VLEN = STI.getRealVLen()) { + const int64_t VLENB = *VLEN / 8; int64_t Offset = VLENB * LMUL; STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); } else { @@ -360,8 +360,8 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); // Optimize for constant VLEN. - if (STI.getRealMinVLen() == STI.getRealMaxVLen()) { - const int64_t VLENB = STI.getRealMinVLen() / 8; + if (auto VLEN = STI.getRealVLen()) { + const int64_t VLENB = *VLEN / 8; int64_t Offset = VLENB * LMUL; STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); } else { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 8c55efa69a6a5f..4b60d7aff22a0f 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -188,6 +188,14 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned VLen = getMaxRVVVectorSizeInBits(); return VLen == 0 ? 65536 : VLen; } + // If we know the exact VLEN, return it. Otherwise, return std::nullopt. + std::optional getRealVLen() const { + unsigned Min = getRealMinVLen(); + if (Min != getRealMaxVLen()) + return std::nullopt; + return Min; + } + RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isSoftFPABI() const { return TargetABI == RISCVABI::ABI_LP64 || diff --git a/llvm/test/CodeGen/Hexagon/ldst_vector_offset.ll b/llvm/test/CodeGen/Hexagon/ldst_vector_offset.ll new file mode 100644 index 00000000000000..15695e83501652 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/ldst_vector_offset.ll @@ -0,0 +1,59 @@ +; REQUIRES: asserts +; RUN: llc -O3 -march=hexagon < %s -o /dev/null +; Make sure that this doesn't crash. +; This test validates that the compiler would not assert when analyzing the +; offset of V6_vS32b_pred_ai instruction + +%struct.pluto = type <{ ptr, i16, ptr }> + +@global = external hidden unnamed_addr constant [62 x i8], align 1 +@global.1 = external hidden unnamed_addr constant [47 x i8], align 1 +@global.2 = hidden local_unnamed_addr constant %struct.pluto <{ ptr @global, i16 892, ptr @global.1 }>, align 1 +@global.3 = local_unnamed_addr constant [1 x i32] zeroinitializer + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #0 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <32 x i32> @llvm.hexagon.V6.vd0.128B() #1 + +; Function Attrs: noinline nounwind +declare hidden fastcc void @zot(i32, i32, i32, i32) unnamed_addr #2 + +; Function Attrs: noinline nounwind +define void @barney(ptr nocapture %arg, ptr nocapture readnone %arg1, i8 signext %arg2, i32 %arg3, ptr nocapture readnone %arg4, ptr nocapture readnone %arg5, i32 %arg6, i32 %arg7, ptr nocapture readnone %arg8, ptr nocapture readnone %arg9, ptr nocapture readnone %arg10, ptr nocapture readnone %arg11, ptr nocapture readnone %arg12, ptr nocapture readnone %arg13, ptr nocapture readnone %arg14, ptr nocapture readnone %arg15, ptr nocapture readnone %arg16, ptr nocapture readnone %arg17) local_unnamed_addr #2 { +bb: + %icmp = icmp ult i32 %arg3, 4 + tail call void @llvm.assume(i1 %icmp) #3 + %call = tail call <32 x i32> @llvm.hexagon.V6.vd0.128B() #3 + br label %bb18 + +bb18: ; preds = %bb22, %bb + %phi = phi i32 [ %and, %bb22 ], [ %arg3, %bb ] + %phi19 = phi i32 [ %add23, %bb22 ], [ 4, %bb ] + %icmp20 = icmp eq i32 %phi, 0 + br i1 %icmp20, label %bb21, label %bb22 + +bb21: ; preds = %bb18 + %shl = shl i32 %phi19, 8 + %getelementptr = getelementptr inbounds i8, ptr %arg, i32 %shl + %bitcast = bitcast ptr %getelementptr to ptr + store <32 x i32> %call, ptr %bitcast, align 128 + br label %bb22 + +bb22: ; preds = %bb21, %bb18 + %add = add nuw nsw i32 %phi, 1 + %and = and i32 %add, 3 + %add23 = add nuw nsw i32 %phi19, 1 + %icmp24 = icmp eq i32 %add23, 8 + br i1 %icmp24, label %bb25, label %bb18 + +bb25: ; preds = %bb22 + tail call fastcc void @zot(i32 %arg6, i32 %arg7, i32 0, i32 %arg3) + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { noinline nounwind "target-cpu"="hexagonv68" "target-features"="+hvx-length128b,+hvxv68,+v68,+hvx-ieee-fp,-long-calls,-small-data" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { nounwind } diff --git a/llvm/test/DebugInfo/fixed-point.ll b/llvm/test/DebugInfo/fixed-point.ll new file mode 100644 index 00000000000000..eaa67ead3f3136 --- /dev/null +++ b/llvm/test/DebugInfo/fixed-point.ll @@ -0,0 +1,32 @@ +;; This fixes https://github.com/llvm/llvm-project/issues/81555 +; REQUIRES: object-emission +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - -verify | FileCheck %s --check-prefix=VERIFY + +; VERIFY-NOT: error: + +; CHECK: {{.*}}: DW_TAG_base_type +; CHECK-NEXT: DW_AT_name ("var") +; CHECK-NEXT: DW_AT_encoding (DW_ATE_signed_fixed) +define void @func() !dbg !26 { +entry: + %classifier = alloca i32, align 4 + tail call void @llvm.dbg.value(metadata i32 32768, metadata !37, metadata !DIExpression()), !dbg !39 + store i32 32768, ptr %classifier, align 4, !dbg !39 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!19} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "a", directory: "") +!6 = !DIBasicType(name: "var", size: 32, encoding: DW_ATE_signed_fixed) +!19 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !DISubroutineType(types: null) +!26 = distinct !DISubprogram(unit: !0, type: !3) +!37 = !DILocalVariable(name: "intercept", arg: 2, scope: !26, file: !1, line: 7, type: !6) +!39 = !DILocation(line: 0, scope: !26) + diff --git a/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test b/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test index 244827196f485e..33ad5515a6357a 100644 --- a/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test +++ b/llvm/test/ExecutionEngine/JITLink/Generic/sectcreate.test @@ -1,6 +1,6 @@ # RUN: llc -filetype=obj -o %t.o %S/Inputs/main-ret-0.ll # RUN: llvm-jitlink -noexec \ -# RUN: -sectcreate __data,%S/Inputs/sectcreate-data.txt:foo=0 \ +# RUN: -sectcreate __data,%S/Inputs/sectcreate-data.txt@foo=0 \ # RUN: %t.o # # Use -sectcreate to create a section from a data file. diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 9a94162005e1f7..d288c02a22c921 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -2116,6 +2116,12 @@ v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] // GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 +// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 +// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + v_dot2_f16_f16 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] @@ -2161,6 +2167,12 @@ v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] // GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v2, v0, 0x20004000, v2 +// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v2, 0x20004000, v0, v2 +// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + v_fma_dx9_zero_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 7674c02185b5f2..fc35a2e6b4f8f4 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1788,6 +1788,12 @@ # GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20 + +# GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20 + # GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 @@ -1833,6 +1839,12 @@ # GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] +0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20 + +# GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] +0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20 + # GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 8ecae5dbe37202..4c05317036d1a3 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -415,10 +415,11 @@ def version_int(ver): config.available_features.add("llvm-dylib") config.substitutions.append( ( + # libLLVM.so.19.0git "%llvmdylib", - "{}/libLLVM-{}{}".format( - config.llvm_shlib_dir, config.llvm_dylib_version, config.llvm_shlib_ext - ), + "{}/libLLVM{}.{}".format( + config.llvm_shlib_dir, config.llvm_shlib_ext, config.llvm_dylib_version + ) ) ) diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 1138b2ccf7bce7..b6f255d472d16f 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -44,7 +44,7 @@ config.build_examples = @LLVM_BUILD_EXAMPLES@ config.enable_threads = @LLVM_ENABLE_THREADS@ config.build_shared_libs = @BUILD_SHARED_LIBS@ config.link_llvm_dylib = @LLVM_LINK_LLVM_DYLIB@ -config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@@LLVM_VERSION_SUFFIX@" +config.llvm_dylib_version = "@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@@LLVM_VERSION_SUFFIX@" config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' config.host_arch = "@HOST_ARCH@" config.have_opt_viewer_modules = @LLVM_HAVE_OPT_VIEWER_MODULES@ diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index f6280779ded10f..f0b8310a32efd3 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -165,7 +165,7 @@ static cl::list static cl::list SectCreate("sectcreate", - cl::desc("given ,[:=,...] " + cl::desc("given ,[@=,...] " "add the content of to "), cl::cat(JITLinkCategory)); @@ -1683,7 +1683,7 @@ static Error addSectCreates(Session &S, StringRef SCArg(*SCItr); - auto [SectAndFileName, ExtraSymbolsString] = SCArg.split(':'); + auto [SectAndFileName, ExtraSymbolsString] = SCArg.split('@'); auto [SectName, FileName] = SectAndFileName.rsplit(','); if (SectName.empty()) return make_error("In -sectcreate=" + SCArg + diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index cafef5b5fad512..b73891b59f0264 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -1005,7 +1005,7 @@ TEST(STLExtras, Unique) { } TEST(STLExtras, UniqueNoPred) { - std::vector V = {1, 5, 5, 4, 3, 3, 3}; + std::vector V = {1, 5, 5, 4, 3, 3, 3}; auto I = llvm::unique(V); diff --git a/llvm/utils/gn/secondary/clang/lib/ExtractAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ExtractAPI/BUILD.gn index 62b4af0635841a..ee60eee0da0fb8 100644 --- a/llvm/utils/gn/secondary/clang/lib/ExtractAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ExtractAPI/BUILD.gn @@ -6,6 +6,7 @@ static_library("ExtractAPI") { "//clang/lib/Basic", "//clang/lib/Frontend", "//clang/lib/Index", + "//clang/lib/InstallAPI", "//llvm/lib/Support", "//llvm/lib/TargetParser", ] diff --git a/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn index 4d79ac805ac193..6eae7e293dce61 100644 --- a/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn @@ -6,5 +6,9 @@ static_library("InstallAPI") { "//llvm/lib/Support", "//llvm/lib/TextAPI", ] - sources = [ "Context.cpp" ] + sources = [ + "Context.cpp", + "FileList.cpp", + "HeaderFile.cpp", + ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn index b60c5264d60cce..354934f4b18ab1 100644 --- a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn @@ -13,6 +13,7 @@ group("unittests") { "Format:FormatTests", "Frontend:FrontendTests", "Index:IndexTests", + "InstallAPI:InstallAPITests", "Interpreter:ClangReplInterpreterTests", "Introspection:IntrospectionTests", "Lex:LexTests", diff --git a/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn new file mode 100644 index 00000000000000..e27659457474f2 --- /dev/null +++ b/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn @@ -0,0 +1,13 @@ +import("//third-party/unittest/unittest.gni") + +unittest("InstallAPITests") { + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang/lib/InstallAPI", + "//llvm/lib/Testing/Support", + ] + sources = [ + "HeaderFileTest.cpp", + "FileListTest.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn index a813bf3f508b2d..31f567d6df159d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn @@ -50,6 +50,7 @@ static_library("Orc") { "OrcABISupport.cpp", "OrcV2CBindings.cpp", "RTDyldObjectLinkingLayer.cpp", + "SectCreate.cpp", "SimpleRemoteEPC.cpp", "SpeculateAnalyses.cpp", "Speculation.cpp", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index ab4fd8e6403e76..3257f4b5ff2363 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -74,6 +74,7 @@ write_lit_config("lit_site_cfg") { "LLVM_USE_INTEL_JITEVENTS=0", "LLVM_USE_SANITIZER=", "LLVM_VERSION_MAJOR=$llvm_version_major", + "LLVM_VERSION_MINOR=$llvm_version_minor", "LLVM_VERSION_SUFFIX=git", "Python3_EXECUTABLE=$python_path", "TARGETS_TO_BUILD=$llvm_targets_to_build_string", diff --git a/llvm/utils/release/github-upload-release.py b/llvm/utils/release/github-upload-release.py index a8bb569d2fc999..14ec05062d88c8 100755 --- a/llvm/utils/release/github-upload-release.py +++ b/llvm/utils/release/github-upload-release.py @@ -77,20 +77,28 @@ def upload_files(repo, release, files): parser.add_argument("--token", type=str) parser.add_argument("--release", type=str) parser.add_argument("--user", type=str) +parser.add_argument("--user-token", type=str) # Upload args parser.add_argument("--files", nargs="+", type=str) args = parser.parse_args() -github = github.Github(args.token) -llvm_org = github.get_organization("llvm") +gh = github.Github(args.token) +llvm_org = gh.get_organization("llvm") llvm_repo = llvm_org.get_repo("llvm-project") if args.user: + if not args.user_token: + print("--user-token option required when --user is used") + sys.exit(1) # Validate that this user is allowed to modify releases. - user = github.get_user(args.user) - team = llvm_org.get_team_by_slug("llvm-release-managers") + user = gh.get_user(args.user) + team = ( + github.Github(args.user_token) + .get_organization("llvm") + .get_team_by_slug("llvm-release-managers") + ) if not team.has_in_members(user): print("User {} is not a allowed to modify releases".format(args.user)) sys.exit(1) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index 272bc3116c5fdc..92d844eefb7207 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -245,7 +245,7 @@ def MapOp : LinalgStructuredBase_Op<"map", [ } ``` - Shortened print form is available. Applies to simple maps with one + Shortened print form is available. Applies to simple maps with one non-yield operation inside the body. The example above will be printed as: @@ -458,6 +458,7 @@ def TransposeOp : LinalgStructuredBase_Op<"transpose", [ ::mlir::OperationState & odsState); }]; + let hasFolder = 1; let hasCustomAssemblyFormat = 1; let hasVerifier = 1; } diff --git a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td index 8ba7c111aea6bb..b9cd15e2062669 100644 --- a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td +++ b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td @@ -16,6 +16,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/IR/BuiltinTypes.td" include "mlir/IR/CommonAttrConstraints.td" include "mlir/IR/CommonTypeConstraints.td" +include "mlir/IR/OpAsmInterface.td" include "mlir/IR/SymbolInterfaces.td" //===----------------------------------------------------------------------===// @@ -78,7 +79,10 @@ def Mesh_MeshOp : Mesh_Op<"mesh", [Symbol]> { } def Mesh_MeshShapeOp : Mesh_Op<"mesh_shape", [ - Pure, DeclareOpInterfaceMethods]> { + Pure, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods + ]> { let summary = "Get the shape of the mesh."; let arguments = (ins FlatSymbolRefAttr:$mesh, @@ -101,7 +105,11 @@ def Mesh_MeshShapeOp : Mesh_Op<"mesh_shape", [ ]; } -def Mesh_ShardOp : Mesh_Op<"shard", [Pure, SameOperandsAndResultType]> { +def Mesh_ShardOp : Mesh_Op<"shard", [ + Pure, + SameOperandsAndResultType, + DeclareOpInterfaceMethods + ]> { let summary = "Annotate on how a tensor is sharded across a mesh."; let description = [{ The mesh.shard operation is designed to specify and guide the sharding @@ -194,7 +202,8 @@ def Mesh_ShardOp : Mesh_Op<"shard", [Pure, SameOperandsAndResultType]> { def Mesh_ProcessMultiIndexOp : Mesh_Op<"process_multi_index", [ Pure, - DeclareOpInterfaceMethods + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods ]> { let summary = "Get the multi index of current device along specified mesh axes."; let description = [{ @@ -221,7 +230,8 @@ def Mesh_ProcessMultiIndexOp : Mesh_Op<"process_multi_index", [ def Mesh_ProcessLinearIndexOp : Mesh_Op<"process_linear_index", [ Pure, - DeclareOpInterfaceMethods + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods ]> { let summary = "Get the linear index of the current device."; let description = [{ @@ -248,7 +258,10 @@ class Mesh_CollectiveCommunicationOpBase< string mnemonic, list traits = []> : Mesh_Op])> { + [ + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods + ])> { dag commonArgs = (ins FlatSymbolRefAttr:$mesh, DefaultValuedAttr:$mesh_axes @@ -258,7 +271,7 @@ class Mesh_CollectiveCommunicationOpBase< def Mesh_AllGatherOp : Mesh_CollectiveCommunicationOpBase<"all_gather", [ Pure, SameOperandsAndResultElementType, - SameOperandsAndResultRank + SameOperandsAndResultRank, ]> { let summary = "All-gather over a device mesh."; let description = [{ diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h index 41a14575ed1054..a00c9c31256c96 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h @@ -283,7 +283,13 @@ struct LevelType { } bool operator!=(const LevelType lhs) const { return !(*this == lhs); } - LevelType stripProperties() const { return LevelType(lvlBits & ~0xffff); } + LevelType stripStorageIrrelevantProperties() const { + // Properties other than `SoA` do not change the storage scheme of the + // sparse tensor. + constexpr uint64_t mask = + 0xffff & ~static_cast(LevelPropNonDefault::SoA); + return LevelType(lvlBits & ~mask); + } /// Get N of NOutOfM level type. constexpr uint64_t getN() const { diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h index 24a5640d820e43..1a090ddb782fdb 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h @@ -24,6 +24,7 @@ struct COOSegment { std::pair lvlRange; // [low, high) bool isSoA; + bool isAoS() const { return !isSoA; } bool isSegmentStart(Level l) const { return l == lvlRange.first; } bool inSegment(Level l) const { return l >= lvlRange.first && l < lvlRange.second; @@ -337,7 +338,9 @@ class SparseTensorType { /// Returns the starting level of this sparse tensor type for a /// trailing COO region that spans **at least** two levels. If /// no such COO region is found, then returns the level-rank. - Level getCOOStart() const; + /// + /// DEPRECATED: use getCOOSegment instead; + Level getAoSCOOStart() const; /// Returns [un]ordered COO type for this sparse tensor type. RankedTensorType getCOOType(bool ordered) const; diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h index fe9b16cb44b3da..d09c9e36f6ff88 100644 --- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -32,13 +32,11 @@ FailureOr computeTransposedType(RankedTensorType rankedTensorType, ArrayRef transposeVector); -/// Given a tensor::PackOp, compute the permutation vector to shuffle the -/// packed shape into the shape before any outer or inner permutations have -/// been applied. -/// i.e. for a pack from an ABCD layout to an ABCDba: -/// The packed shape would be ABCDba. -/// The pre-permutation shape would be AaBbCD. -SmallVector getPackInverseDestPermutation(PackOp packOp); +SmallVector getPackInverseDestPerm(tensor::PackOp packOp); +SmallVector getUnPackInverseSrcPerm(tensor::UnPackOp unpackOp); + +SmallVector getUnPackInverseSrcPerm(tensor::UnPackOp, + PackingMetadata &metadata); /// A tensor.insert_slice is a cast-like operation if it merely rank-extends the /// source tensor or inserts the source tensor into a destination tensor with diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index a0f02f6a7f259d..919f5130e1760f 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1786,6 +1786,22 @@ void TransposeOp::getEffects( getDpsInits()); } +LogicalResult TransposeOp::fold(FoldAdaptor adaptor, + SmallVectorImpl &result) { + // Single dimension transpose. + if (getPermutation().size() == 0) { + result.push_back(getInput()); + return success(); + } + // Identity permutation. + if (isIdentityPermutation(getPermutation())) { + result.push_back(getInput()); + return success(); + } + + return failure(); +} + //===----------------------------------------------------------------------===// // BroadcastOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 4ef8859fd5c430..299965bcfc3ab3 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3152,7 +3152,8 @@ DiagnosedSilenceableFailure transform::VectorizeOp::apply( // TODO: Check that the correct number of vectorSizes was provided. for (Operation *target : targets) { - if (!isa(target)) { + if (!isa( + target)) { return mlir::emitSilenceableFailure(target->getLoc()) << "Unsupported Op, cannot vectorize"; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 01b393644679c5..a17bc8e4cd318f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -237,7 +237,7 @@ FailureOr linalg::lowerPack(RewriterBase &rewriter, PackingMetadata packingMetadata = computePackingMetadata( packedTensorType.getRank(), packOp.getInnerDimsPos()); SmallVector packedToStripMinedShapePerm = - tensor::getPackInverseDestPermutation(packOp); + tensor::getPackInverseDestPerm(packOp); // 3. Compute the stripMinedShape: this is the packed shape before any outer // or inner permutations have been applied. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 2bd6929fea6142..ac043e87223dfe 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1405,8 +1405,7 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, /// permutations. static SmallVector getTiledPackShape(tensor::PackOp packOp, ArrayRef destShape) { - return applyPermutation(destShape, - tensor::getPackInverseDestPermutation(packOp)); + return applyPermutation(destShape, tensor::getPackInverseDestPerm(packOp)); } /// Create a TransferReadOp from `source` with static shape `readShape`. If the @@ -1547,7 +1546,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, // Create TransposeOp. auto destPermutation = - invertPermutationVector(tensor::getPackInverseDestPermutation(packOp)); + invertPermutationVector(tensor::getPackInverseDestPerm(packOp)); auto transposeOp = rewriter.create( loc, shapeCastOp.getResult(), destPermutation); @@ -1559,6 +1558,112 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, return success(); } +/// Vectorize a `tensor::UnPackOp` to these 4 Ops: +/// Vector::TransferReadOp - Reads a vector from the source tensor +/// vector::TransposeOp - Transpose the Source tensor +/// ShapeCastOp - Reshape the data based on the target. +/// vector::TransferWriteOp. - Write the result vector back to the destination +/// tensor +static LogicalResult +vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp, + ArrayRef inputVectorSizes, + SmallVectorImpl &newResults) { + + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(unpackOp); + + RankedTensorType unpackTensorType = unpackOp.getSourceType(); + + ArrayRef innerDimPos = unpackOp.getInnerDimsPos(); + ArrayRef innerTiles = unpackOp.getStaticInnerTiles(); + + SmallVector readMaskShape(inputVectorSizes.begin(), + inputVectorSizes.end()); + ArrayRef outerDimsPerm = unpackOp.getOuterDimsPerm(); + ArrayRef sourceShape = unpackTensorType.getShape(); + + // ReadMask is the size of tensor used to read and apply mask. It is + // set like this: Let's say the vectorSize (VS) array is size 'N' and + // the sourceShape(SS) is 'M' where M >= N and InnerTileSizes (IT) of + // size M-N + // Thus: + // - initially: ReadMaskShape = vectorInputSizes + // - Divide all the readMaskShape locations pointed by innerDimPos + // by the innerTileSize attribute value. + // - if outer_dims_perms is present: do that permutation on readMaskShape. + // - Append the remaining shape from SS + // E.g. let's say let's say unpackTensorType.getShape() = <8x8x32x16> + // inner Dim Pos = [0, 1] and Inner Tiles = [32, 16], vector_sizes are [512, + // 128] and outer_dims_perm is [1, 0] then read shape is: + // ReadMaskShape(initial): [512, 128] + // Final Value(after innerDim Adjustment): [512/32, 128/16] + // = [16, 8] + // After applying outer_dims_perm: [8, 16] + // After appending the rest of the sourceShape: [8, 16, 32, 16] + + for (auto [index, size] : enumerate(innerTiles)) { + readMaskShape[innerDimPos[index]] = + llvm::divideCeil(readMaskShape[innerDimPos[index]], size); + } + if (!outerDimsPerm.empty()) { + applyPermutationToVector(readMaskShape, outerDimsPerm); + } + readMaskShape.append(sourceShape.begin() + inputVectorSizes.size(), + sourceShape.end()); + + ReifiedRankedShapedTypeDims reifiedRetShapes; + LogicalResult status = + cast(unpackOp.getOperation()) + .reifyResultShapes(rewriter, reifiedRetShapes); + if (status.failed()) { + LDBG("Unable to reify result shapes of " << unpackOp); + return failure(); + } + Location loc = unpackOp->getLoc(); + + auto padValue = rewriter.create( + loc, rewriter.getZeroAttr(unpackOp.getSourceType().getElementType())); + + // Read result, mask if necessary. If transferReadOp shape is not equal + // to shape of source, then a mask is necessary. + Value readResult = createReadOrMaskedRead( + rewriter, loc, unpackOp.getSource(), + ArrayRef(readMaskShape.begin(), readMaskShape.end()), padValue); + + PackingMetadata packMetadata; + SmallVector lastDimToInsertPosPerm = + tensor::getUnPackInverseSrcPerm(unpackOp, packMetadata); + ShapedType maskedOpShapedType = cast(readResult.getType()); + SmallVector stripMineShape(maskedOpShapedType.getShape()); + mlir::Type stripMineElemType = maskedOpShapedType.getElementType(); + applyPermutationToVector(stripMineShape, lastDimToInsertPosPerm); + RankedTensorType stripMineTensorType = + RankedTensorType::get(stripMineShape, stripMineElemType); + // Transpose the appropriate rows to match output. + vector::TransposeOp transposeOp = rewriter.create( + loc, readResult, lastDimToInsertPosPerm); + + // Collapse the vector to the size required by result. + RankedTensorType collapsedType = tensor::CollapseShapeOp::inferCollapsedType( + stripMineTensorType, packMetadata.reassociations); + mlir::VectorType vecCollapsedType = + VectorType::get(collapsedType.getShape(), collapsedType.getElementType()); + vector::ShapeCastOp shapeCastOp = rewriter.create( + loc, vecCollapsedType, transposeOp->getResult(0)); + + // WriteMaskShape had to match the shapecast shape for dynamic sizes, + // otherwise the validator complains that the mask size is invalid. + SmallVector writeMaskShape( + unpackOp.getDestType().hasStaticShape() + ? inputVectorSizes + : shapeCastOp.getResultVectorType().getShape()); + Operation *write = + createWriteOrMaskedWrite(rewriter, loc, shapeCastOp.getResult(), + reifiedRetShapes[0], writeMaskShape); + newResults.push_back(write->getResult(0)); + return success(); +} + /// Vectorize a `padOp` with (1) static result type, (2) constant padding value /// and (3) all-zero lowPad to /// `transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value))`. @@ -1655,6 +1760,25 @@ isValidMaskedInputVector(ArrayRef shape, return success(); } +/// Need to check if the inner-tiles are static/constant. +static LogicalResult +vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp, + ArrayRef inputVectorSizes) { + + if (llvm::any_of(unpackOp.getInnerTiles(), [](OpFoldResult res) { + return !getConstantIntValue(res).has_value(); + })) { + LDBG("Inner-tiles must be constant: " << unpackOp << "\n"); + return failure(); + } + llvm::ArrayRef resultShape = unpackOp.getDestType().getShape(); + if (!inputVectorSizes.empty() && + failed(isValidMaskedInputVector(resultShape, inputVectorSizes))) + return failure(); + + return success(); +} + static LogicalResult vectorizeLinalgOpPrecondition(LinalgOp linalgOp, ArrayRef inputVectorSizes, @@ -1703,9 +1827,10 @@ vectorizeLinalgOpPrecondition(LinalgOp linalgOp, } if (isElementwise(linalgOp)) return success(); - // TODO: isaConvolutionOpInterface that can also infer from generic features. - // But we will still need stride/dilation attributes that will be annoying to - // reverse-engineer... + + // TODO: isaConvolutionOpInterface that can also infer from generic + // features. But we will still need stride/dilation attributes that will be + // annoying to reverse-engineer... if (isa(linalgOp.getOperation())) return success(); // TODO: the common vector shape is equal to the static loop sizes only when @@ -1810,6 +1935,9 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition( .Case([&](auto packOp) { return vectorizePackOpPrecondition(packOp, inputVectorSizes); }) + .Case([&](auto unpackOp) { + return vectorizeUnPackOpPrecondition(unpackOp, inputVectorSizes); + }) .Default([](auto) { return failure(); }); } @@ -1829,11 +1957,11 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) { } /// Emit a suitable vector form for an operation. If provided, -/// `inputVectorSizes` are used to vectorize this operation. `inputVectorSizes` -/// must match the rank of the iteration space of the operation and the input -/// vector sizes must be greater than or equal to their counterpart iteration -/// space sizes, if static. `inputVectorShapes` also allows the vectorization of -/// operations with dynamic shapes. +/// `inputVectorSizes` are used to vectorize this operation. +/// `inputVectorSizes` must match the rank of the iteration space of the +/// operation and the input vector sizes must be greater than or equal to +/// their counterpart iteration space sizes, if static. `inputVectorShapes` +/// also allows the vectorization of operations with dynamic shapes. LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op, ArrayRef inputVectorSizes, ArrayRef inputScalableVecDims, @@ -1867,8 +1995,9 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op, auto vectorizeResult = TypeSwitch(op) .Case([&](auto linalgOp) { - // TODO: isaConvolutionOpInterface that can also infer from generic - // features. Will require stride/dilation attributes inference. + // TODO: isaConvolutionOpInterface that can also infer from + // generic features. Will require stride/dilation attributes + // inference. if (isa(linalgOp.getOperation())) { FailureOr convOr = vectorizeConvolution( rewriter, linalgOp, flatten1DDepthwiseConv); @@ -1902,6 +2031,10 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op, return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes, results); }) + .Case([&](auto unpackOp) { + return vectorizeAsTensorUnpackOp(rewriter, unpackOp, + inputVectorSizes, results); + }) .Default([](auto) { return failure(); }); if (failed(vectorizeResult)) { @@ -1919,7 +2052,6 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op, LogicalResult mlir::linalg::vectorizeCopy(RewriterBase &rewriter, memref::CopyOp copyOp) { - auto srcType = cast(copyOp.getSource().getType()); auto dstType = cast(copyOp.getTarget().getType()); if (!srcType.hasStaticShape() || !dstType.hasStaticShape()) @@ -2833,8 +2965,8 @@ struct Conv1DGenerator Value res = rewriter.create(loc, resType, resShaped, resPadding); - // The base vectorization case for channeled convolution is input: {n,w,c}, - // weight: {kw,c,f}, output: {n,w,f}. To reuse the base pattern + // The base vectorization case for channeled convolution is input: + // {n,w,c}, weight: {kw,c,f}, output: {n,w,f}. To reuse the base pattern // vectorization case, we do pre transpose on input, weight, and output. switch (conv1DOpOrder) { case Conv1DOpOrder::W: @@ -2877,9 +3009,9 @@ struct Conv1DGenerator return kw * (wSize / wSizeStep) + w; }; - // Compute contraction: O{n, w, f} += I{n, sw * w + dw * kw, c} * F{c, f} or - // perform outerproduct for non-channeled convolution or - // perform simple arith operation for pooling + // Compute contraction: O{n, w, f} += I{n, sw * w + dw * kw, c} * F{c, f} + // or perform outerproduct for non-channeled convolution or perform simple + // arith operation for pooling for (int64_t kw = 0; kw < kwSize; ++kw) { for (int64_t w = 0; w < wSize; w += wSizeStep) { switch (oper) { @@ -2908,9 +3040,9 @@ struct Conv1DGenerator // End vector-only rewrite part //===------------------------------------------------------------------===// - // The base vectorization case for channeled convolution is output: {n,w,f} - // To reuse the result from base pattern vectorization case, we post - // transpose the base case result. + // The base vectorization case for channeled convolution is output: + // {n,w,f} To reuse the result from base pattern vectorization case, we + // post transpose the base case result. switch (conv1DOpOrder) { case Conv1DOpOrder::W: case Conv1DOpOrder::Nwc: @@ -3348,9 +3480,9 @@ static FailureOr vectorizeConvolution(RewriterBase &rewriter, LinalgOp op, bool flatten1DDepthwiseConv) { // The ConvolutionOpInterface gives us guarantees of existence for - // strides/dilations. However, we do not need to rely on those, we can simply - // use them if present, otherwise use the default and let the generic conv. - // matcher in the ConvGenerator succeed or fail. + // strides/dilations. However, we do not need to rely on those, we can + // simply use them if present, otherwise use the default and let the generic + // conv. matcher in the ConvGenerator succeed or fail. auto strides = op->getAttrOfType("strides"); auto dilations = op->getAttrOfType("dilations"); auto stride = strides ? *strides.getValues().begin() : 1; diff --git a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp index 838255cf5a5ba3..50163880e85f96 100644 --- a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp +++ b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp @@ -24,7 +24,6 @@ #include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -34,7 +33,6 @@ #include #include #include -#include #include #define DEBUG_TYPE "mesh-ops" @@ -244,6 +242,11 @@ void MeshShapeOp::build(OpBuilder &odsBuilder, OperationState &odsState, MeshAxesAttr::get(odsBuilder.getContext(), axes)); } +void MeshShapeOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResults()[0], "mesh_shape"); +} + //===----------------------------------------------------------------------===// // mesh.shard attr //===----------------------------------------------------------------------===// @@ -307,6 +310,15 @@ bool MeshShardingAttr::operator==(MeshShardingAttr rhs) const { std::mem_fn(&MeshAxesAttr::empty)); } +//===----------------------------------------------------------------------===// +// mesh.shard op +//===----------------------------------------------------------------------===// + +void ShardOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "sharding_annotated"); +} + //===----------------------------------------------------------------------===// // mesh.process_multi_index op //===----------------------------------------------------------------------===// @@ -345,6 +357,11 @@ void ProcessMultiIndexOp::build(OpBuilder &odsBuilder, OperationState &odsState, MeshAxesAttr::get(odsBuilder.getContext(), axes)); } +void ProcessMultiIndexOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResults()[0], "proc_linear_idx"); +} + //===----------------------------------------------------------------------===// // mesh.process_linear_index op //===----------------------------------------------------------------------===// @@ -363,6 +380,11 @@ void ProcessLinearIndexOp::build(OpBuilder &odsBuilder, build(odsBuilder, odsState, mesh.getSymName()); } +void ProcessLinearIndexOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "proc_linear_idx"); +} + //===----------------------------------------------------------------------===// // collective communication ops //===----------------------------------------------------------------------===// @@ -606,6 +628,11 @@ void AllGatherOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void AllGatherOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "all_gather"); +} + //===----------------------------------------------------------------------===// // mesh.all_reduce op //===----------------------------------------------------------------------===// @@ -620,6 +647,11 @@ void AllReduceOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void AllReduceOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "all_reduce"); +} + //===----------------------------------------------------------------------===// // mesh.all_slice op //===----------------------------------------------------------------------===// @@ -654,6 +686,11 @@ void AllSliceOp::build(OpBuilder &odsBuilder, OperationState &odsState, APInt(sizeof(sliceAxis) * CHAR_BIT, sliceAxis)); } +void AllSliceOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "all_slice"); +} + //===----------------------------------------------------------------------===// // mesh.all_to_all op //===----------------------------------------------------------------------===// @@ -674,6 +711,11 @@ void AllToAllOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void AllToAllOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "all_to_all"); +} + //===----------------------------------------------------------------------===// // mesh.broadcast op //===----------------------------------------------------------------------===// @@ -698,6 +740,11 @@ void BroadcastOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void BroadcastOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "broadcast"); +} + //===----------------------------------------------------------------------===// // mesh.gather op //===----------------------------------------------------------------------===// @@ -724,6 +771,11 @@ void GatherOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void GatherOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "gather"); +} + //===----------------------------------------------------------------------===// // mesh.recv op //===----------------------------------------------------------------------===// @@ -747,6 +799,10 @@ void RecvOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void RecvOp::getAsmResultNames(function_ref setNameFn) { + setNameFn(getResult(), "recv"); +} + //===----------------------------------------------------------------------===// // mesh.reduce op //===----------------------------------------------------------------------===// @@ -770,6 +826,11 @@ void ReduceOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void ReduceOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "reduce"); +} + //===----------------------------------------------------------------------===// // mesh.reduce_scatter op //===----------------------------------------------------------------------===// @@ -791,6 +852,11 @@ void ReduceScatterOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void ReduceScatterOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "reduce_scatter"); +} + //===----------------------------------------------------------------------===// // mesh.scatter op //===----------------------------------------------------------------------===// @@ -817,6 +883,11 @@ void ScatterOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void ScatterOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "scatter"); +} + //===----------------------------------------------------------------------===// // mesh.send op //===----------------------------------------------------------------------===// @@ -839,6 +910,10 @@ void SendOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add>(context); } +void SendOp::getAsmResultNames(function_ref setNameFn) { + setNameFn(getResult(), "send"); +} + //===----------------------------------------------------------------------===// // mesh.shift op //===----------------------------------------------------------------------===// @@ -865,6 +940,11 @@ void ShiftOp::getCanonicalizationPatterns(RewritePatternSet &patterns, // offset % shift_axis_mesh_dim_size == 0. } +void ShiftOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "shift"); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 53e78d2c28b1d7..af7b85d458774d 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -182,7 +182,7 @@ StorageLayout::getFieldIndexAndStride(SparseTensorFieldKind kind, unsigned stride = 1; if (kind == SparseTensorFieldKind::CrdMemRef) { assert(lvl.has_value()); - const Level cooStart = SparseTensorType(enc).getCOOStart(); + const Level cooStart = SparseTensorType(enc).getAoSCOOStart(); const Level lvlRank = enc.getLvlRank(); if (lvl.value() >= cooStart && lvl.value() < lvlRank) { lvl = cooStart; @@ -811,10 +811,10 @@ bool mlir::sparse_tensor::SparseTensorType::isCOOType(Level startLvl, return !isUnique || isUniqueLvl(lvlRank - 1); } -Level mlir::sparse_tensor::SparseTensorType::getCOOStart() const { +Level mlir::sparse_tensor::SparseTensorType::getAoSCOOStart() const { SmallVector coo = getCOOSegments(); - if (!coo.empty()) { - assert(coo.size() == 1); + assert(coo.size() == 1 || coo.empty()); + if (!coo.empty() && coo.front().isAoS()) { return coo.front().lvlRange.first; } return lvlRank; @@ -1051,7 +1051,7 @@ static SparseTensorEncodingAttr getNormalizedEncodingForSpecifier(SparseTensorEncodingAttr enc) { SmallVector lts; for (auto lt : enc.getLvlTypes()) - lts.push_back(lt.stripProperties()); + lts.push_back(lt.stripStorageIrrelevantProperties()); return SparseTensorEncodingAttr::get( enc.getContext(), lts, @@ -1137,7 +1137,7 @@ static LogicalResult verifyPackUnPack(Operation *op, bool requiresStaticShape, return op->emitError("the sparse-tensor must have an encoding attribute"); // Verifies the trailing COO. - Level cooStartLvl = stt.getCOOStart(); + Level cooStartLvl = stt.getAoSCOOStart(); if (cooStartLvl < stt.getLvlRank()) { // We only supports trailing COO for now, must be the last input. auto cooTp = llvm::cast(lvlTps.back()); @@ -1452,7 +1452,7 @@ LogicalResult ToCoordinatesOp::verify() { LogicalResult ToCoordinatesBufferOp::verify() { auto stt = getSparseTensorType(getTensor()); - if (stt.getCOOStart() >= stt.getLvlRank()) + if (stt.getAoSCOOStart() >= stt.getLvlRank()) return emitError("expected sparse tensor with a COO region"); return success(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp index 9414d81e6bf5c6..cd6b9b49893731 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseAssembler.cpp @@ -22,13 +22,9 @@ using namespace sparse_tensor; // Helper methods. //===----------------------------------------------------------------------===// -// TODO: reuse StorageLayout::foreachField? - -// TODO: we need COO AoS and SoA - // Convert type range to new types range, with sparse tensors externalized. -void convTypes(TypeRange types, SmallVectorImpl &convTypes, - SmallVectorImpl *extraTypes = nullptr) { +static void convTypes(TypeRange types, SmallVectorImpl &convTypes, + SmallVectorImpl *extraTypes = nullptr) { for (auto type : types) { // All "dense" data passes through unmodified. if (!getSparseTensorEncoding(type)) { @@ -42,29 +38,30 @@ void convTypes(TypeRange types, SmallVectorImpl &convTypes, convTypes.push_back(vtp); if (extraTypes) extraTypes->push_back(vtp); - // Convert the external representations of the pos/crd arrays. - for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) { - const auto lt = stt.getLvlType(lvl); - if (isCompressedLT(lt) || isLooseCompressedLT(lt)) { - auto ptp = RankedTensorType::get(shape, stt.getPosType()); - auto ctp = RankedTensorType::get(shape, stt.getCrdType()); - convTypes.push_back(ptp); - convTypes.push_back(ctp); - if (extraTypes) { - extraTypes->push_back(ptp); - extraTypes->push_back(ctp); - } - } else { - assert(isDenseLT(lt)); // TODO: handle other cases + + // Convert the external representation of the position/coordinate array. + foreachFieldAndTypeInSparseTensor(stt, [&convTypes, extraTypes]( + Type t, FieldIndex, + SparseTensorFieldKind kind, + Level, LevelType) { + if (kind == SparseTensorFieldKind::CrdMemRef || + kind == SparseTensorFieldKind::PosMemRef) { + ShapedType st = t.cast(); + auto rtp = RankedTensorType::get(st.getShape(), st.getElementType()); + convTypes.push_back(rtp); + if (extraTypes) + extraTypes->push_back(rtp); } - } + return true; + }); } } // Convert input and output values to [dis]assemble ops for sparse tensors. -void convVals(OpBuilder &builder, Location loc, TypeRange types, - ValueRange fromVals, ValueRange extraVals, - SmallVectorImpl &toVals, unsigned extra, bool isIn) { +static void convVals(OpBuilder &builder, Location loc, TypeRange types, + ValueRange fromVals, ValueRange extraVals, + SmallVectorImpl &toVals, unsigned extra, + bool isIn) { unsigned idx = 0; for (auto type : types) { // All "dense" data passes through unmodified. @@ -85,29 +82,28 @@ void convVals(OpBuilder &builder, Location loc, TypeRange types, if (!isIn) { inputs.push_back(extraVals[extra++]); retTypes.push_back(RankedTensorType::get(shape, stt.getElementType())); - cntTypes.push_back(builder.getIndexType()); + cntTypes.push_back(builder.getIndexType()); // nnz } + // Collect the external representations of the pos/crd arrays. - for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) { - const auto lt = stt.getLvlType(lvl); - if (isCompressedLT(lt) || isLooseCompressedLT(lt)) { + foreachFieldAndTypeInSparseTensor(stt, [&, isIn](Type t, FieldIndex, + SparseTensorFieldKind kind, + Level, LevelType) { + if (kind == SparseTensorFieldKind::CrdMemRef || + kind == SparseTensorFieldKind::PosMemRef) { if (isIn) { inputs.push_back(fromVals[idx++]); - inputs.push_back(fromVals[idx++]); } else { - Type pTp = stt.getPosType(); - Type cTp = stt.getCrdType(); - inputs.push_back(extraVals[extra++]); + ShapedType st = t.cast(); + auto rtp = RankedTensorType::get(st.getShape(), st.getElementType()); inputs.push_back(extraVals[extra++]); - retTypes.push_back(RankedTensorType::get(shape, pTp)); - retTypes.push_back(RankedTensorType::get(shape, cTp)); - cntTypes.push_back(pTp); - cntTypes.push_back(cTp); + retTypes.push_back(rtp); + cntTypes.push_back(rtp.getElementType()); } - } else { - assert(isDenseLT(lt)); // TODO: handle other cases } - } + return true; + }); + if (isIn) { // Assemble multiple inputs into a single sparse tensor. auto a = builder.create(loc, rtp, inputs); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index d4459c6ea1e521..0ccb11f3a6b858 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -194,7 +194,7 @@ static void createAllocFields(OpBuilder &builder, Location loc, valHeuristic = builder.create(loc, valHeuristic, lvlSizesValues[lvl]); } else if (sizeHint) { - if (stt.getCOOStart() == 0) { + if (stt.getAoSCOOStart() == 0) { posHeuristic = constantIndex(builder, loc, 2); crdHeuristic = builder.create( loc, constantIndex(builder, loc, lvlRank), sizeHint); // AOS @@ -1316,7 +1316,7 @@ struct SparseAssembleOpConverter : public OpConversionPattern { Value posBack = c0; // index to the last value in the position array Value memSize = c1; // memory size for current array - Level trailCOOStart = stt.getCOOStart(); + Level trailCOOStart = stt.getAoSCOOStart(); Level trailCOORank = stt.getLvlRank() - trailCOOStart; // Sets up SparseTensorSpecifier. for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) { @@ -1453,7 +1453,7 @@ struct SparseNewConverter : public OpConversionPattern { const auto dstTp = getSparseTensorType(op.getResult()); // Creating COO with NewOp is handled by direct IR codegen. All other cases // are handled by rewriting. - if (!dstTp.hasEncoding() || dstTp.getCOOStart() != 0) + if (!dstTp.hasEncoding() || dstTp.getAoSCOOStart() != 0) return failure(); // Implement as follows: diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 7326a6a3811284..2ccb2361b5efe1 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -1180,7 +1180,7 @@ struct NewRewriter : public OpRewritePattern { PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto stt = getSparseTensorType(op.getResult()); - if (!stt.hasEncoding() || stt.getCOOStart() == 0) + if (!stt.hasEncoding() || stt.getAoSCOOStart() == 0) return failure(); // Implement the NewOp as follows: diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp index 75a43891491879..b888dfadb9c714 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp @@ -568,7 +568,7 @@ Value sparse_tensor::genToCoordinates(OpBuilder &builder, Location loc, const auto srcTp = getSparseTensorType(tensor); const Type crdTp = srcTp.getCrdType(); const Type memTp = - get1DMemRefType(crdTp, /*withLayout=*/lvl >= srcTp.getCOOStart()); + get1DMemRefType(crdTp, /*withLayout=*/lvl >= srcTp.getAoSCOOStart()); return builder.create(loc, memTp, tensor, builder.getIndexAttr(lvl)); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.cpp index 3ab4157475cd4c..6ac26ad550f9f3 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.cpp @@ -103,7 +103,7 @@ void SparseTensorSpecifier::setSpecifierField(OpBuilder &builder, Location loc, Value sparse_tensor::SparseTensorDescriptor::getCrdMemRefOrView( OpBuilder &builder, Location loc, Level lvl) const { - const Level cooStart = rType.getCOOStart(); + const Level cooStart = rType.getAoSCOOStart(); if (lvl < cooStart) return getMemRefField(SparseTensorFieldKind::CrdMemRef, lvl); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.h index 3a61ec7a2236f3..c2f631605bf4b2 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorDescriptor.h @@ -137,7 +137,7 @@ class SparseTensorDescriptorImpl { } Value getAOSMemRef() const { - const Level cooStart = rType.getCOOStart(); + const Level cooStart = rType.getAoSCOOStart(); assert(cooStart < rType.getLvlRank()); return getMemRefField(SparseTensorFieldKind::CrdMemRef, cooStart); } diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index f20008a1ed2b2f..186f85d2ce20a6 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -72,36 +72,73 @@ mlir::tensor::computeTransposedType(RankedTensorType rankedTensorType, RTTBuilder(rankedTensorType).setShape(transposedShape); return transposedTensorType; } - -SmallVector -mlir::tensor::getPackInverseDestPermutation(PackOp packOp) { - // The permutation can be obtained from two permutations: - // a) Compute the permutation vector to move the last `numPackedDims` into - // the `innerPosDims` of a shape of rank `packedRank`. - // b) Compute the permutation vector to move outer dims if the pack op - // has outer_dims_perm. - // Apply (b) permutation on (a) permutation to get the final permutation. - int64_t numPackedDims = packOp.getInnerDimsPos().size(); - int64_t packedRank = packOp.getDestType().getRank(); - auto lastDims = llvm::to_vector( - llvm::seq(packedRank - numPackedDims, packedRank)); - PackingMetadata packingMetadata = computePackingMetadata( - packOp.getDestType().getRank(), packOp.getInnerDimsPos()); - SmallVector innerPositionsPerm = computePermutationVector( - packedRank, lastDims, packingMetadata.insertPositions); +/// The permutation can be obtained from two permutations: +/// a) Compute the permutation vector to move the last `numPackedDims` into +/// the `innerPosDims` of a shape of rank `rank`. +/// b) Compute the permutation vector to move outer dims if the +/// `outerPerm` parameter is not empty. +/// Apply (b) permutation on (a) permutation to get the final permutation. +static SmallVector +computePackUnPackPerm(int64_t rank, ArrayRef &innerDimsPos, + ArrayRef &outerPerm, + PackingMetadata &packingMetadata) { + int64_t numPackedDims = innerDimsPos.size(); + auto lastDims = + llvm::to_vector(llvm::seq(rank - numPackedDims, rank)); + packingMetadata = computePackingMetadata(rank, innerDimsPos); + SmallVector innerPositionsPerm = + computePermutationVector(rank, lastDims, packingMetadata.insertPositions); SmallVector outerPos = packingMetadata.outerPositions; - ArrayRef outerPerm = packOp.getOuterDimsPerm(); if (!outerPerm.empty()) applyPermutationToVector(outerPos, outerPerm); - SmallVector outerPositionPerm = computePermutationVector( - packedRank, packingMetadata.outerPositions, outerPos); + SmallVector outerPositionPerm = + computePermutationVector(rank, packingMetadata.outerPositions, outerPos); SmallVector packInverseDestPermutation = innerPositionsPerm; applyPermutationToVector(packInverseDestPermutation, outerPositionPerm); return packInverseDestPermutation; } +/// Shell function to compute the Destination Permutation of PackOp +/// This function uses the helper function `computePackUnPackPerm` to get +/// the permutation vector. Only major difference between UnPack and Pack is +/// that packOp uses destination rank whereas unpack Uses source rank. +SmallVector mlir::tensor::getPackInverseDestPerm(PackOp packOp) { + + PackingMetadata pMetadata; + int64_t packedRank = packOp.getDestType().getRank(); + ArrayRef innerDimPos = packOp.getInnerDimsPos(); + ArrayRef outerPerm = packOp.getOuterDimsPerm(); + SmallVector packInvDestPerm = + computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata); + return packInvDestPerm; +} + +/// Shell function to compute the Source Permutation of unPackOp. +/// This function, like the getPackInverseDestPerm uses the helper function +/// computePackUnPackPerm` to get the permutation vector. +/// Only major difference between UnPack and Pack is that packOp uses +/// destination rank whereas unpack Uses source rank. +SmallVector mlir::tensor::getUnPackInverseSrcPerm(UnPackOp unpackOp) { + PackingMetadata metadata; + return mlir::tensor::getUnPackInverseSrcPerm(unpackOp, metadata); +} + +/// Shell function to compute the Source rank permutation for unpackOp +/// Unpack requires some packing metadata data information, so created +/// another function where this value is passed by reference. +SmallVector +mlir::tensor::getUnPackInverseSrcPerm(UnPackOp unpackOp, + PackingMetadata &metadata) { + int64_t unpackRank = unpackOp.getSourceType().getRank(); + ArrayRef innerDimPos = unpackOp.getInnerDimsPos(); + ArrayRef outerPerm = unpackOp.getOuterDimsPerm(); + SmallVector unpackInvSrcPerm = + computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata); + return unpackInvSrcPerm; +} + bool mlir::tensor::isCastLikeInsertSliceOp(InsertSliceOp op) { llvm::SmallBitVector droppedDims = op.getDroppedDims(); int64_t srcDim = 0; diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 721f35162ef867..7adde3117deeaa 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -1029,3 +1029,38 @@ func.func @broadcast_same_shape(%input: tensor<2x3xf32>, %init: tensor<2x3xf32>) %0 = linalg.broadcast ins(%input: tensor<2x3xf32>) outs(%init: tensor<2x3xf32>) dimensions = [] return %0 : tensor<2x3xf32> } + +// ---- + +func.func @transpose_1d(%input: tensor<16xf32>, + %init: tensor<16xf32>) -> tensor<16xf32> { + %transpose = linalg.transpose + ins(%input:tensor<16xf32>) + outs(%init:tensor<16xf32>) + permutation = [0] + func.return %transpose : tensor<16xf32> +} + +// CHECK-LABEL: func @transpose_1d( +// CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<16xf32>, +// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: tensor<16xf32>) +// CHECK-NOT: linalg.transpose +// CHECK: return %[[INPUT]] : tensor<16xf32> + +// ----- + +func.func @transpose_identity_perm(%input: tensor<16x32x64xf32>, + %init: tensor<16x32x64xf32>) -> tensor<16x32x64xf32> { + %transpose = linalg.transpose + ins(%input:tensor<16x32x64xf32>) + outs(%init:tensor<16x32x64xf32>) + permutation = [0, 1, 2] + func.return %transpose : tensor<16x32x64xf32> +} + +// CHECK-LABEL: func @transpose_identity_perm( +// CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<16x32x64xf32>, +// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: tensor<16x32x64xf32>) +// CHECK-NOT: linalg.transpose +// CHECK: return %[[INPUT]] : tensor<16x32x64xf32> + diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir index d63433248ab1e0..0a197a0ee9fa68 100644 --- a/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir @@ -48,12 +48,8 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a // CHECK: %[[PAD:.+]] = tensor.pad %[[SRC_SLICE]] // CHECK: tensor.yield %[[PAD_VAL]] // CHECK: } : tensor to tensor<8x2xf32> -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[PAD]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) -// CHECK-SAME: permutation = [0, 1] -// CHECK: %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}} +// CHECK-NOT: linalg.transpose +// CHECK: %{{.+}} = tensor.insert_slice %[[PAD]] into %{{.+}} module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -81,12 +77,8 @@ func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) // CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]]) // CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]] // CHECK-SAME: [%[[IN_K]], %[[IN_C]]] [32, 8] [1, 1] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[TILE]] -// CHECK-SAME: outs(%[[EMPTY]] -// CHECK-SAME: permutation = [0, 1] -// CHECK: %[[SUB_ITER:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}} +// CHECK-NOT: linalg.transpose +// CHECK: %[[SUB_ITER:.+]] = tensor.insert_slice %[[TILE]] into %{{[a-zA-Z0-9]+}} // CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<32x8xf32> into tensor<1x1x32x8xf32> // CHECK: %{{.+}} = tensor.insert_slice %[[SUB_ITER]] into %{{[a-zA-Z0-9]+}} // CHECK-SAME: [%[[C]], %[[K]], 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> into tensor<32x4x32x8xf32> diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir index eaad6bd8270476..7d87a0994004fe 100644 --- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir @@ -29,12 +29,8 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x // CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]] // CHECK: %[[PAD:.+]] = tensor.pad %[[SRC]] low[0, 0] high[3, 1] // CHECK: tensor.yield %[[PAD_VAL]] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[PAD]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) -// CHECK-SAME: permutation = [0, 1] -// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] +// CHECK-NOT: linalg.transpose +// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[PAD]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1] // CHECK: return %[[INSERT]] @@ -47,12 +43,8 @@ func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32 // CHECK-LABEL: func.func @simple_NC_to_CNnc // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[SRC]] : tensor<32x8xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<32x8xf32>) -// CHECK-SAME: permutation = [0, 1] -// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] +// CHECK-NOT: linalg.transpose +// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[SRC]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] // CHECK: return %[[INSERT]] diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir index f0d4b790520e03..7d64331c987841 100644 --- a/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir @@ -57,12 +57,8 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13 // CHECK-SAME: [%[[I]], %[[J]]] [%[[OUT_I_SZ]], %[[OUT_J_SZ]]] // CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC_SLICE]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1] : tensor<1x1x8x2xf32> to tensor<8x2xf32> -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[TILE]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) -// CHECK-SAME: permutation = [0, 1] -// CHECK: %[[UNPACK_TILE:.+]] = tensor.extract_slice %[[TRANSP]] +// CHECK-NOT: linalg.transpose +// CHECK: %[[UNPACK_TILE:.+]] = tensor.extract_slice %[[TILE]] // CHECK-SAME: [0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]]] [1, 1] // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[UNPACK_TILE]] into %[[ITER_SLICE]] // CHECK-SAME: [0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]]] [1, 1] @@ -96,12 +92,8 @@ func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) // CHECK-SAME: [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] // CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC_SLICE]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> to tensor<32x8xf32> -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[TILE]] -// CHECK-SAME: outs(%[[EMPTY]] -// CHECK-SAME: permutation = [0, 1] -// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}} +// CHECK-NOT: linalg.transpose +// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TILE]] into %{{[a-zA-Z0-9]+}} // CHECK-SAME: [%[[K]], %[[C]]] [32, 8] [1, 1] diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir index 02376808865006..153ce68b8f086c 100644 --- a/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir @@ -27,14 +27,10 @@ func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output: // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]][0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[TILE]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) -// CHECK-SAME: permutation = [0, 1] +// CHECK-NOT: linalg.transpose // They have the same type, so the insert_slice op is folded // away. -// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TRANSP]][0, 0] [5, 1] [1, 1] +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TILE]][0, 0] [5, 1] [1, 1] // CHECK: return %[[SLICE]] // ----- @@ -47,14 +43,10 @@ func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32 // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]][0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> -// CHECK: %[[TRANSP:.+]] = linalg.transpose -// CHECK-SAME: ins(%[[TILE]] : tensor<32x8xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<32x8xf32>) -// CHECK-SAME: permutation = [0, 1] +// CHECK-NOT: linalg.transpose // They have the same type, so the insert_slice op is folded // away. -// CHECK: return %[[TRANSP]] +// CHECK: return %[[TILE]] // ----- @@ -75,7 +67,6 @@ func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x // away. // CHECK: return %[[TRANSP]] - // ----- func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x16x8xf32>) -> tensor<1x32x16x8xf32> { diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 0272ac599aa3db..2d01d57304013c 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -697,3 +697,118 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +// CHECK-LABEL: func @test_vectorize_dynamic_shapes_unpack +func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor, %arg1: tensor) -> tensor { +// CHECK: %[[C0:.*]] = arith.constant 0 +// CHECK: %[[DIM:.*]] = tensor.dim %arg0, %[[C0]] : tensor +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM0:.*]] = tensor.dim %arg0, %[[C1]] : tensor +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 +// CHECK: %[[C01:.*]] = arith.constant 0 +// CHECK: %[[C02:.*]] = arith.constant 0 +// CHECK: %[[DIM4:.*]] = tensor.dim %arg1, %[[C02]] : tensor +// CHECK: %[[CNST14:.*]] = arith.constant 1 +// CHECK: %[[DIM6:.*]] = tensor.dim %arg1, %[[CNST14]] : tensor +// CHECK: %[[CNST16:.*]] = arith.constant 16 : index +// CHECK: %[[CNST2:.*]] = arith.constant 2 : index +// CHECK: %[[readMsk0:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x16x2xi1> +// CHECK: %[[read0:.*]] = vector.mask %[[readMsk0]] {{.*}} vector.transfer_read %{{.*}} : tensor, vector<2x1x16x2xf32> } : vector<2x1x16x2xi1> -> vector<2x1x16x2xf32> +// CHECK: %[[trans0:.*]] = vector.transpose %[[read0]], [0, 3, 1, 2] : vector<2x1x16x2xf32> to vector<2x2x1x16xf32> +// CHECK: %[[sc0:.*]] = vector.shape_cast %[[trans0]] : vector<2x2x1x16xf32> to vector<4x16xf32> +// CHECK: %[[empt0:.*]] = tensor.empty +// CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1> +// CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[empt0]] +// CHECK: return %[[write0]] + %ret = tensor.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor -> tensor + return %ret : tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_unpack +func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]]= arith.constant 0 : index + // CHECK: %[[C8:.*]] = arith.constant 8 : index + // CHECK: %[[C80:.*]] = arith.constant 8 : index + // CHECK: %[[C32:.*]] = arith.constant 32 : index + // CHECK: %[[C16:.*]] = arith.constant 16 : index + // CHECK: %[[MSK0:.*]] = vector.create_mask %[[C8]], %[[C80]], %[[C32]], %[[C16]] : vector<16x8x32x16xi1> + // CHECK: %[[READ0:.*]] = vector.mask %[[MSK0]] {{.*}} : vector<16x8x32x16xi1> -> vector<16x8x32x16xf32> + // CHECK: %[[TRANSP0:.*]] = vector.transpose %[[READ0]], [0, 2, 1, 3] : vector<16x8x32x16xf32> to vector<16x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP0]] : vector<16x32x8x16xf32> to vector<512x128xf32> + // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> + // CHECK: %[[C01:.*]] = arith.constant 0 : index + // CHECK: %[[C256:.*]] = arith.constant 256 : index + // CHECK: %[[C128:.*]] = arith.constant 128 : index + // CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1> + // CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] {{.*}} : vector<512x128xi1> -> tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_unpack_no_masks +func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> + // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op + transform.yield + } + } + + // ----- + + // CHECK-LABEL: test_vectorize_unpack_with_outer_perm + func.func @test_vectorize_unpack_with_outer_perm(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> + // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Mesh/process-multi-index-op-lowering.mlir b/mlir/test/Dialect/Mesh/process-multi-index-op-lowering.mlir index 677a5982ea2540..e23cfd79a42745 100644 --- a/mlir/test/Dialect/Mesh/process-multi-index-op-lowering.mlir +++ b/mlir/test/Dialect/Mesh/process-multi-index-op-lowering.mlir @@ -6,7 +6,7 @@ mesh.mesh @mesh2d(shape = ?x?) func.func @multi_index_2d_mesh() -> (index, index) { // CHECK: %[[LINEAR_IDX:.*]] = mesh.process_linear_index on @mesh2d : index // CHECK: %[[MESH_SHAPE:.*]]:2 = mesh.mesh_shape @mesh2d : index, index - // CHECK: %[[MULTI_IDX:.*]]:2 = affine.delinearize_index %0 into (%[[MESH_SHAPE]]#0, %[[MESH_SHAPE]]#1) : index, index + // CHECK: %[[MULTI_IDX:.*]]:2 = affine.delinearize_index %[[LINEAR_IDX]] into (%[[MESH_SHAPE]]#0, %[[MESH_SHAPE]]#1) : index, index %0:2 = mesh.process_multi_index on @mesh2d : index, index // CHECK: return %[[MULTI_IDX]]#0, %[[MULTI_IDX]]#1 : index, index return %0#0, %0#1 : index, index @@ -16,7 +16,7 @@ func.func @multi_index_2d_mesh() -> (index, index) { func.func @multi_index_2d_mesh_single_inner_axis() -> index { // CHECK: %[[LINEAR_IDX:.*]] = mesh.process_linear_index on @mesh2d : index // CHECK: %[[MESH_SHAPE:.*]]:2 = mesh.mesh_shape @mesh2d : index, index - // CHECK: %[[MULTI_IDX:.*]]:2 = affine.delinearize_index %0 into (%[[MESH_SHAPE]]#0, %[[MESH_SHAPE]]#1) : index, index + // CHECK: %[[MULTI_IDX:.*]]:2 = affine.delinearize_index %[[LINEAR_IDX]] into (%[[MESH_SHAPE]]#0, %[[MESH_SHAPE]]#1) : index, index %0 = mesh.process_multi_index on @mesh2d axes = [0] : index // CHECK: return %[[MULTI_IDX]]#0 : index return %0 : index diff --git a/mlir/test/Dialect/SparseTensor/external.mlir b/mlir/test/Dialect/SparseTensor/external.mlir index c17ba13e86c926..b5701ad2024264 100644 --- a/mlir/test/Dialect/SparseTensor/external.mlir +++ b/mlir/test/Dialect/SparseTensor/external.mlir @@ -100,3 +100,27 @@ func.func @sparse_out2(%arg0: tensor<64x64xf32>) -> (tensor<64x64xf32>, tensor<6 func.func @sparse_inout(%arg0: tensor<64x64xf32, #sparse>) -> tensor<64x64xf32, #sparse> { return %arg0 : tensor<64x64xf32, #sparse> } + +// ----- + +// CHECK-LABEL: func.func @sparse_inout_coo_soa( +// CHECK-SAME: %[[A:.*0]]: tensor, +// CHECK-SAME: %[[B:.*1]]: tensor, +// CHECK-SAME: %[[C:.*2]]: tensor, +// CHECK-SAME: %[[D:.*3]]: tensor, +// CHECK-SAME: %[[E:.*4]]: tensor, +// CHECK-SAME: %[[F:.*5]]: tensor, +// CHECK-SAME: %[[G:.*6]]: tensor, +// CHECK-SAME: %[[H:.*7]]: tensor) -> (tensor, tensor, tensor, tensor) { +// CHECK: %[[I:.*]] = sparse_tensor.assemble %[[A]], %[[B]], %[[C]], %[[D]] +// CHECK: %[[F:.*]] = call @_internal_sparse_inout_coo_soa(%[[I]]) +// CHECK: sparse_tensor.disassemble %[[F]] +// CHECK: return +// CHECK: } +// CHECK: func.func private @_internal_sparse_inout +#sparse = #sparse_tensor.encoding<{ + map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) +}> +func.func @sparse_inout_coo_soa(%arg0: tensor<64x64xf32, #sparse>) -> tensor<64x64xf32, #sparse> { + return %arg0 : tensor<64x64xf32, #sparse> +} diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir new file mode 100644 index 00000000000000..8ee4e1fb48fef1 --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/mmt4d.mlir @@ -0,0 +1,121 @@ +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule \ +// DEFINE: -one-shot-bufferize -func-bufferize -cse -canonicalize -convert-vector-to-scf -test-lower-to-llvm -o %t +// DEFINE: %{entry_point} = mmt4d +// DEFINE: %{run} = mlir-cpu-runner %t -e %{entry_point} -entry-point-result=void \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils + +// RUN: %{compile} + +// RUN: %{run} | FileCheck %s + +func.func @mmt4d() { + // Allocate the matrices + %A_alloc = tensor.empty() : tensor<2x2x3x1xi32> + %B_alloc = tensor.empty() : tensor<2x2x3x1xi32> + %C_alloc = tensor.empty() : tensor<2x2x3x3xi32> + %C_in = arith.constant dense<[ + [[[ 1, 2, 3], + [ 4, 5, 6], + [ 7, 8, 9]], + [[ 11, 12, 13], + [ 14, 15, 16], + [ 17, 18, 19]]], + [[[ 21, 22, 23], + [ 24, 25, 26], + [ 27, 28, 29]], + [[ 31, 32, 33], + [ 34, 35, 36], + [ 37, 38, 39]]] + ]> : tensor<2x2x3x3xi32> + + // Initialise the matrices + %three = arith.constant 3 : i32 + %four = arith.constant 4 : i32 + %A = linalg.fill ins(%three : i32) outs(%A_alloc : tensor<2x2x3x1xi32>) -> tensor<2x2x3x1xi32> + %B = linalg.fill ins(%four : i32) outs(%B_alloc : tensor<2x2x3x1xi32>) -> tensor<2x2x3x1xi32> + + // Matmul + %C_out = linalg.mmt4d ins(%A, %B: tensor<2x2x3x1xi32>, tensor<2x2x3x1xi32>) outs(%C_in: tensor<2x2x3x3xi32>) -> tensor<2x2x3x3xi32> + + // Print and verify the output + // CHECK: Unranked Memref {{.*}} rank = 4 offset = 0 sizes = [2, 2, 3, 3] strides = [18, 9, 3, 1] data = + // C[0, 0] + // CHECK-NEXT: [25, 26, 27] + // CHECK-NEXT: [28, 29, 30] + // CHECK-NEXT: [31, 32, 33] + // C[0, 1] + // CHECK-NEXT: [35, 36, 37] + // CHECK-NEXT: [38, 39, 40] + // CHECK-NEXT: [41, 42, 43] + // C[1, 0] + // CHECK-NEXT: [45, 46, 47] + // CHECK-NEXT: [48, 49, 50] + // CHECK-NEXT: [51, 52, 53] + // C[1, 1] + // CHECK-NEXT: [55, 56, 57] + // CHECK-NEXT: [58, 59, 60] + // CHECK-NEXT: [61, 62, 63] + + %xf = tensor.cast %C_out : tensor<2x2x3x3xi32> to tensor<*xi32> + call @printMemrefI32(%xf) : (tensor<*xi32>) -> () + + return +} + +module @transforms attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(%module: !transform.any_op {transform.readonly}) { + %mmt4d = transform.collect_matching @match_mmt4d in %module : (!transform.any_op) -> (!transform.any_op) + %func = transform.get_parent_op %mmt4d {isolated_from_above} : (!transform.any_op) -> !transform.op<"func.func"> + + // Step 1: Tile + // Tile parallel dims + %tiled_linalg_op_p, %loops:4 = transform.structured.tile_using_for %mmt4d[1, 1, 0, 3, 3, 0] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + // Tile reduction dims + %tiled_linalg_op_r, %loops2:2 = transform.structured.tile_using_for %tiled_linalg_op_p[0, 0, 1, 0, 0, 1] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + + // Step 2: Vectorize + transform.structured.vectorize %tiled_linalg_op_r : !transform.any_op + + // Step 3: Simplify + // vector.multi_reduction --> vector.contract + // Generates a 6-dim vector.contract with the dim matching the original MMT4D Op + // and with the following split into parallel and reduction dims: + // * parallel, parallel, reduction, parallel, parallel, reduction + transform.apply_patterns to %func { + transform.apply_patterns.vector.reduction_to_contract + // Reduce the rank of xfer ops. This transforms vector.contract to be + // more matmul-like and to enable the lowering to outer product Ops. + transform.apply_patterns.vector.transfer_permutation_patterns + } : !transform.op<"func.func"> + + // Hoisting and LICM - not strictly required + %func_h = transform.structured.hoist_redundant_vector_transfers %func + : (!transform.op<"func.func">) -> !transform.op<"func.func"> + %all_loops = transform.structured.match interface{LoopLikeInterface} in %func_h + : (!transform.op<"func.func">) -> !transform.any_op + transform.apply_licm to %all_loops : !transform.any_op + transform.loop.hoist_loop_invariant_subsets %all_loops : !transform.any_op + + // Simplify the 6-dim vector.contract into a 3-dim matmul-like + // vector.contract with the following split into parallel and reduction + // dims: + // * parallel, parallel, reduction + transform.apply_patterns to %func_h { + transform.apply_patterns.vector.reduction_to_contract + transform.apply_patterns.vector.cast_away_vector_leading_one_dim + transform.apply_patterns.canonicalization + } : !transform.op<"func.func"> + transform.yield + } + + transform.named_sequence @match_mmt4d( + %entry: !transform.any_op {transform.readonly}) -> !transform.any_op { + transform.match.operation_name %entry ["linalg.mmt4d"] : !transform.any_op + transform.yield %entry : !transform.any_op + } +} + +func.func private @printMemrefI32(%ptr : tensor<*xi32>) diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir index aaf15ecc681fc2..16252c1005ebbb 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir @@ -34,6 +34,10 @@ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }> +#SortedCOOSoA = #sparse_tensor.encoding<{ + map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton(soa)) +}> + #CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }> @@ -50,7 +54,7 @@ module { func.func @add_coo_csr(%arga: tensor<8x8xf32, #CSR>, - %argb: tensor<8x8xf32, #SortedCOO>) + %argb: tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32> { %empty = tensor.empty() : tensor<8x8xf32> %zero = arith.constant 0.000000e+00 : f32 @@ -59,7 +63,7 @@ module { outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32, #CSR>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) outs(%init: tensor<8x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 @@ -69,7 +73,7 @@ module { } func.func @add_coo_coo(%arga: tensor<8x8xf32, #SortedCOO>, - %argb: tensor<8x8xf32, #SortedCOO>) + %argb: tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32> { %empty = tensor.empty() : tensor<8x8xf32> %zero = arith.constant 0.000000e+00 : f32 @@ -78,7 +82,7 @@ module { outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) outs(%init: tensor<8x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 @@ -88,12 +92,12 @@ module { } func.func @add_coo_coo_out_coo(%arga: tensor<8x8xf32, #SortedCOO>, - %argb: tensor<8x8xf32, #SortedCOO>) + %argb: tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32, #SortedCOO> { %init = tensor.empty() : tensor<8x8xf32, #SortedCOO> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) outs(%init: tensor<8x8xf32, #SortedCOO>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 @@ -104,7 +108,7 @@ module { func.func @add_coo_dense(%arga: tensor<8x8xf32>, - %argb: tensor<8x8xf32, #SortedCOO>) + %argb: tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32> { %empty = tensor.empty() : tensor<8x8xf32> %zero = arith.constant 0.000000e+00 : f32 @@ -113,7 +117,7 @@ module { outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) outs(%init: tensor<8x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 @@ -154,19 +158,19 @@ module { %COO_A = sparse_tensor.convert %A : tensor<8x8xf32> to tensor<8x8xf32, #SortedCOO> %COO_B = sparse_tensor.convert %B - : tensor<8x8xf32> to tensor<8x8xf32, #SortedCOO> + : tensor<8x8xf32> to tensor<8x8xf32, #SortedCOOSoA> %C1 = call @add_coo_dense(%A, %COO_B) : (tensor<8x8xf32>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32> %C2 = call @add_coo_csr(%CSR_A, %COO_B) : (tensor<8x8xf32, #CSR>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32> %C3 = call @add_coo_coo(%COO_A, %COO_B) : (tensor<8x8xf32, #SortedCOO>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32> %COO_RET = call @add_coo_coo_out_coo(%COO_A, %COO_B) : (tensor<8x8xf32, #SortedCOO>, - tensor<8x8xf32, #SortedCOO>) + tensor<8x8xf32, #SortedCOOSoA>) -> tensor<8x8xf32, #SortedCOO> %C4 = sparse_tensor.convert %COO_RET : tensor<8x8xf32, #SortedCOO> to tensor<8x8xf32> // @@ -204,7 +208,7 @@ module { bufferization.dealloc_tensor %C4 : tensor<8x8xf32> bufferization.dealloc_tensor %CSR_A : tensor<8x8xf32, #CSR> bufferization.dealloc_tensor %COO_A : tensor<8x8xf32, #SortedCOO> - bufferization.dealloc_tensor %COO_B : tensor<8x8xf32, #SortedCOO> + bufferization.dealloc_tensor %COO_B : tensor<8x8xf32, #SortedCOOSoA> bufferization.dealloc_tensor %COO_RET : tensor<8x8xf32, #SortedCOO> diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 31ceb05ad1dbfd..eb8c0aba1d33b6 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -1414,7 +1414,7 @@ void OperationFormat::genElementParser(FormatElement *element, MethodBody &body, } body.unindent() << "}\n"; body.unindent(); - } else if (dyn_cast(element)) { + } else if (isa(element)) { body << " if (parseProperties(parser, result))\n" << " return ::mlir::failure();\n"; } else if (auto *customDir = dyn_cast(element)) { @@ -2239,7 +2239,7 @@ void OperationFormat::genElementPrinter(FormatElement *element, } // Emit the attribute dictionary. - if (dyn_cast(element)) { + if (isa(element)) { genPropDictPrinter(*this, op, body); lastWasPunctuation = false; return; diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp index 77c34cb03e987e..426a3482960be4 100644 --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -1785,7 +1785,7 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs( range); sizes.push_back(formatv("static_cast({0}.size())", range)); } else { - sizes.push_back("1"); + sizes.emplace_back("1"); os << formatv("tblgen_values.push_back("); if (node.isNestedDagArg(argIndex)) { os << symbolInfoMap.getValueAndRangeUse( diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp index bb7241c2d51969..a37a2afc226453 100644 --- a/mlir/unittests/Bytecode/BytecodeTest.cpp +++ b/mlir/unittests/Bytecode/BytecodeTest.cpp @@ -23,7 +23,7 @@ using namespace llvm; using namespace mlir; -StringLiteral IRWithResources = R"( +StringLiteral irWithResources = R"( module @TestDialectResources attributes { bytecode.test = dense_resource : tensor<4xi32> } {} @@ -42,7 +42,7 @@ TEST(Bytecode, MultiModuleWithResource) { Builder builder(&context); ParserConfig parseConfig(&context); OwningOpRef module = - parseSourceString(IRWithResources, parseConfig); + parseSourceString(irWithResources, parseConfig); ASSERT_TRUE(module); // Write the module to bytecode @@ -53,15 +53,15 @@ TEST(Bytecode, MultiModuleWithResource) { // Create copy of buffer which is aligned to requested resource alignment. constexpr size_t kAlignment = 0x20; - size_t buffer_size = buffer.size(); - buffer.reserve(buffer_size + kAlignment - 1); + size_t bufferSize = buffer.size(); + buffer.reserve(bufferSize + kAlignment - 1); size_t pad = ~(uintptr_t)buffer.data() + 1 & kAlignment - 1; buffer.insert(0, pad, ' '); - StringRef aligned_buffer(buffer.data() + pad, buffer_size); + StringRef alignedBuffer(buffer.data() + pad, bufferSize); // Parse it back OwningOpRef roundTripModule = - parseSourceString(aligned_buffer, parseConfig); + parseSourceString(alignedBuffer, parseConfig); ASSERT_TRUE(roundTripModule); // FIXME: Parsing external resources does not work on big-endian diff --git a/mlir/unittests/Debug/FileLineColLocBreakpointManagerTest.cpp b/mlir/unittests/Debug/FileLineColLocBreakpointManagerTest.cpp index 48c62ad20a04a6..5b48e80749c8b8 100644 --- a/mlir/unittests/Debug/FileLineColLocBreakpointManagerTest.cpp +++ b/mlir/unittests/Debug/FileLineColLocBreakpointManagerTest.cpp @@ -98,7 +98,7 @@ TEST(FileLineColLocBreakpointManager, OperationMatch) { // Set a breakpoint matching only the second operation in the list. auto *breakpoint = breakpointManager.addBreakpoint( fileNames[0], lineColLoc[0].first, lineColLoc[0].second); - auto checkMatchIdxs = [&](DenseSet idxs) { + auto checkMatchIdxs = [&](const DenseSet &idxs) { counter = 0; int reference = 0; for (int i = 0; i < (int)operations.size(); ++i) { diff --git a/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp b/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp index 56a98cc205ab43..3a6bcbd999a57a 100644 --- a/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp +++ b/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp @@ -77,7 +77,7 @@ class SerializationTest : public ::testing::Test { } // Inserts an Integer or a Vector of Integers constant of value 'val'. - spirv::ConstantOp AddConstInt(Type type, APInt val) { + spirv::ConstantOp AddConstInt(Type type, const APInt &val) { OpBuilder builder(module->getRegion()); auto loc = UnknownLoc::get(&context);