From 008d5ed1816d1e1f87bc4a59e4c605fb33352971 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 11:17:56 +0200 Subject: [PATCH 01/20] SPMI: Improve speed significantly for large diffs This starts communicating more information about diffs back from superpmi and starts using it in the driver. The current information is the base size, diff size, base instructions, diff instructions and context size. The driver uses the base size/diff size to pick a small number of interesting contexts and only creates disassembly for these contexts. jit-analyze is then also invoked on only these contexts, but the contexts are picked so that they overlap with what jit-analyze would display. Additionally, we also pick the top 20 smallest contexts (in terms of context size) -- I frequently use the smallest contexts with diffs to investigate my change. The new behavior is only enabled when no custom metrics are specified. If custom metrics are specified we fall back to producing all disassembly files and leave it up to jit-analyze to extract and analyze the metrics specified. The net result is that we can now get SPMI diffs for changes with even hundreds of thousands of diffs in the same time as it takes to get diffs for a small change. Fix #76178 --- src/coreclr/scripts/superpmi.py | 106 ++++++++++++--- .../tools/superpmi/superpmi/CMakeLists.txt | 1 + .../tools/superpmi/superpmi/commandline.cpp | 8 +- .../tools/superpmi/superpmi/commandline.h | 4 +- .../tools/superpmi/superpmi/fileio.cpp | 115 ++++++++++++++++ src/coreclr/tools/superpmi/superpmi/fileio.h | 125 +++++++++++++++++ .../superpmi/superpmi/metricssummary.cpp | 19 ++- .../tools/superpmi/superpmi/metricssummary.h | 2 +- .../superpmi/superpmi/parallelsuperpmi.cpp | 64 +++++++-- .../tools/superpmi/superpmi/superpmi.cpp | 128 ++++++++++++------ 10 files changed, 479 insertions(+), 93 deletions(-) create mode 100644 src/coreclr/tools/superpmi/superpmi/fileio.cpp create mode 100644 src/coreclr/tools/superpmi/superpmi/fileio.h diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 45dfcfe8746e5..4f331a627c1bd 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -501,6 +501,11 @@ def read_csv_metrics(path): return dict +def read_csv_diffs(path): + with open(path) as csv_file: + reader = csv.DictReader(csv_file) + return list(reader) + def determine_clrjit_compiler_version(clrjit_path): """ Obtain the version of the compiler that was used to compile the clrjit at the specified path. @@ -1568,7 +1573,7 @@ def replay_with_asm_diffs(self): logging.info("Running asm diffs of %s", mch_file) fail_mcl_file = os.path.join(temp_location, os.path.basename(mch_file) + "_fail.mcl") - diff_mcl_file = os.path.join(temp_location, os.path.basename(mch_file) + "_diff.mcl") + diffs_info = os.path.join(temp_location, os.path.basename(mch_file) + "_diffs.csv") base_metrics_summary_file = os.path.join(temp_location, os.path.basename(mch_file) + "_base_metrics.csv") diff_metrics_summary_file = os.path.join(temp_location, os.path.basename(mch_file) + "_diff_metrics.csv") @@ -1577,7 +1582,7 @@ def replay_with_asm_diffs(self): "-a", # Asm diffs "-v", "ewmi", # display errors, warnings, missing, jit info "-f", fail_mcl_file, # Failing mc List - "-diffMCList", diff_mcl_file, # Create all of the diffs in an mcl file + "-diffsInfo", diffs_info, # Information about diffs "-r", os.path.join(temp_location, "repro"), # Repro name, create .mc repro files "-baseMetricsSummary", base_metrics_summary_file, # Create summary of metrics we can use to get total code size impact "-diffMetricsSummary", diff_metrics_summary_file, @@ -1633,8 +1638,10 @@ def replay_with_asm_diffs(self): repro_base_command_line = "{} {} {}".format(self.superpmi_path, " ".join(altjit_asm_diffs_flags), self.diff_jit_path) save_repro_mc_files(temp_location, self.coreclr_args, artifacts_base_name, repro_base_command_line) + diffs = read_csv_diffs(diffs_info) + # This file had asm diffs; keep track of that. - has_diffs = is_nonzero_length_file(diff_mcl_file) + has_diffs = len(diffs) > 0 if has_diffs: files_with_asm_diffs.append(mch_file) @@ -1649,12 +1656,6 @@ def replay_with_asm_diffs(self): if return_code == 0: logging.warning("Warning: SuperPMI returned a zero exit code, but generated a non-zero-sized mcl file") - self.diff_mcl_contents = None - with open(diff_mcl_file) as file_handle: - mcl_lines = file_handle.readlines() - mcl_lines = [item.strip() for item in mcl_lines] - self.diff_mcl_contents = mcl_lines - asm_root_dir = create_unique_directory_name(self.coreclr_args.spmi_location, "asm.{}".format(artifacts_base_name)) base_asm_location = os.path.join(asm_root_dir, "base") diff_asm_location = os.path.join(asm_root_dir, "diff") @@ -1672,13 +1673,13 @@ def replay_with_asm_diffs(self): text_differences = queue.Queue() jit_dump_differences = queue.Queue() - async def create_replay_artifacts(print_prefix, item, self, mch_file, env, jit_differences_queue, base_location, diff_location, extension): + async def create_replay_artifacts(print_prefix, context_index, self, mch_file, env, jit_differences_queue, base_location, diff_location, extension): """ Run superpmi over an MC to create JIT asm or JIT dumps for the method. """ # Setup flags to call SuperPMI for both the diff jit and the base jit flags = [ - "-c", item, + "-c", str(context_index), "-v", "q" # only log from the jit. ] flags += altjit_replay_flags @@ -1690,7 +1691,7 @@ async def create_replay_artifacts(print_prefix, item, self, mch_file, env, jit_d async def create_one_artifact(jit_path: str, location: str, flags) -> str: command = [self.superpmi_path] + flags + [jit_path, mch_file] - item_path = os.path.join(location, "{}{}".format(item, extension)) + item_path = os.path.join(location, "{}{}".format(context_index, extension)) modified_env = env.copy() modified_env['COMPlus_JitStdOutFile'] = item_path logging.debug("%sGenerating %s", print_prefix, item_path) @@ -1706,22 +1707,20 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: diff_txt = await create_one_artifact(self.diff_jit_path, diff_location, flags + diff_option_flags_for_diff_artifact) if base_txt != diff_txt: - jit_differences_queue.put_nowait(item) + jit_differences_queue.put_nowait(context_index) ################################################################################################ end of create_replay_artifacts() - diff_items = [] - for item in self.diff_mcl_contents: - diff_items.append(item) - logging.info("Creating dasm files: %s %s", base_asm_location, diff_asm_location) - subproc_helper = AsyncSubprocessHelper(diff_items, verbose=True) + dasm_contexts = self.pick_contexts_to_disassemble(diffs) + subproc_helper = AsyncSubprocessHelper(dasm_contexts, verbose=True) subproc_helper.run_to_completion(create_replay_artifacts, self, mch_file, asm_complus_vars_full_env, text_differences, base_asm_location, diff_asm_location, ".dasm") if self.coreclr_args.diff_jit_dump: logging.info("Creating JitDump files: %s %s", base_dump_location, diff_dump_location) subproc_helper.run_to_completion(create_replay_artifacts, self, mch_file, jit_dump_complus_vars_full_env, jit_dump_differences, base_dump_location, diff_dump_location, ".txt") - logging.info("Differences found. To replay SuperPMI use:") + logging.info("Contexts with differences found. To replay SuperPMI use:".format(len(diffs))) + logging.info("") for var, value in asm_complus_vars.items(): print_platform_specific_environment_vars(logging.INFO, self.coreclr_args, var, value) @@ -1736,9 +1735,10 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: logging.info("%s %s -c ### %s %s", self.superpmi_path, " ".join(altjit_replay_flags), self.diff_jit_path, mch_file) logging.info("") - logging.debug("Method numbers with binary differences:") - for item in self.diff_mcl_contents: - logging.debug(item) + smallest = sorted(diffs, key=lambda r: int(r["Context size"]))[20:] + logging.debug("Smallest {} contexts with binary differences:".format(len(smallest))) + for diff in smallest: + logging.debug(diff["Context"]) logging.debug("") if base_metrics is not None and diff_metrics is not None: @@ -1776,6 +1776,17 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: elif base_bytes is not None and diff_bytes is not None: command += [ "--override-total-base-metric", str(base_bytes), "--override-total-diff-metric", str(diff_bytes) ] + if len(dasm_contexts) < len(diffs): + # Avoid producing analysis output that assumes these are all contexts + # with diffs. When no custom metrics are specified we pick only a subset + # of interesting contexts to disassemble, to avoid spending too long. + # See pick_contexts_to_disassemble. + command += [ "--is-subset-of-diffs" ] + else: + # Ask jit-analyze to avoid producing analysis output that assumes these are + # all contexts (we never produce .dasm files for contexts without diffs). + command += [ "--is-diffs-only" ] + run_and_log(command, logging.INFO) ran_jit_analyze = True @@ -1800,6 +1811,14 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: else: logging.warning("No textual differences. Is this an issue with coredistools?") + # If we are not specifying custom metrics then print a summary here, otherwise leave the summarization up to jit-analyze. + if self.coreclr_args.metrics is None: + num_improvements = sum(1 for r in diffs if int(r["Diff size"]) < int(r["Base size"])) + num_regressions = sum(1 for r in diffs if int(r["Diff size"]) > int(r["Base size"])) + logging.info("{} contexts with differences found ({} improvements, {} regressions)".format(len(diffs), num_improvements, num_regressions)) + logging.info("") + logging.info("") + if self.coreclr_args.diff_jit_dump: try: current_jit_dump_diff = jit_dump_differences.get_nowait() @@ -1994,6 +2013,49 @@ def write_pivot_section(row): return result ################################################################################################ end of replay_with_asm_diffs() + def pick_contexts_to_disassemble(self, diffs): + """ Given information about diffs, pick the context numbers to create .dasm files for + + Returns: + List of method context numbers to create disassembly for. + """ + + # If there are non-default metrics then we need to disassemble + # everything so that jit-analyze can handle those. + if self.coreclr_args.metrics is not None: + contexts = diffs + else: + # In the default case we have size improvements/regressions + # available without needing to disassemble all, so pick a subset of + # interesting diffs to pass to jit-analyze. + + # 20 smallest method contexts with diffs + smallest_contexts = sorted(diffs, key=lambda r: int(r["Context size"]))[:20] + # Order by byte-wise improvement, largest improvements first + by_diff_size = sorted(diffs, key=lambda r: int(r["Diff size"]) - int(r["Base size"])) + # 20 top improvements, byte-wise + top_improvements_bytes = by_diff_size[:20] + # 20 top regressions, byte-wise + top_regressions_bytes = by_diff_size[-20:] + + # Order by percentage-wise size improvement, largest improvements first + def diff_pct(r): + base = int(r["Base size"]) + if base == 0: + return 0 + diff = int(r["Diff size"]) + return (diff - base) / base + + by_diff_size_pct = sorted(diffs, key=diff_pct) + top_improvements_pct = by_diff_size_pct[:20] + top_regressions_pct = by_diff_size_pct[-20:] + + contexts = smallest_contexts + top_improvements_bytes + top_regressions_bytes + top_improvements_pct + top_regressions_pct + + final_contexts_indices = list(set(int(r["Context"]) for r in contexts)) + final_contexts_indices.sort() + return final_contexts_indices + ################################################################################ # SuperPMI Replay/TP diff ################################################################################ diff --git a/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt b/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt index 63237450898e3..76ab73ffdabfe 100644 --- a/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt +++ b/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt @@ -26,6 +26,7 @@ set(SUPERPMI_SOURCES neardiffer.cpp parallelsuperpmi.cpp superpmi.cpp + fileio.cpp jithost.cpp ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.cpp b/src/coreclr/tools/superpmi/superpmi/commandline.cpp index 563d8cb16248c..ca407889e8723 100644 --- a/src/coreclr/tools/superpmi/superpmi/commandline.cpp +++ b/src/coreclr/tools/superpmi/superpmi/commandline.cpp @@ -318,7 +318,7 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) o->mclFilename = argv[i]; } - else if ((_strnicmp(&argv[i][1], "diffMCList", 10) == 0)) + else if ((_strnicmp(&argv[i][1], "diffsInfo", 9) == 0)) { if (++i >= argc) { @@ -326,7 +326,7 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) return false; } - o->diffMCLFilename = argv[i]; + o->diffsInfo = argv[i]; } else if ((_strnicmp(&argv[i][1], "target", 6) == 0)) { @@ -641,9 +641,9 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) DumpHelp(argv[0]); return false; } - if (o->diffMCLFilename != nullptr && !o->applyDiff) + if (o->diffsInfo != nullptr && !o->applyDiff) { - LogError("-diffMCList specified without -applyDiff."); + LogError("-diffsInfo specified without -applyDiff."); DumpHelp(argv[0]); return false; } diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.h b/src/coreclr/tools/superpmi/superpmi/commandline.h index 055adf00295cd..e801f8cb4f752 100644 --- a/src/coreclr/tools/superpmi/superpmi/commandline.h +++ b/src/coreclr/tools/superpmi/superpmi/commandline.h @@ -39,7 +39,7 @@ class CommandLine , baseMetricsSummaryFile(nullptr) , diffMetricsSummaryFile(nullptr) , mclFilename(nullptr) - , diffMCLFilename(nullptr) + , diffsInfo(nullptr) , targetArchitecture(nullptr) , compileList(nullptr) , offset(-1) @@ -72,7 +72,7 @@ class CommandLine char* baseMetricsSummaryFile; char* diffMetricsSummaryFile; char* mclFilename; - char* diffMCLFilename; + char* diffsInfo; char* targetArchitecture; char* compileList; int offset; diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.cpp b/src/coreclr/tools/superpmi/superpmi/fileio.cpp new file mode 100644 index 0000000000000..ed16485a038e8 --- /dev/null +++ b/src/coreclr/tools/superpmi/superpmi/fileio.cpp @@ -0,0 +1,115 @@ +#include "standardpch.h" +#include "fileio.h" + +bool FileWriter::Printf(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + + char stackBuffer[512]; + size_t bufferSize = sizeof(stackBuffer); + char* pBuffer = stackBuffer; + while (true) + { + va_list argsCopy; + va_copy(argsCopy, args); + int printed = _vsnprintf_s(pBuffer, bufferSize, _TRUNCATE, fmt, argsCopy); + va_end(argsCopy); + + if (printed < 0) + { + // buffer too small + if (pBuffer != stackBuffer) + delete[] pBuffer; + + bufferSize *= 2; + pBuffer = new char[bufferSize]; + } + else + { + DWORD numWritten; + bool result = + WriteFile(m_file.Get(), pBuffer, static_cast(printed), &numWritten, nullptr) && + (numWritten == static_cast(printed)); + + if (pBuffer != stackBuffer) + delete[] pBuffer; + + va_end(args); + return result; + } + } +} + +bool FileWriter::CreateNew(const char* path, FileWriter* fw) +{ + FileHandle handle(CreateFile(path, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)); + if (!handle.IsValid()) + { + return false; + } + + *fw = FileWriter(std::move(handle)); + return true; +} + +bool FileLineReader::Open(const char* path, FileLineReader* fr) +{ + FileHandle file(CreateFile(path, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); + if (!file.IsValid()) + { + return false; + } + + LARGE_INTEGER size; + if (!GetFileSizeEx(file.Get(), &size)) + { + return false; + } + + if (static_cast(size.QuadPart) > SIZE_MAX) + { + return false; + } + + FileMappingHandle mapping(CreateFileMapping(file.Get(), nullptr, PAGE_READONLY, size.u.HighPart, size.u.LowPart, nullptr)); + if (!mapping.IsValid()) + { + return false; + } + + FileViewHandle view(MapViewOfFile(mapping.Get(), FILE_MAP_READ, 0, 0, 0)); + if (!view.IsValid()) + { + return false; + } + + *fr = FileLineReader(std::move(file), std::move(mapping), std::move(view), static_cast(size.QuadPart)); + return true; +} + +bool FileLineReader::AdvanceLine() +{ + if (m_cur >= m_end) + { + return false; + } + + char* end = m_cur; + while (end < m_end && *end != '\r' && *end != '\n') + { + end++; + } + + m_currentLine.resize(end - m_cur + 1); + memcpy(m_currentLine.data(), m_cur, end - m_cur); + m_currentLine[end - m_cur] = '\0'; + + m_cur = end; + if (m_cur < m_end && *m_cur == '\r') + m_cur++; + if (m_cur < m_end && *m_cur == '\n') + m_cur++; + + return true; +} diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.h b/src/coreclr/tools/superpmi/superpmi/fileio.h new file mode 100644 index 0000000000000..71727fbc03342 --- /dev/null +++ b/src/coreclr/tools/superpmi/superpmi/fileio.h @@ -0,0 +1,125 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _FileIO +#define _FileIO + +template +struct HandleWrapper +{ + HandleWrapper() + : m_handle(InvalidHandleValue) + { + } + + explicit HandleWrapper(HandleType handle) + : m_handle(handle) + { + } + + ~HandleWrapper() + { + if (m_handle != InvalidHandleValue) + { + FreeFunction{}(m_handle); + + m_handle = InvalidHandleValue; + } + } + + HandleWrapper(const HandleWrapper&) = delete; + HandleWrapper& operator=(HandleWrapper&) = delete; + + HandleWrapper(HandleWrapper&& hw) + : m_handle(hw.m_handle) + { + hw.m_handle = INVALID_HANDLE_VALUE; + } + + HandleWrapper& operator=(HandleWrapper&& hw) + { + if (m_handle != InvalidHandleValue) + CloseHandle(m_handle); + + m_handle = hw.m_handle; + hw.m_handle = InvalidHandleValue; + return *this; + } + + bool IsValid() { return m_handle != InvalidHandleValue; } + HandleType Get() { return m_handle; } + +private: + HandleType m_handle; +}; + +struct CloseHandleFunctor +{ + void operator()(HANDLE handle) + { + CloseHandle(handle); + } +}; + +struct UnmapViewOfFileFunctor +{ + void operator()(LPVOID view) + { + UnmapViewOfFile(view); + } +}; + +typedef HandleWrapper FileHandle; +typedef HandleWrapper FileMappingHandle; +typedef HandleWrapper FileViewHandle; + +class FileWriter +{ + FileHandle m_file; + + FileWriter(FileHandle file) + : m_file(std::move(file)) + { + } + +public: + FileWriter() + { + } + + bool Printf(const char* fmt, ...); + + static bool CreateNew(const char* path, FileWriter* fw); +}; + +class FileLineReader +{ + FileHandle m_file; + FileMappingHandle m_fileMapping; + FileViewHandle m_view; + + char* m_cur; + char* m_end; + std::vector m_currentLine; + + FileLineReader(FileHandle file, FileMappingHandle fileMapping, FileViewHandle view, size_t size) + : m_file(std::move(file)) + , m_fileMapping(std::move(fileMapping)) + , m_view(std::move(view)) + , m_cur(static_cast(m_view.Get())) + , m_end(static_cast(m_view.Get()) + size) + { + } + +public: + FileLineReader() + { + } + + bool AdvanceLine(); + const char* GetCurrentLine() { return m_currentLine.data(); } + + static bool Open(const char* path, FileLineReader* fr); +}; + +#endif diff --git a/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp b/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp index cb9c908ee72d8..5344a2b949138 100644 --- a/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp +++ b/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp @@ -4,6 +4,7 @@ #include "standardpch.h" #include "metricssummary.h" #include "logging.h" +#include "fileio.h" void MetricsSummary::AggregateFrom(const MetricsSummary& other) { @@ -60,14 +61,13 @@ static bool FilePrintf(HANDLE hFile, const char* fmt, ...) bool MetricsSummaries::SaveToFile(const char* path) { - FileHandleWrapper file(CreateFile(path, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)); - if (file.get() == INVALID_HANDLE_VALUE) + FileWriter file; + if (!FileWriter::CreateNew(path, &file)) { return false; } - if (!FilePrintf( - file.get(), + if (!file.Printf( "Successful compiles,Failing compiles,Missing compiles,Contexts with diffs," "Code bytes,Diffed code bytes,Executed instructions,Diff executed instructions,Name\n")) { @@ -75,16 +75,15 @@ bool MetricsSummaries::SaveToFile(const char* path) } return - WriteRow(file.get(), "Overall", Overall) && - WriteRow(file.get(), "MinOpts", MinOpts) && - WriteRow(file.get(), "FullOpts", FullOpts); + WriteRow(file, "Overall", Overall) && + WriteRow(file, "MinOpts", MinOpts) && + WriteRow(file, "FullOpts", FullOpts); } -bool MetricsSummaries::WriteRow(HANDLE hFile, const char* name, const MetricsSummary& summary) +bool MetricsSummaries::WriteRow(FileWriter& fw, const char* name, const MetricsSummary& summary) { return - FilePrintf( - hFile, + fw.Printf( "%d,%d,%d,%d,%lld,%lld,%lld,%lld,%s\n", summary.SuccessfulCompiles, summary.FailingCompiles, diff --git a/src/coreclr/tools/superpmi/superpmi/metricssummary.h b/src/coreclr/tools/superpmi/superpmi/metricssummary.h index 118d6aac22400..14e364cd9fcf3 100644 --- a/src/coreclr/tools/superpmi/superpmi/metricssummary.h +++ b/src/coreclr/tools/superpmi/superpmi/metricssummary.h @@ -39,7 +39,7 @@ class MetricsSummaries bool SaveToFile(const char* path); static bool LoadFromFile(const char* path, MetricsSummaries* metrics); private: - static bool WriteRow(HANDLE hFile, const char* name, const MetricsSummary& summary); + static bool WriteRow(class FileWriter& fw, const char* name, const MetricsSummary& summary); }; #endif diff --git a/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp b/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp index dfca5adcc376a..610ccdf732b73 100644 --- a/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp @@ -9,6 +9,7 @@ #include "commandline.h" #include "errorhandling.h" #include "metricssummary.h" +#include "fileio.h" #define MAX_LOG_LINE_SIZE 0x1000 // 4 KB @@ -349,7 +350,7 @@ struct PerWorkerData HANDLE hStdError; char* failingMCListPath; - char* diffMCListPath; + char* diffsInfoPath; char* stdOutputPath; char* stdErrorPath; char* baseMetricsSummaryPath; @@ -359,7 +360,7 @@ struct PerWorkerData : hStdOutput(INVALID_HANDLE_VALUE) , hStdError(INVALID_HANDLE_VALUE) , failingMCListPath(nullptr) - , diffMCListPath(nullptr) + , diffsInfoPath(nullptr) , stdOutputPath(nullptr) , stdErrorPath(nullptr) , baseMetricsSummaryPath(nullptr) @@ -368,7 +369,7 @@ struct PerWorkerData } }; -void MergeWorkerMCLs(char* mclFilename, PerWorkerData* workerData, int workerCount, char* PerWorkerData::*mclPath) +static void MergeWorkerMCLs(char* mclFilename, PerWorkerData* workerData, int workerCount, char* PerWorkerData::*mclPath) { int **MCL = new int *[workerCount], *MCLCount = new int[workerCount], totalCount = 0; @@ -396,6 +397,39 @@ void MergeWorkerMCLs(char* mclFilename, PerWorkerData* workerData, int workerCou LogError("Unable to write to MCL file %s.", mclFilename); } +static void MergeWorkerCsvs(char* csvFilename, PerWorkerData* workerData, int workerCount, char* PerWorkerData::* csvPath) +{ + FileWriter fw; + if (!FileWriter::CreateNew(csvFilename, &fw)) + { + LogError("Could not create file %s", csvFilename); + return; + } + + bool hasHeader = false; + for (int i = 0; i < workerCount; i++) + { + FileLineReader reader; + if (!FileLineReader::Open(workerData[i].*csvPath, &reader)) + { + LogError("Could not open child CSV file %s", workerData[i].*csvPath); + continue; + } + + if (hasHeader && !reader.AdvanceLine()) + { + continue; + } + + while (reader.AdvanceLine()) + { + fw.Printf("%s\n", reader.GetCurrentLine()); + } + + hasHeader = true; + } +} + #define MAX_CMDLINE_SIZE 0x1000 // 4 KB //------------------------------------------------------------- @@ -528,8 +562,8 @@ int doParallelSuperPMI(CommandLine::Options& o) LogVerbose("Using child (%s) with args (%s)", spmiFilename, spmiArgs); if (o.mclFilename != nullptr) LogVerbose(" failingMCList=%s", o.mclFilename); - if (o.diffMCLFilename != nullptr) - LogVerbose(" diffMCLFilename=%s", o.diffMCLFilename); + if (o.diffsInfo != nullptr) + LogVerbose(" diffsInfo=%s", o.diffsInfo); LogVerbose(" workerCount=%d, skipCleanup=%d.", o.workerCount, o.skipCleanup); PerWorkerData* perWorkerData = new PerWorkerData[o.workerCount]; @@ -551,10 +585,10 @@ int doParallelSuperPMI(CommandLine::Options& o) sprintf_s(wd.failingMCListPath, MAX_PATH, "%sParallelSuperPMI-%u-%d.mcl", tempPath, randNumber, i); } - if (o.diffMCLFilename != nullptr) + if (o.diffsInfo != nullptr) { - wd.diffMCListPath = new char[MAX_PATH]; - sprintf_s(wd.diffMCListPath, MAX_PATH, "%sParallelSuperPMI-Diff-%u-%d.mcl", tempPath, randNumber, i); + wd.diffsInfoPath = new char[MAX_PATH]; + sprintf_s(wd.diffsInfoPath, MAX_PATH, "%sParallelSuperPMI-Diff-%u-%d.mcl", tempPath, randNumber, i); } if (o.baseMetricsSummaryFile != nullptr) @@ -593,10 +627,10 @@ int doParallelSuperPMI(CommandLine::Options& o) wd.failingMCListPath); } - if (wd.diffMCListPath != nullptr) + if (wd.diffsInfoPath != nullptr) { - bytesWritten += sprintf_s(cmdLine + bytesWritten, MAX_CMDLINE_SIZE - bytesWritten, " -diffMCList %s", - wd.diffMCListPath); + bytesWritten += sprintf_s(cmdLine + bytesWritten, MAX_CMDLINE_SIZE - bytesWritten, " -diffsInfo %s", + wd.diffsInfoPath); } if (wd.baseMetricsSummaryPath != nullptr) @@ -719,10 +753,10 @@ int doParallelSuperPMI(CommandLine::Options& o) MergeWorkerMCLs(o.mclFilename, perWorkerData, o.workerCount, &PerWorkerData::failingMCListPath); } - if (o.diffMCLFilename != nullptr && !usageError) + if (o.diffsInfo != nullptr && !usageError) { // Concat the resulting diff .mcl files - MergeWorkerMCLs(o.diffMCLFilename, perWorkerData, o.workerCount, &PerWorkerData::diffMCListPath); + MergeWorkerCsvs(o.diffsInfo, perWorkerData, o.workerCount, &PerWorkerData::diffsInfoPath); } if (o.baseMetricsSummaryFile != nullptr && !usageError) @@ -761,9 +795,9 @@ int doParallelSuperPMI(CommandLine::Options& o) { DeleteFile(wd.failingMCListPath); } - if (wd.diffMCListPath != nullptr) + if (wd.diffsInfoPath != nullptr) { - DeleteFile(wd.diffMCListPath); + DeleteFile(wd.diffsInfoPath); } if (wd.baseMetricsSummaryPath != nullptr) { diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index ddf595281691b..a6788f4d1108a 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -19,6 +19,7 @@ #include "methodstatsemitter.h" #include "spmiutil.h" #include "metricssummary.h" +#include "fileio.h" extern int doParallelSuperPMI(CommandLine::Options& o); @@ -64,54 +65,47 @@ void SetSuperPmiTargetArchitecture(const char* targetArchitecture) } } +enum class NearDifferResult +{ + SuccessWithoutDiff, + SuccessWithDiff, + Failure, +}; + // This function uses PAL_TRY, so it can't be in the a function that requires object unwinding. Extracting it out here // avoids compiler error. // -static void InvokeNearDiffer(NearDiffer* nearDiffer, - CommandLine::Options* o, - MethodContext** mc, - CompileResult** crl, - bool* hasDiff, - MethodContextReader** reader, - MCList* failingMCL, - MCList* diffMCL) +static NearDifferResult InvokeNearDiffer(NearDiffer* nearDiffer, + MethodContext** mc, + CompileResult** crl, + MethodContextReader** reader) { + struct Param : FilterSuperPMIExceptionsParam_CaptureException { NearDiffer* nearDiffer; - CommandLine::Options* o; MethodContext** mc; CompileResult** crl; - bool* hasDiff; MethodContextReader** reader; - MCList* failingMCL; - MCList* diffMCL; + NearDifferResult result; } param; param.nearDiffer = nearDiffer; - param.o = o; param.mc = mc; param.crl = crl; - param.hasDiff = hasDiff; param.reader = reader; - param.failingMCL = failingMCL; - param.diffMCL = diffMCL; - *hasDiff = false; + param.result = NearDifferResult::Failure; PAL_TRY(Param*, pParam, ¶m) { if (!pParam->nearDiffer->compare(*pParam->mc, *pParam->crl, (*pParam->mc)->cr)) { - *pParam->hasDiff = true; + pParam->result = NearDifferResult::SuccessWithDiff; LogIssue(ISSUE_ASM_DIFF, "main method %d of size %d differs", (*pParam->reader)->GetMethodContextIndex(), (*pParam->mc)->methodSize); - - // This is a difference in ASM outputs from Jit1 & Jit2 and not a playback failure - // We will add this MC to the diffMCList if one is requested - // Otherwise this will end up in failingMCList - if ((*pParam->o).diffMCLFilename != nullptr) - (*pParam->diffMCL).AddMethodToMCL((*pParam->reader)->GetMethodContextIndex()); - else if ((*pParam->o).mclFilename != nullptr) - (*pParam->failingMCL).AddMethodToMCL((*pParam->reader)->GetMethodContextIndex()); + } + else + { + pParam->result = NearDifferResult::SuccessWithoutDiff; } } PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_CaptureExceptionAndStop) @@ -121,10 +115,26 @@ static void InvokeNearDiffer(NearDiffer* nearDiffer, LogError("main method %d of size %d failed to load and compile correctly.", (*reader)->GetMethodContextIndex(), (*mc)->methodSize); e.ShowAndDeleteMessage(); - if ((*o).mclFilename != nullptr) - (*failingMCL).AddMethodToMCL((*reader)->GetMethodContextIndex()); + param.result = NearDifferResult::Failure; } PAL_ENDTRY + + return param.result; +} + +static bool PrintDiffsCsvHeader(FileWriter& fw) +{ + return fw.Printf("Context,Base size,Diff size,Base instructions,Diff instructions,Context size\n"); +} + +static bool PrintDiffsCsvRow( + FileWriter& fw, + int context, + long long baseSize, long long diffSize, + long long baseInstructions, long long diffInstructions, + uint32_t contextSize) +{ + return fw.Printf("%d,%lld,%lld,%lld,%lld,%u\n", context, baseSize, diffSize, baseInstructions, diffInstructions, contextSize); } // Run superpmi. The return value is as follows: @@ -171,7 +181,8 @@ int __cdecl main(int argc, char* argv[]) #endif bool collectThroughput = false; - MCList failingToReplayMCL, diffMCL; + MCList failingToReplayMCL; + FileWriter diffCsv; CommandLine::Options o; if (!CommandLine::Parse(argc, argv, &o)) @@ -230,9 +241,13 @@ int __cdecl main(int argc, char* argv[]) { failingToReplayMCL.InitializeMCL(o.mclFilename); } - if (o.diffMCLFilename != nullptr) + if (o.diffsInfo != nullptr) { - diffMCL.InitializeMCL(o.diffMCLFilename); + if (!FileWriter::CreateNew(o.diffsInfo, &diffCsv)) + { + LogError("Could not create file %s", o.diffsInfo); + return (int)SpmiResult::GeneralFailure; + } } SetDebugDumpVariables(); @@ -264,6 +279,8 @@ int __cdecl main(int argc, char* argv[]) { return (int)SpmiResult::GeneralFailure; } + + PrintDiffsCsvHeader(diffCsv); } MetricsSummaries totalBaseMetrics; @@ -552,16 +569,53 @@ int __cdecl main(int argc, char* argv[]) } else { - bool hasDiff; - InvokeNearDiffer(&nearDiffer, &o, &mc, &crl, &hasDiff, &reader, &failingToReplayMCL, &diffMCL); + NearDifferResult result = InvokeNearDiffer(&nearDiffer, &mc, &crl, &reader); - if (hasDiff) + bool addDiffsCsvRow = false; + switch (result) { + case NearDifferResult::SuccessWithDiff: totalBaseMetrics.Overall.NumContextsWithDiffs++; totalDiffMetrics.Overall.NumContextsWithDiffs++; totalBaseMetricsOpts.NumContextsWithDiffs++; totalDiffMetricsOpts.NumContextsWithDiffs++; + + // This is a difference in ASM outputs from Jit1 & Jit2 and not a playback failure + // We will add this MC to the diffs info if there is one. + // Otherwise this will end up in failingMCList + if (o.diffsInfo != nullptr) + { + addDiffsCsvRow = true; + } + else if (o.mclFilename != nullptr) + { + failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); + } + + break; + case NearDifferResult::SuccessWithoutDiff: + if (o.diffsInfo != nullptr) + { + addDiffsCsvRow = true; + } + + break; + case NearDifferResult::Failure: + if (o.mclFilename != nullptr) + failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); + + break; + } + + if (addDiffsCsvRow) + { + PrintDiffsCsvRow( + diffCsv, + reader->GetMethodContextIndex(), + baseMetrics.NumCodeBytes, diffMetrics.NumCodeBytes, + baseMetrics.NumExecutedInstructions, diffMetrics.NumExecutedInstructions, + mcb.size); } totalBaseMetrics.Overall.NumDiffedCodeBytes += baseMetrics.NumCodeBytes; @@ -625,7 +679,7 @@ int __cdecl main(int argc, char* argv[]) if (o.breakOnError) { if (o.indexCount == -1) - LogInfo("HINT: to repro add '/c %d' to cmdline", reader->GetMethodContextIndex()); + LogInfo("HINT: to repro add '-c %d' to cmdline", reader->GetMethodContextIndex()); __debugbreak(); } } @@ -674,10 +728,6 @@ int __cdecl main(int argc, char* argv[]) { failingToReplayMCL.CloseMCL(); } - if (o.diffMCLFilename != nullptr) - { - diffMCL.CloseMCL(); - } Logger::Shutdown(); SpmiResult result = SpmiResult::Success; From 6d78190c8877b1e5cffff772bbf1e097c8edcd24 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 13:54:42 +0200 Subject: [PATCH 02/20] Remove --retainOnlyTopFiles This is no longer necessary since we avoid creating these disassembly files in the first place. Another benefit is that all contexts mentioned by jit-analyze output will now be part of the artifacts. --- src/coreclr/scripts/superpmi.py | 8 -------- src/coreclr/scripts/superpmi_diffs.py | 3 +-- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 4f331a627c1bd..1e79a1c77d5b4 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -342,7 +342,6 @@ def add_core_root_arguments(parser, build_type_default, build_type_help): asm_diff_parser.add_argument("--debuginfo", action="store_true", help="Include debug info after disassembly (sets COMPlus_JitDebugDump).") asm_diff_parser.add_argument("-tag", help="Specify a word to add to the directory name where the asm diffs will be placed") asm_diff_parser.add_argument("-metrics", action="append", help="Metrics option to pass to jit-analyze. Can be specified multiple times, or pass comma-separated values.") -asm_diff_parser.add_argument("-retainOnlyTopFiles", action="store_true", help="Retain only top .dasm files with largest improvements or regressions and delete remaining files.") asm_diff_parser.add_argument("--diff_with_release", action="store_true", help="Specify if this is asmdiff using release binaries.") asm_diff_parser.add_argument("--git_diff", action="store_true", help="Produce a '.diff' file from 'base' and 'diff' folders if there were any differences.") @@ -1769,8 +1768,6 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: # It appears we have a built jit-analyze on the path, so try to run it. jit_analyze_summary_file = os.path.join(asm_root_dir, "summary.md") command = [ jit_analyze_path, "--md", jit_analyze_summary_file, "-r", "--base", base_asm_location, "--diff", diff_asm_location ] - if self.coreclr_args.retainOnlyTopFiles: - command += [ "--retainOnlyTopFiles" ] if self.coreclr_args.metrics: command += [ "--metrics", ",".join(self.coreclr_args.metrics) ] elif base_bytes is not None and diff_bytes is not None: @@ -3970,11 +3967,6 @@ def verify_base_diff_args(): lambda unused: True, "Unable to set metrics.") - coreclr_args.verify(args, - "retainOnlyTopFiles", - lambda unused: True, - "Unable to set retainOnlyTopFiles.") - coreclr_args.verify(args, "diff_with_release", lambda unused: True, diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index 47243ad43d3a2..f8e24dcab9128 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -216,8 +216,7 @@ def main(main_args): "-spmi_location", spmi_location, "-error_limit", "100", "-log_level", "debug", - "-log_file", log_file, - "-retainOnlyTopFiles"]) + "-log_file", log_file) print("Running superpmi.py tpdiff") From c96e35cdfddbbea357ec84cc712728cacb6b6cf4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 13:56:11 +0200 Subject: [PATCH 03/20] Test change with tons of diffs --- src/coreclr/jit/morph.cpp | 500 +++++++++++++++++++------------------- 1 file changed, 250 insertions(+), 250 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ef932bbe2489c..0edd77e69eab1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1297,256 +1297,256 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) sortedArgs[argCount++] = &arg; } - // Set the beginning and end for the new argument table - unsigned curInx; - int regCount = 0; - unsigned begTab = 0; - unsigned endTab = argCount - 1; - unsigned argsRemaining = argCount; - - // First take care of arguments that are constants. - // [We use a backward iterator pattern] - // - curInx = argCount; - do - { - curInx--; - - CallArg* arg = sortedArgs[curInx]; - - if (arg->AbiInfo.GetRegNum() != REG_STK) - { - regCount++; - } - - assert(arg->GetLateNode() == nullptr); - - // Skip any already processed args - // - if (!arg->m_processed) - { - GenTree* argx = arg->GetEarlyNode(); - - assert(argx != nullptr); - // put constants at the end of the table - // - if (argx->gtOper == GT_CNS_INT) - { - noway_assert(curInx <= endTab); - - arg->m_processed = true; - - // place curArgTabEntry at the endTab position by performing a swap - // - if (curInx != endTab) - { - sortedArgs[curInx] = sortedArgs[endTab]; - sortedArgs[endTab] = arg; - } - - endTab--; - argsRemaining--; - } - } - } while (curInx > 0); - - if (argsRemaining > 0) - { - // Next take care of arguments that are calls. - // [We use a forward iterator pattern] - // - for (curInx = begTab; curInx <= endTab; curInx++) - { - CallArg* arg = sortedArgs[curInx]; - - // Skip any already processed args - // - if (!arg->m_processed) - { - GenTree* argx = arg->GetEarlyNode(); - assert(argx != nullptr); - - // put calls at the beginning of the table - // - if (argx->gtFlags & GTF_CALL) - { - arg->m_processed = true; - - // place curArgTabEntry at the begTab position by performing a swap - // - if (curInx != begTab) - { - sortedArgs[curInx] = sortedArgs[begTab]; - sortedArgs[begTab] = arg; - } - - begTab++; - argsRemaining--; - } - } - } - } - - if (argsRemaining > 0) - { - // Next take care arguments that are temps. - // These temps come before the arguments that are - // ordinary local vars or local fields - // since this will give them a better chance to become - // enregistered into their actual argument register. - // [We use a forward iterator pattern] - // - for (curInx = begTab; curInx <= endTab; curInx++) - { - CallArg* arg = sortedArgs[curInx]; - - // Skip any already processed args - // - if (!arg->m_processed) - { - if (arg->m_needTmp) - { - arg->m_processed = true; - - // place curArgTabEntry at the begTab position by performing a swap - // - if (curInx != begTab) - { - sortedArgs[curInx] = sortedArgs[begTab]; - sortedArgs[begTab] = arg; - } - - begTab++; - argsRemaining--; - } - } - } - } - - if (argsRemaining > 0) - { - // Next take care of local var and local field arguments. - // These are moved towards the end of the argument evaluation. - // [We use a backward iterator pattern] - // - curInx = endTab + 1; - do - { - curInx--; - - CallArg* arg = sortedArgs[curInx]; - - // Skip any already processed args - // - if (!arg->m_processed) - { - GenTree* argx = arg->GetEarlyNode(); - assert(argx != nullptr); - - // As a CQ heuristic, sort TYP_STRUCT args using the cost estimation below. - if (!argx->TypeIs(TYP_STRUCT) && argx->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - { - noway_assert(curInx <= endTab); - - arg->m_processed = true; - - // place curArgTabEntry at the endTab position by performing a swap - // - if (curInx != endTab) - { - sortedArgs[curInx] = sortedArgs[endTab]; - sortedArgs[endTab] = arg; - } - - endTab--; - argsRemaining--; - } - } - } while (curInx > begTab); - } - - // Finally, take care of all the remaining arguments. - // Note that we fill in one arg at a time using a while loop. - bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop - while (argsRemaining > 0) - { - /* Find the most expensive arg remaining and evaluate it next */ - - CallArg* expensiveArg = nullptr; - unsigned expensiveArgIndex = UINT_MAX; - unsigned expensiveArgCost = 0; - - // [We use a forward iterator pattern] - // - for (curInx = begTab; curInx <= endTab; curInx++) - { - CallArg* arg = sortedArgs[curInx]; - - // Skip any already processed args - // - if (!arg->m_processed) - { - GenTree* argx = arg->GetEarlyNode(); - assert(argx != nullptr); - - // We should have already handled these kinds of args - assert((!argx->OperIs(GT_LCL_VAR, GT_LCL_FLD) || argx->TypeIs(TYP_STRUCT)) && - !argx->OperIs(GT_CNS_INT)); - - // This arg should either have no persistent side effects or be the last one in our table - // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); - - if (argsRemaining == 1) - { - // This is the last arg to place - expensiveArgIndex = curInx; - expensiveArg = arg; - assert(begTab == endTab); - break; - } - else - { - if (!costsPrepared) - { - /* We call gtPrepareCost to measure the cost of evaluating this tree */ - comp->gtPrepareCost(argx); - } - - if (argx->GetCostEx() > expensiveArgCost) - { - // Remember this arg as the most expensive one that we have yet seen - expensiveArgCost = argx->GetCostEx(); - expensiveArgIndex = curInx; - expensiveArg = arg; - } - } - } - } - - noway_assert(expensiveArgIndex != UINT_MAX); - - // put the most expensive arg towards the beginning of the table - - expensiveArg->m_processed = true; - - // place expensiveArgTabEntry at the begTab position by performing a swap - // - if (expensiveArgIndex != begTab) - { - sortedArgs[expensiveArgIndex] = sortedArgs[begTab]; - sortedArgs[begTab] = expensiveArg; - } - - begTab++; - argsRemaining--; - - costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop - } - - // The table should now be completely filled and thus begTab should now be adjacent to endTab - // and regArgsRemaining should be zero - assert(begTab == (endTab + 1)); - assert(argsRemaining == 0); + //// Set the beginning and end for the new argument table + //unsigned curInx; + //int regCount = 0; + //unsigned begTab = 0; + //unsigned endTab = argCount - 1; + //unsigned argsRemaining = argCount; + + //// First take care of arguments that are constants. + //// [We use a backward iterator pattern] + //// + //curInx = argCount; + //do + //{ + // curInx--; + + // CallArg* arg = sortedArgs[curInx]; + + // if (arg->AbiInfo.GetRegNum() != REG_STK) + // { + // regCount++; + // } + + // assert(arg->GetLateNode() == nullptr); + + // // Skip any already processed args + // // + // if (!arg->m_processed) + // { + // GenTree* argx = arg->GetEarlyNode(); + + // assert(argx != nullptr); + // // put constants at the end of the table + // // + // if (argx->gtOper == GT_CNS_INT) + // { + // noway_assert(curInx <= endTab); + + // arg->m_processed = true; + + // // place curArgTabEntry at the endTab position by performing a swap + // // + // if (curInx != endTab) + // { + // sortedArgs[curInx] = sortedArgs[endTab]; + // sortedArgs[endTab] = arg; + // } + + // endTab--; + // argsRemaining--; + // } + // } + //} while (curInx > 0); + + //if (argsRemaining > 0) + //{ + // // Next take care of arguments that are calls. + // // [We use a forward iterator pattern] + // // + // for (curInx = begTab; curInx <= endTab; curInx++) + // { + // CallArg* arg = sortedArgs[curInx]; + + // // Skip any already processed args + // // + // if (!arg->m_processed) + // { + // GenTree* argx = arg->GetEarlyNode(); + // assert(argx != nullptr); + + // // put calls at the beginning of the table + // // + // if (argx->gtFlags & GTF_CALL) + // { + // arg->m_processed = true; + + // // place curArgTabEntry at the begTab position by performing a swap + // // + // if (curInx != begTab) + // { + // sortedArgs[curInx] = sortedArgs[begTab]; + // sortedArgs[begTab] = arg; + // } + + // begTab++; + // argsRemaining--; + // } + // } + // } + //} + + //if (argsRemaining > 0) + //{ + // // Next take care arguments that are temps. + // // These temps come before the arguments that are + // // ordinary local vars or local fields + // // since this will give them a better chance to become + // // enregistered into their actual argument register. + // // [We use a forward iterator pattern] + // // + // for (curInx = begTab; curInx <= endTab; curInx++) + // { + // CallArg* arg = sortedArgs[curInx]; + + // // Skip any already processed args + // // + // if (!arg->m_processed) + // { + // if (arg->m_needTmp) + // { + // arg->m_processed = true; + + // // place curArgTabEntry at the begTab position by performing a swap + // // + // if (curInx != begTab) + // { + // sortedArgs[curInx] = sortedArgs[begTab]; + // sortedArgs[begTab] = arg; + // } + + // begTab++; + // argsRemaining--; + // } + // } + // } + //} + + //if (argsRemaining > 0) + //{ + // // Next take care of local var and local field arguments. + // // These are moved towards the end of the argument evaluation. + // // [We use a backward iterator pattern] + // // + // curInx = endTab + 1; + // do + // { + // curInx--; + + // CallArg* arg = sortedArgs[curInx]; + + // // Skip any already processed args + // // + // if (!arg->m_processed) + // { + // GenTree* argx = arg->GetEarlyNode(); + // assert(argx != nullptr); + + // // As a CQ heuristic, sort TYP_STRUCT args using the cost estimation below. + // if (!argx->TypeIs(TYP_STRUCT) && argx->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + // { + // noway_assert(curInx <= endTab); + + // arg->m_processed = true; + + // // place curArgTabEntry at the endTab position by performing a swap + // // + // if (curInx != endTab) + // { + // sortedArgs[curInx] = sortedArgs[endTab]; + // sortedArgs[endTab] = arg; + // } + + // endTab--; + // argsRemaining--; + // } + // } + // } while (curInx > begTab); + //} + + //// Finally, take care of all the remaining arguments. + //// Note that we fill in one arg at a time using a while loop. + //bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop + //while (argsRemaining > 0) + //{ + // /* Find the most expensive arg remaining and evaluate it next */ + + // CallArg* expensiveArg = nullptr; + // unsigned expensiveArgIndex = UINT_MAX; + // unsigned expensiveArgCost = 0; + + // // [We use a forward iterator pattern] + // // + // for (curInx = begTab; curInx <= endTab; curInx++) + // { + // CallArg* arg = sortedArgs[curInx]; + + // // Skip any already processed args + // // + // if (!arg->m_processed) + // { + // GenTree* argx = arg->GetEarlyNode(); + // assert(argx != nullptr); + + // // We should have already handled these kinds of args + // assert((!argx->OperIs(GT_LCL_VAR, GT_LCL_FLD) || argx->TypeIs(TYP_STRUCT)) && + // !argx->OperIs(GT_CNS_INT)); + + // // This arg should either have no persistent side effects or be the last one in our table + // // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); + + // if (argsRemaining == 1) + // { + // // This is the last arg to place + // expensiveArgIndex = curInx; + // expensiveArg = arg; + // assert(begTab == endTab); + // break; + // } + // else + // { + // if (!costsPrepared) + // { + // /* We call gtPrepareCost to measure the cost of evaluating this tree */ + // comp->gtPrepareCost(argx); + // } + + // if (argx->GetCostEx() > expensiveArgCost) + // { + // // Remember this arg as the most expensive one that we have yet seen + // expensiveArgCost = argx->GetCostEx(); + // expensiveArgIndex = curInx; + // expensiveArg = arg; + // } + // } + // } + // } + + // noway_assert(expensiveArgIndex != UINT_MAX); + + // // put the most expensive arg towards the beginning of the table + + // expensiveArg->m_processed = true; + + // // place expensiveArgTabEntry at the begTab position by performing a swap + // // + // if (expensiveArgIndex != begTab) + // { + // sortedArgs[expensiveArgIndex] = sortedArgs[begTab]; + // sortedArgs[begTab] = expensiveArg; + // } + + // begTab++; + // argsRemaining--; + + // costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop + //} + + //// The table should now be completely filled and thus begTab should now be adjacent to endTab + //// and regArgsRemaining should be zero + //assert(begTab == (endTab + 1)); + //assert(argsRemaining == 0); } //------------------------------------------------------------------------------ From f079301aa18d9d04a119277a29e017fae94047a9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 14:22:18 +0200 Subject: [PATCH 04/20] DRY it up --- .../superpmi/superpmi/metricssummary.cpp | 76 ++----------------- 1 file changed, 5 insertions(+), 71 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp b/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp index 5344a2b949138..7967cf90293fb 100644 --- a/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp +++ b/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp @@ -25,40 +25,6 @@ void MetricsSummaries::AggregateFrom(const MetricsSummaries& other) FullOpts.AggregateFrom(other.FullOpts); } -struct FileHandleWrapper -{ - FileHandleWrapper(HANDLE hFile) - : hFile(hFile) - { - } - - ~FileHandleWrapper() - { - CloseHandle(hFile); - } - - HANDLE get() { return hFile; } - -private: - HANDLE hFile; -}; - -static bool FilePrintf(HANDLE hFile, const char* fmt, ...) -{ - va_list args; - va_start(args, fmt); - - char buffer[4096]; - int len = vsprintf_s(buffer, ARRAY_SIZE(buffer), fmt, args); - DWORD numWritten; - bool result = - WriteFile(hFile, buffer, static_cast(len), &numWritten, nullptr) && (numWritten == static_cast(len)); - - va_end(args); - - return result; -} - bool MetricsSummaries::SaveToFile(const char* path) { FileWriter file; @@ -98,59 +64,27 @@ bool MetricsSummaries::WriteRow(FileWriter& fw, const char* name, const MetricsS bool MetricsSummaries::LoadFromFile(const char* path, MetricsSummaries* metrics) { - FileHandleWrapper file(CreateFile(path, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); - if (file.get() == INVALID_HANDLE_VALUE) - { - return false; - } - - LARGE_INTEGER len; - if (!GetFileSizeEx(file.get(), &len)) + FileLineReader reader; + if (!FileLineReader::Open(path, &reader)) { return false; } - DWORD stringLen = static_cast(len.QuadPart); - std::vector content(stringLen); - DWORD numRead; - if (!ReadFile(file.get(), content.data(), stringLen, &numRead, nullptr) || (numRead != stringLen)) + if (!reader.AdvanceLine()) { return false; } - std::vector line; - size_t index = 0; - auto nextLine = [&line, &content, &index]() - { - size_t end = index; - while ((end < content.size()) && (content[end] != '\r') && (content[end] != '\n')) - { - end++; - } - - line.resize(end - index + 1); - memcpy(line.data(), &content[index], end - index); - line[end - index] = '\0'; - - index = end; - if ((index < content.size()) && (content[index] == '\r')) - index++; - if ((index < content.size()) && (content[index] == '\n')) - index++; - }; - *metrics = MetricsSummaries(); - nextLine(); bool result = true; - while (index < content.size()) + while (reader.AdvanceLine()) { - nextLine(); MetricsSummary summary; char name[32]; int scanResult = sscanf_s( - line.data(), + reader.GetCurrentLine(), "%d,%d,%d,%d,%lld,%lld,%lld,%lld,%s", &summary.SuccessfulCompiles, &summary.FailingCompiles, From 67aca1a3092afcc94555dcbb2ae4bc27c729e345 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 14:22:29 +0200 Subject: [PATCH 05/20] Fix bad refactoring --- .../tools/superpmi/superpmi/superpmi.cpp | 25 +++++-------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index a6788f4d1108a..c707f8f870844 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -571,7 +571,6 @@ int __cdecl main(int argc, char* argv[]) { NearDifferResult result = InvokeNearDiffer(&nearDiffer, &mc, &crl, &reader); - bool addDiffsCsvRow = false; switch (result) { case NearDifferResult::SuccessWithDiff: @@ -586,20 +585,18 @@ int __cdecl main(int argc, char* argv[]) // Otherwise this will end up in failingMCList if (o.diffsInfo != nullptr) { - addDiffsCsvRow = true; + PrintDiffsCsvRow( + diffCsv, + reader->GetMethodContextIndex(), + baseMetrics.NumCodeBytes, diffMetrics.NumCodeBytes, + baseMetrics.NumExecutedInstructions, diffMetrics.NumExecutedInstructions, + mcb.size); } else if (o.mclFilename != nullptr) { failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); } - break; - case NearDifferResult::SuccessWithoutDiff: - if (o.diffsInfo != nullptr) - { - addDiffsCsvRow = true; - } - break; case NearDifferResult::Failure: if (o.mclFilename != nullptr) @@ -608,16 +605,6 @@ int __cdecl main(int argc, char* argv[]) break; } - if (addDiffsCsvRow) - { - PrintDiffsCsvRow( - diffCsv, - reader->GetMethodContextIndex(), - baseMetrics.NumCodeBytes, diffMetrics.NumCodeBytes, - baseMetrics.NumExecutedInstructions, diffMetrics.NumExecutedInstructions, - mcb.size); - } - totalBaseMetrics.Overall.NumDiffedCodeBytes += baseMetrics.NumCodeBytes; totalDiffMetrics.Overall.NumDiffedCodeBytes += diffMetrics.NumCodeBytes; From 95583e3ce710fe1a4915cc1afcdd3594bdeded89 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 14:22:42 +0200 Subject: [PATCH 06/20] Include utility for std::move --- src/coreclr/tools/superpmi/superpmi-shared/standardpch.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h b/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h index dc4ce235ffbf2..e0e83c41d03ab 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h @@ -62,6 +62,7 @@ // Getting STL to work with PAL is difficult, so reimplement STL functionality to not require it. #ifdef TARGET_UNIX +#include "clr_std/utility" #include "clr_std/string" #include "clr_std/algorithm" #include "clr_std/vector" @@ -69,6 +70,7 @@ #ifndef USE_STL #define USE_STL #endif // USE_STL +#include #include #include #include From be172a68e90e16b7583c7e517b3af56b4c26f3c0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 15:29:36 +0200 Subject: [PATCH 07/20] Fix Clang/GCC compilation Coercing integer to pointer in constexpr context is invalid. Clang happily miscompiles the 'invalid handle check' to a null pointer check. --- src/coreclr/tools/superpmi/superpmi/fileio.h | 60 +++++++++++--------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.h b/src/coreclr/tools/superpmi/superpmi/fileio.h index 71727fbc03342..a88e74d6ee00c 100644 --- a/src/coreclr/tools/superpmi/superpmi/fileio.h +++ b/src/coreclr/tools/superpmi/superpmi/fileio.h @@ -4,11 +4,13 @@ #ifndef _FileIO #define _FileIO -template +template struct HandleWrapper { + using HandleType = typename HandleSpec::Type; + HandleWrapper() - : m_handle(InvalidHandleValue) + : m_handle(HandleSpec::Invalid()) { } @@ -19,59 +21,63 @@ struct HandleWrapper ~HandleWrapper() { - if (m_handle != InvalidHandleValue) + if (m_handle != HandleSpec::Invalid()) { - FreeFunction{}(m_handle); - - m_handle = InvalidHandleValue; + HandleSpec::Close(m_handle); + m_handle = HandleSpec::Invalid(); } } HandleWrapper(const HandleWrapper&) = delete; HandleWrapper& operator=(HandleWrapper&) = delete; - HandleWrapper(HandleWrapper&& hw) + HandleWrapper(HandleWrapper&& hw) noexcept : m_handle(hw.m_handle) { - hw.m_handle = INVALID_HANDLE_VALUE; + hw.m_handle = HandleSpec::Invalid(); } - HandleWrapper& operator=(HandleWrapper&& hw) + HandleWrapper& operator=(HandleWrapper&& hw) noexcept { - if (m_handle != InvalidHandleValue) - CloseHandle(m_handle); + if (m_handle != HandleSpec::Invalid()) + HandleSpec::Close(m_handle); m_handle = hw.m_handle; - hw.m_handle = InvalidHandleValue; + hw.m_handle = HandleSpec::Invalid(); return *this; } - bool IsValid() { return m_handle != InvalidHandleValue; } + bool IsValid() { return m_handle != HandleSpec::Invalid(); } HandleType Get() { return m_handle; } private: HandleType m_handle; }; -struct CloseHandleFunctor +struct FileHandleSpec { - void operator()(HANDLE handle) - { - CloseHandle(handle); - } + using Type = HANDLE; + static HANDLE Invalid() { return INVALID_HANDLE_VALUE; } + static void Close(HANDLE h) { CloseHandle(h); } }; -struct UnmapViewOfFileFunctor +struct FileMappingHandleSpec { - void operator()(LPVOID view) - { - UnmapViewOfFile(view); - } + using Type = HANDLE; + static HANDLE Invalid() { return nullptr; } + static void Close(HANDLE h) { CloseHandle(h); } +}; + +struct FileViewHandleSpec +{ + using Type = LPVOID; + static LPVOID Invalid() { return nullptr; } + static void Close(LPVOID p) { UnmapViewOfFile(p); } }; -typedef HandleWrapper FileHandle; -typedef HandleWrapper FileMappingHandle; -typedef HandleWrapper FileViewHandle; +typedef HandleWrapper FileHandle; +typedef HandleWrapper FileMappingHandle; +typedef HandleWrapper FileViewHandle; class FileWriter { @@ -113,6 +119,8 @@ class FileLineReader public: FileLineReader() + : m_cur(nullptr) + , m_end(nullptr) { } From 22640fc63b91e0f482d63d1287015d074ddfd39f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 14:44:44 +0200 Subject: [PATCH 08/20] Fix build --- src/coreclr/tools/superpmi/superpmi/superpmi.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index c707f8f870844..fa8f6b12f2bc2 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -597,6 +597,8 @@ int __cdecl main(int argc, char* argv[]) failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); } + break; + case NearDifferResult::SuccessWithoutDiff: break; case NearDifferResult::Failure: if (o.mclFilename != nullptr) From 4a77ff5371d2ebe68ed318c8fc3c2fe87d177250 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 15:45:31 +0200 Subject: [PATCH 09/20] Proper move constructor for clr_std::vector, delete copy constructors --- src/coreclr/inc/clr_std/vector | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/coreclr/inc/clr_std/vector b/src/coreclr/inc/clr_std/vector index c10ecf6ba5a92..31efbcaf06ae0 100644 --- a/src/coreclr/inc/clr_std/vector +++ b/src/coreclr/inc/clr_std/vector @@ -215,6 +215,34 @@ namespace std } } + vector(const vector&) = delete; + vector& operator=(const vector&) = delete; + + vector(vector&& v) noexcept + : m_size(v.m_size) + , m_capacity(v.m_capacity) + , m_pelements(v.m_pelements) + , m_isBufferOwner(v.m_isBufferOwner) + { + v.m_isBufferOwner = false; + return *this; + } + + vector& operator=(vector&& v) noexcept + { + if (m_isBufferOwner) + { + erase(m_pelements, 0, m_size); + delete [] (BYTE*)m_pelements; + } + + m_size = v.m_size; + m_capacity = v.m_capacity; + m_pelements = v.m_pelements; + m_isBufferOwner = v.m_isBufferOwner; + v.m_isBufferOwner = false; + return *this; + } size_t size() const { From c17deeaef6c267ed3bcdcf87d7a85bb074f274dc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 15:47:11 +0200 Subject: [PATCH 10/20] Fix --- src/coreclr/inc/clr_std/vector | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/inc/clr_std/vector b/src/coreclr/inc/clr_std/vector index 31efbcaf06ae0..c2d1caba890aa 100644 --- a/src/coreclr/inc/clr_std/vector +++ b/src/coreclr/inc/clr_std/vector @@ -225,7 +225,6 @@ namespace std , m_isBufferOwner(v.m_isBufferOwner) { v.m_isBufferOwner = false; - return *this; } vector& operator=(vector&& v) noexcept From 504d36873ff036ab6d8eaf7da51da4c7d4997061 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 16:35:09 +0200 Subject: [PATCH 11/20] Fix superpmi_diffs.py --- src/coreclr/scripts/superpmi_diffs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index f8e24dcab9128..b7af1427fc481 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -216,7 +216,7 @@ def main(main_args): "-spmi_location", spmi_location, "-error_limit", "100", "-log_level", "debug", - "-log_file", log_file) + "-log_file", log_file]) print("Running superpmi.py tpdiff") From 30c0a8d6fd0bce2295efa04f378d83e6146adcd4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 19:38:21 +0200 Subject: [PATCH 12/20] Run jit-format and force new CI run --- src/coreclr/jit/morph.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0edd77e69eab1..9113e98af7310 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1298,17 +1298,17 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) } //// Set the beginning and end for the new argument table - //unsigned curInx; - //int regCount = 0; - //unsigned begTab = 0; - //unsigned endTab = argCount - 1; - //unsigned argsRemaining = argCount; + // unsigned curInx; + // int regCount = 0; + // unsigned begTab = 0; + // unsigned endTab = argCount - 1; + // unsigned argsRemaining = argCount; //// First take care of arguments that are constants. //// [We use a backward iterator pattern] //// - //curInx = argCount; - //do + // curInx = argCount; + // do //{ // curInx--; @@ -1350,7 +1350,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // } //} while (curInx > 0); - //if (argsRemaining > 0) + // if (argsRemaining > 0) //{ // // Next take care of arguments that are calls. // // [We use a forward iterator pattern] @@ -1387,7 +1387,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // } //} - //if (argsRemaining > 0) + // if (argsRemaining > 0) //{ // // Next take care arguments that are temps. // // These temps come before the arguments that are @@ -1423,7 +1423,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // } //} - //if (argsRemaining > 0) + // if (argsRemaining > 0) //{ // // Next take care of local var and local field arguments. // // These are moved towards the end of the argument evaluation. @@ -1467,8 +1467,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) //// Finally, take care of all the remaining arguments. //// Note that we fill in one arg at a time using a while loop. - //bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop - //while (argsRemaining > 0) + // bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop + // while (argsRemaining > 0) //{ // /* Find the most expensive arg remaining and evaluate it next */ @@ -1545,8 +1545,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) //// The table should now be completely filled and thus begTab should now be adjacent to endTab //// and regArgsRemaining should be zero - //assert(begTab == (endTab + 1)); - //assert(argsRemaining == 0); + // assert(begTab == (endTab + 1)); + // assert(argsRemaining == 0); } //------------------------------------------------------------------------------ From ccb37d8843f708b13f008d81fb4e4f2e60a9d0b1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 22:29:27 +0200 Subject: [PATCH 13/20] Minor fixes --- src/coreclr/scripts/superpmi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 1e79a1c77d5b4..ae45443620cd6 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -1718,7 +1718,7 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: logging.info("Creating JitDump files: %s %s", base_dump_location, diff_dump_location) subproc_helper.run_to_completion(create_replay_artifacts, self, mch_file, jit_dump_complus_vars_full_env, jit_dump_differences, base_dump_location, diff_dump_location, ".txt") - logging.info("Contexts with differences found. To replay SuperPMI use:".format(len(diffs))) + logging.info("Differences found. To replay SuperPMI use:".format(len(diffs))) logging.info("") for var, value in asm_complus_vars.items(): @@ -1734,7 +1734,7 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: logging.info("%s %s -c ### %s %s", self.superpmi_path, " ".join(altjit_replay_flags), self.diff_jit_path, mch_file) logging.info("") - smallest = sorted(diffs, key=lambda r: int(r["Context size"]))[20:] + smallest = sorted(diffs, key=lambda r: int(r["Context size"]))[:20] logging.debug("Smallest {} contexts with binary differences:".format(len(smallest))) for diff in smallest: logging.debug(diff["Context"]) From 55ae4488f7d20c3d2445f99bbf5b46db6195ff62 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 27 Sep 2022 22:37:46 +0200 Subject: [PATCH 14/20] Nitpicking order.. --- .../tools/superpmi/superpmi/superpmi.cpp | 67 ++++++++++--------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index fa8f6b12f2bc2..7875d4ca11119 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -124,17 +124,17 @@ static NearDifferResult InvokeNearDiffer(NearDiffer* nearDiffer, static bool PrintDiffsCsvHeader(FileWriter& fw) { - return fw.Printf("Context,Base size,Diff size,Base instructions,Diff instructions,Context size\n"); + return fw.Printf("Context,Context size,Base size,Diff size,Base instructions,Diff instructions\n"); } static bool PrintDiffsCsvRow( FileWriter& fw, int context, + uint32_t contextSize, long long baseSize, long long diffSize, - long long baseInstructions, long long diffInstructions, - uint32_t contextSize) + long long baseInstructions, long long diffInstructions) { - return fw.Printf("%d,%lld,%lld,%lld,%lld,%u\n", context, baseSize, diffSize, baseInstructions, diffInstructions, contextSize); + return fw.Printf("%d,%u,%lld,%lld,%lld,%lld\n", context, contextSize, baseSize, diffSize, baseInstructions, diffInstructions); } // Run superpmi. The return value is as follows: @@ -279,7 +279,10 @@ int __cdecl main(int argc, char* argv[]) { return (int)SpmiResult::GeneralFailure; } + } + if (o.diffsInfo != nullptr) + { PrintDiffsCsvHeader(diffCsv); } @@ -573,38 +576,38 @@ int __cdecl main(int argc, char* argv[]) switch (result) { - case NearDifferResult::SuccessWithDiff: - totalBaseMetrics.Overall.NumContextsWithDiffs++; - totalDiffMetrics.Overall.NumContextsWithDiffs++; + case NearDifferResult::SuccessWithDiff: + totalBaseMetrics.Overall.NumContextsWithDiffs++; + totalDiffMetrics.Overall.NumContextsWithDiffs++; - totalBaseMetricsOpts.NumContextsWithDiffs++; - totalDiffMetricsOpts.NumContextsWithDiffs++; + totalBaseMetricsOpts.NumContextsWithDiffs++; + totalDiffMetricsOpts.NumContextsWithDiffs++; - // This is a difference in ASM outputs from Jit1 & Jit2 and not a playback failure - // We will add this MC to the diffs info if there is one. - // Otherwise this will end up in failingMCList - if (o.diffsInfo != nullptr) - { - PrintDiffsCsvRow( - diffCsv, - reader->GetMethodContextIndex(), - baseMetrics.NumCodeBytes, diffMetrics.NumCodeBytes, - baseMetrics.NumExecutedInstructions, diffMetrics.NumExecutedInstructions, - mcb.size); - } - else if (o.mclFilename != nullptr) - { - failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); - } + // This is a difference in ASM outputs from Jit1 & Jit2 and not a playback failure + // We will add this MC to the diffs info if there is one. + // Otherwise this will end up in failingMCList + if (o.diffsInfo != nullptr) + { + PrintDiffsCsvRow( + diffCsv, + reader->GetMethodContextIndex(), + mcb.size, + baseMetrics.NumCodeBytes, diffMetrics.NumCodeBytes, + baseMetrics.NumExecutedInstructions, diffMetrics.NumExecutedInstructions); + } + else if (o.mclFilename != nullptr) + { + failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); + } - break; - case NearDifferResult::SuccessWithoutDiff: - break; - case NearDifferResult::Failure: - if (o.mclFilename != nullptr) - failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); + break; + case NearDifferResult::SuccessWithoutDiff: + break; + case NearDifferResult::Failure: + if (o.mclFilename != nullptr) + failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); - break; + break; } totalBaseMetrics.Overall.NumDiffedCodeBytes += baseMetrics.NumCodeBytes; From 1f3d9f07b0e9a01af45694bf5f6e033795ed01f2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 28 Sep 2022 11:33:55 +0200 Subject: [PATCH 15/20] Revert morph change --- src/coreclr/jit/morph.cpp | 500 +++++++++++++++++++------------------- 1 file changed, 250 insertions(+), 250 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9113e98af7310..ef932bbe2489c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1297,256 +1297,256 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) sortedArgs[argCount++] = &arg; } - //// Set the beginning and end for the new argument table - // unsigned curInx; - // int regCount = 0; - // unsigned begTab = 0; - // unsigned endTab = argCount - 1; - // unsigned argsRemaining = argCount; - - //// First take care of arguments that are constants. - //// [We use a backward iterator pattern] - //// - // curInx = argCount; - // do - //{ - // curInx--; - - // CallArg* arg = sortedArgs[curInx]; - - // if (arg->AbiInfo.GetRegNum() != REG_STK) - // { - // regCount++; - // } - - // assert(arg->GetLateNode() == nullptr); - - // // Skip any already processed args - // // - // if (!arg->m_processed) - // { - // GenTree* argx = arg->GetEarlyNode(); - - // assert(argx != nullptr); - // // put constants at the end of the table - // // - // if (argx->gtOper == GT_CNS_INT) - // { - // noway_assert(curInx <= endTab); - - // arg->m_processed = true; - - // // place curArgTabEntry at the endTab position by performing a swap - // // - // if (curInx != endTab) - // { - // sortedArgs[curInx] = sortedArgs[endTab]; - // sortedArgs[endTab] = arg; - // } - - // endTab--; - // argsRemaining--; - // } - // } - //} while (curInx > 0); - - // if (argsRemaining > 0) - //{ - // // Next take care of arguments that are calls. - // // [We use a forward iterator pattern] - // // - // for (curInx = begTab; curInx <= endTab; curInx++) - // { - // CallArg* arg = sortedArgs[curInx]; - - // // Skip any already processed args - // // - // if (!arg->m_processed) - // { - // GenTree* argx = arg->GetEarlyNode(); - // assert(argx != nullptr); - - // // put calls at the beginning of the table - // // - // if (argx->gtFlags & GTF_CALL) - // { - // arg->m_processed = true; - - // // place curArgTabEntry at the begTab position by performing a swap - // // - // if (curInx != begTab) - // { - // sortedArgs[curInx] = sortedArgs[begTab]; - // sortedArgs[begTab] = arg; - // } - - // begTab++; - // argsRemaining--; - // } - // } - // } - //} - - // if (argsRemaining > 0) - //{ - // // Next take care arguments that are temps. - // // These temps come before the arguments that are - // // ordinary local vars or local fields - // // since this will give them a better chance to become - // // enregistered into their actual argument register. - // // [We use a forward iterator pattern] - // // - // for (curInx = begTab; curInx <= endTab; curInx++) - // { - // CallArg* arg = sortedArgs[curInx]; - - // // Skip any already processed args - // // - // if (!arg->m_processed) - // { - // if (arg->m_needTmp) - // { - // arg->m_processed = true; - - // // place curArgTabEntry at the begTab position by performing a swap - // // - // if (curInx != begTab) - // { - // sortedArgs[curInx] = sortedArgs[begTab]; - // sortedArgs[begTab] = arg; - // } - - // begTab++; - // argsRemaining--; - // } - // } - // } - //} - - // if (argsRemaining > 0) - //{ - // // Next take care of local var and local field arguments. - // // These are moved towards the end of the argument evaluation. - // // [We use a backward iterator pattern] - // // - // curInx = endTab + 1; - // do - // { - // curInx--; - - // CallArg* arg = sortedArgs[curInx]; - - // // Skip any already processed args - // // - // if (!arg->m_processed) - // { - // GenTree* argx = arg->GetEarlyNode(); - // assert(argx != nullptr); - - // // As a CQ heuristic, sort TYP_STRUCT args using the cost estimation below. - // if (!argx->TypeIs(TYP_STRUCT) && argx->OperIs(GT_LCL_VAR, GT_LCL_FLD)) - // { - // noway_assert(curInx <= endTab); - - // arg->m_processed = true; - - // // place curArgTabEntry at the endTab position by performing a swap - // // - // if (curInx != endTab) - // { - // sortedArgs[curInx] = sortedArgs[endTab]; - // sortedArgs[endTab] = arg; - // } - - // endTab--; - // argsRemaining--; - // } - // } - // } while (curInx > begTab); - //} - - //// Finally, take care of all the remaining arguments. - //// Note that we fill in one arg at a time using a while loop. - // bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop - // while (argsRemaining > 0) - //{ - // /* Find the most expensive arg remaining and evaluate it next */ - - // CallArg* expensiveArg = nullptr; - // unsigned expensiveArgIndex = UINT_MAX; - // unsigned expensiveArgCost = 0; - - // // [We use a forward iterator pattern] - // // - // for (curInx = begTab; curInx <= endTab; curInx++) - // { - // CallArg* arg = sortedArgs[curInx]; - - // // Skip any already processed args - // // - // if (!arg->m_processed) - // { - // GenTree* argx = arg->GetEarlyNode(); - // assert(argx != nullptr); - - // // We should have already handled these kinds of args - // assert((!argx->OperIs(GT_LCL_VAR, GT_LCL_FLD) || argx->TypeIs(TYP_STRUCT)) && - // !argx->OperIs(GT_CNS_INT)); - - // // This arg should either have no persistent side effects or be the last one in our table - // // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); - - // if (argsRemaining == 1) - // { - // // This is the last arg to place - // expensiveArgIndex = curInx; - // expensiveArg = arg; - // assert(begTab == endTab); - // break; - // } - // else - // { - // if (!costsPrepared) - // { - // /* We call gtPrepareCost to measure the cost of evaluating this tree */ - // comp->gtPrepareCost(argx); - // } - - // if (argx->GetCostEx() > expensiveArgCost) - // { - // // Remember this arg as the most expensive one that we have yet seen - // expensiveArgCost = argx->GetCostEx(); - // expensiveArgIndex = curInx; - // expensiveArg = arg; - // } - // } - // } - // } - - // noway_assert(expensiveArgIndex != UINT_MAX); - - // // put the most expensive arg towards the beginning of the table - - // expensiveArg->m_processed = true; - - // // place expensiveArgTabEntry at the begTab position by performing a swap - // // - // if (expensiveArgIndex != begTab) - // { - // sortedArgs[expensiveArgIndex] = sortedArgs[begTab]; - // sortedArgs[begTab] = expensiveArg; - // } - - // begTab++; - // argsRemaining--; - - // costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop - //} - - //// The table should now be completely filled and thus begTab should now be adjacent to endTab - //// and regArgsRemaining should be zero - // assert(begTab == (endTab + 1)); - // assert(argsRemaining == 0); + // Set the beginning and end for the new argument table + unsigned curInx; + int regCount = 0; + unsigned begTab = 0; + unsigned endTab = argCount - 1; + unsigned argsRemaining = argCount; + + // First take care of arguments that are constants. + // [We use a backward iterator pattern] + // + curInx = argCount; + do + { + curInx--; + + CallArg* arg = sortedArgs[curInx]; + + if (arg->AbiInfo.GetRegNum() != REG_STK) + { + regCount++; + } + + assert(arg->GetLateNode() == nullptr); + + // Skip any already processed args + // + if (!arg->m_processed) + { + GenTree* argx = arg->GetEarlyNode(); + + assert(argx != nullptr); + // put constants at the end of the table + // + if (argx->gtOper == GT_CNS_INT) + { + noway_assert(curInx <= endTab); + + arg->m_processed = true; + + // place curArgTabEntry at the endTab position by performing a swap + // + if (curInx != endTab) + { + sortedArgs[curInx] = sortedArgs[endTab]; + sortedArgs[endTab] = arg; + } + + endTab--; + argsRemaining--; + } + } + } while (curInx > 0); + + if (argsRemaining > 0) + { + // Next take care of arguments that are calls. + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + CallArg* arg = sortedArgs[curInx]; + + // Skip any already processed args + // + if (!arg->m_processed) + { + GenTree* argx = arg->GetEarlyNode(); + assert(argx != nullptr); + + // put calls at the beginning of the table + // + if (argx->gtFlags & GTF_CALL) + { + arg->m_processed = true; + + // place curArgTabEntry at the begTab position by performing a swap + // + if (curInx != begTab) + { + sortedArgs[curInx] = sortedArgs[begTab]; + sortedArgs[begTab] = arg; + } + + begTab++; + argsRemaining--; + } + } + } + } + + if (argsRemaining > 0) + { + // Next take care arguments that are temps. + // These temps come before the arguments that are + // ordinary local vars or local fields + // since this will give them a better chance to become + // enregistered into their actual argument register. + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + CallArg* arg = sortedArgs[curInx]; + + // Skip any already processed args + // + if (!arg->m_processed) + { + if (arg->m_needTmp) + { + arg->m_processed = true; + + // place curArgTabEntry at the begTab position by performing a swap + // + if (curInx != begTab) + { + sortedArgs[curInx] = sortedArgs[begTab]; + sortedArgs[begTab] = arg; + } + + begTab++; + argsRemaining--; + } + } + } + } + + if (argsRemaining > 0) + { + // Next take care of local var and local field arguments. + // These are moved towards the end of the argument evaluation. + // [We use a backward iterator pattern] + // + curInx = endTab + 1; + do + { + curInx--; + + CallArg* arg = sortedArgs[curInx]; + + // Skip any already processed args + // + if (!arg->m_processed) + { + GenTree* argx = arg->GetEarlyNode(); + assert(argx != nullptr); + + // As a CQ heuristic, sort TYP_STRUCT args using the cost estimation below. + if (!argx->TypeIs(TYP_STRUCT) && argx->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + noway_assert(curInx <= endTab); + + arg->m_processed = true; + + // place curArgTabEntry at the endTab position by performing a swap + // + if (curInx != endTab) + { + sortedArgs[curInx] = sortedArgs[endTab]; + sortedArgs[endTab] = arg; + } + + endTab--; + argsRemaining--; + } + } + } while (curInx > begTab); + } + + // Finally, take care of all the remaining arguments. + // Note that we fill in one arg at a time using a while loop. + bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop + while (argsRemaining > 0) + { + /* Find the most expensive arg remaining and evaluate it next */ + + CallArg* expensiveArg = nullptr; + unsigned expensiveArgIndex = UINT_MAX; + unsigned expensiveArgCost = 0; + + // [We use a forward iterator pattern] + // + for (curInx = begTab; curInx <= endTab; curInx++) + { + CallArg* arg = sortedArgs[curInx]; + + // Skip any already processed args + // + if (!arg->m_processed) + { + GenTree* argx = arg->GetEarlyNode(); + assert(argx != nullptr); + + // We should have already handled these kinds of args + assert((!argx->OperIs(GT_LCL_VAR, GT_LCL_FLD) || argx->TypeIs(TYP_STRUCT)) && + !argx->OperIs(GT_CNS_INT)); + + // This arg should either have no persistent side effects or be the last one in our table + // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); + + if (argsRemaining == 1) + { + // This is the last arg to place + expensiveArgIndex = curInx; + expensiveArg = arg; + assert(begTab == endTab); + break; + } + else + { + if (!costsPrepared) + { + /* We call gtPrepareCost to measure the cost of evaluating this tree */ + comp->gtPrepareCost(argx); + } + + if (argx->GetCostEx() > expensiveArgCost) + { + // Remember this arg as the most expensive one that we have yet seen + expensiveArgCost = argx->GetCostEx(); + expensiveArgIndex = curInx; + expensiveArg = arg; + } + } + } + } + + noway_assert(expensiveArgIndex != UINT_MAX); + + // put the most expensive arg towards the beginning of the table + + expensiveArg->m_processed = true; + + // place expensiveArgTabEntry at the begTab position by performing a swap + // + if (expensiveArgIndex != begTab) + { + sortedArgs[expensiveArgIndex] = sortedArgs[begTab]; + sortedArgs[begTab] = expensiveArg; + } + + begTab++; + argsRemaining--; + + costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop + } + + // The table should now be completely filled and thus begTab should now be adjacent to endTab + // and regArgsRemaining should be zero + assert(begTab == (endTab + 1)); + assert(argsRemaining == 0); } //------------------------------------------------------------------------------ From c36637d07f88c97f015bd01c0efaa971dedd00c8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 22 Sep 2022 16:02:37 +0200 Subject: [PATCH 16/20] JIT: Smarter ordering of late args based on register uses --- src/coreclr/jit/morph.cpp | 175 +++++++++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ef932bbe2489c..fcdacb2106b92 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1256,6 +1256,50 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) m_argsComplete = true; } +// Estimate registers used by a tree. +static regMaskTP EstimateRegisterUses(Compiler* comp, GenTree* tree) +{ + struct RegisterUsesVisitor : GenTreeVisitor + { + enum + { + DoPreOrder = true, + DoLclVarsOnly = true, + }; + + regMaskTP Registers = RBM_NONE; + + RegisterUsesVisitor(Compiler* comp) : GenTreeVisitor(comp) + { + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* parent) + { + GenTreeLclVarCommon* node = (*use)->AsLclVarCommon(); + LclVarDsc* desc = m_compiler->lvaGetDesc(node); + if (!desc->lvDoNotEnregister && desc->lvIsRegArg) + { + if (desc->GetArgReg() != REG_NA) + { + Registers |= genRegMask(desc->GetArgReg()); + } +#if FEATURE_MULTIREG_ARGS + if (desc->GetOtherArgReg() != REG_NA) + { + Registers |= genRegMask(desc->GetOtherArgReg()); + } +#endif + } + + return fgWalkResult::WALK_CONTINUE; + } + }; + + RegisterUsesVisitor visitor(comp); + visitor.WalkTree(&tree, nullptr); + return visitor.Registers; +} + //------------------------------------------------------------------------ // SortArgs: Sort arguments into a better passing order. // @@ -1275,9 +1319,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // idea is to move all "simple" arguments like constants and local vars to // the end, and move the complex arguments towards the beginning. This will // help prevent registers from being spilled by allowing us to evaluate the - // more complex arguments before the simpler arguments. We use the late - // list to keep the sorted result at this point, and the ordering ends up - // looking like: + // more complex arguments before the simpler arguments. The ordering ends + // up looking like: // +------------------------------------+ <--- end of sortedArgs // | constants | // +------------------------------------+ @@ -1338,6 +1381,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the endTab position by performing a swap // + JITDUMP(" [%06u] -> #%d (is constant)\n", argx->gtTreeID, endTab); if (curInx != endTab) { sortedArgs[curInx] = sortedArgs[endTab]; @@ -1374,6 +1418,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the begTab position by performing a swap // + JITDUMP(" [%06u] -> #%d (is call)\n", argx->gtTreeID, begTab); if (curInx != begTab) { sortedArgs[curInx] = sortedArgs[begTab]; @@ -1387,6 +1432,48 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) } } + //// Finally place arguments such that arguments nodes that use parameters + //// are placed before arguments that would clobber the registers those + //// parameters may be in. + // while (argsRemaining > 0) + //{ + // // Find arg that clobbers least amount of registers used by other args. + // unsigned bestNumClobbered = UINT_MAX; + // unsigned bestIndex = 0; + // for (unsigned i = begTab; i <= endTab; i++) + // { + // assert(!sortedArgs[i]->m_processed); + // regMaskTP clobbered = 0; + // for (unsigned j = 0; j < sortedArgs[i]->AbiInfo.NumRegs; j++) + // { + // clobbered |= genRegMask(sortedArgs[i]->AbiInfo.GetRegNum(j)); + // } + + // assert(clobbered != 0); + // unsigned clobberedLaterArgs = 0; + // for (unsigned j = begTab; j <= endTab; j++) + // { + // if (i == j) + // continue; + + // regMaskTP uses = EstimateRegisterUses(comp, sortedArgs[j]->GetNode()); + // if ((uses & clobbered) != 0) + // clobberedLaterArgs++; + // } + + // if (clobberedLaterArgs < bestNumClobbered) + // { + // bestNumClobbered = clobberedLaterArgs; + // bestIndex = i; + // } + // } + + // sortedArgs[bestIndex]->m_processed = true; + // std::swap(sortedArgs[begTab], sortedArgs[bestIndex]); + // begTab++; + // argsRemaining--; + //} + if (argsRemaining > 0) { // Next take care arguments that are temps. @@ -1410,6 +1497,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the begTab position by performing a swap // + JITDUMP(" [%06u] -> #%d (is temp)\n", arg->GetNode()->gtTreeID, begTab); if (curInx != begTab) { sortedArgs[curInx] = sortedArgs[begTab]; @@ -1452,6 +1540,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the endTab position by performing a swap // + JITDUMP(" [%06u] -> #%d (non-struct local)\n", arg->GetNode()->gtTreeID, endTab); if (curInx != endTab) { sortedArgs[curInx] = sortedArgs[endTab]; @@ -1465,6 +1554,27 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) } while (curInx > begTab); } + if (argsRemaining > 0) + { + // Place all arguments that do not clobber registers. + for (unsigned curIndex = begTab; curIndex <= endTab; curIndex++) + { + CallArg* arg = sortedArgs[curIndex]; + if (arg->m_processed) + continue; + + if (arg->AbiInfo.NumRegs <= 0) + { + arg->m_processed = true; + memmove(&sortedArgs[begTab + 1], &sortedArgs[begTab], (curIndex - begTab) * sizeof(CallArg*)); + JITDUMP(" [%06u] -> #%d (stack arg)\n", arg->GetNode()->gtTreeID, begTab); + sortedArgs[begTab] = arg; + begTab++; + argsRemaining--; + } + } + } + // Finally, take care of all the remaining arguments. // Note that we fill in one arg at a time using a while loop. bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop @@ -1475,6 +1585,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) CallArg* expensiveArg = nullptr; unsigned expensiveArgIndex = UINT_MAX; unsigned expensiveArgCost = 0; + unsigned expensiveArgNum = 0; // [We use a forward iterator pattern] // @@ -1494,6 +1605,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) !argx->OperIs(GT_CNS_INT)); // This arg should either have no persistent side effects or be the last one in our table + // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); if (argsRemaining == 1) @@ -1501,6 +1613,7 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // This is the last arg to place expensiveArgIndex = curInx; expensiveArg = arg; + expensiveArgNum = 1; assert(begTab == endTab); break; } @@ -1518,6 +1631,11 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) expensiveArgCost = argx->GetCostEx(); expensiveArgIndex = curInx; expensiveArg = arg; + expensiveArgNum = 1; + } + else if (argx->GetCostEx() == expensiveArgCost) + { + expensiveArgNum++; } } } @@ -1525,12 +1643,63 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) noway_assert(expensiveArgIndex != UINT_MAX); + if (expensiveArgNum > 1) + { + // If there are multiple args with the same cost then place arg + // that clobbers least amount of registers used by other args. + // Find arg that clobbers least amount of registers used by other args. + unsigned bestNumClobbered = UINT_MAX; + unsigned bestIndex = 0; + for (unsigned i = begTab; i <= endTab; i++) + { + if (sortedArgs[i]->m_processed) + continue; + + if (sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) + continue; + + regMaskTP clobbered = 0; + for (unsigned j = 0; j < sortedArgs[i]->AbiInfo.NumRegs; j++) + { + clobbered |= genRegMask(sortedArgs[i]->AbiInfo.GetRegNum(j)); + } + + assert(clobbered != 0); + unsigned clobberedLaterArgs = 0; + for (unsigned j = begTab; j <= endTab; j++) + { + if (i == j || sortedArgs[i]->m_processed || + sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) + continue; + + regMaskTP uses = EstimateRegisterUses(comp, sortedArgs[j]->GetNode()); + if ((uses & clobbered) != 0) + clobberedLaterArgs++; + } + + if (clobberedLaterArgs < bestNumClobbered) + { + bestNumClobbered = clobberedLaterArgs; + bestIndex = i; + } + } + + expensiveArg = sortedArgs[bestIndex]; + expensiveArgIndex = bestIndex; + } + // put the most expensive arg towards the beginning of the table expensiveArg->m_processed = true; // place expensiveArgTabEntry at the begTab position by performing a swap // + JITDUMP(" [%06u] -> #%d", expensiveArg->GetNode()->gtTreeID, begTab); + if (argsRemaining == 1) + JITDUMP(" (last arg)\n"); + else + JITDUMP(" (cost %u)\n", expensiveArgCost); + if (expensiveArgIndex != begTab) { sortedArgs[expensiveArgIndex] = sortedArgs[begTab]; From a76ef142c0ad93163c31c136fd85c1fd07a7bebb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 22 Sep 2022 16:33:15 +0200 Subject: [PATCH 17/20] Fix build --- src/coreclr/jit/morph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index fcdacb2106b92..06405a5b4c7f2 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1696,9 +1696,13 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // JITDUMP(" [%06u] -> #%d", expensiveArg->GetNode()->gtTreeID, begTab); if (argsRemaining == 1) + { JITDUMP(" (last arg)\n"); + } else + { JITDUMP(" (cost %u)\n", expensiveArgCost); + } if (expensiveArgIndex != begTab) { From 472749f3a8f8ba36287be2034e10abc4af5792c9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 22 Sep 2022 23:09:13 +0200 Subject: [PATCH 18/20] Fix arg reg check --- src/coreclr/jit/morph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 06405a5b4c7f2..04289f6193032 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1279,12 +1279,12 @@ static regMaskTP EstimateRegisterUses(Compiler* comp, GenTree* tree) LclVarDsc* desc = m_compiler->lvaGetDesc(node); if (!desc->lvDoNotEnregister && desc->lvIsRegArg) { - if (desc->GetArgReg() != REG_NA) + if ((desc->GetArgReg() != REG_NA) && (desc->GetArgReg() != REG_STK)) { Registers |= genRegMask(desc->GetArgReg()); } #if FEATURE_MULTIREG_ARGS - if (desc->GetOtherArgReg() != REG_NA) + if ((desc->GetOtherArgReg() != REG_NA) && (desc->GetOtherArgReg() != REG_STK)) { Registers |= genRegMask(desc->GetOtherArgReg()); } From f283a600351b450e30d68055ee9b9f280917b3ae Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 26 Sep 2022 19:02:21 +0200 Subject: [PATCH 19/20] Generalize --- src/coreclr/jit/morph.cpp | 197 +++++++++++++++++++++++++------------- 1 file changed, 132 insertions(+), 65 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 04289f6193032..bd563d958f0a0 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1340,6 +1340,11 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) sortedArgs[argCount++] = &arg; } + if (argCount <= 1) + { + return; + } + // Set the beginning and end for the new argument table unsigned curInx; int regCount = 0; @@ -1390,10 +1395,18 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) endTab--; argsRemaining--; + JITDUMP(" "); + for (unsigned i = 0; i < argCount; i++) + JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); + JITDUMP("\n"); } } } while (curInx > 0); + unsigned beforeFirstConstArgIndex = endTab; + + unsigned firstCallArgIndex = begTab; + if (argsRemaining > 0) { // Next take care of arguments that are calls. @@ -1427,11 +1440,17 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) begTab++; argsRemaining--; + JITDUMP(" "); + for (unsigned i = 0; i < argCount; i++) + JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); + JITDUMP("\n"); } } } } + unsigned afterCallIndex = begTab; + //// Finally place arguments such that arguments nodes that use parameters //// are placed before arguments that would clobber the registers those //// parameters may be in. @@ -1506,6 +1525,10 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) begTab++; argsRemaining--; + JITDUMP(" "); + for (unsigned i = 0; i < argCount; i++) + JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); + JITDUMP("\n"); } } } @@ -1549,32 +1572,15 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) endTab--; argsRemaining--; + JITDUMP(" "); + for (unsigned i = 0; i < argCount; i++) + JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); + JITDUMP("\n"); } } } while (curInx > begTab); } - if (argsRemaining > 0) - { - // Place all arguments that do not clobber registers. - for (unsigned curIndex = begTab; curIndex <= endTab; curIndex++) - { - CallArg* arg = sortedArgs[curIndex]; - if (arg->m_processed) - continue; - - if (arg->AbiInfo.NumRegs <= 0) - { - arg->m_processed = true; - memmove(&sortedArgs[begTab + 1], &sortedArgs[begTab], (curIndex - begTab) * sizeof(CallArg*)); - JITDUMP(" [%06u] -> #%d (stack arg)\n", arg->GetNode()->gtTreeID, begTab); - sortedArgs[begTab] = arg; - begTab++; - argsRemaining--; - } - } - } - // Finally, take care of all the remaining arguments. // Note that we fill in one arg at a time using a while loop. bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop @@ -1643,50 +1649,50 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) noway_assert(expensiveArgIndex != UINT_MAX); - if (expensiveArgNum > 1) - { - // If there are multiple args with the same cost then place arg - // that clobbers least amount of registers used by other args. - // Find arg that clobbers least amount of registers used by other args. - unsigned bestNumClobbered = UINT_MAX; - unsigned bestIndex = 0; - for (unsigned i = begTab; i <= endTab; i++) - { - if (sortedArgs[i]->m_processed) - continue; - - if (sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) - continue; - - regMaskTP clobbered = 0; - for (unsigned j = 0; j < sortedArgs[i]->AbiInfo.NumRegs; j++) - { - clobbered |= genRegMask(sortedArgs[i]->AbiInfo.GetRegNum(j)); - } - - assert(clobbered != 0); - unsigned clobberedLaterArgs = 0; - for (unsigned j = begTab; j <= endTab; j++) - { - if (i == j || sortedArgs[i]->m_processed || - sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) - continue; - - regMaskTP uses = EstimateRegisterUses(comp, sortedArgs[j]->GetNode()); - if ((uses & clobbered) != 0) - clobberedLaterArgs++; - } - - if (clobberedLaterArgs < bestNumClobbered) - { - bestNumClobbered = clobberedLaterArgs; - bestIndex = i; - } - } - - expensiveArg = sortedArgs[bestIndex]; - expensiveArgIndex = bestIndex; - } + // if (expensiveArgNum > 1) + //{ + // // If there are multiple args with the same cost then place arg + // // that clobbers least amount of registers used by other args. + // // Find arg that clobbers least amount of registers used by other args. + // unsigned bestNumClobbered = UINT_MAX; + // unsigned bestIndex = 0; + // for (unsigned i = begTab; i <= endTab; i++) + // { + // if (sortedArgs[i]->m_processed) + // continue; + + // if (sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) + // continue; + + // regMaskTP clobbered = 0; + // for (unsigned j = 0; j < sortedArgs[i]->AbiInfo.NumRegs; j++) + // { + // clobbered |= genRegMask(sortedArgs[i]->AbiInfo.GetRegNum(j)); + // } + + // assert(clobbered != 0); + // unsigned clobberedLaterArgs = 0; + // for (unsigned j = begTab; j <= endTab; j++) + // { + // if (i == j || sortedArgs[i]->m_processed || + // sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) + // continue; + + // regMaskTP uses = EstimateRegisterUses(comp, sortedArgs[j]->GetNode()); + // if ((uses & clobbered) != 0) + // clobberedLaterArgs++; + // } + + // if (clobberedLaterArgs < bestNumClobbered) + // { + // bestNumClobbered = clobberedLaterArgs; + // bestIndex = i; + // } + // } + + // expensiveArg = sortedArgs[bestIndex]; + // expensiveArgIndex = bestIndex; + //} // put the most expensive arg towards the beginning of the table @@ -1714,6 +1720,67 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) argsRemaining--; costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop + JITDUMP(" "); + for (unsigned i = 0; i < argCount; i++) + JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); + JITDUMP("\n"); + } + + unsigned insertionIndex = afterCallIndex; + // Move all arguments that do not clobber registers back to happen right after GTF_CALL args. + for (unsigned i = afterCallIndex; i != endTab + 1; i++) + { + CallArg* arg = sortedArgs[i]; + if (arg->AbiInfo.NumRegs <= 0) + { + memmove(&sortedArgs[insertionIndex + 1], &sortedArgs[insertionIndex], + (i - insertionIndex) * sizeof(CallArg*)); + JITDUMP(" [%06u] -> #%d (stack arg)\n", arg->GetNode()->gtTreeID, insertionIndex); + sortedArgs[insertionIndex] = arg; + JITDUMP(" "); + for (unsigned i = 0; i < argCount; i++) + JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); + JITDUMP("\n"); + + insertionIndex++; + } + } + + // Now move args whose placement will potentially clobber a later use to the end of the list. + unsigned max = beforeFirstConstArgIndex - firstCallArgIndex + 1; + unsigned curIndex = firstCallArgIndex; + for (unsigned count = 0; count < max; count++) + { + CallArg* arg = sortedArgs[curIndex]; + if (arg->AbiInfo.NumRegs <= 0) + { + curIndex++; + continue; + } + + regMaskTP clobbered = 0; + for (unsigned j = 0; j < arg->AbiInfo.NumRegs; j++) + { + clobbered |= genRegMask(arg->AbiInfo.GetRegNum(j)); + } + + unsigned nextIndex = curIndex + 1; + + for (unsigned i = curIndex + 1; i != beforeFirstConstArgIndex + 1; i++) + { + regMaskTP usedRegs = EstimateRegisterUses(comp, sortedArgs[i]->GetNode()); + if ((clobbered & usedRegs) != 0) + { + memmove(&sortedArgs[curIndex], &sortedArgs[curIndex + 1], + (beforeFirstConstArgIndex - curIndex) * sizeof(CallArg*)); + assert(sortedArgs[beforeFirstConstArgIndex - 1] == sortedArgs[beforeFirstConstArgIndex]); + sortedArgs[beforeFirstConstArgIndex] = arg; + nextIndex--; + break; + } + } + + curIndex = nextIndex; } // The table should now be completely filled and thus begTab should now be adjacent to endTab From 5536609a25aa723c6be7778c1c54bcfcbd39e9e5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 28 Sep 2022 16:31:25 +0200 Subject: [PATCH 20/20] Revert JIT changes --- src/coreclr/jit/morph.cpp | 246 +------------------------------------- 1 file changed, 3 insertions(+), 243 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index bd563d958f0a0..ef932bbe2489c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1256,50 +1256,6 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) m_argsComplete = true; } -// Estimate registers used by a tree. -static regMaskTP EstimateRegisterUses(Compiler* comp, GenTree* tree) -{ - struct RegisterUsesVisitor : GenTreeVisitor - { - enum - { - DoPreOrder = true, - DoLclVarsOnly = true, - }; - - regMaskTP Registers = RBM_NONE; - - RegisterUsesVisitor(Compiler* comp) : GenTreeVisitor(comp) - { - } - - fgWalkResult PreOrderVisit(GenTree** use, GenTree* parent) - { - GenTreeLclVarCommon* node = (*use)->AsLclVarCommon(); - LclVarDsc* desc = m_compiler->lvaGetDesc(node); - if (!desc->lvDoNotEnregister && desc->lvIsRegArg) - { - if ((desc->GetArgReg() != REG_NA) && (desc->GetArgReg() != REG_STK)) - { - Registers |= genRegMask(desc->GetArgReg()); - } -#if FEATURE_MULTIREG_ARGS - if ((desc->GetOtherArgReg() != REG_NA) && (desc->GetOtherArgReg() != REG_STK)) - { - Registers |= genRegMask(desc->GetOtherArgReg()); - } -#endif - } - - return fgWalkResult::WALK_CONTINUE; - } - }; - - RegisterUsesVisitor visitor(comp); - visitor.WalkTree(&tree, nullptr); - return visitor.Registers; -} - //------------------------------------------------------------------------ // SortArgs: Sort arguments into a better passing order. // @@ -1319,8 +1275,9 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // idea is to move all "simple" arguments like constants and local vars to // the end, and move the complex arguments towards the beginning. This will // help prevent registers from being spilled by allowing us to evaluate the - // more complex arguments before the simpler arguments. The ordering ends - // up looking like: + // more complex arguments before the simpler arguments. We use the late + // list to keep the sorted result at this point, and the ordering ends up + // looking like: // +------------------------------------+ <--- end of sortedArgs // | constants | // +------------------------------------+ @@ -1340,11 +1297,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) sortedArgs[argCount++] = &arg; } - if (argCount <= 1) - { - return; - } - // Set the beginning and end for the new argument table unsigned curInx; int regCount = 0; @@ -1386,7 +1338,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the endTab position by performing a swap // - JITDUMP(" [%06u] -> #%d (is constant)\n", argx->gtTreeID, endTab); if (curInx != endTab) { sortedArgs[curInx] = sortedArgs[endTab]; @@ -1395,18 +1346,10 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) endTab--; argsRemaining--; - JITDUMP(" "); - for (unsigned i = 0; i < argCount; i++) - JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); - JITDUMP("\n"); } } } while (curInx > 0); - unsigned beforeFirstConstArgIndex = endTab; - - unsigned firstCallArgIndex = begTab; - if (argsRemaining > 0) { // Next take care of arguments that are calls. @@ -1431,7 +1374,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the begTab position by performing a swap // - JITDUMP(" [%06u] -> #%d (is call)\n", argx->gtTreeID, begTab); if (curInx != begTab) { sortedArgs[curInx] = sortedArgs[begTab]; @@ -1440,59 +1382,11 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) begTab++; argsRemaining--; - JITDUMP(" "); - for (unsigned i = 0; i < argCount; i++) - JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); - JITDUMP("\n"); } } } } - unsigned afterCallIndex = begTab; - - //// Finally place arguments such that arguments nodes that use parameters - //// are placed before arguments that would clobber the registers those - //// parameters may be in. - // while (argsRemaining > 0) - //{ - // // Find arg that clobbers least amount of registers used by other args. - // unsigned bestNumClobbered = UINT_MAX; - // unsigned bestIndex = 0; - // for (unsigned i = begTab; i <= endTab; i++) - // { - // assert(!sortedArgs[i]->m_processed); - // regMaskTP clobbered = 0; - // for (unsigned j = 0; j < sortedArgs[i]->AbiInfo.NumRegs; j++) - // { - // clobbered |= genRegMask(sortedArgs[i]->AbiInfo.GetRegNum(j)); - // } - - // assert(clobbered != 0); - // unsigned clobberedLaterArgs = 0; - // for (unsigned j = begTab; j <= endTab; j++) - // { - // if (i == j) - // continue; - - // regMaskTP uses = EstimateRegisterUses(comp, sortedArgs[j]->GetNode()); - // if ((uses & clobbered) != 0) - // clobberedLaterArgs++; - // } - - // if (clobberedLaterArgs < bestNumClobbered) - // { - // bestNumClobbered = clobberedLaterArgs; - // bestIndex = i; - // } - // } - - // sortedArgs[bestIndex]->m_processed = true; - // std::swap(sortedArgs[begTab], sortedArgs[bestIndex]); - // begTab++; - // argsRemaining--; - //} - if (argsRemaining > 0) { // Next take care arguments that are temps. @@ -1516,7 +1410,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the begTab position by performing a swap // - JITDUMP(" [%06u] -> #%d (is temp)\n", arg->GetNode()->gtTreeID, begTab); if (curInx != begTab) { sortedArgs[curInx] = sortedArgs[begTab]; @@ -1525,10 +1418,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) begTab++; argsRemaining--; - JITDUMP(" "); - for (unsigned i = 0; i < argCount; i++) - JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); - JITDUMP("\n"); } } } @@ -1563,7 +1452,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // place curArgTabEntry at the endTab position by performing a swap // - JITDUMP(" [%06u] -> #%d (non-struct local)\n", arg->GetNode()->gtTreeID, endTab); if (curInx != endTab) { sortedArgs[curInx] = sortedArgs[endTab]; @@ -1572,10 +1460,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) endTab--; argsRemaining--; - JITDUMP(" "); - for (unsigned i = 0; i < argCount; i++) - JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); - JITDUMP("\n"); } } } while (curInx > begTab); @@ -1591,7 +1475,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) CallArg* expensiveArg = nullptr; unsigned expensiveArgIndex = UINT_MAX; unsigned expensiveArgCost = 0; - unsigned expensiveArgNum = 0; // [We use a forward iterator pattern] // @@ -1611,7 +1494,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) !argx->OperIs(GT_CNS_INT)); // This arg should either have no persistent side effects or be the last one in our table - // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1))); if (argsRemaining == 1) @@ -1619,7 +1501,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // This is the last arg to place expensiveArgIndex = curInx; expensiveArg = arg; - expensiveArgNum = 1; assert(begTab == endTab); break; } @@ -1637,11 +1518,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) expensiveArgCost = argx->GetCostEx(); expensiveArgIndex = curInx; expensiveArg = arg; - expensiveArgNum = 1; - } - else if (argx->GetCostEx() == expensiveArgCost) - { - expensiveArgNum++; } } } @@ -1649,67 +1525,12 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) noway_assert(expensiveArgIndex != UINT_MAX); - // if (expensiveArgNum > 1) - //{ - // // If there are multiple args with the same cost then place arg - // // that clobbers least amount of registers used by other args. - // // Find arg that clobbers least amount of registers used by other args. - // unsigned bestNumClobbered = UINT_MAX; - // unsigned bestIndex = 0; - // for (unsigned i = begTab; i <= endTab; i++) - // { - // if (sortedArgs[i]->m_processed) - // continue; - - // if (sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) - // continue; - - // regMaskTP clobbered = 0; - // for (unsigned j = 0; j < sortedArgs[i]->AbiInfo.NumRegs; j++) - // { - // clobbered |= genRegMask(sortedArgs[i]->AbiInfo.GetRegNum(j)); - // } - - // assert(clobbered != 0); - // unsigned clobberedLaterArgs = 0; - // for (unsigned j = begTab; j <= endTab; j++) - // { - // if (i == j || sortedArgs[i]->m_processed || - // sortedArgs[i]->GetNode()->GetCostEx() != expensiveArgCost) - // continue; - - // regMaskTP uses = EstimateRegisterUses(comp, sortedArgs[j]->GetNode()); - // if ((uses & clobbered) != 0) - // clobberedLaterArgs++; - // } - - // if (clobberedLaterArgs < bestNumClobbered) - // { - // bestNumClobbered = clobberedLaterArgs; - // bestIndex = i; - // } - // } - - // expensiveArg = sortedArgs[bestIndex]; - // expensiveArgIndex = bestIndex; - //} - // put the most expensive arg towards the beginning of the table expensiveArg->m_processed = true; // place expensiveArgTabEntry at the begTab position by performing a swap // - JITDUMP(" [%06u] -> #%d", expensiveArg->GetNode()->gtTreeID, begTab); - if (argsRemaining == 1) - { - JITDUMP(" (last arg)\n"); - } - else - { - JITDUMP(" (cost %u)\n", expensiveArgCost); - } - if (expensiveArgIndex != begTab) { sortedArgs[expensiveArgIndex] = sortedArgs[begTab]; @@ -1720,67 +1541,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) argsRemaining--; costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop - JITDUMP(" "); - for (unsigned i = 0; i < argCount; i++) - JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); - JITDUMP("\n"); - } - - unsigned insertionIndex = afterCallIndex; - // Move all arguments that do not clobber registers back to happen right after GTF_CALL args. - for (unsigned i = afterCallIndex; i != endTab + 1; i++) - { - CallArg* arg = sortedArgs[i]; - if (arg->AbiInfo.NumRegs <= 0) - { - memmove(&sortedArgs[insertionIndex + 1], &sortedArgs[insertionIndex], - (i - insertionIndex) * sizeof(CallArg*)); - JITDUMP(" [%06u] -> #%d (stack arg)\n", arg->GetNode()->gtTreeID, insertionIndex); - sortedArgs[insertionIndex] = arg; - JITDUMP(" "); - for (unsigned i = 0; i < argCount; i++) - JITDUMP(" [%06u]", sortedArgs[i]->GetNode()->gtTreeID); - JITDUMP("\n"); - - insertionIndex++; - } - } - - // Now move args whose placement will potentially clobber a later use to the end of the list. - unsigned max = beforeFirstConstArgIndex - firstCallArgIndex + 1; - unsigned curIndex = firstCallArgIndex; - for (unsigned count = 0; count < max; count++) - { - CallArg* arg = sortedArgs[curIndex]; - if (arg->AbiInfo.NumRegs <= 0) - { - curIndex++; - continue; - } - - regMaskTP clobbered = 0; - for (unsigned j = 0; j < arg->AbiInfo.NumRegs; j++) - { - clobbered |= genRegMask(arg->AbiInfo.GetRegNum(j)); - } - - unsigned nextIndex = curIndex + 1; - - for (unsigned i = curIndex + 1; i != beforeFirstConstArgIndex + 1; i++) - { - regMaskTP usedRegs = EstimateRegisterUses(comp, sortedArgs[i]->GetNode()); - if ((clobbered & usedRegs) != 0) - { - memmove(&sortedArgs[curIndex], &sortedArgs[curIndex + 1], - (beforeFirstConstArgIndex - curIndex) * sizeof(CallArg*)); - assert(sortedArgs[beforeFirstConstArgIndex - 1] == sortedArgs[beforeFirstConstArgIndex]); - sortedArgs[beforeFirstConstArgIndex] = arg; - nextIndex--; - break; - } - } - - curIndex = nextIndex; } // The table should now be completely filled and thus begTab should now be adjacent to endTab