From fc6d9dc66dd8e2d0867364c1d7e882954161f2eb Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Mon, 19 Aug 2024 20:24:40 +0100 Subject: [PATCH] Add dun_render_benchmark Results from a single run (a bit noisy) on my machine: ``` tools/build_and_run_benchmark.py dun_render_benchmark ``` ``` ------------------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------------------------------------------------ DunRenderBenchmark/LeftTriangle_Solid_FullyLit 98297 ns 98282 ns 8840 items_per_second=15.1096M/s DunRenderBenchmark/LeftTriangle_Solid_FullyDark 124727 ns 124701 ns 6973 items_per_second=11.9085M/s DunRenderBenchmark/LeftTriangle_Solid_PartiallyLit 514869 ns 514747 ns 1700 items_per_second=2.88491M/s DunRenderBenchmark/LeftTriangle_Transparent_FullyLit 520312 ns 520216 ns 1682 items_per_second=2.85458M/s DunRenderBenchmark/LeftTriangle_Transparent_FullyDark 524440 ns 524331 ns 1664 items_per_second=2.83218M/s DunRenderBenchmark/LeftTriangle_Transparent_PartiallyLit 532300 ns 532162 ns 1647 items_per_second=2.7905M/s DunRenderBenchmark/RightTriangle_Solid_FullyLit 92387 ns 92363 ns 8840 items_per_second=16.7275M/s DunRenderBenchmark/RightTriangle_Solid_FullyDark 85680 ns 85662 ns 9884 items_per_second=18.0361M/s DunRenderBenchmark/RightTriangle_Solid_PartiallyLit 538347 ns 538250 ns 1626 items_per_second=2.87041M/s DunRenderBenchmark/RightTriangle_Transparent_FullyLit 548800 ns 548760 ns 1598 items_per_second=2.81544M/s DunRenderBenchmark/RightTriangle_Transparent_FullyDark 540450 ns 540369 ns 1620 items_per_second=2.85916M/s DunRenderBenchmark/RightTriangle_Transparent_PartiallyLit 555061 ns 555003 ns 1575 items_per_second=2.78377M/s DunRenderBenchmark/TransparentSquare_Solid_FullyLit 700849 ns 700751 ns 1320 items_per_second=3.68176M/s DunRenderBenchmark/TransparentSquare_Solid_FullyDark 664927 ns 664872 ns 1389 items_per_second=3.88045M/s DunRenderBenchmark/TransparentSquare_Solid_PartiallyLit 1131702 ns 1131559 ns 822 items_per_second=2.28004M/s DunRenderBenchmark/TransparentSquare_Transparent_FullyLit 1022384 ns 1022267 ns 916 items_per_second=2.5238M/s DunRenderBenchmark/TransparentSquare_Transparent_FullyDark 1023193 ns 1023057 ns 900 items_per_second=2.52185M/s DunRenderBenchmark/TransparentSquare_Transparent_PartiallyLit 1033573 ns 1033496 ns 895 items_per_second=2.49638M/s DunRenderBenchmark/Square_Solid_FullyLit 53532 ns 53524 ns 10000 items_per_second=30.8272M/s DunRenderBenchmark/Square_Solid_FullyDark 41993 ns 41987 ns 19794 items_per_second=47.1573M/s DunRenderBenchmark/Square_Solid_PartiallyLit 842772 ns 842615 ns 1108 items_per_second=1.56655M/s DunRenderBenchmark/Square_Transparent_FullyLit 834105 ns 834026 ns 1119 items_per_second=1.58269M/s DunRenderBenchmark/Square_Transparent_FullyDark 831912 ns 831823 ns 1122 items_per_second=1.58688M/s DunRenderBenchmark/Square_Transparent_PartiallyLit 924638 ns 924536 ns 1010 items_per_second=1.42774M/s DunRenderBenchmark/LeftTrapezoid_Solid_FullyLit 33728 ns 33725 ns 24962 items_per_second=18.8583M/s DunRenderBenchmark/LeftTrapezoid_Solid_FullyDark 31088 ns 31085 ns 27444 items_per_second=20.4601M/s DunRenderBenchmark/LeftTrapezoid_Solid_PartiallyLit 268792 ns 268768 ns 3254 items_per_second=1.97196M/s DunRenderBenchmark/LeftTrapezoid_Transparent_FullyLit 277990 ns 277965 ns 3140 items_per_second=1.90672M/s DunRenderBenchmark/LeftTrapezoid_Transparent_FullyDark 268952 ns 268912 ns 3250 items_per_second=1.9709M/s DunRenderBenchmark/LeftTrapezoid_Transparent_PartiallyLit 288869 ns 288826 ns 3056 items_per_second=1.83501M/s DunRenderBenchmark/RightTrapezoid_Solid_FullyLit 29581 ns 29576 ns 28347 items_per_second=21.0984M/s DunRenderBenchmark/RightTrapezoid_Solid_FullyDark 25315 ns 25312 ns 32250 items_per_second=24.6523M/s DunRenderBenchmark/RightTrapezoid_Solid_PartiallyLit 259977 ns 259960 ns 3312 items_per_second=2.0003M/s DunRenderBenchmark/RightTrapezoid_Transparent_FullyLit 263079 ns 263056 ns 3322 items_per_second=1.97677M/s DunRenderBenchmark/RightTrapezoid_Transparent_FullyDark 259849 ns 259824 ns 3364 items_per_second=2.00136M/s DunRenderBenchmark/RightTrapezoid_Transparent_PartiallyLit 279623 ns 279594 ns 3127 items_per_second=1.85984M/s ``` --- Source/diablo.cpp | 1 + Source/lighting.cpp | 11 ++-- Source/lighting.h | 1 + Source/mpq/mpq_reader.cpp | 1 + test/CMakeLists.txt | 4 +- test/dun_render_benchmark.cpp | 112 ++++++++++++++++++++++++++++++++++ 6 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 test/dun_render_benchmark.cpp diff --git a/Source/diablo.cpp b/Source/diablo.cpp index f93ff6aa99c3..c0559cde1d63 100644 --- a/Source/diablo.cpp +++ b/Source/diablo.cpp @@ -2855,6 +2855,7 @@ void LoadGameLevel(bool firstflag, lvl_entry lvldir) } SetRndSeed(DungeonSeeds[currlevel]); IncProgress(); + LoadTrns(); MakeLightTable(); LoadLvlGFX(); SetDungeonMicros(); diff --git a/Source/lighting.cpp b/Source/lighting.cpp index 2345c88dde40..6b7ea2876dd1 100644 --- a/Source/lighting.cpp +++ b/Source/lighting.cpp @@ -260,6 +260,13 @@ void DoVision(Point position, uint8_t radius, MapExplorationType doAutomap, bool } } +void LoadTrns() +{ + LoadFileInMem("plrgfx\\infra.trn", InfravisionTable); + LoadFileInMem("plrgfx\\stone.trn", StoneTable); + LoadFileInMem("gendata\\pause.trn", PauseTable); +} + void MakeLightTable() { // Generate 16 gradually darker translation tables for doing lighting @@ -319,10 +326,6 @@ void MakeLightTable() assert((FullyLitLightTable != nullptr) == (LightTables[0][0] == 0 && std::adjacent_find(LightTables[0].begin(), LightTables[0].end() - 1, [](auto x, auto y) { return (x + 1) != y; }) == LightTables[0].end() - 1)); assert((FullyDarkLightTable != nullptr) == (std::all_of(LightTables[LightsMax].begin(), LightTables[LightsMax].end(), [](auto x) { return x == 0; }))); - LoadFileInMem("plrgfx\\infra.trn", InfravisionTable); - LoadFileInMem("plrgfx\\stone.trn", StoneTable); - LoadFileInMem("gendata\\pause.trn", PauseTable); - // Generate light falloffs ranges const float maxDarkness = 15; const float maxBrightness = 0; diff --git a/Source/lighting.h b/Source/lighting.h index f8888fbf6e50..69fc21aea939 100644 --- a/Source/lighting.h +++ b/Source/lighting.h @@ -65,6 +65,7 @@ void DoUnLight(Point position, uint8_t radius); void DoLighting(Point position, uint8_t radius, DisplacementOf offset); void DoUnVision(Point position, uint8_t radius); void DoVision(Point position, uint8_t radius, MapExplorationType doAutomap, bool visible); +void LoadTrns(); void MakeLightTable(); #ifdef _DEBUG void ToggleLighting(); diff --git a/Source/mpq/mpq_reader.cpp b/Source/mpq/mpq_reader.cpp index 23828cbecb42..63c1b32a9944 100644 --- a/Source/mpq/mpq_reader.cpp +++ b/Source/mpq/mpq_reader.cpp @@ -40,6 +40,7 @@ MpqArchive &MpqArchive::operator=(MpqArchive &&other) noexcept if (archive_ != nullptr) libmpq__archive_close(archive_); archive_ = other.archive_; + other.archive_ = nullptr; tmp_buf_ = std::move(other.tmp_buf_); return *this; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5a17aaf19132..6e9a18972f25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -48,7 +48,8 @@ set(standalone_tests ) set(benchmarks clx_render_benchmark - crawl_benchmark) + crawl_benchmark + dun_render_benchmark) include(Fixtures.cmake) @@ -88,6 +89,7 @@ target_link_libraries(codec_test PRIVATE libdevilutionx_codec app_fatal_for_test target_link_libraries(clx_render_benchmark PRIVATE libdevilutionx_so) target_link_libraries(crawl_test PRIVATE libdevilutionx_crawl) target_link_libraries(crawl_benchmark PRIVATE libdevilutionx_crawl) +target_link_libraries(dun_render_benchmark PRIVATE libdevilutionx_so) target_link_libraries(file_util_test PRIVATE libdevilutionx_file_util app_fatal_for_testing) target_link_libraries(format_int_test PRIVATE libdevilutionx_format_int language_for_testing) target_link_libraries(parse_int_test PRIVATE libdevilutionx_parse_int) diff --git a/test/dun_render_benchmark.cpp b/test/dun_render_benchmark.cpp new file mode 100644 index 000000000000..31dd5ddc8a07 --- /dev/null +++ b/test/dun_render_benchmark.cpp @@ -0,0 +1,112 @@ +#include + +#include +#include + +#include "diablo.h" +#include "engine/clx_sprite.hpp" +#include "engine/displacement.hpp" +#include "engine/load_file.hpp" +#include "engine/render/dun_render.hpp" +#include "engine/surface.hpp" +#include "init.h" +#include "levels/dun_tile.hpp" +#include "levels/gendung.h" +#include "lighting.h" +#include "options.h" +#include "utils/log.hpp" +#include "utils/sdl_wrap.h" + +namespace devilution { +namespace { + +void InitOnce() +{ + [[maybe_unused]] static const bool GlobalInitDone = []() { + LoadCoreArchives(); + LoadGameArchives(); + if (!HaveSpawn() && !HaveDiabdat()) { + LogError("This benchmark needs spawn.mpq or diabdat.mpq"); + exit(1); + } + + leveltype = DTYPE_CATHEDRAL; + pDungeonCels = LoadFileInMem("levels\\l1data\\l1.cel"); + SetDungeonMicros(); + MakeLightTable(); + return true; + }(); +} + +class DunRenderBenchmark : public benchmark::Fixture { +public: + void SetUp(benchmark::State &st) override + { + InitOnce(); + sdl_surface_ = SDLWrap::CreateRGBSurfaceWithFormat( + /*flags=*/0, /*width=*/640, /*height=*/480, /*depth=*/8, SDL_PIXELFORMAT_INDEX8); + if (sdl_surface_ == nullptr) { + LogError("Failed to create SDL Surface: {}", SDL_GetError()); + exit(1); + } + + for (size_t i = 0; i < 700; ++i) { + for (size_t j = 0; j < 10; ++j) { + if (const LevelCelBlock levelCelBlock = DPieceMicros[i].mt[j]; levelCelBlock.hasValue()) { + tiles_[levelCelBlock.type()].push_back(levelCelBlock); + } + } + } + } + +protected: + void RunForTileMaskLight(benchmark::State &state, TileType tileType, MaskType maskType, const uint8_t *lightTable) + { + Surface out = Surface(sdl_surface_.get()); + size_t numItemsProcessed = 0; + const std::vector &tiles = tiles_[tileType]; + for (auto _ : state) { + for (const LevelCelBlock &levelCelBlock : tiles) { + RenderTile(out, Point { 320, 240 }, levelCelBlock, maskType, lightTable); + uint8_t color = out[Point { 310, 200 }]; + benchmark::DoNotOptimize(color); + } + numItemsProcessed += tiles.size(); + } + state.SetItemsProcessed(numItemsProcessed); + } + + ankerl::unordered_dense::map> tiles_; + SDLSurfaceUniquePtr sdl_surface_; +}; + +#define DEFINE_FOR_TILE_AND_MASK_TYPE(TILE_TYPE, MASK_TYPE) \ + BENCHMARK_F(DunRenderBenchmark, TILE_TYPE##_##MASK_TYPE##_FullyLit) \ + (benchmark::State & state) \ + { \ + RunForTileMaskLight(state, TileType::TILE_TYPE, MaskType::MASK_TYPE, FullyLitLightTable); \ + } \ + BENCHMARK_F(DunRenderBenchmark, TILE_TYPE##_##MASK_TYPE##_FullyDark) \ + (benchmark::State & state) \ + { \ + RunForTileMaskLight(state, TileType::TILE_TYPE, MaskType::MASK_TYPE, FullyDarkLightTable); \ + } \ + BENCHMARK_F(DunRenderBenchmark, TILE_TYPE##_##MASK_TYPE##_PartiallyLit) \ + (benchmark::State & state) \ + { \ + RunForTileMaskLight(state, TileType::TILE_TYPE, MaskType::MASK_TYPE, LightTables[5].data()); \ + } + +#define DEFINE_FOR_TILE_TYPE(TILE_TYPE) \ + DEFINE_FOR_TILE_AND_MASK_TYPE(TILE_TYPE, Solid) \ + DEFINE_FOR_TILE_AND_MASK_TYPE(TILE_TYPE, Transparent) + +DEFINE_FOR_TILE_TYPE(LeftTriangle) +DEFINE_FOR_TILE_TYPE(RightTriangle) +DEFINE_FOR_TILE_TYPE(TransparentSquare) +DEFINE_FOR_TILE_TYPE(Square) +DEFINE_FOR_TILE_TYPE(LeftTrapezoid) +DEFINE_FOR_TILE_TYPE(RightTrapezoid) + +} // namespace +} // namespace devilution