From fa7e96fc7630d09bfc0a18bbf171b884aec45e7a Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 15:37:51 +0100 Subject: [PATCH 01/19] Only record the datamodel definition once --- src/ROOTFrameWriter.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 48a47b482..e140ffc4d 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -42,8 +42,6 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c for (const auto& name : catInfo.collsToWrite) { auto* coll = frame.getCollectionForWrite(name); collections.emplace_back(name, const_cast(coll)); - - m_datamodelCollector.registerDatamodelDefinition(coll, name); } // We will at least have a parameters branch, even if there are no @@ -73,6 +71,10 @@ void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetBuffers(); // For subset collections we only fill one references branch From 8fc5d815302d157a8282f6ef559cc8ea79edc079 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 16:08:59 +0100 Subject: [PATCH 02/19] Make sure the contents of Frames in one category are consistent --- src/ROOTFrameWriter.cc | 33 +++++++++++++++++++++++++++++ tests/unittests/unittest.cpp | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index e140ffc4d..3ae43f95c 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -11,6 +11,30 @@ namespace podio { +/** + * Check whether existingIds and candidateIds both contain the same collection + * Ids / hashes. Returns false if the two vectors differ in content. Inputs can + * have random order wrt each other, but the assumption is that all the ids are + * unique in each vector. + */ +bool checkConsistentColls(const std::vector& existingColls, + const std::vector& candidateColls) { + if (existingColls.size() != candidateColls.size()) { + return false; + } + + // Since we are guaranteed to have unique names here, we can just look for + // collisions brute force, which seems to be quickest approach for vector + // sizes we typically have here (few hundred) + for (const auto& id : candidateColls) { + if (std::find(existingColls.begin(), existingColls.end(), id) == existingColls.end()) { + return false; + } + } + + return true; +} + ROOTFrameWriter::ROOTFrameWriter(const std::string& filename) { m_file = std::make_unique(filename.c_str(), "recreate"); } @@ -41,6 +65,10 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c collections.reserve(catInfo.collsToWrite.size()); for (const auto& name : catInfo.collsToWrite) { auto* coll = frame.getCollectionForWrite(name); + if (!coll) { + // Make sure all collections that we want to write are actually available + throw std::runtime_error("Collection '" + name + "' in category '" + category + "' is not available in Frame"); + } collections.emplace_back(name, const_cast(coll)); } @@ -50,6 +78,11 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c initBranches(catInfo, collections, const_cast(frame.getParameters())); } else { + // Make sure that the category contents are consistent with the initial + // frame in the category + if (!checkConsistentColls(catInfo.collsToWrite, collsToWrite)) { + throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content"); + } resetBranches(catInfo.branches, collections, &const_cast(frame.getParameters())); } diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index 8a313de28..ebd808748 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -11,8 +11,10 @@ // podio specific includes #include "podio/EventStore.h" +#include "podio/Frame.h" #include "podio/GenericParameters.h" #include "podio/ROOTFrameReader.h" +#include "podio/ROOTFrameWriter.h" #include "podio/ROOTLegacyReader.h" #include "podio/ROOTReader.h" #include "podio/podioVersion.h" @@ -1134,3 +1136,42 @@ TEST_CASE("JSON", "[json]") { } #endif + +TEST_CASE("Consistency ROOTFrameWriter", "[basics]") { + podio::Frame frame; + + frame.put(ExampleClusterCollection(), "clusters"); + frame.put(ExampleClusterCollection(), "clusters2"); + frame.put(ExampleHitCollection(), "hits"); + + podio::ROOTFrameWriter writer("unittests_frame_consistency.root"); + writer.writeFrame(frame, "full"); + + // Write a frame with more collections + frame.put(ExampleHitCollection(), "hits2"); + REQUIRE_THROWS_AS(writer.writeFrame(frame, "full"), std::runtime_error); + + // Write a frame with less collections + podio::Frame frame2; + frame2.put(ExampleClusterCollection(), "clusters"); + frame2.put(ExampleClusterCollection(), "clusters2"); + REQUIRE_THROWS_AS(writer.writeFrame(frame2, "full"), std::runtime_error); + + // Write only a subset of collections + const std::vector collsToWrite = {"clusters", "hits"}; + writer.writeFrame(frame, "subset", collsToWrite); + + // Frame is missing a collection + REQUIRE_THROWS_AS(writer.writeFrame(frame2, "subset", collsToWrite), std::runtime_error); + + // Don't throw if frame contents are different, but the subset that is written + // is consistent + const std::vector otherCollsToWrite = {"clusters", "clusters2"}; + writer.writeFrame(frame, "subset2", otherCollsToWrite); + REQUIRE_NOTHROW(writer.writeFrame(frame2, "subset2", otherCollsToWrite)); + + // Make sure that restricting the second frame works. + // See https://github.com/AIDASoft/podio/issues/382 for the original issue + writer.writeFrame(frame2, "full_frame2"); + REQUIRE_NOTHROW(writer.writeFrame(frame, "full_frame2", frame2.getAvailableCollections())); +} From 9ecce262c4e773013dbdf9d9bc69e57600fb12e7 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 16:37:01 +0100 Subject: [PATCH 03/19] Ignore performance concerns on the path to an exception --- src/ROOTFrameWriter.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 3ae43f95c..d939bfaf0 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -67,6 +67,7 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c auto* coll = frame.getCollectionForWrite(name); if (!coll) { // Make sure all collections that we want to write are actually available + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) throw std::runtime_error("Collection '" + name + "' in category '" + category + "' is not available in Frame"); } collections.emplace_back(name, const_cast(coll)); From ed99889e76ae35eb43c2216463cef20dce31daa1 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 19:46:36 +0100 Subject: [PATCH 04/19] Refactor RNTupleWriter internals in preparation --- include/podio/ROOTNTupleWriter.h | 4 +--- src/ROOTNTupleWriter.cc | 33 +++++++++++++++----------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/include/podio/ROOTNTupleWriter.h b/include/podio/ROOTNTupleWriter.h index 0f6f6d466..60d07b9dc 100644 --- a/include/podio/ROOTNTupleWriter.h +++ b/include/podio/ROOTNTupleWriter.h @@ -52,9 +52,7 @@ class ROOTNTupleWriter { std::vector schemaVersion{}; }; - std::unordered_map m_collectionInfo{}; - - std::set m_categories{}; + std::unordered_map m_categories{}; bool m_finished{false}; diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index 741af53b3..8cafa4c9e 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -72,11 +72,17 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& collections.emplace_back(name, const_cast(coll)); } - bool new_category = false; if (m_writers.find(category) == m_writers.end()) { - new_category = true; auto model = createModels(collections); m_writers[category] = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); + + for (const auto& [name, coll] : collections) { + m_categories[category].id.emplace_back(coll->getID()); + m_categories[category].name.emplace_back(name); + m_categories[category].type.emplace_back(coll->getTypeName()); + m_categories[category].isSubsetCollection.emplace_back(coll->isSubsetCollection()); + m_categories[category].schemaVersion.emplace_back(coll->getSchemaVersion()); + } } auto entry = m_writers[category]->GetModel()->CreateBareEntry(); @@ -121,14 +127,6 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& // Not supported // entry->CaptureValueUnsafe(root_utils::paramBranchName, // &const_cast(frame.getParameters())); - - if (new_category) { - m_collectionInfo[category].id.emplace_back(coll->getID()); - m_collectionInfo[category].name.emplace_back(name); - m_collectionInfo[category].type.emplace_back(coll->getTypeName()); - m_collectionInfo[category].isSubsetCollection.emplace_back(coll->isSubsetCollection()); - m_collectionInfo[category].schemaVersion.emplace_back(coll->getSchemaVersion()); - } } auto params = frame.getParameters(); @@ -138,7 +136,6 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& fillParams(params, entry.get()); m_writers[category]->Fill(*entry); - m_categories.insert(category); } std::unique_ptr @@ -227,21 +224,21 @@ void ROOTNTupleWriter::finish() { *edmField = edmDefinitions; auto availableCategoriesField = m_metadata->MakeField>(root_utils::availableCategories); - for (auto& [c, _] : m_collectionInfo) { + for (auto& [c, _] : m_categories) { availableCategoriesField->push_back(c); } - for (auto& category : m_categories) { + for (auto& [category, collInfo] : m_categories) { auto idField = m_metadata->MakeField>({root_utils::idTableName(category)}); - *idField = m_collectionInfo[category].id; + *idField = collInfo.id; auto collectionNameField = m_metadata->MakeField>({root_utils::collectionName(category)}); - *collectionNameField = m_collectionInfo[category].name; + *collectionNameField = collInfo.name; auto collectionTypeField = m_metadata->MakeField>({root_utils::collInfoName(category)}); - *collectionTypeField = m_collectionInfo[category].type; + *collectionTypeField = collInfo.type; auto subsetCollectionField = m_metadata->MakeField>({root_utils::subsetCollection(category)}); - *subsetCollectionField = m_collectionInfo[category].isSubsetCollection; + *subsetCollectionField = collInfo.isSubsetCollection; auto schemaVersionField = m_metadata->MakeField>({"schemaVersion_" + category}); - *schemaVersionField = m_collectionInfo[category].schemaVersion; + *schemaVersionField = collInfo.schemaVersion; } m_metadata->Freeze(); From d470afb7988e9e21c10d39bbd80c37f4f5825803 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 19:54:26 +0100 Subject: [PATCH 05/19] Minimize number of internal maps in RNTuple writer --- include/podio/ROOTNTupleWriter.h | 2 +- src/ROOTNTupleWriter.cc | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/podio/ROOTNTupleWriter.h b/include/podio/ROOTNTupleWriter.h index 60d07b9dc..4ae43592d 100644 --- a/include/podio/ROOTNTupleWriter.h +++ b/include/podio/ROOTNTupleWriter.h @@ -37,7 +37,6 @@ class ROOTNTupleWriter { std::unique_ptr createModels(const std::vector& collections); std::unique_ptr m_metadata{}; - std::unordered_map> m_writers{}; std::unique_ptr m_metadataWriter{}; std::unique_ptr m_file{}; @@ -50,6 +49,7 @@ class ROOTNTupleWriter { std::vector type{}; std::vector isSubsetCollection{}; std::vector schemaVersion{}; + std::unique_ptr writer{nullptr}; }; std::unordered_map m_categories{}; diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index 8cafa4c9e..1f61bbb9d 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -72,9 +72,10 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& collections.emplace_back(name, const_cast(coll)); } - if (m_writers.find(category) == m_writers.end()) { + if (m_categories.find(category) == m_categories.end()) { auto model = createModels(collections); - m_writers[category] = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); + m_categories[category].writer = + ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); for (const auto& [name, coll] : collections) { m_categories[category].id.emplace_back(coll->getID()); @@ -85,7 +86,7 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& } } - auto entry = m_writers[category]->GetModel()->CreateBareEntry(); + auto entry = m_categories[category].writer->GetModel()->CreateBareEntry(); ROOT::Experimental::RNTupleWriteOptions options; options.SetCompression(ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose); @@ -135,7 +136,7 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& fillParams(params, entry.get()); fillParams(params, entry.get()); - m_writers[category]->Fill(*entry); + m_categories[category].writer->Fill(*entry); } std::unique_ptr @@ -251,7 +252,9 @@ void ROOTNTupleWriter::finish() { // All the tuple writers must be deleted before the file so that they flush // unwritten output - m_writers.clear(); + for (auto& [_, catInfo] : m_categories) { + catInfo.writer.reset(); + } m_metadataWriter.reset(); m_finished = true; From e70375bd3671080770122565173f044766334f3f Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 20:02:07 +0100 Subject: [PATCH 06/19] Finish refactoring to make consistent Frame writing possible --- include/podio/ROOTNTupleWriter.h | 3 ++- src/ROOTNTupleWriter.cc | 39 ++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/include/podio/ROOTNTupleWriter.h b/include/podio/ROOTNTupleWriter.h index 4ae43592d..390b5c810 100644 --- a/include/podio/ROOTNTupleWriter.h +++ b/include/podio/ROOTNTupleWriter.h @@ -44,13 +44,14 @@ class ROOTNTupleWriter { DatamodelDefinitionCollector m_datamodelCollector{}; struct CollectionInfo { - std::vector id{}; + std::vector id{}; std::vector name{}; std::vector type{}; std::vector isSubsetCollection{}; std::vector schemaVersion{}; std::unique_ptr writer{nullptr}; }; + CollectionInfo& getCategoryInfo(const std::string& category); std::unordered_map m_categories{}; diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index 1f61bbb9d..04b683ac5 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -64,25 +64,35 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& category, const std::vector& collsToWrite) { + auto& catInfo = getCategoryInfo(category); + + // Use the writer as proxy to check whether this category has been initialized + // already and do so if not + const bool new_category = (catInfo.writer == nullptr); + if (new_category) { + // This is the minimal information that we need for now + catInfo.name = collsToWrite; + } std::vector collections; - collections.reserve(collsToWrite.size()); - for (const auto& name : collsToWrite) { + collections.reserve(catInfo.name.size()); + // Only loop over the collections that were requested in the first Frame of + // this category + for (const auto& name : catInfo.name) { auto* coll = frame.getCollectionForWrite(name); collections.emplace_back(name, const_cast(coll)); } - if (m_categories.find(category) == m_categories.end()) { + if (new_category) { + // Now we have enough info to populate the rest auto model = createModels(collections); - m_categories[category].writer = - ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); + catInfo.writer = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); for (const auto& [name, coll] : collections) { - m_categories[category].id.emplace_back(coll->getID()); - m_categories[category].name.emplace_back(name); - m_categories[category].type.emplace_back(coll->getTypeName()); - m_categories[category].isSubsetCollection.emplace_back(coll->isSubsetCollection()); - m_categories[category].schemaVersion.emplace_back(coll->getSchemaVersion()); + catInfo.id.emplace_back(coll->getID()); + catInfo.type.emplace_back(coll->getTypeName()); + catInfo.isSubsetCollection.emplace_back(coll->isSubsetCollection()); + catInfo.schemaVersion.emplace_back(coll->getSchemaVersion()); } } @@ -213,6 +223,15 @@ ROOTNTupleWriter::createModels(const std::vector& collections) return model; } +ROOTNTupleWriter::CollectionInfo& ROOTNTupleWriter::getCategoryInfo(const std::string& category) { + if (auto it = m_categories.find(category); it != m_categories.end()) { + return it->second; + } + + auto [it, _] = m_categories.try_emplace(category, CollectionInfo{}); + return it->second; +} + void ROOTNTupleWriter::finish() { auto podioVersion = podio::version::build_version; From 8c7f0eef72ce0731f86a1edac3194e7dc4c939f2 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 20:08:40 +0100 Subject: [PATCH 07/19] Enforce consistency also for RNTuple writer --- src/ROOTFrameWriter.cc | 26 +------------------------- src/ROOTNTupleWriter.cc | 10 ++++++++++ src/rootUtils.h | 24 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index d939bfaf0..0d7c3572d 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -11,30 +11,6 @@ namespace podio { -/** - * Check whether existingIds and candidateIds both contain the same collection - * Ids / hashes. Returns false if the two vectors differ in content. Inputs can - * have random order wrt each other, but the assumption is that all the ids are - * unique in each vector. - */ -bool checkConsistentColls(const std::vector& existingColls, - const std::vector& candidateColls) { - if (existingColls.size() != candidateColls.size()) { - return false; - } - - // Since we are guaranteed to have unique names here, we can just look for - // collisions brute force, which seems to be quickest approach for vector - // sizes we typically have here (few hundred) - for (const auto& id : candidateColls) { - if (std::find(existingColls.begin(), existingColls.end(), id) == existingColls.end()) { - return false; - } - } - - return true; -} - ROOTFrameWriter::ROOTFrameWriter(const std::string& filename) { m_file = std::make_unique(filename.c_str(), "recreate"); } @@ -81,7 +57,7 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c } else { // Make sure that the category contents are consistent with the initial // frame in the category - if (!checkConsistentColls(catInfo.collsToWrite, collsToWrite)) { + if (!root_utils::checkConsistentColls(catInfo.collsToWrite, collsToWrite)) { throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content"); } resetBranches(catInfo.branches, collections, &const_cast(frame.getParameters())); diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index 04b683ac5..9e2b8798b 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -80,6 +80,12 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& // this category for (const auto& name : catInfo.name) { auto* coll = frame.getCollectionForWrite(name); + if (!coll) { + // Make sure all collections that we want to write are actually available + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + throw std::runtime_error("Collection '" + name + "' in category '" + category + "' is not available in Frame"); + } + collections.emplace_back(name, const_cast(coll)); } @@ -94,6 +100,10 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& catInfo.isSubsetCollection.emplace_back(coll->isSubsetCollection()); catInfo.schemaVersion.emplace_back(coll->getSchemaVersion()); } + } else { + if (!root_utils::checkConsistentColls(catInfo.name, collsToWrite)) { + throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content"); + } } auto entry = m_categories[category].writer->GetModel()->CreateBareEntry(); diff --git a/src/rootUtils.h b/src/rootUtils.h index 523d5c228..b16e807a6 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -272,6 +272,30 @@ inline std::vector sortAlphabeticaly(std::vector strin return strings; } +/** + * Check whether existingIds and candidateIds both contain the same collection + * Ids / hashes. Returns false if the two vectors differ in content. Inputs can + * have random order wrt each other, but the assumption is that all the ids are + * unique in each vector. + */ +inline bool checkConsistentColls(const std::vector& existingColls, + const std::vector& candidateColls) { + if (existingColls.size() != candidateColls.size()) { + return false; + } + + // Since we are guaranteed to have unique names here, we can just look for + // collisions brute force, which seems to be quickest approach for vector + // sizes we typically have here (few hundred) + for (const auto& id : candidateColls) { + if (std::find(existingColls.begin(), existingColls.end(), id) == existingColls.end()) { + return false; + } + } + + return true; +} + } // namespace podio::root_utils #endif From d0a87b5b553074ad483f4d3f8256adb59db527cb Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 20:08:54 +0100 Subject: [PATCH 08/19] Add RNTuple writer unittests --- src/CMakeLists.txt | 1 + tests/unittests/CMakeLists.txt | 2 +- tests/unittests/unittest.cpp | 18 ++++++++++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8cdd0d813..554c51d56 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -109,6 +109,7 @@ PODIO_ADD_LIB_AND_DICT(podioRootIO "${root_headers}" "${root_sources}" root_sele target_link_libraries(podioRootIO PUBLIC podio::podio ROOT::Core ROOT::RIO ROOT::Tree) if(ENABLE_RNTUPLE) target_link_libraries(podioRootIO PUBLIC ROOT::ROOTNTuple) + target_compile_definitions(podioRootIO PUBLIC PODIO_ENABLE_RNTUPLE=1) endif() diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index 4988ab36d..c49157b3f 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -88,7 +88,7 @@ else() WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} TEST_PREFIX "UT_" # make it possible to filter easily with -R ^UT TEST_SPEC ${filter_tests} # discover only tests that are known to not fail - DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${PROJECT_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${PROJECT_BINARY_DIR}/src:${PROJECT_BINARY_DIR}/tests:$:$<$:$>:$ENV{LD_LIBRARY_PATH} PROPERTIES ENVIRONMENT PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index ebd808748..050879ea7 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -7,6 +7,7 @@ #include #include +#include "catch2/catch_template_test_macros.hpp" #include "catch2/catch_test_macros.hpp" // podio specific includes @@ -28,6 +29,13 @@ #include "podio/SIOReader.h" #endif +#ifndef PODIO_ENABLE_RNTUPLE + #define PODIO_ENABLE_RNTUPLE 0 +#endif +#if PODIO_ENABLE_RNTUPLE + #include "podio/ROOTNTupleWriter.h" +#endif + // Test data types #include "datamodel/EventInfoCollection.h" #include "datamodel/ExampleClusterCollection.h" @@ -1137,14 +1145,20 @@ TEST_CASE("JSON", "[json]") { #endif -TEST_CASE("Consistency ROOTFrameWriter", "[basics]") { +#if PODIO_ENABLE_RNTUPLE +using ROOTWriterTypes = std::tuple; +#else +using ROOTWriterTypes = std::tuple; +#endif + +TEMPLATE_LIST_TEST_CASE("Consistent frame contents", "[basics][root]", ROOTWriterTypes) { podio::Frame frame; frame.put(ExampleClusterCollection(), "clusters"); frame.put(ExampleClusterCollection(), "clusters2"); frame.put(ExampleHitCollection(), "hits"); - podio::ROOTFrameWriter writer("unittests_frame_consistency.root"); + TestType writer("unittests_frame_consistency.root"); writer.writeFrame(frame, "full"); // Write a frame with more collections From 38233df5898a9f81a1fee699e91155a84417bfd0 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 8 Nov 2023 20:14:57 +0100 Subject: [PATCH 09/19] Correct docstring to match implementation --- src/rootUtils.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rootUtils.h b/src/rootUtils.h index b16e807a6..5713a9b08 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -273,10 +273,10 @@ inline std::vector sortAlphabeticaly(std::vector strin } /** - * Check whether existingIds and candidateIds both contain the same collection - * Ids / hashes. Returns false if the two vectors differ in content. Inputs can - * have random order wrt each other, but the assumption is that all the ids are - * unique in each vector. + * Check whether existingColls and candidateColls both contain the same + * collection names. Returns false if the two vectors differ in content. Inputs + * can have random order wrt each other, but the assumption is that each vector + * only contains unique names. */ inline bool checkConsistentColls(const std::vector& existingColls, const std::vector& candidateColls) { From 9dbd1353bc9d55c5e529883847988839c7dab280 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 9 Nov 2023 09:32:08 +0100 Subject: [PATCH 10/19] Split test cases into two to tag them differently --- tests/unittests/unittest.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index 050879ea7..a98677a5c 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -1145,20 +1145,18 @@ TEST_CASE("JSON", "[json]") { #endif -#if PODIO_ENABLE_RNTUPLE -using ROOTWriterTypes = std::tuple; -#else -using ROOTWriterTypes = std::tuple; -#endif - -TEMPLATE_LIST_TEST_CASE("Consistent frame contents", "[basics][root]", ROOTWriterTypes) { +// Write a template function that can be used with different writers in order to +// be able to tag the unittests differently. This is necessary because the +// ROOTFrameWriter fails with ASan, but the ROOTNTuple writer doesn't +template +void runConsistentFrameTest(const std::string& filename) { podio::Frame frame; frame.put(ExampleClusterCollection(), "clusters"); frame.put(ExampleClusterCollection(), "clusters2"); frame.put(ExampleHitCollection(), "hits"); - TestType writer("unittests_frame_consistency.root"); + WriterT writer(filename); writer.writeFrame(frame, "full"); // Write a frame with more collections @@ -1189,3 +1187,14 @@ TEMPLATE_LIST_TEST_CASE("Consistent frame contents", "[basics][root]", ROOTWrite writer.writeFrame(frame2, "full_frame2"); REQUIRE_NOTHROW(writer.writeFrame(frame, "full_frame2", frame2.getAvailableCollections())); } + +TEST_CASE("ROOTFrameWriter consistent frame contents", "[ASAN-FAIL][UBSAN-FAIL][basics][root]") { + // The UBSAN-FAIL only happens on clang12 in CI. + runConsistentFrameTest("unittests_frame_consistency.root"); +} + +#if PODIO_ENABLE_RNTUPLE +TEST_CASE("ROOTNTupleWriter consistent frame contents", "[basics][root]") { + runConsistentFrameTest("unittests_frame_consistency.root"); +} +#endif From 7719acff00820992c9c5a8f69e990e482e01d8ec Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 9 Nov 2023 10:36:00 +0100 Subject: [PATCH 11/19] Always define PODIO_ENABLE_RNTUPLE with appropriate value --- src/CMakeLists.txt | 2 ++ tests/unittests/unittest.cpp | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 554c51d56..a68b94bfb 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -110,6 +110,8 @@ target_link_libraries(podioRootIO PUBLIC podio::podio ROOT::Core ROOT::RIO ROOT: if(ENABLE_RNTUPLE) target_link_libraries(podioRootIO PUBLIC ROOT::ROOTNTuple) target_compile_definitions(podioRootIO PUBLIC PODIO_ENABLE_RNTUPLE=1) +else() + target_compile_definitions(podioRootIO PUBLIC PODIO_ENABLE_RNTUPLE=0) endif() diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index a98677a5c..93d058897 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -29,9 +29,6 @@ #include "podio/SIOReader.h" #endif -#ifndef PODIO_ENABLE_RNTUPLE - #define PODIO_ENABLE_RNTUPLE 0 -#endif #if PODIO_ENABLE_RNTUPLE #include "podio/ROOTNTupleWriter.h" #endif From 1d2766f732d9177bee92a8494dcc42f245323ff5 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 9 Nov 2023 11:33:15 +0100 Subject: [PATCH 12/19] Include more information in exception message --- src/ROOTFrameWriter.cc | 3 ++- src/ROOTNTupleWriter.cc | 3 ++- src/rootUtils.h | 46 ++++++++++++++++++++++++++++++++++++ tests/unittests/unittest.cpp | 12 ++++++++-- 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 0d7c3572d..6e8a96892 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -58,7 +58,8 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c // Make sure that the category contents are consistent with the initial // frame in the category if (!root_utils::checkConsistentColls(catInfo.collsToWrite, collsToWrite)) { - throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content"); + throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + + root_utils::getInconsistentCollsMsg(catInfo.collsToWrite, collsToWrite)); } resetBranches(catInfo.branches, collections, &const_cast(frame.getParameters())); } diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index 9e2b8798b..f6cded7e9 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -102,7 +102,8 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& } } else { if (!root_utils::checkConsistentColls(catInfo.name, collsToWrite)) { - throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content"); + throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content." + + root_utils::getInconsistentCollsMsg(catInfo.name, collsToWrite)); } } diff --git a/src/rootUtils.h b/src/rootUtils.h index 5713a9b08..9e9d5fcb7 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -296,6 +297,51 @@ inline bool checkConsistentColls(const std::vector& existingColls, return true; } +/** + * Get the differences in the existingColls and candidateColls collection names. + * Returns two vectors of collection names. The first one are the collections + * that only exist in the existingColls, the seconde one are the names that only + * exist in the candidateColls. + */ +inline std::tuple, std::vector> +getInconsistentColls(std::vector existingColls, std::vector candidateColls) { + // Need sorted ranges for set_difference + std::sort(existingColls.begin(), existingColls.end()); + std::sort(candidateColls.begin(), candidateColls.end()); + + std::vector onlyInExisting{}; + std::set_difference(existingColls.begin(), existingColls.end(), candidateColls.begin(), candidateColls.end(), + std::back_inserter(onlyInExisting)); + + std::vector onlyInCands{}; + std::set_difference(candidateColls.begin(), candidateColls.end(), existingColls.begin(), existingColls.end(), + std::back_inserter(onlyInCands)); + + return {std::move(onlyInExisting), std::move(onlyInCands)}; +} + +inline std::string getInconsistentCollsMsg(const std::vector& existingColls, + const std::vector& candidateColls) { + const auto& [onlyExisting, onlyCands] = getInconsistentColls(existingColls, candidateColls); + + std::stringstream sstr; + sstr << "missing: ["; + std::string sep = ""; + for (const auto& name : onlyExisting) { + sstr << sep << name; + sep = ","; + } + sep = ""; + sstr << "], superfluous: ["; + for (const auto& name : onlyCands) { + sstr << sep << name; + sep = ","; + } + sstr << "]"; + + return sstr.str(); +} + } // namespace podio::root_utils #endif diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index 93d058897..9b5654c36 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -9,6 +9,7 @@ #include "catch2/catch_template_test_macros.hpp" #include "catch2/catch_test_macros.hpp" +#include "catch2/matchers/catch_matchers_string.hpp" // podio specific includes #include "podio/EventStore.h" @@ -1147,6 +1148,8 @@ TEST_CASE("JSON", "[json]") { // ROOTFrameWriter fails with ASan, but the ROOTNTuple writer doesn't template void runConsistentFrameTest(const std::string& filename) { + using Catch::Matchers::ContainsSubstring; + podio::Frame frame; frame.put(ExampleClusterCollection(), "clusters"); @@ -1158,13 +1161,18 @@ void runConsistentFrameTest(const std::string& filename) { // Write a frame with more collections frame.put(ExampleHitCollection(), "hits2"); - REQUIRE_THROWS_AS(writer.writeFrame(frame, "full"), std::runtime_error); + REQUIRE_THROWS_WITH(writer.writeFrame(frame, "full"), + ContainsSubstring("Trying to write category") && + ContainsSubstring("inconsistent collection content") && + ContainsSubstring("missing: [], superfluous: [hits2]")); // Write a frame with less collections podio::Frame frame2; frame2.put(ExampleClusterCollection(), "clusters"); frame2.put(ExampleClusterCollection(), "clusters2"); - REQUIRE_THROWS_AS(writer.writeFrame(frame2, "full"), std::runtime_error); + REQUIRE_THROWS_WITH(writer.writeFrame(frame2, "full"), + ContainsSubstring("Collection 'hits' in category") && + ContainsSubstring("not available in Frame")); // Write only a subset of collections const std::vector collsToWrite = {"clusters", "hits"}; From 23423008fddd21fb9af8bf545025e97b0b513a75 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 9 Nov 2023 11:50:18 +0100 Subject: [PATCH 13/19] Add possibility to check consistency of frames before writing --- include/podio/ROOTFrameWriter.h | 16 +++++++++++++++ include/podio/ROOTNTupleWriter.h | 16 +++++++++++++++ src/ROOTFrameWriter.cc | 9 +++++++++ src/ROOTNTupleWriter.cc | 9 +++++++++ tests/unittests/unittest.cpp | 34 +++++++++++++++++++++++++++++++- 5 files changed, 83 insertions(+), 1 deletion(-) diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h index addc22692..d6b7ecc8c 100644 --- a/include/podio/ROOTFrameWriter.h +++ b/include/podio/ROOTFrameWriter.h @@ -49,6 +49,22 @@ class ROOTFrameWriter { */ void finish(); + /** Check whether the collsToWrite are consistent with the state of the passed + * category. + * + * Return two vectors of collection names. The first one contains all the + * names that were missing from the collsToWrite but were present in the + * category. The second one contains the names that are present in the + * collsToWrite only. If both vectors are empty the category and the passed + * collsToWrite are consistent. + * + * NOTE: This will only be a meaningful check if the first Frame of the passed + * category has already been written. Also, this check is rather expensive as + * it has to effectively do two set differences. + */ + std::tuple, std::vector> + checkConsistency(const std::vector& collsToWrite, const std::string& category) const; + private: using StoreCollection = std::pair; diff --git a/include/podio/ROOTNTupleWriter.h b/include/podio/ROOTNTupleWriter.h index 390b5c810..6a1b4cd71 100644 --- a/include/podio/ROOTNTupleWriter.h +++ b/include/podio/ROOTNTupleWriter.h @@ -32,6 +32,22 @@ class ROOTNTupleWriter { void writeFrame(const podio::Frame& frame, const std::string& category, const std::vector& collsToWrite); void finish(); + /** Check whether the collsToWrite are consistent with the state of the passed + * category. + * + * Return two vectors of collection names. The first one contains all the + * names that were missing from the collsToWrite but were present in the + * category. The second one contains the names that are present in the + * collsToWrite only. If both vectors are empty the category and the passed + * collsToWrite are consistent. + * + * NOTE: This will only be a meaningful check if the first Frame of the passed + * category has already been written. Also, this check is rather expensive as + * it has to effectively do two set differences. + */ + std::tuple, std::vector> + checkConsistency(const std::vector& collsToWrite, const std::string& category) const; + private: using StoreCollection = std::pair; std::unique_ptr createModels(const std::vector& collections); diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 6e8a96892..20e6bee53 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -166,4 +166,13 @@ void ROOTFrameWriter::finish() { m_finished = true; } +std::tuple, std::vector> +ROOTFrameWriter::checkConsistency(const std::vector& collsToWrite, const std::string& category) const { + if (const auto it = m_categories.find(category); it != m_categories.end()) { + return root_utils::getInconsistentColls(it->second.collsToWrite, collsToWrite); + } + + return {std::vector{}, collsToWrite}; +} + } // namespace podio diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index f6cded7e9..262b947a4 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -290,4 +290,13 @@ void ROOTNTupleWriter::finish() { m_finished = true; } +std::tuple, std::vector> +ROOTNTupleWriter::checkConsistency(const std::vector& collsToWrite, const std::string& category) const { + if (const auto it = m_categories.find(category); it != m_categories.end()) { + return root_utils::getInconsistentColls(it->second.name, collsToWrite); + } + + return {std::vector{}, collsToWrite}; +} + } // namespace podio diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index 9b5654c36..20a818f49 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -10,6 +10,7 @@ #include "catch2/catch_template_test_macros.hpp" #include "catch2/catch_test_macros.hpp" #include "catch2/matchers/catch_matchers_string.hpp" +#include "catch2/matchers/catch_matchers_vector.hpp" // podio specific includes #include "podio/EventStore.h" @@ -1193,13 +1194,44 @@ void runConsistentFrameTest(const std::string& filename) { REQUIRE_NOTHROW(writer.writeFrame(frame, "full_frame2", frame2.getAvailableCollections())); } +template +void runCheckConsistencyTest(const std::string& filename) { + using Catch::Matchers::UnorderedEquals; + + WriterT writer(filename); + podio::Frame frame; + frame.put(ExampleClusterCollection(), "clusters"); + frame.put(ExampleClusterCollection(), "clusters2"); + frame.put(ExampleHitCollection(), "hits"); + writer.writeFrame(frame, "frame"); + + // Cumbersome way to get the collections that are used for this category + const auto& [categoryColls, emptyVec] = writer.checkConsistency({}, "frame"); + REQUIRE_THAT(categoryColls, UnorderedEquals({"clusters", "clusters2", "hits"})); + REQUIRE(emptyVec.empty()); + + const std::vector collsToWrite = {"clusters", "clusters2", "non-existant"}; + const auto& [missing, superfluous] = writer.checkConsistency(collsToWrite, "frame"); + REQUIRE_THAT(missing, UnorderedEquals({"hits"})); + REQUIRE_THAT(superfluous, UnorderedEquals({"non-existant"})); +} + TEST_CASE("ROOTFrameWriter consistent frame contents", "[ASAN-FAIL][UBSAN-FAIL][basics][root]") { // The UBSAN-FAIL only happens on clang12 in CI. runConsistentFrameTest("unittests_frame_consistency.root"); } +TEST_CASE("ROOTFrameWriter check consistency", "[basics][root]") { + runCheckConsistencyTest("unittests_frame_check_consistency.root"); +} + #if PODIO_ENABLE_RNTUPLE TEST_CASE("ROOTNTupleWriter consistent frame contents", "[basics][root]") { - runConsistentFrameTest("unittests_frame_consistency.root"); + runConsistentFrameTest("unittests_frame_consistency_rntuple.root"); +} + +TEST_CASE("ROOTNTupleWriter check consistency", "[basics][root]") { + runCheckConsistencyTest("unittests_frame_check_consistency_rntuple.root"); } + #endif From 8be26e17b4a6c9dc6d8dd991686d518a0bdfd925 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 9 Nov 2023 13:15:54 +0100 Subject: [PATCH 14/19] Add more labels to exclude tests from running in sanitizer CI --- tests/unittests/unittest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index 20a818f49..bbb2418d5 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -1216,12 +1216,12 @@ void runCheckConsistencyTest(const std::string& filename) { REQUIRE_THAT(superfluous, UnorderedEquals({"non-existant"})); } -TEST_CASE("ROOTFrameWriter consistent frame contents", "[ASAN-FAIL][UBSAN-FAIL][basics][root]") { - // The UBSAN-FAIL only happens on clang12 in CI. +TEST_CASE("ROOTFrameWriter consistent frame contents", "[ASAN-FAIL][UBSAN-FAIL][TSAN-FAIL][basics][root]") { + // The UBSAN-FAIL and TSAN-FAIL only happens on clang12 in CI. runConsistentFrameTest("unittests_frame_consistency.root"); } -TEST_CASE("ROOTFrameWriter check consistency", "[basics][root]") { +TEST_CASE("ROOTFrameWriter check consistency", "[ASAN-FAIL][UBSAN-FAIL][basics][root]") { runCheckConsistencyTest("unittests_frame_check_consistency.root"); } From 5d8599110c57503943373ac39902a7fa2c74333d Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 9 Nov 2023 13:17:10 +0100 Subject: [PATCH 15/19] Make exception messages consistent Co-authored-by: Andre Sailer --- src/ROOTNTupleWriter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index 262b947a4..dfa75badf 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -102,7 +102,7 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& } } else { if (!root_utils::checkConsistentColls(catInfo.name, collsToWrite)) { - throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content." + + throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + root_utils::getInconsistentCollsMsg(catInfo.name, collsToWrite)); } } From af405fecbbffc21a6174b313e9a8dadb3509c933 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 9 Nov 2023 13:48:39 +0100 Subject: [PATCH 16/19] Use correct label for excluding Thread sanitizer --- tests/unittests/unittest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index bbb2418d5..ab4c3b32f 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -1216,7 +1216,7 @@ void runCheckConsistencyTest(const std::string& filename) { REQUIRE_THAT(superfluous, UnorderedEquals({"non-existant"})); } -TEST_CASE("ROOTFrameWriter consistent frame contents", "[ASAN-FAIL][UBSAN-FAIL][TSAN-FAIL][basics][root]") { +TEST_CASE("ROOTFrameWriter consistent frame contents", "[ASAN-FAIL][UBSAN-FAIL][THREAD-FAIL][basics][root]") { // The UBSAN-FAIL and TSAN-FAIL only happens on clang12 in CI. runConsistentFrameTest("unittests_frame_consistency.root"); } From 3cbcd14501102538ef8a764a6d523843970dc0e4 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Mon, 13 Nov 2023 11:08:17 +0100 Subject: [PATCH 17/19] Make exception message more succinct --- src/rootUtils.h | 25 +++++++++++++++---------- tests/unittests/unittest.cpp | 2 +- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/rootUtils.h b/src/rootUtils.h index 9e9d5fcb7..648a6f166 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -325,19 +325,24 @@ inline std::string getInconsistentCollsMsg(const std::vector& exist const auto& [onlyExisting, onlyCands] = getInconsistentColls(existingColls, candidateColls); std::stringstream sstr; - sstr << "missing: ["; std::string sep = ""; - for (const auto& name : onlyExisting) { - sstr << sep << name; - sep = ","; + if (!onlyExisting.empty()) { + sstr << "missing: ["; + for (const auto& name : onlyExisting) { + sstr << sep << name; + sep = ","; + } + sstr << "]"; } - sep = ""; - sstr << "], superfluous: ["; - for (const auto& name : onlyCands) { - sstr << sep << name; - sep = ","; + if (!onlyCands.empty()) { + sstr << sep << " superfluous: ["; + sep = ""; + for (const auto& name : onlyCands) { + sstr << sep << name; + sep = ","; + } + sstr << "]"; } - sstr << "]"; return sstr.str(); } diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index ab4c3b32f..f08765df5 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -1165,7 +1165,7 @@ void runConsistentFrameTest(const std::string& filename) { REQUIRE_THROWS_WITH(writer.writeFrame(frame, "full"), ContainsSubstring("Trying to write category") && ContainsSubstring("inconsistent collection content") && - ContainsSubstring("missing: [], superfluous: [hits2]")); + ContainsSubstring("superfluous: [hits2]")); // Write a frame with less collections podio::Frame frame2; From 6abff1dc67f22e1ab9237d9db387b614e6465b89 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 14 Nov 2023 08:58:56 +0100 Subject: [PATCH 18/19] Remove unnecessary header --- tests/unittests/unittest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index f08765df5..c3f4eb6c7 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -7,7 +7,6 @@ #include #include -#include "catch2/catch_template_test_macros.hpp" #include "catch2/catch_test_macros.hpp" #include "catch2/matchers/catch_matchers_string.hpp" #include "catch2/matchers/catch_matchers_vector.hpp" From c480c02e0edac9f2097d977c7c1380b8b2c42477 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 14 Nov 2023 11:58:45 +0100 Subject: [PATCH 19/19] Switch to improved binary_search --- src/ROOTNTupleWriter.cc | 2 +- src/rootUtils.h | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc index dfa75badf..9cf3b9d8d 100644 --- a/src/ROOTNTupleWriter.cc +++ b/src/ROOTNTupleWriter.cc @@ -71,7 +71,7 @@ void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& const bool new_category = (catInfo.writer == nullptr); if (new_category) { // This is the minimal information that we need for now - catInfo.name = collsToWrite; + catInfo.name = root_utils::sortAlphabeticaly(collsToWrite); } std::vector collections; diff --git a/src/rootUtils.h b/src/rootUtils.h index 648a6f166..acc552f24 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -287,9 +287,16 @@ inline bool checkConsistentColls(const std::vector& existingColls, // Since we are guaranteed to have unique names here, we can just look for // collisions brute force, which seems to be quickest approach for vector - // sizes we typically have here (few hundred) + // sizes we typically have (few hundred). We can take advantage of the fact + // that the existingColls are ordered (alphabetically and case-insensitive), + // so we can do a binary_search for (const auto& id : candidateColls) { - if (std::find(existingColls.begin(), existingColls.end(), id) == existingColls.end()) { + if (!std::binary_search(existingColls.begin(), existingColls.end(), id, [](const auto& lhs, const auto& rhs) { + return lhs.size() == rhs.size() && + std::lexicographical_compare( + lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), + [](const auto cl, const auto cr) { return std::tolower(cl) < std::tolower(cr); }); + })) { return false; } }