diff --git a/src/ast/analysis/ProfileUse.cpp b/src/ast/analysis/ProfileUse.cpp index d6d3f7f659e..fe290ea5515 100644 --- a/src/ast/analysis/ProfileUse.cpp +++ b/src/ast/analysis/ProfileUse.cpp @@ -70,9 +70,12 @@ std::size_t ProfileUseAnalysis::getNonRecursiveUniqueKeys( return reader->getNonRecursiveCountUniqueKeys(rel, attributes, constants); } -std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys( - const std::string& rel, const std::string& attributes, const std::string& constants) const { - return reader->getRecursiveCountUniqueKeys(rel, attributes, constants); +std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes, + const std::string& constants, const std::string& iteration) const { + return reader->getRecursiveCountUniqueKeys(rel, attributes, constants, iteration); } +std::size_t ProfileUseAnalysis::getIterations(const std::string& rel) const { + return reader->getIterations(rel); +} } // namespace souffle::ast::analysis diff --git a/src/ast/analysis/ProfileUse.h b/src/ast/analysis/ProfileUse.h index 41310a32bb5..d23de41a2b1 100644 --- a/src/ast/analysis/ProfileUse.h +++ b/src/ast/analysis/ProfileUse.h @@ -59,8 +59,10 @@ class ProfileUseAnalysis : public Analysis { std::size_t getNonRecursiveUniqueKeys( const std::string& rel, const std::string& attributes, const std::string& constants) const; - std::size_t getRecursiveUniqueKeys( - const std::string& rel, const std::string& attributes, const std::string& constants) const; + std::size_t getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes, + const std::string& constants, const std::string& iteration) const; + + std::size_t getIterations(const std::string& rel) const; private: /** performance model of profile run */ diff --git a/src/ast/utility/SipsMetric.cpp b/src/ast/utility/SipsMetric.cpp index 84115e47b67..5a6efdeac3e 100644 --- a/src/ast/utility/SipsMetric.cpp +++ b/src/ast/utility/SipsMetric.cpp @@ -104,7 +104,8 @@ std::vector SelingerProfileSipsMetric::getReordering( auto* prof = profileUseAnalysis; auto getRelationSize = [&prof](bool isRecursive, const ast::QualifiedName& rel, const std::vector& joinColumns, - const std::map& constantsMap) { + const std::map& constantsMap, + const std::string& iteration) { std::set joinKeys(joinColumns.begin(), joinColumns.end()); for (auto& [k, _] : constantsMap) { joinKeys.insert(k); @@ -127,7 +128,7 @@ std::vector SelingerProfileSipsMetric::getReordering( constants[constants.size() - 1] = ']'; if (isRecursive) { - return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants); + return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants, iteration); } return prof->getNonRecursiveUniqueKeys(rel.toString(), attributes, constants); @@ -243,6 +244,17 @@ std::vector SelingerProfileSipsMetric::getReordering( std::unordered_map> atomToIdxConstants; + std::size_t iterations = 1; + for (std::size_t i = 0; i < atoms.size(); ++i) { + auto* atom = atoms[i]; + std::string name = getClauseAtomName(*clause, atom, sccAtoms, version, mode); + bool isRecursive = recursiveInCurrentStratum.count(i) > 0; + if (isRecursive) { + iterations = prof->getIterations(name); + break; + } + } + AtomIdx atomIdx = 0; for (auto* atom : atoms) { std::string name = getClauseAtomName(*clause, atom, sccAtoms, version, mode); @@ -271,11 +283,17 @@ std::vector SelingerProfileSipsMetric::getReordering( // start by storing the access cost for each individual relation std::vector empty; bool isRecursive = recursiveInCurrentStratum.count(atomIdx) > 0; - std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant); - double cost = static_cast(tuples * atom->getArity()); AtomSet singleton = {atomIdx}; std::vector plan = {atomIdx}; - cache[1].insert(std::make_pair(singleton, PlanTuplesCost(plan, tuples, cost))); + PlanTuplesCost p; + p.plan = plan; + for (std::size_t iter = 0; iter < iterations; ++iter) { + std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant, std::to_string(iter)); + double cost = static_cast(tuples * atom->getArity()); + p.tuplesPerIteration.push_back(tuples); + p.costsPerIteration.push_back(cost); + } + cache[1].insert(std::make_pair(singleton, p)); ++atomIdx; } @@ -295,12 +313,6 @@ std::vector SelingerProfileSipsMetric::getReordering( smallerSubset.insert(subset[j]); } - // lookup the cost in the cache - auto& planTuplesCost = cache[K - 1].at(smallerSubset); - auto& oldPlan = planTuplesCost.plan; - auto oldTuples = planTuplesCost.tuples; - auto oldCost = planTuplesCost.cost; - // compute the grounded variables from the subset VarSet groundedVariablesFromSubset; for (auto idx : smallerSubset) { @@ -350,49 +362,67 @@ std::vector SelingerProfileSipsMetric::getReordering( } } + // lookup the cost in the cache + auto& planTuplesCost = cache[K - 1].at(smallerSubset); + auto& oldPlan = planTuplesCost.plan; + auto oldTuples = planTuplesCost.tuplesPerIteration; + auto oldCost = planTuplesCost.costsPerIteration; + + PlanTuplesCost p; bool isRecursive = recursiveInCurrentStratum.count(atomIdx) > 0; std::vector empty; double expectedTuples = 0; - - if (numBound == atom->getArity()) { - expectedTuples = 1; - } else { - auto relSizeWithConstants = getRelationSize(isRecursive, - getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty, - atomToIdxConstants[atomIdx]); - - if (joinColumns.empty()) { - expectedTuples = static_cast(relSizeWithConstants); + double newTotalCost = 0.0; + for (std::size_t iter = 0; iter < iterations; ++iter) { + if (numBound == atom->getArity()) { + expectedTuples = 1; } else { - auto uniqueKeys = getRelationSize(isRecursive, - getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns, - atomToIdxConstants[atomIdx]); - - bool normalize = (uniqueKeys > 0); - expectedTuples = - static_cast(relSizeWithConstants) / (normalize ? uniqueKeys : 1); + auto relSizeWithConstants = getRelationSize(isRecursive, + getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty, + atomToIdxConstants[atomIdx], std::to_string(iter)); + + if (joinColumns.empty()) { + expectedTuples = static_cast(relSizeWithConstants); + } else { + auto uniqueKeys = getRelationSize(isRecursive, + getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns, + atomToIdxConstants[atomIdx], std::to_string(iter)); + + bool normalize = (uniqueKeys > 0); + expectedTuples = + static_cast(relSizeWithConstants) / (normalize ? uniqueKeys : 1); + } } - } - // calculate new number of tuples - std::size_t newTuples = static_cast(oldTuples * expectedTuples); + // calculate new number of tuples + std::size_t newTuples = static_cast(oldTuples[iter] * expectedTuples); + + // calculate new cost + double newCost = oldCost[iter] + newTuples * atom->getArity(); - // calculate new cost - double newCost = oldCost + newTuples * atom->getArity(); + // add to vector of costs/tuples + p.tuplesPerIteration.push_back(newTuples); + p.costsPerIteration.push_back(newCost); + newTotalCost += newCost; + } // calculate new plan std::vector newPlan(oldPlan.begin(), oldPlan.end()); newPlan.push_back(atomIdx); + p.plan = newPlan; // if no plan then insert it AtomSet currentSet(subset.begin(), subset.end()); if (cache[K].count(currentSet) == 0) { - cache[K].insert(std::make_pair(currentSet, PlanTuplesCost(newPlan, newTuples, newCost))); - } - // if we have a lower cost - else if (cache[K].at(currentSet).cost >= newCost) { - cache[K].erase(currentSet); - cache[K].insert(std::make_pair(currentSet, PlanTuplesCost(newPlan, newTuples, newCost))); + cache[K].insert(std::make_pair(currentSet, p)); + } else { + // if we have a lower cost + auto& costVector = cache[K].at(currentSet).costsPerIteration; + double oldTotalCost = std::accumulate(costVector.begin(), costVector.end(), 0.0); + if (oldTotalCost >= newTotalCost) { + cache[K].erase(currentSet); + cache[K].insert(std::make_pair(currentSet, p)); + } } } } diff --git a/src/ast/utility/SipsMetric.h b/src/ast/utility/SipsMetric.h index e2d5d490b7e..38934a0c522 100644 --- a/src/ast/utility/SipsMetric.h +++ b/src/ast/utility/SipsMetric.h @@ -74,12 +74,9 @@ class SelingerProfileSipsMetric : public SipsMetric { private: /* helper struct for Selinger */ struct PlanTuplesCost { - PlanTuplesCost(const std::vector& givenPlan, std::size_t givenTuples, double givenCost) - : plan(givenPlan), tuples(givenTuples), cost(givenCost) {} - std::vector plan; - std::size_t tuples; - double cost; + std::vector tuplesPerIteration; + std::vector costsPerIteration; }; const PowerSet& getSubsets(std::size_t N, std::size_t K) const; diff --git a/src/include/souffle/profile/Reader.h b/src/include/souffle/profile/Reader.h index ffc594f2e98..0dbbf35845b 100644 --- a/src/include/souffle/profile/Reader.h +++ b/src/include/souffle/profile/Reader.h @@ -428,16 +428,22 @@ class Reader { return countNonRecursiveUniqueKeysMap.at(key); } - std::size_t getRecursiveCountUniqueKeys( - const std::string& rel, const std::string& attributes, const std::string& constants) { + std::size_t getIterations(const std::string& rel) { + for (auto& [key, m] : countRecursiveUniqueKeysMap) { + std::string token = key.substr(0, key.find(" ")); + if (token == rel) { + return m.size(); + } + } + assert(false); + return 0; + } + + std::size_t getRecursiveCountUniqueKeys(const std::string& rel, const std::string& attributes, + const std::string& constants, const std::string& iteration) { auto key = rel + " " + attributes + " " + constants; auto& m = countRecursiveUniqueKeysMap.at(key); - double total = 0.0; - for (auto [_, count] : m) { - total += count; - } - double average = ceil(total / m.size()); - return static_cast(average); + return static_cast(m.at(iteration)); } void addRelation(const DirectoryEntry& relation) {