Skip to content

Commit

Permalink
Fixed auto-scheduler for recursive rules using Selinger w/ vector
Browse files Browse the repository at this point in the history
  • Loading branch information
Sam committed Apr 6, 2022
1 parent 3efd902 commit 418a194
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 57 deletions.
9 changes: 6 additions & 3 deletions src/ast/analysis/ProfileUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ std::size_t ProfileUseAnalysis::getNonRecursiveUniqueKeys(
return reader->getNonRecursiveCountUniqueKeys(rel, attributes, constants);
}

std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) const {
return reader->getRecursiveCountUniqueKeys(rel, attributes, constants);
std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const {
return reader->getRecursiveCountUniqueKeys(rel, attributes, constants, iteration);
}

std::size_t ProfileUseAnalysis::getIterations(const std::string& rel) const {
return reader->getIterations(rel);
}
} // namespace souffle::ast::analysis
6 changes: 4 additions & 2 deletions src/ast/analysis/ProfileUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ class ProfileUseAnalysis : public Analysis {
std::size_t getNonRecursiveUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) const;

std::size_t getRecursiveUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) const;
std::size_t getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const;

std::size_t getIterations(const std::string& rel) const;

private:
/** performance model of profile run */
Expand Down
108 changes: 69 additions & 39 deletions src/ast/utility/SipsMetric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
auto* prof = profileUseAnalysis;
auto getRelationSize = [&prof](bool isRecursive, const ast::QualifiedName& rel,
const std::vector<std::size_t>& joinColumns,
const std::map<std::size_t, std::string>& constantsMap) {
const std::map<std::size_t, std::string>& constantsMap,
const std::string& iteration) {
std::set<std::size_t> joinKeys(joinColumns.begin(), joinColumns.end());
for (auto& [k, _] : constantsMap) {
joinKeys.insert(k);
Expand All @@ -127,7 +128,7 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
constants[constants.size() - 1] = ']';

if (isRecursive) {
return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants);
return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants, iteration);
}

return prof->getNonRecursiveUniqueKeys(rel.toString(), attributes, constants);
Expand Down Expand Up @@ -243,6 +244,17 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(

std::unordered_map<AtomIdx, std::map<ArgIdx, std::string>> atomToIdxConstants;

std::size_t iterations = 1;
for (std::size_t i = 0; i < atoms.size(); ++i) {
auto* atom = atoms[i];
std::string name = getClauseAtomName(*clause, atom, sccAtoms, version, mode);
bool isRecursive = recursiveInCurrentStratum.count(i) > 0;
if (isRecursive) {
iterations = prof->getIterations(name);
break;
}
}

AtomIdx atomIdx = 0;
for (auto* atom : atoms) {
std::string name = getClauseAtomName(*clause, atom, sccAtoms, version, mode);
Expand Down Expand Up @@ -271,11 +283,17 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
// start by storing the access cost for each individual relation
std::vector<AtomIdx> empty;
bool isRecursive = recursiveInCurrentStratum.count(atomIdx) > 0;
std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant);
double cost = static_cast<double>(tuples * atom->getArity());
AtomSet singleton = {atomIdx};
std::vector<AtomIdx> plan = {atomIdx};
cache[1].insert(std::make_pair(singleton, PlanTuplesCost(plan, tuples, cost)));
PlanTuplesCost p;
p.plan = plan;
for (std::size_t iter = 0; iter < iterations; ++iter) {
std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant, std::to_string(iter));
double cost = static_cast<double>(tuples * atom->getArity());
p.tuplesPerIteration.push_back(tuples);
p.costsPerIteration.push_back(cost);
}
cache[1].insert(std::make_pair(singleton, p));
++atomIdx;
}

Expand All @@ -295,12 +313,6 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
smallerSubset.insert(subset[j]);
}

// lookup the cost in the cache
auto& planTuplesCost = cache[K - 1].at(smallerSubset);
auto& oldPlan = planTuplesCost.plan;
auto oldTuples = planTuplesCost.tuples;
auto oldCost = planTuplesCost.cost;

// compute the grounded variables from the subset
VarSet groundedVariablesFromSubset;
for (auto idx : smallerSubset) {
Expand Down Expand Up @@ -350,49 +362,67 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
}
}

// lookup the cost in the cache
auto& planTuplesCost = cache[K - 1].at(smallerSubset);
auto& oldPlan = planTuplesCost.plan;
auto oldTuples = planTuplesCost.tuplesPerIteration;
auto oldCost = planTuplesCost.costsPerIteration;

PlanTuplesCost p;
bool isRecursive = recursiveInCurrentStratum.count(atomIdx) > 0;
std::vector<ArgIdx> empty;
double expectedTuples = 0;

if (numBound == atom->getArity()) {
expectedTuples = 1;
} else {
auto relSizeWithConstants = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty,
atomToIdxConstants[atomIdx]);

if (joinColumns.empty()) {
expectedTuples = static_cast<double>(relSizeWithConstants);
double newTotalCost = 0.0;
for (std::size_t iter = 0; iter < iterations; ++iter) {
if (numBound == atom->getArity()) {
expectedTuples = 1;
} else {
auto uniqueKeys = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx]);

bool normalize = (uniqueKeys > 0);
expectedTuples =
static_cast<double>(relSizeWithConstants) / (normalize ? uniqueKeys : 1);
auto relSizeWithConstants = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty,
atomToIdxConstants[atomIdx], std::to_string(iter));

if (joinColumns.empty()) {
expectedTuples = static_cast<double>(relSizeWithConstants);
} else {
auto uniqueKeys = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx], std::to_string(iter));

bool normalize = (uniqueKeys > 0);
expectedTuples =
static_cast<double>(relSizeWithConstants) / (normalize ? uniqueKeys : 1);
}
}
}

// calculate new number of tuples
std::size_t newTuples = static_cast<std::size_t>(oldTuples * expectedTuples);
// calculate new number of tuples
std::size_t newTuples = static_cast<std::size_t>(oldTuples[iter] * expectedTuples);

// calculate new cost
double newCost = oldCost[iter] + newTuples * atom->getArity();

// calculate new cost
double newCost = oldCost + newTuples * atom->getArity();
// add to vector of costs/tuples
p.tuplesPerIteration.push_back(newTuples);
p.costsPerIteration.push_back(newCost);
newTotalCost += newCost;
}

// calculate new plan
std::vector<AtomIdx> newPlan(oldPlan.begin(), oldPlan.end());
newPlan.push_back(atomIdx);
p.plan = newPlan;

// if no plan then insert it
AtomSet currentSet(subset.begin(), subset.end());
if (cache[K].count(currentSet) == 0) {
cache[K].insert(std::make_pair(currentSet, PlanTuplesCost(newPlan, newTuples, newCost)));
}
// if we have a lower cost
else if (cache[K].at(currentSet).cost >= newCost) {
cache[K].erase(currentSet);
cache[K].insert(std::make_pair(currentSet, PlanTuplesCost(newPlan, newTuples, newCost)));
cache[K].insert(std::make_pair(currentSet, p));
} else {
// if we have a lower cost
auto& costVector = cache[K].at(currentSet).costsPerIteration;
double oldTotalCost = std::accumulate(costVector.begin(), costVector.end(), 0.0);
if (oldTotalCost >= newTotalCost) {
cache[K].erase(currentSet);
cache[K].insert(std::make_pair(currentSet, p));
}
}
}
}
Expand Down
7 changes: 2 additions & 5 deletions src/ast/utility/SipsMetric.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,9 @@ class SelingerProfileSipsMetric : public SipsMetric {
private:
/* helper struct for Selinger */
struct PlanTuplesCost {
PlanTuplesCost(const std::vector<std::size_t>& givenPlan, std::size_t givenTuples, double givenCost)
: plan(givenPlan), tuples(givenTuples), cost(givenCost) {}

std::vector<std::size_t> plan;
std::size_t tuples;
double cost;
std::vector<std::size_t> tuplesPerIteration;
std::vector<double> costsPerIteration;
};

const PowerSet& getSubsets(std::size_t N, std::size_t K) const;
Expand Down
22 changes: 14 additions & 8 deletions src/include/souffle/profile/Reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,16 +428,22 @@ class Reader {
return countNonRecursiveUniqueKeysMap.at(key);
}

std::size_t getRecursiveCountUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) {
std::size_t getIterations(const std::string& rel) {
for (auto& [key, m] : countRecursiveUniqueKeysMap) {
std::string token = key.substr(0, key.find(" "));
if (token == rel) {
return m.size();
}
}
assert(false);
return 0;
}

std::size_t getRecursiveCountUniqueKeys(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) {
auto key = rel + " " + attributes + " " + constants;
auto& m = countRecursiveUniqueKeysMap.at(key);
double total = 0.0;
for (auto [_, count] : m) {
total += count;
}
double average = ceil(total / m.size());
return static_cast<std::size_t>(average);
return static_cast<std::size_t>(m.at(iteration));
}

void addRelation(const DirectoryEntry& relation) {
Expand Down

0 comments on commit 418a194

Please sign in to comment.