Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Auto-Scheduler #1 #2284

Merged
merged 9 commits into from
Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ set(SOUFFLE_SOURCES
ast/analysis/RecursiveClauses.cpp
ast/analysis/RedundantRelations.cpp
ast/analysis/RelationSchedule.cpp
ast/analysis/UniqueKeys.cpp
ast/analysis/JoinSize.cpp
ast/analysis/SCCGraph.cpp
ast/analysis/TopologicallySortedSCCGraph.cpp
ast/analysis/typesystem/PolymorphicObjects.cpp
Expand Down
54 changes: 27 additions & 27 deletions src/ast/analysis/UniqueKeys.cpp → src/ast/analysis/JoinSize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@

/************************************************************************
*
* @file UniqueKeys.cpp
* @file JoinSize.cpp
*
* CountUniqueKeys are used for accumulating selectivity statistics for the auto scheduler
* This analysis determines which CountUniqueKeys statements to emit in the RAM
* EstimateJoinSize are used for accumulating selectivity statistics for the auto scheduler
* This analysis determines which EstimateJoinSize statements to emit in the RAM
*
***********************************************************************/

#include "ast/analysis/UniqueKeys.h"
#include "ast/analysis/JoinSize.h"
#include "Global.h"
#include "GraphUtils.h"
#include "ast/BinaryConstraint.h"
Expand Down Expand Up @@ -44,7 +44,7 @@

namespace souffle::ast::analysis {

const analysis::PowerSet& UniqueKeysAnalysis::getSubsets(std::size_t N, std::size_t K) const {
const analysis::PowerSet& JoinSizeAnalysis::getSubsets(std::size_t N, std::size_t K) const {
if (cache.count({N, K})) {
return cache.at({N, K});
}
Expand Down Expand Up @@ -78,7 +78,7 @@ const analysis::PowerSet& UniqueKeysAnalysis::getSubsets(std::size_t N, std::siz
return cache.at({N, K});
}

analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(const ast::RelationSet& scc,
analysis::StratumJoinSize JoinSizeAnalysis::computeRuleVersionStatements(const RelationSet& scc,
const ast::Clause& clause, std::optional<std::size_t> version, ast2ram::TranslationMode mode) {
auto* prog = program;
auto* poly = polyAnalysis;
Expand All @@ -105,7 +105,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
fatal("unaccounted-for constant");
};

analysis::StratumUniqueKeys statements;
analysis::StratumJoinSize statements;

auto getClauseAtomName = [&sccAtoms, &version](const ast::Clause& clause, const ast::Atom* atom,
bool isRecursive, ast2ram::TranslationMode mode) {
Expand Down Expand Up @@ -413,7 +413,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
}
}

// construct a CountUniqueKeys ram node
// construct a EstimateJoinSize ram node
bool isRecursive = recursiveInCurrentStratum.count(i) > 0;
auto relation = getClauseAtomName(clause, atom, isRecursive, mode);
auto& constantMap = atomToIdxConstants.at(i);
Expand All @@ -426,7 +426,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
ss << isRecursive;

if (seenNodes.count(ss.str()) == 0) {
auto node = mk<souffle::ram::CountUniqueKeys>(
auto node = mk<souffle::ram::EstimateJoinSize>(
relation, joinColumns, constantMap, isRecursive);
seenNodes.insert(ss.str());

Expand All @@ -440,7 +440,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
return statements;
}

std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeyStatements() {
std::vector<analysis::StratumJoinSize> JoinSizeAnalysis::computeJoinSizeStatements() {
auto* prog = program;
auto getSccAtoms = [prog](const ast::Clause* clause, const ast::RelationSet& scc) {
const auto& sccAtoms = filter(ast::getBodyLiterals<ast::Atom>(*clause),
Expand All @@ -450,17 +450,17 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta

const auto& sccOrdering = topsortSCCGraphAnalysis->order();

std::vector<analysis::StratumUniqueKeys> uniqueKeyStatements;
uniqueKeyStatements.resize(sccOrdering.size());
std::vector<analysis::StratumJoinSize> joinSizeStatements;
joinSizeStatements.resize(sccOrdering.size());

auto& config = Global::config();
if (!config.has("index-stats")) {
return uniqueKeyStatements;
return joinSizeStatements;
}

// for each stratum (formed from scc ordering)
for (std::size_t i = 0; i < sccOrdering.size(); i++) {
analysis::StratumUniqueKeys stratumNodes;
analysis::StratumJoinSize stratumNodes;

auto scc = sccOrdering[i];
const ast::RelationSet sccRelations = sccGraph->getInternalRelations(scc);
Expand Down Expand Up @@ -512,15 +512,15 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta
}
}
}
uniqueKeyStatements[scc] = std::move(stratumNodes);
joinSizeStatements[scc] = std::move(stratumNodes);
}

std::map<std::string, std::size_t> relationToCompletedStratum;

// first step is to compute the earliest stratum that a non-recursive relation completes
for (std::size_t i = 0; i < sccOrdering.size(); ++i) {
auto scc = sccOrdering[i];
for (const auto& statement : uniqueKeyStatements[scc]) {
for (const auto& statement : joinSizeStatements[scc]) {
const auto& rel = statement->getRelation();

if (statement->isRecursiveRelation()) {
Expand All @@ -536,7 +536,7 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta

for (std::size_t i = 0; i < sccOrdering.size(); ++i) {
auto scc = sccOrdering[i];
for (auto& statement : uniqueKeyStatements[scc]) {
for (auto& statement : joinSizeStatements[scc]) {
const auto& rel = statement->getRelation();
if (statement->isRecursiveRelation()) {
continue;
Expand All @@ -547,34 +547,34 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta
std::size_t newStratum = relationToCompletedStratum.at(rel);

// move the node into the new stratum
uniqueKeyStatements[newStratum].push_back(std::move(statement));
joinSizeStatements[newStratum].push_back(std::move(statement));
}

// erase remove all nullptr from the vector since moved from unique_ptr are guaranteed to be nullptr
auto& v = uniqueKeyStatements[scc];
auto& v = joinSizeStatements[scc];
v.erase(std::remove(v.begin(), v.end(), nullptr), v.end());
}
return uniqueKeyStatements;
return joinSizeStatements;
}

void UniqueKeysAnalysis::run(const TranslationUnit& translationUnit) {
void JoinSizeAnalysis::run(const TranslationUnit& translationUnit) {
program = &translationUnit.getProgram();
sccGraph = &translationUnit.getAnalysis<SCCGraphAnalysis>();
topsortSCCGraphAnalysis = &translationUnit.getAnalysis<TopologicallySortedSCCGraphAnalysis>();
recursiveClauses = &translationUnit.getAnalysis<RecursiveClausesAnalysis>();
polyAnalysis = &translationUnit.getAnalysis<ast::analysis::PolymorphicObjectsAnalysis>();
uniqueKeyStatements = computeUniqueKeyStatements();
joinSizeStatements = computeJoinSizeStatements();
}

void UniqueKeysAnalysis::print(std::ostream& os) const {
os << "Begin UniqueKeyStatements\n";
for (std::size_t i = 0; i < uniqueKeyStatements.size(); ++i) {
void JoinSizeAnalysis::print(std::ostream& os) const {
os << "Begin JoinSizeStatements\n";
for (std::size_t i = 0; i < joinSizeStatements.size(); ++i) {
os << "Stratum: " << i << "\n";
for (auto& s : uniqueKeyStatements[i]) {
for (auto& s : joinSizeStatements[i]) {
os << *s << "\n";
}
}
os << "End UniqueKeyStatements\n";
os << "End JoinSizeStatements\n";
}

} // namespace souffle::ast::analysis
28 changes: 14 additions & 14 deletions src/ast/analysis/UniqueKeys.h → src/ast/analysis/JoinSize.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

/************************************************************************
*
* @file UniqueKeys.h
* @file JoinSize.h

* Computes for every stratum, which CountUniqueKeys nodes to emit in the RAM
* Computes for every stratum, which EstimateJoinSize nodes to emit in the RAM
* This is useful for the auto-scheduler to accumulate selectivity statistics
*
***********************************************************************/
Expand All @@ -26,7 +26,7 @@
#include "ast/analysis/typesystem/PolymorphicObjects.h"
#include "ast/utility/Visitor.h"
#include "ast2ram/ClauseTranslator.h"
#include "ram/CountUniqueKeys.h"
#include "ram/EstimateJoinSize.h"
#include "ram/Expression.h"
#include <ostream>
#include <set>
Expand All @@ -40,25 +40,25 @@ namespace souffle::ast::analysis {
* Analysis pass computing a schedule for computing relations.
*/
using PowerSet = std::vector<std::vector<std::size_t>>;
using StratumUniqueKeys = std::vector<Own<ram::CountUniqueKeys>>;
using StratumJoinSize = std::vector<Own<ram::EstimateJoinSize>>;

class UniqueKeysAnalysis : public Analysis {
class JoinSizeAnalysis : public Analysis {
public:
static constexpr const char* name = "unique-keys";
static constexpr const char* name = "join-size";

UniqueKeysAnalysis() : Analysis(name) {}
JoinSizeAnalysis() : Analysis(name) {}

void run(const TranslationUnit& translationUnit) override;

/** Dump this relation schedule to standard error. */
void print(std::ostream& os) const override;

const StratumUniqueKeys& getUniqueKeyStatementsInSCC(std::size_t scc) const {
return uniqueKeyStatements[scc];
const StratumJoinSize& getJoinSizeStatementsInSCC(std::size_t scc) const {
return joinSizeStatements[scc];
}

private:
std::vector<StratumUniqueKeys> uniqueKeyStatements;
std::vector<StratumJoinSize> joinSizeStatements;

std::set<std::string> seenNodes;
ast::Program* program = nullptr;
Expand All @@ -67,10 +67,10 @@ class UniqueKeysAnalysis : public Analysis {
RecursiveClausesAnalysis* recursiveClauses = nullptr;
PolymorphicObjectsAnalysis* polyAnalysis = nullptr;

// for each stratum compute the CountUniqueKeys nodes to emit
std::vector<StratumUniqueKeys> computeUniqueKeyStatements();
StratumUniqueKeys computeRuleVersionStatements(const ast::RelationSet& sccRelations,
const ast::Clause& clause, std::optional<std::size_t> version,
// for each stratum compute the EstimateJoinSize nodes to emit
std::vector<StratumJoinSize> computeJoinSizeStatements();
StratumJoinSize computeRuleVersionStatements(const RelationSet& sccRelations, const ast::Clause& clause,
std::optional<std::size_t> version,
ast2ram::TranslationMode mode = ast2ram::TranslationMode::DEFAULT);
const PowerSet& getSubsets(std::size_t N, std::size_t K) const;
mutable std::map<std::pair<std::size_t, std::size_t>, PowerSet> cache;
Expand Down
8 changes: 4 additions & 4 deletions src/ast/analysis/ProfileUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ bool ProfileUseAnalysis::hasAutoSchedulerStats() const {
return reader->hasAutoSchedulerStats();
}

std::size_t ProfileUseAnalysis::getNonRecursiveUniqueKeys(
double ProfileUseAnalysis::getNonRecursiveJoinSize(
const std::string& rel, const std::string& attributes, const std::string& constants) const {
return reader->getNonRecursiveCountUniqueKeys(rel, attributes, constants);
return reader->getNonRecursiveEstimateJoinSize(rel, attributes, constants);
}

std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
double ProfileUseAnalysis::getRecursiveJoinSize(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const {
return reader->getRecursiveCountUniqueKeys(rel, attributes, constants, iteration);
return reader->getRecursiveEstimateJoinSize(rel, attributes, constants, iteration);
}

std::size_t ProfileUseAnalysis::getIterations(const std::string& rel) const {
Expand Down
4 changes: 2 additions & 2 deletions src/ast/analysis/ProfileUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ class ProfileUseAnalysis : public Analysis {

bool hasAutoSchedulerStats() const;

std::size_t getNonRecursiveUniqueKeys(
double getNonRecursiveJoinSize(
const std::string& rel, const std::string& attributes, const std::string& constants) const;

std::size_t getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
double getRecursiveJoinSize(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const;

std::size_t getIterations(const std::string& rel) const;
Expand Down
35 changes: 12 additions & 23 deletions src/ast/utility/SipsMetric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
assert(profileUseAnalysis->hasAutoSchedulerStats() && "Must have stats in order to auto-schedule!");

auto* prof = profileUseAnalysis;
auto getRelationSize = [&prof](bool isRecursive, const ast::QualifiedName& rel,
const std::vector<std::size_t>& joinColumns,
const std::map<std::size_t, std::string>& constantsMap,
const std::string& iteration) {
auto getJoinSize = [&prof](bool isRecursive, const ast::QualifiedName& rel,
const std::vector<std::size_t>& joinColumns,
const std::map<std::size_t, std::string>& constantsMap,
const std::string& iteration) {
std::set<std::size_t> joinKeys(joinColumns.begin(), joinColumns.end());
for (auto& [k, _] : constantsMap) {
joinKeys.insert(k);
}

if (joinKeys.empty() && !isRecursive) {
return prof->getRelationSize(rel);
return static_cast<double>(prof->getRelationSize(rel));
}

std::stringstream ss;
Expand All @@ -133,10 +133,10 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
constants[constants.size() - 1] = ']';

if (isRecursive) {
return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants, iteration);
return prof->getRecursiveJoinSize(rel.toString(), attributes, constants, iteration);
}

return prof->getNonRecursiveUniqueKeys(rel.toString(), attributes, constants);
return prof->getNonRecursiveJoinSize(rel.toString(), attributes, constants);
};

using AtomIdx = std::size_t;
Expand Down Expand Up @@ -293,7 +293,7 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
PlanTuplesCost p;
p.plan = plan;
for (std::size_t iter = 0; iter < iterations; ++iter) {
std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant, std::to_string(iter));
double tuples = getJoinSize(isRecursive, name, empty, idxConstant, std::to_string(iter));
double cost = static_cast<double>(tuples * atom->getArity());
p.tuplesPerIteration.push_back(tuples);
p.costsPerIteration.push_back(cost);
Expand Down Expand Up @@ -382,25 +382,14 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
if (numBound == atom->getArity()) {
expectedTuples = 1;
} else {
auto relSizeWithConstants = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty,
// get the join size from the profile
expectedTuples = getJoinSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx], std::to_string(iter));

if (joinColumns.empty()) {
expectedTuples = static_cast<double>(relSizeWithConstants);
} else {
auto uniqueKeys = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx], std::to_string(iter));

bool normalize = (uniqueKeys > 0);
expectedTuples =
static_cast<double>(relSizeWithConstants) / (normalize ? uniqueKeys : 1);
}
}

// calculate new number of tuples
std::size_t newTuples = static_cast<std::size_t>(oldTuples[iter] * expectedTuples);
double newTuples = oldTuples[iter] * expectedTuples;

// calculate new cost
double newCost = oldCost[iter] + newTuples * atom->getArity();
Expand Down
2 changes: 1 addition & 1 deletion src/ast/utility/SipsMetric.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class SelingerProfileSipsMetric : public SipsMetric {
/* helper struct for Selinger */
struct PlanTuplesCost {
std::vector<std::size_t> plan;
std::vector<std::size_t> tuplesPerIteration;
std::vector<double> tuplesPerIteration;
std::vector<double> costsPerIteration;
};

Expand Down
2 changes: 1 addition & 1 deletion src/ast2ram/seminaive/ClauseTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@
#include "ram/Aggregate.h"
#include "ram/Break.h"
#include "ram/Constraint.h"
#include "ram/CountUniqueKeys.h"
#include "ram/DebugInfo.h"
#include "ram/EmptinessCheck.h"
#include "ram/EstimateJoinSize.h"
#include "ram/ExistenceCheck.h"
#include "ram/Filter.h"
#include "ram/FloatConstant.h"
Expand Down
Loading