Skip to content

Commit

Permalink
Merge pull request #2284 from SamArch27/master
Browse files Browse the repository at this point in the history
Refactor Auto-Scheduler #1
  • Loading branch information
SamArch27 committed Jun 16, 2022
2 parents 429e168 + 8979bdf commit 429676c
Show file tree
Hide file tree
Showing 24 changed files with 200 additions and 206 deletions.
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ set(SOUFFLE_SOURCES
ast/analysis/RecursiveClauses.cpp
ast/analysis/RedundantRelations.cpp
ast/analysis/RelationSchedule.cpp
ast/analysis/UniqueKeys.cpp
ast/analysis/JoinSize.cpp
ast/analysis/SCCGraph.cpp
ast/analysis/TopologicallySortedSCCGraph.cpp
ast/analysis/typesystem/PolymorphicObjects.cpp
Expand Down
54 changes: 27 additions & 27 deletions src/ast/analysis/UniqueKeys.cpp → src/ast/analysis/JoinSize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@

/************************************************************************
*
* @file UniqueKeys.cpp
* @file JoinSize.cpp
*
* CountUniqueKeys are used for accumulating selectivity statistics for the auto scheduler
* This analysis determines which CountUniqueKeys statements to emit in the RAM
* EstimateJoinSize are used for accumulating selectivity statistics for the auto scheduler
* This analysis determines which EstimateJoinSize statements to emit in the RAM
*
***********************************************************************/

#include "ast/analysis/UniqueKeys.h"
#include "ast/analysis/JoinSize.h"
#include "Global.h"
#include "GraphUtils.h"
#include "ast/BinaryConstraint.h"
Expand Down Expand Up @@ -44,7 +44,7 @@

namespace souffle::ast::analysis {

const analysis::PowerSet& UniqueKeysAnalysis::getSubsets(std::size_t N, std::size_t K) const {
const analysis::PowerSet& JoinSizeAnalysis::getSubsets(std::size_t N, std::size_t K) const {
if (cache.count({N, K})) {
return cache.at({N, K});
}
Expand Down Expand Up @@ -78,7 +78,7 @@ const analysis::PowerSet& UniqueKeysAnalysis::getSubsets(std::size_t N, std::siz
return cache.at({N, K});
}

analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(const ast::RelationSet& scc,
analysis::StratumJoinSize JoinSizeAnalysis::computeRuleVersionStatements(const RelationSet& scc,
const ast::Clause& clause, std::optional<std::size_t> version, ast2ram::TranslationMode mode) {
auto* prog = program;
auto* poly = polyAnalysis;
Expand All @@ -105,7 +105,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
fatal("unaccounted-for constant");
};

analysis::StratumUniqueKeys statements;
analysis::StratumJoinSize statements;

auto getClauseAtomName = [&sccAtoms, &version](const ast::Clause& clause, const ast::Atom* atom,
bool isRecursive, ast2ram::TranslationMode mode) {
Expand Down Expand Up @@ -413,7 +413,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
}
}

// construct a CountUniqueKeys ram node
// construct a EstimateJoinSize ram node
bool isRecursive = recursiveInCurrentStratum.count(i) > 0;
auto relation = getClauseAtomName(clause, atom, isRecursive, mode);
auto& constantMap = atomToIdxConstants.at(i);
Expand All @@ -426,7 +426,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
ss << isRecursive;

if (seenNodes.count(ss.str()) == 0) {
auto node = mk<souffle::ram::CountUniqueKeys>(
auto node = mk<souffle::ram::EstimateJoinSize>(
relation, joinColumns, constantMap, isRecursive);
seenNodes.insert(ss.str());

Expand All @@ -440,7 +440,7 @@ analysis::StratumUniqueKeys UniqueKeysAnalysis::computeRuleVersionStatements(con
return statements;
}

std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeyStatements() {
std::vector<analysis::StratumJoinSize> JoinSizeAnalysis::computeJoinSizeStatements() {
auto* prog = program;
auto getSccAtoms = [prog](const ast::Clause* clause, const ast::RelationSet& scc) {
const auto& sccAtoms = filter(ast::getBodyLiterals<ast::Atom>(*clause),
Expand All @@ -450,17 +450,17 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta

const auto& sccOrdering = topsortSCCGraphAnalysis->order();

std::vector<analysis::StratumUniqueKeys> uniqueKeyStatements;
uniqueKeyStatements.resize(sccOrdering.size());
std::vector<analysis::StratumJoinSize> joinSizeStatements;
joinSizeStatements.resize(sccOrdering.size());

auto& config = Global::config();
if (!config.has("index-stats")) {
return uniqueKeyStatements;
return joinSizeStatements;
}

// for each stratum (formed from scc ordering)
for (std::size_t i = 0; i < sccOrdering.size(); i++) {
analysis::StratumUniqueKeys stratumNodes;
analysis::StratumJoinSize stratumNodes;

auto scc = sccOrdering[i];
const ast::RelationSet sccRelations = sccGraph->getInternalRelations(scc);
Expand Down Expand Up @@ -512,15 +512,15 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta
}
}
}
uniqueKeyStatements[scc] = std::move(stratumNodes);
joinSizeStatements[scc] = std::move(stratumNodes);
}

std::map<std::string, std::size_t> relationToCompletedStratum;

// first step is to compute the earliest stratum that a non-recursive relation completes
for (std::size_t i = 0; i < sccOrdering.size(); ++i) {
auto scc = sccOrdering[i];
for (const auto& statement : uniqueKeyStatements[scc]) {
for (const auto& statement : joinSizeStatements[scc]) {
const auto& rel = statement->getRelation();

if (statement->isRecursiveRelation()) {
Expand All @@ -536,7 +536,7 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta

for (std::size_t i = 0; i < sccOrdering.size(); ++i) {
auto scc = sccOrdering[i];
for (auto& statement : uniqueKeyStatements[scc]) {
for (auto& statement : joinSizeStatements[scc]) {
const auto& rel = statement->getRelation();
if (statement->isRecursiveRelation()) {
continue;
Expand All @@ -547,34 +547,34 @@ std::vector<analysis::StratumUniqueKeys> UniqueKeysAnalysis::computeUniqueKeySta
std::size_t newStratum = relationToCompletedStratum.at(rel);

// move the node into the new stratum
uniqueKeyStatements[newStratum].push_back(std::move(statement));
joinSizeStatements[newStratum].push_back(std::move(statement));
}

// erase remove all nullptr from the vector since moved from unique_ptr are guaranteed to be nullptr
auto& v = uniqueKeyStatements[scc];
auto& v = joinSizeStatements[scc];
v.erase(std::remove(v.begin(), v.end(), nullptr), v.end());
}
return uniqueKeyStatements;
return joinSizeStatements;
}

void UniqueKeysAnalysis::run(const TranslationUnit& translationUnit) {
void JoinSizeAnalysis::run(const TranslationUnit& translationUnit) {
program = &translationUnit.getProgram();
sccGraph = &translationUnit.getAnalysis<SCCGraphAnalysis>();
topsortSCCGraphAnalysis = &translationUnit.getAnalysis<TopologicallySortedSCCGraphAnalysis>();
recursiveClauses = &translationUnit.getAnalysis<RecursiveClausesAnalysis>();
polyAnalysis = &translationUnit.getAnalysis<ast::analysis::PolymorphicObjectsAnalysis>();
uniqueKeyStatements = computeUniqueKeyStatements();
joinSizeStatements = computeJoinSizeStatements();
}

void UniqueKeysAnalysis::print(std::ostream& os) const {
os << "Begin UniqueKeyStatements\n";
for (std::size_t i = 0; i < uniqueKeyStatements.size(); ++i) {
void JoinSizeAnalysis::print(std::ostream& os) const {
os << "Begin JoinSizeStatements\n";
for (std::size_t i = 0; i < joinSizeStatements.size(); ++i) {
os << "Stratum: " << i << "\n";
for (auto& s : uniqueKeyStatements[i]) {
for (auto& s : joinSizeStatements[i]) {
os << *s << "\n";
}
}
os << "End UniqueKeyStatements\n";
os << "End JoinSizeStatements\n";
}

} // namespace souffle::ast::analysis
28 changes: 14 additions & 14 deletions src/ast/analysis/UniqueKeys.h → src/ast/analysis/JoinSize.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

/************************************************************************
*
* @file UniqueKeys.h
* @file JoinSize.h
* Computes for every stratum, which CountUniqueKeys nodes to emit in the RAM
* Computes for every stratum, which EstimateJoinSize nodes to emit in the RAM
* This is useful for the auto-scheduler to accumulate selectivity statistics
*
***********************************************************************/
Expand All @@ -26,7 +26,7 @@
#include "ast/analysis/typesystem/PolymorphicObjects.h"
#include "ast/utility/Visitor.h"
#include "ast2ram/ClauseTranslator.h"
#include "ram/CountUniqueKeys.h"
#include "ram/EstimateJoinSize.h"
#include "ram/Expression.h"
#include <ostream>
#include <set>
Expand All @@ -40,25 +40,25 @@ namespace souffle::ast::analysis {
* Analysis pass computing a schedule for computing relations.
*/
using PowerSet = std::vector<std::vector<std::size_t>>;
using StratumUniqueKeys = std::vector<Own<ram::CountUniqueKeys>>;
using StratumJoinSize = std::vector<Own<ram::EstimateJoinSize>>;

class UniqueKeysAnalysis : public Analysis {
class JoinSizeAnalysis : public Analysis {
public:
static constexpr const char* name = "unique-keys";
static constexpr const char* name = "join-size";

UniqueKeysAnalysis() : Analysis(name) {}
JoinSizeAnalysis() : Analysis(name) {}

void run(const TranslationUnit& translationUnit) override;

/** Dump this relation schedule to standard error. */
void print(std::ostream& os) const override;

const StratumUniqueKeys& getUniqueKeyStatementsInSCC(std::size_t scc) const {
return uniqueKeyStatements[scc];
const StratumJoinSize& getJoinSizeStatementsInSCC(std::size_t scc) const {
return joinSizeStatements[scc];
}

private:
std::vector<StratumUniqueKeys> uniqueKeyStatements;
std::vector<StratumJoinSize> joinSizeStatements;

std::set<std::string> seenNodes;
ast::Program* program = nullptr;
Expand All @@ -67,10 +67,10 @@ class UniqueKeysAnalysis : public Analysis {
RecursiveClausesAnalysis* recursiveClauses = nullptr;
PolymorphicObjectsAnalysis* polyAnalysis = nullptr;

// for each stratum compute the CountUniqueKeys nodes to emit
std::vector<StratumUniqueKeys> computeUniqueKeyStatements();
StratumUniqueKeys computeRuleVersionStatements(const ast::RelationSet& sccRelations,
const ast::Clause& clause, std::optional<std::size_t> version,
// for each stratum compute the EstimateJoinSize nodes to emit
std::vector<StratumJoinSize> computeJoinSizeStatements();
StratumJoinSize computeRuleVersionStatements(const RelationSet& sccRelations, const ast::Clause& clause,
std::optional<std::size_t> version,
ast2ram::TranslationMode mode = ast2ram::TranslationMode::DEFAULT);
const PowerSet& getSubsets(std::size_t N, std::size_t K) const;
mutable std::map<std::pair<std::size_t, std::size_t>, PowerSet> cache;
Expand Down
8 changes: 4 additions & 4 deletions src/ast/analysis/ProfileUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ bool ProfileUseAnalysis::hasAutoSchedulerStats() const {
return reader->hasAutoSchedulerStats();
}

std::size_t ProfileUseAnalysis::getNonRecursiveUniqueKeys(
double ProfileUseAnalysis::getNonRecursiveJoinSize(
const std::string& rel, const std::string& attributes, const std::string& constants) const {
return reader->getNonRecursiveCountUniqueKeys(rel, attributes, constants);
return reader->getNonRecursiveEstimateJoinSize(rel, attributes, constants);
}

std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
double ProfileUseAnalysis::getRecursiveJoinSize(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const {
return reader->getRecursiveCountUniqueKeys(rel, attributes, constants, iteration);
return reader->getRecursiveEstimateJoinSize(rel, attributes, constants, iteration);
}

std::size_t ProfileUseAnalysis::getIterations(const std::string& rel) const {
Expand Down
4 changes: 2 additions & 2 deletions src/ast/analysis/ProfileUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ class ProfileUseAnalysis : public Analysis {

bool hasAutoSchedulerStats() const;

std::size_t getNonRecursiveUniqueKeys(
double getNonRecursiveJoinSize(
const std::string& rel, const std::string& attributes, const std::string& constants) const;

std::size_t getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
double getRecursiveJoinSize(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const;

std::size_t getIterations(const std::string& rel) const;
Expand Down
35 changes: 12 additions & 23 deletions src/ast/utility/SipsMetric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
assert(profileUseAnalysis->hasAutoSchedulerStats() && "Must have stats in order to auto-schedule!");

auto* prof = profileUseAnalysis;
auto getRelationSize = [&prof](bool isRecursive, const ast::QualifiedName& rel,
const std::vector<std::size_t>& joinColumns,
const std::map<std::size_t, std::string>& constantsMap,
const std::string& iteration) {
auto getJoinSize = [&prof](bool isRecursive, const ast::QualifiedName& rel,
const std::vector<std::size_t>& joinColumns,
const std::map<std::size_t, std::string>& constantsMap,
const std::string& iteration) {
std::set<std::size_t> joinKeys(joinColumns.begin(), joinColumns.end());
for (auto& [k, _] : constantsMap) {
joinKeys.insert(k);
}

if (joinKeys.empty() && !isRecursive) {
return prof->getRelationSize(rel);
return static_cast<double>(prof->getRelationSize(rel));
}

std::stringstream ss;
Expand All @@ -133,10 +133,10 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
constants[constants.size() - 1] = ']';

if (isRecursive) {
return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants, iteration);
return prof->getRecursiveJoinSize(rel.toString(), attributes, constants, iteration);
}

return prof->getNonRecursiveUniqueKeys(rel.toString(), attributes, constants);
return prof->getNonRecursiveJoinSize(rel.toString(), attributes, constants);
};

using AtomIdx = std::size_t;
Expand Down Expand Up @@ -293,7 +293,7 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
PlanTuplesCost p;
p.plan = plan;
for (std::size_t iter = 0; iter < iterations; ++iter) {
std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant, std::to_string(iter));
double tuples = getJoinSize(isRecursive, name, empty, idxConstant, std::to_string(iter));
double cost = static_cast<double>(tuples * atom->getArity());
p.tuplesPerIteration.push_back(tuples);
p.costsPerIteration.push_back(cost);
Expand Down Expand Up @@ -382,25 +382,14 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
if (numBound == atom->getArity()) {
expectedTuples = 1;
} else {
auto relSizeWithConstants = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty,
// get the join size from the profile
expectedTuples = getJoinSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx], std::to_string(iter));

if (joinColumns.empty()) {
expectedTuples = static_cast<double>(relSizeWithConstants);
} else {
auto uniqueKeys = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx], std::to_string(iter));

bool normalize = (uniqueKeys > 0);
expectedTuples =
static_cast<double>(relSizeWithConstants) / (normalize ? uniqueKeys : 1);
}
}

// calculate new number of tuples
std::size_t newTuples = static_cast<std::size_t>(oldTuples[iter] * expectedTuples);
double newTuples = oldTuples[iter] * expectedTuples;

// calculate new cost
double newCost = oldCost[iter] + newTuples * atom->getArity();
Expand Down
2 changes: 1 addition & 1 deletion src/ast/utility/SipsMetric.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class SelingerProfileSipsMetric : public SipsMetric {
/* helper struct for Selinger */
struct PlanTuplesCost {
std::vector<std::size_t> plan;
std::vector<std::size_t> tuplesPerIteration;
std::vector<double> tuplesPerIteration;
std::vector<double> costsPerIteration;
};

Expand Down
2 changes: 1 addition & 1 deletion src/ast2ram/seminaive/ClauseTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@
#include "ram/Aggregate.h"
#include "ram/Break.h"
#include "ram/Constraint.h"
#include "ram/CountUniqueKeys.h"
#include "ram/DebugInfo.h"
#include "ram/EmptinessCheck.h"
#include "ram/EstimateJoinSize.h"
#include "ram/ExistenceCheck.h"
#include "ram/Filter.h"
#include "ram/FloatConstant.h"
Expand Down
Loading

0 comments on commit 429676c

Please sign in to comment.