diff --git a/CMakeLists.txt b/CMakeLists.txt index b143c728bd..9f321f65dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.11) -project(Kuzu VERSION 0.0.6.1 LANGUAGES CXX) +project(Kuzu VERSION 0.0.6.2 LANGUAGES CXX) find_package(Threads REQUIRED) diff --git a/src/catalog/CMakeLists.txt b/src/catalog/CMakeLists.txt index afa1db7c93..3e21b8ec76 100644 --- a/src/catalog/CMakeLists.txt +++ b/src/catalog/CMakeLists.txt @@ -1,7 +1,8 @@ add_library(kuzu_catalog OBJECT catalog.cpp - catalog_structs.cpp) + catalog_content.cpp + table_schema.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 328b6093df..769b375318 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -2,351 +2,15 @@ #include "common/ser_deser.h" #include "common/string_utils.h" -#include "spdlog/spdlog.h" #include "storage/storage_utils.h" using namespace kuzu::common; -using namespace kuzu::catalog; using namespace kuzu::storage; using namespace kuzu::transaction; namespace kuzu { -namespace common { - -/** - * Specialized serialize and deserialize functions used in Catalog. - * */ - -template<> -void SerDeser::serializeValue( - const Property& value, FileInfo* fileInfo, uint64_t& offset) { - SerDeser::serializeValue(value.name, fileInfo, offset); - SerDeser::serializeValue(value.dataType, fileInfo, offset); - SerDeser::serializeValue(value.propertyID, fileInfo, offset); - SerDeser::serializeValue(value.tableID, fileInfo, offset); -} - -template<> -void SerDeser::deserializeValue(Property& value, FileInfo* fileInfo, uint64_t& offset) { - SerDeser::deserializeValue(value.name, fileInfo, offset); - SerDeser::deserializeValue(value.dataType, fileInfo, offset); - SerDeser::deserializeValue(value.propertyID, fileInfo, offset); - SerDeser::deserializeValue(value.tableID, fileInfo, offset); -} - -template<> -void SerDeser::serializeValue( - const std::unordered_map& value, FileInfo* fileInfo, uint64_t& offset) { - SerDeser::serializeValue(value.size(), fileInfo, offset); - for (auto& entry : value) { - SerDeser::serializeValue(entry.first, fileInfo, offset); - SerDeser::serializeValue(entry.second, fileInfo, offset); - } -} - -template<> -void SerDeser::deserializeValue( - std::unordered_map& value, FileInfo* fileInfo, uint64_t& offset) { - uint64_t numEntries = 0; - deserializeValue(numEntries, fileInfo, offset); - for (auto i = 0u; i < numEntries; i++) { - table_id_t tableID; - uint64_t num; - deserializeValue(tableID, fileInfo, offset); - deserializeValue(num, fileInfo, offset); - value.emplace(tableID, num); - } -} - -template<> -void SerDeser::serializeVector>( - const std::vector>& values, FileInfo* fileInfo, uint64_t& offset) { - uint64_t vectorSize = values.size(); - SerDeser::serializeValue(vectorSize, fileInfo, offset); - for (auto& value : values) { - serializeVector(value, fileInfo, offset); - } -} - -template<> -void SerDeser::serializeValue( - const TableSchema& value, FileInfo* fileInfo, uint64_t& offset) { - serializeValue(value.tableName, fileInfo, offset); - serializeValue(value.tableID, fileInfo, offset); - serializeVector(value.properties, fileInfo, offset); - serializeValue(value.nextPropertyID, fileInfo, offset); -} - -template<> -void SerDeser::deserializeValue( - TableSchema& value, FileInfo* fileInfo, uint64_t& offset) { - deserializeValue(value.tableName, fileInfo, offset); - deserializeValue(value.tableID, fileInfo, offset); - deserializeVector(value.properties, fileInfo, offset); - deserializeValue(value.nextPropertyID, fileInfo, offset); -} - -template<> -void SerDeser::serializeValue( - const NodeTableSchema& value, FileInfo* fileInfo, uint64_t& offset) { - SerDeser::serializeValue((const TableSchema&)value, fileInfo, offset); - SerDeser::serializeValue(value.primaryKeyPropertyID, fileInfo, offset); - SerDeser::serializeUnorderedSet(value.fwdRelTableIDSet, fileInfo, offset); - SerDeser::serializeUnorderedSet(value.bwdRelTableIDSet, fileInfo, offset); -} - -template<> -void SerDeser::deserializeValue( - NodeTableSchema& value, FileInfo* fileInfo, uint64_t& offset) { - deserializeValue((TableSchema&)value, fileInfo, offset); - deserializeValue(value.primaryKeyPropertyID, fileInfo, offset); - deserializeUnorderedSet(value.fwdRelTableIDSet, fileInfo, offset); - deserializeUnorderedSet(value.bwdRelTableIDSet, fileInfo, offset); -} - -template<> -void SerDeser::serializeValue( - const RelTableSchema& value, FileInfo* fileInfo, uint64_t& offset) { - SerDeser::serializeValue((const TableSchema&)value, fileInfo, offset); - SerDeser::serializeValue(value.relMultiplicity, fileInfo, offset); - SerDeser::serializeValue(value.srcTableID, fileInfo, offset); - SerDeser::serializeValue(value.dstTableID, fileInfo, offset); - SerDeser::serializeValue(value.srcPKDataType, fileInfo, offset); - SerDeser::serializeValue(value.dstPKDataType, fileInfo, offset); -} - -template<> -void SerDeser::deserializeValue( - RelTableSchema& value, FileInfo* fileInfo, uint64_t& offset) { - deserializeValue((TableSchema&)value, fileInfo, offset); - deserializeValue(value.relMultiplicity, fileInfo, offset); - deserializeValue(value.srcTableID, fileInfo, offset); - deserializeValue(value.dstTableID, fileInfo, offset); - deserializeValue(value.srcPKDataType, fileInfo, offset); - deserializeValue(value.dstPKDataType, fileInfo, offset); -} - -} // namespace common - namespace catalog { -CatalogContent::CatalogContent() : nextTableID{0} { - registerBuiltInFunctions(); -} - -CatalogContent::CatalogContent(const std::string& directory) { - readFromFile(directory, DBFileType::ORIGINAL); - registerBuiltInFunctions(); -} - -CatalogContent::CatalogContent(const CatalogContent& other) { - for (auto& nodeTableSchema : other.nodeTableSchemas) { - auto newNodeTableSchema = std::make_unique(*nodeTableSchema.second); - nodeTableSchemas[newNodeTableSchema->tableID] = std::move(newNodeTableSchema); - } - for (auto& relTableSchema : other.relTableSchemas) { - auto newRelTableSchema = std::make_unique(*relTableSchema.second); - relTableSchemas[newRelTableSchema->tableID] = std::move(newRelTableSchema); - } - nodeTableNameToIDMap = other.nodeTableNameToIDMap; - relTableNameToIDMap = other.relTableNameToIDMap; - nextTableID = other.nextTableID; - registerBuiltInFunctions(); - for (auto& macro : other.macros) { - macros.emplace(macro.first, macro.second->copy()); - } -} - -table_id_t CatalogContent::addNodeTableSchema( - std::string tableName, property_id_t primaryKeyId, std::vector properties) { - table_id_t tableID = assignNextTableID(); - for (auto i = 0u; i < properties.size(); ++i) { - properties[i].propertyID = i; - properties[i].tableID = tableID; - } - auto nodeTableSchema = std::make_unique( - std::move(tableName), tableID, primaryKeyId, std::move(properties)); - nodeTableNameToIDMap[nodeTableSchema->tableName] = tableID; - nodeTableSchemas[tableID] = std::move(nodeTableSchema); - return tableID; -} - -table_id_t CatalogContent::addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity, - std::vector properties, table_id_t srcTableID, table_id_t dstTableID, - LogicalType srcPKDataType, LogicalType dstPKDataType) { - table_id_t tableID = assignNextTableID(); - nodeTableSchemas[srcTableID]->addFwdRelTableID(tableID); - nodeTableSchemas[dstTableID]->addBwdRelTableID(tableID); - auto relInternalIDProperty = - Property(InternalKeyword::ID, LogicalType{LogicalTypeID::INTERNAL_ID}); - properties.insert(properties.begin(), relInternalIDProperty); - for (auto i = 0u; i < properties.size(); ++i) { - properties[i].propertyID = i; - properties[i].tableID = tableID; - } - auto relTableSchema = - std::make_unique(std::move(tableName), tableID, relMultiplicity, - std::move(properties), srcTableID, dstTableID, srcPKDataType, dstPKDataType); - relTableNameToIDMap[relTableSchema->tableName] = tableID; - relTableSchemas[tableID] = std::move(relTableSchema); - return tableID; -} - -const Property& CatalogContent::getNodeProperty( - table_id_t tableID, const std::string& propertyName) const { - for (auto& property : nodeTableSchemas.at(tableID)->properties) { - if (propertyName == property.name) { - return property; - } - } - throw CatalogException("Cannot find node property " + propertyName + "."); -} - -const Property& CatalogContent::getRelProperty( - table_id_t tableID, const std::string& propertyName) const { - for (auto& property : relTableSchemas.at(tableID)->properties) { - if (propertyName == property.name) { - return property; - } - } - throw CatalogException("Cannot find rel property " + propertyName + "."); -} - -void CatalogContent::dropTableSchema(table_id_t tableID) { - auto tableSchema = getTableSchema(tableID); - if (tableSchema->isNodeTable) { - nodeTableNameToIDMap.erase(tableSchema->tableName); - nodeTableSchemas.erase(tableID); - } else { - relTableNameToIDMap.erase(tableSchema->tableName); - relTableSchemas.erase(tableID); - } -} - -void CatalogContent::renameTable(table_id_t tableID, const std::string& newName) { - auto tableSchema = getTableSchema(tableID); - auto& tableNameToIDMap = tableSchema->isNodeTable ? nodeTableNameToIDMap : relTableNameToIDMap; - tableNameToIDMap.erase(tableSchema->tableName); - tableNameToIDMap.emplace(newName, tableID); - tableSchema->tableName = newName; -} - -void CatalogContent::saveToFile(const std::string& directory, DBFileType dbFileType) { - auto catalogPath = StorageUtils::getCatalogFilePath(directory, dbFileType); - auto fileInfo = FileUtils::openFile(catalogPath, O_WRONLY | O_CREAT); - uint64_t offset = 0; - writeMagicBytes(fileInfo.get(), offset); - SerDeser::serializeValue( - StorageVersionInfo::getStorageVersion(), fileInfo.get(), offset); - SerDeser::serializeValue(nodeTableSchemas.size(), fileInfo.get(), offset); - SerDeser::serializeValue(relTableSchemas.size(), fileInfo.get(), offset); - for (auto& nodeTableSchema : nodeTableSchemas) { - SerDeser::serializeValue(nodeTableSchema.first, fileInfo.get(), offset); - SerDeser::serializeValue(*nodeTableSchema.second, fileInfo.get(), offset); - } - for (auto& relTableSchema : relTableSchemas) { - SerDeser::serializeValue(relTableSchema.first, fileInfo.get(), offset); - SerDeser::serializeValue(*relTableSchema.second, fileInfo.get(), offset); - } - SerDeser::serializeValue(nextTableID, fileInfo.get(), offset); - SerDeser::serializeUnorderedMap(macros, fileInfo.get(), offset); -} - -void CatalogContent::readFromFile(const std::string& directory, DBFileType dbFileType) { - auto catalogPath = StorageUtils::getCatalogFilePath(directory, dbFileType); - auto fileInfo = FileUtils::openFile(catalogPath, O_RDONLY); - uint64_t offset = 0; - validateMagicBytes(fileInfo.get(), offset); - storage::storage_version_t savedStorageVersion; - SerDeser::deserializeValue(savedStorageVersion, fileInfo.get(), offset); - validateStorageVersion(savedStorageVersion); - uint64_t numNodeTables, numRelTables; - SerDeser::deserializeValue(numNodeTables, fileInfo.get(), offset); - SerDeser::deserializeValue(numRelTables, fileInfo.get(), offset); - table_id_t tableID; - for (auto i = 0u; i < numNodeTables; i++) { - SerDeser::deserializeValue(tableID, fileInfo.get(), offset); - nodeTableSchemas[tableID] = std::make_unique(); - SerDeser::deserializeValue( - *nodeTableSchemas[tableID], fileInfo.get(), offset); - } - for (auto i = 0u; i < numRelTables; i++) { - SerDeser::deserializeValue(tableID, fileInfo.get(), offset); - relTableSchemas[tableID] = std::make_unique(); - SerDeser::deserializeValue( - *relTableSchemas[tableID], fileInfo.get(), offset); - } - // Construct the tableNameToIdMap. - for (auto& nodeTableSchema : nodeTableSchemas) { - nodeTableNameToIDMap[nodeTableSchema.second->tableName] = nodeTableSchema.second->tableID; - } - for (auto& relTableSchema : relTableSchemas) { - relTableNameToIDMap[relTableSchema.second->tableName] = relTableSchema.second->tableID; - } - SerDeser::deserializeValue(nextTableID, fileInfo.get(), offset); - SerDeser::deserializeUnorderedMap(macros, fileInfo.get(), offset); -} - -ExpressionType CatalogContent::getFunctionType(const std::string& name) const { - auto upperCaseName = StringUtils::getUpper(name); - if (builtInVectorFunctions->containsFunction(upperCaseName)) { - return FUNCTION; - } else if (builtInAggregateFunctions->containsFunction(upperCaseName)) { - return AGGREGATE_FUNCTION; - } else if (macros.contains(upperCaseName)) { - return MACRO; - } else { - throw CatalogException(name + " function does not exist."); - } -} - -void CatalogContent::addVectorFunction( - std::string name, function::vector_function_definitions definitions) { - StringUtils::toUpper(name); - builtInVectorFunctions->addFunction(std::move(name), std::move(definitions)); -} - -void CatalogContent::addScalarMacroFunction( - std::string name, std::unique_ptr macro) { - StringUtils::toUpper(name); - macros.emplace(std::move(name), std::move(macro)); -} - -void CatalogContent::validateStorageVersion(storage_version_t savedStorageVersion) const { - auto storageVersion = StorageVersionInfo::getStorageVersion(); - if (savedStorageVersion != storageVersion) { - throw common::RuntimeException(StringUtils::string_format( - "Trying to read a database file with a different version. " - "Database file version: {}, Current build storage version: {}", - savedStorageVersion, storageVersion)); - } -} - -void CatalogContent::validateMagicBytes(FileInfo* fileInfo, offset_t& offset) const { - auto numMagicBytes = strlen(StorageVersionInfo::MAGIC_BYTES); - uint8_t magicBytes[4]; - for (auto i = 0u; i < numMagicBytes; i++) { - SerDeser::deserializeValue(magicBytes[i], fileInfo, offset); - } - if (memcmp(magicBytes, StorageVersionInfo::MAGIC_BYTES, numMagicBytes) != 0) { - throw common::RuntimeException( - "This is not a valid Kuzu database directory for the current version of Kuzu."); - } -} - -void CatalogContent::writeMagicBytes(FileInfo* fileInfo, offset_t& offset) const { - auto numMagicBytes = strlen(StorageVersionInfo::MAGIC_BYTES); - for (auto i = 0u; i < numMagicBytes; i++) { - SerDeser::serializeValue(StorageVersionInfo::MAGIC_BYTES[i], fileInfo, offset); - } -} - -void CatalogContent::registerBuiltInFunctions() { - builtInVectorFunctions = std::make_unique(); - builtInAggregateFunctions = std::make_unique(); - builtInTableFunctions = std::make_unique(); -} - Catalog::Catalog() : wal{nullptr} { catalogContentForReadOnlyTrx = std::make_unique(); } @@ -389,7 +53,8 @@ table_id_t Catalog::addRelTableSchema(std::string tableName, RelMultiplicity rel LogicalType srcPKDataType, LogicalType dstPKDataType) { initCatalogContentForWriteTrxIfNecessary(); auto tableID = catalogContentForWriteTrx->addRelTableSchema(std::move(tableName), - relMultiplicity, propertyDefinitions, srcTableID, dstTableID, srcPKDataType, dstPKDataType); + relMultiplicity, propertyDefinitions, srcTableID, dstTableID, std::move(srcPKDataType), + std::move(dstPKDataType)); wal->logRelTableRecord(tableID); return tableID; } @@ -400,9 +65,9 @@ void Catalog::dropTableSchema(table_id_t tableID) { wal->logDropTableRecord(tableID); } -void Catalog::renameTable(table_id_t tableID, std::string newName) { +void Catalog::renameTable(table_id_t tableID, const std::string& newName) { initCatalogContentForWriteTrxIfNecessary(); - catalogContentForWriteTrx->renameTable(tableID, std::move(newName)); + catalogContentForWriteTrx->renameTable(tableID, newName); } void Catalog::addProperty( diff --git a/src/catalog/catalog_content.cpp b/src/catalog/catalog_content.cpp new file mode 100644 index 0000000000..802176a5be --- /dev/null +++ b/src/catalog/catalog_content.cpp @@ -0,0 +1,236 @@ +#include "catalog/catalog_content.h" + +#include "common/ser_deser.h" +#include "common/string_utils.h" +#include "storage/storage_utils.h" + +using namespace kuzu::common; +using namespace kuzu::storage; +using namespace kuzu::transaction; + +namespace kuzu { +namespace catalog { + +CatalogContent::CatalogContent() : nextTableID{0} { + registerBuiltInFunctions(); +} + +CatalogContent::CatalogContent(const std::string& directory) { + readFromFile(directory, DBFileType::ORIGINAL); + registerBuiltInFunctions(); +} + +CatalogContent::CatalogContent(const CatalogContent& other) { + for (auto& nodeTableSchema : other.nodeTableSchemas) { + auto newNodeTableSchema = std::make_unique(*nodeTableSchema.second); + nodeTableSchemas[newNodeTableSchema->tableID] = std::move(newNodeTableSchema); + } + for (auto& relTableSchema : other.relTableSchemas) { + auto newRelTableSchema = std::make_unique(*relTableSchema.second); + relTableSchemas[newRelTableSchema->tableID] = std::move(newRelTableSchema); + } + nodeTableNameToIDMap = other.nodeTableNameToIDMap; + relTableNameToIDMap = other.relTableNameToIDMap; + nextTableID = other.nextTableID; + registerBuiltInFunctions(); + for (auto& macro : other.macros) { + macros.emplace(macro.first, macro.second->copy()); + } +} + +table_id_t CatalogContent::addNodeTableSchema( + std::string tableName, property_id_t primaryKeyId, std::vector properties) { + table_id_t tableID = assignNextTableID(); + for (auto i = 0u; i < properties.size(); ++i) { + properties[i].propertyID = i; + properties[i].tableID = tableID; + } + auto nodeTableSchema = std::make_unique( + std::move(tableName), tableID, primaryKeyId, std::move(properties)); + nodeTableNameToIDMap[nodeTableSchema->tableName] = tableID; + nodeTableSchemas[tableID] = std::move(nodeTableSchema); + return tableID; +} + +table_id_t CatalogContent::addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity, + std::vector properties, table_id_t srcTableID, table_id_t dstTableID, + LogicalType srcPKDataType, LogicalType dstPKDataType) { + table_id_t tableID = assignNextTableID(); + nodeTableSchemas[srcTableID]->addFwdRelTableID(tableID); + nodeTableSchemas[dstTableID]->addBwdRelTableID(tableID); + auto relInternalIDProperty = + Property(InternalKeyword::ID, LogicalType{LogicalTypeID::INTERNAL_ID}); + properties.insert(properties.begin(), relInternalIDProperty); + for (auto i = 0u; i < properties.size(); ++i) { + properties[i].propertyID = i; + properties[i].tableID = tableID; + } + auto relTableSchema = std::make_unique(std::move(tableName), tableID, + relMultiplicity, std::move(properties), srcTableID, dstTableID, std::move(srcPKDataType), + std::move(dstPKDataType)); + relTableNameToIDMap[relTableSchema->tableName] = tableID; + relTableSchemas[tableID] = std::move(relTableSchema); + return tableID; +} + +const Property& CatalogContent::getNodeProperty( + table_id_t tableID, const std::string& propertyName) const { + for (auto& property : nodeTableSchemas.at(tableID)->properties) { + if (propertyName == property.name) { + return property; + } + } + throw CatalogException("Cannot find node property " + propertyName + "."); +} + +const Property& CatalogContent::getRelProperty( + table_id_t tableID, const std::string& propertyName) const { + for (auto& property : relTableSchemas.at(tableID)->properties) { + if (propertyName == property.name) { + return property; + } + } + throw CatalogException("Cannot find rel property " + propertyName + "."); +} + +void CatalogContent::dropTableSchema(table_id_t tableID) { + auto tableSchema = getTableSchema(tableID); + switch (tableSchema->tableType) { + case TableType::NODE: { + nodeTableNameToIDMap.erase(tableSchema->tableName); + nodeTableSchemas.erase(tableID); + } break; + case TableType::REL: { + relTableNameToIDMap.erase(tableSchema->tableName); + relTableSchemas.erase(tableID); + } break; + default: { + throw NotImplementedException("CatalogContent::dropTableSchema"); + } + } +} + +void CatalogContent::renameTable(table_id_t tableID, const std::string& newName) { + auto tableSchema = getTableSchema(tableID); + auto& tableNameToIDMap = + tableSchema->tableType == TableType::NODE ? nodeTableNameToIDMap : relTableNameToIDMap; + tableNameToIDMap.erase(tableSchema->tableName); + tableNameToIDMap.emplace(newName, tableID); + tableSchema->tableName = newName; +} + +void CatalogContent::saveToFile(const std::string& directory, DBFileType dbFileType) { + auto catalogPath = StorageUtils::getCatalogFilePath(directory, dbFileType); + auto fileInfo = FileUtils::openFile(catalogPath, O_WRONLY | O_CREAT); + uint64_t offset = 0; + writeMagicBytes(fileInfo.get(), offset); + SerDeser::serializeValue(StorageVersionInfo::getStorageVersion(), fileInfo.get(), offset); + SerDeser::serializeValue(nodeTableSchemas.size(), fileInfo.get(), offset); + SerDeser::serializeValue(relTableSchemas.size(), fileInfo.get(), offset); + for (auto& [tableID, nodeTableSchema] : nodeTableSchemas) { + SerDeser::serializeValue(tableID, fileInfo.get(), offset); + nodeTableSchema->serialize(fileInfo.get(), offset); + } + for (auto& [tableID, relTableSchema] : relTableSchemas) { + SerDeser::serializeValue(tableID, fileInfo.get(), offset); + relTableSchema->serialize(fileInfo.get(), offset); + } + SerDeser::serializeValue(nextTableID, fileInfo.get(), offset); + SerDeser::serializeUnorderedMap(macros, fileInfo.get(), offset); +} + +void CatalogContent::readFromFile(const std::string& directory, DBFileType dbFileType) { + auto catalogPath = StorageUtils::getCatalogFilePath(directory, dbFileType); + auto fileInfo = FileUtils::openFile(catalogPath, O_RDONLY); + uint64_t offset = 0; + validateMagicBytes(fileInfo.get(), offset); + storage_version_t savedStorageVersion; + SerDeser::deserializeValue(savedStorageVersion, fileInfo.get(), offset); + validateStorageVersion(savedStorageVersion); + uint64_t numNodeTables, numRelTables; + SerDeser::deserializeValue(numNodeTables, fileInfo.get(), offset); + SerDeser::deserializeValue(numRelTables, fileInfo.get(), offset); + table_id_t tableID; + for (auto i = 0u; i < numNodeTables; i++) { + SerDeser::deserializeValue(tableID, fileInfo.get(), offset); + nodeTableSchemas[tableID] = ku_static_unique_pointer_cast( + TableSchema::deserialize(fileInfo.get(), offset)); + } + for (auto i = 0u; i < numRelTables; i++) { + SerDeser::deserializeValue(tableID, fileInfo.get(), offset); + relTableSchemas[tableID] = ku_static_unique_pointer_cast( + TableSchema::deserialize(fileInfo.get(), offset)); + } + // Construct the tableNameToIdMap. + for (auto& nodeTableSchema : nodeTableSchemas) { + nodeTableNameToIDMap[nodeTableSchema.second->tableName] = nodeTableSchema.second->tableID; + } + for (auto& relTableSchema : relTableSchemas) { + relTableNameToIDMap[relTableSchema.second->tableName] = relTableSchema.second->tableID; + } + SerDeser::deserializeValue(nextTableID, fileInfo.get(), offset); + SerDeser::deserializeUnorderedMap(macros, fileInfo.get(), offset); +} + +ExpressionType CatalogContent::getFunctionType(const std::string& name) const { + auto upperCaseName = StringUtils::getUpper(name); + if (builtInVectorFunctions->containsFunction(upperCaseName)) { + return FUNCTION; + } else if (builtInAggregateFunctions->containsFunction(upperCaseName)) { + return AGGREGATE_FUNCTION; + } else if (macros.contains(upperCaseName)) { + return MACRO; + } else { + throw CatalogException(name + " function does not exist."); + } +} + +void CatalogContent::addVectorFunction( + std::string name, function::vector_function_definitions definitions) { + StringUtils::toUpper(name); + builtInVectorFunctions->addFunction(std::move(name), std::move(definitions)); +} + +void CatalogContent::addScalarMacroFunction( + std::string name, std::unique_ptr macro) { + StringUtils::toUpper(name); + macros.emplace(std::move(name), std::move(macro)); +} + +void CatalogContent::validateStorageVersion(storage_version_t savedStorageVersion) { + auto storageVersion = StorageVersionInfo::getStorageVersion(); + if (savedStorageVersion != storageVersion) { + throw common::RuntimeException(StringUtils::string_format( + "Trying to read a database file with a different version. " + "Database file version: {}, Current build storage version: {}", + savedStorageVersion, storageVersion)); + } +} + +void CatalogContent::validateMagicBytes(FileInfo* fileInfo, offset_t& offset) { + auto numMagicBytes = strlen(StorageVersionInfo::MAGIC_BYTES); + uint8_t magicBytes[4]; + for (auto i = 0u; i < numMagicBytes; i++) { + SerDeser::deserializeValue(magicBytes[i], fileInfo, offset); + } + if (memcmp(magicBytes, StorageVersionInfo::MAGIC_BYTES, numMagicBytes) != 0) { + throw common::RuntimeException( + "This is not a valid Kuzu database directory for the current version of Kuzu."); + } +} + +void CatalogContent::writeMagicBytes(FileInfo* fileInfo, offset_t& offset) { + auto numMagicBytes = strlen(StorageVersionInfo::MAGIC_BYTES); + for (auto i = 0u; i < numMagicBytes; i++) { + SerDeser::serializeValue(StorageVersionInfo::MAGIC_BYTES[i], fileInfo, offset); + } +} + +void CatalogContent::registerBuiltInFunctions() { + builtInVectorFunctions = std::make_unique(); + builtInAggregateFunctions = std::make_unique(); + builtInTableFunctions = std::make_unique(); +} + +} // namespace catalog +} // namespace kuzu diff --git a/src/catalog/catalog_structs.cpp b/src/catalog/catalog_structs.cpp deleted file mode 100644 index a5a8841fda..0000000000 --- a/src/catalog/catalog_structs.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "catalog/catalog_structs.h" - -#include "common/exception.h" -#include "common/string_utils.h" - -using namespace kuzu::common; - -namespace kuzu { -namespace catalog { - -RelMultiplicity getRelMultiplicityFromString(const std::string& relMultiplicityString) { - if ("ONE_ONE" == relMultiplicityString) { - return ONE_ONE; - } else if ("MANY_ONE" == relMultiplicityString) { - return MANY_ONE; - } else if ("ONE_MANY" == relMultiplicityString) { - return ONE_MANY; - } else if ("MANY_MANY" == relMultiplicityString) { - return MANY_MANY; - } - throw common::CatalogException( - "Invalid relMultiplicity string '" + relMultiplicityString + "'."); -} - -std::string getRelMultiplicityAsString(RelMultiplicity relMultiplicity) { - switch (relMultiplicity) { - case MANY_MANY: { - return "MANY_MANY"; - } - case MANY_ONE: { - return "MANY_ONE"; - } - case ONE_ONE: { - return "ONE_ONE"; - } - case ONE_MANY: { - return "ONE_MANY"; - } - default: - throw common::CatalogException("Cannot convert rel multiplicity to std::string."); - } -} - -bool TableSchema::isReservedPropertyName(const std::string& propertyName) { - return common::StringUtils::getUpper(propertyName) == common::InternalKeyword::ID; -} - -std::string TableSchema::getPropertyName(property_id_t propertyID) const { - for (auto& property : properties) { - if (property.propertyID == propertyID) { - return property.name; - } - } - throw common::RuntimeException(StringUtils::string_format( - "Table: {} doesn't have a property with propertyID={}.", tableName, propertyID)); -} - -property_id_t TableSchema::getPropertyID(const std::string& propertyName) const { - for (auto& property : properties) { - if (property.name == propertyName) { - return property.propertyID; - } - } - throw common::RuntimeException(StringUtils::string_format( - "Table: {} doesn't have a property with propertyName={}.", tableName, propertyName)); -} - -Property TableSchema::getProperty(property_id_t propertyID) const { - for (auto& property : properties) { - if (property.propertyID == propertyID) { - return property; - } - } - throw common::RuntimeException(StringUtils::string_format( - "Table: {} doesn't have a property with propertyID={}.", tableName, propertyID)); -} - -void TableSchema::renameProperty(property_id_t propertyID, const std::string& newName) { - for (auto& property : properties) { - if (property.propertyID == propertyID) { - property.name = newName; - return; - } - } - throw common::InternalException( - StringUtils::string_format("Property with id={} not found.", propertyID)); -} - -} // namespace catalog -} // namespace kuzu diff --git a/src/catalog/table_schema.cpp b/src/catalog/table_schema.cpp new file mode 100644 index 0000000000..70b782a30a --- /dev/null +++ b/src/catalog/table_schema.cpp @@ -0,0 +1,190 @@ +#include "catalog/table_schema.h" + +#include "common/exception.h" +#include "common/ser_deser.h" +#include "common/string_utils.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace catalog { + +RelMultiplicity getRelMultiplicityFromString(const std::string& relMultiplicityString) { + if ("ONE_ONE" == relMultiplicityString) { + return RelMultiplicity::ONE_ONE; + } else if ("MANY_ONE" == relMultiplicityString) { + return RelMultiplicity::MANY_ONE; + } else if ("ONE_MANY" == relMultiplicityString) { + return RelMultiplicity::ONE_MANY; + } else if ("MANY_MANY" == relMultiplicityString) { + return RelMultiplicity::MANY_MANY; + } + throw CatalogException("Invalid relMultiplicity string '" + relMultiplicityString + "'."); +} + +std::string getRelMultiplicityAsString(RelMultiplicity relMultiplicity) { + switch (relMultiplicity) { + case RelMultiplicity::MANY_MANY: { + return "MANY_MANY"; + } + case RelMultiplicity::MANY_ONE: { + return "MANY_ONE"; + } + case RelMultiplicity::ONE_ONE: { + return "ONE_ONE"; + } + case RelMultiplicity::ONE_MANY: { + return "ONE_MANY"; + } + default: + throw CatalogException("Cannot convert rel multiplicity to std::string."); + } +} + +void Property::serialize(FileInfo* fileInfo, uint64_t& offset) const { + SerDeser::serializeValue(name, fileInfo, offset); + SerDeser::serializeValue(dataType, fileInfo, offset); + SerDeser::serializeValue(propertyID, fileInfo, offset); + SerDeser::serializeValue(tableID, fileInfo, offset); +} + +std::unique_ptr Property::deserialize(FileInfo* fileInfo, uint64_t& offset) { + std::string name; + LogicalType dataType; + property_id_t propertyID; + table_id_t tableID; + SerDeser::deserializeValue(name, fileInfo, offset); + SerDeser::deserializeValue(dataType, fileInfo, offset); + SerDeser::deserializeValue(propertyID, fileInfo, offset); + SerDeser::deserializeValue(tableID, fileInfo, offset); + return std::make_unique(name, dataType, propertyID, tableID); +} + +bool TableSchema::isReservedPropertyName(const std::string& propertyName) { + return StringUtils::getUpper(propertyName) == InternalKeyword::ID; +} + +std::string TableSchema::getPropertyName(property_id_t propertyID) const { + for (auto& property : properties) { + if (property.propertyID == propertyID) { + return property.name; + } + } + throw RuntimeException(StringUtils::string_format( + "Table: {} doesn't have a property with propertyID={}.", tableName, propertyID)); +} + +property_id_t TableSchema::getPropertyID(const std::string& propertyName) const { + for (auto& property : properties) { + if (property.name == propertyName) { + return property.propertyID; + } + } + throw RuntimeException(StringUtils::string_format( + "Table: {} doesn't have a property with propertyName={}.", tableName, propertyName)); +} + +Property TableSchema::getProperty(property_id_t propertyID) const { + for (auto& property : properties) { + if (property.propertyID == propertyID) { + return property; + } + } + throw RuntimeException(StringUtils::string_format( + "Table: {} doesn't have a property with propertyID={}.", tableName, propertyID)); +} + +void TableSchema::renameProperty(property_id_t propertyID, const std::string& newName) { + for (auto& property : properties) { + if (property.propertyID == propertyID) { + property.name = newName; + return; + } + } + throw InternalException( + StringUtils::string_format("Property with id={} not found.", propertyID)); +} + +void TableSchema::serialize(FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeValue(tableName, fileInfo, offset); + SerDeser::serializeValue(tableID, fileInfo, offset); + SerDeser::serializeValue(tableType, fileInfo, offset); + SerDeser::serializeVectorOfObjects(properties, fileInfo, offset); + SerDeser::serializeValue(nextPropertyID, fileInfo, offset); + serializeInternal(fileInfo, offset); +} + +std::unique_ptr TableSchema::deserialize(FileInfo* fileInfo, uint64_t& offset) { + std::string tableName; + table_id_t tableID; + TableType tableType; + std::vector properties; + property_id_t nextPropertyID; + SerDeser::deserializeValue(tableName, fileInfo, offset); + SerDeser::deserializeValue(tableID, fileInfo, offset); + SerDeser::deserializeValue(tableType, fileInfo, offset); + SerDeser::deserializeVectorOfObjects(properties, fileInfo, offset); + SerDeser::deserializeValue(nextPropertyID, fileInfo, offset); + std::unique_ptr result; + switch (tableType) { + case TableType::NODE: { + result = NodeTableSchema::deserialize(fileInfo, offset); + } break; + case TableType::REL: { + result = RelTableSchema::deserialize(fileInfo, offset); + } break; + default: { + throw common::NotImplementedException{"TableSchema::deserialize"}; + } + } + result->tableName = tableName; + result->tableID = tableID; + result->tableType = tableType; + result->properties = properties; + result->nextPropertyID = nextPropertyID; + return result; +} + +void NodeTableSchema::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeValue(primaryKeyPropertyID, fileInfo, offset); + SerDeser::serializeUnorderedSet(fwdRelTableIDSet, fileInfo, offset); + SerDeser::serializeUnorderedSet(bwdRelTableIDSet, fileInfo, offset); +} + +std::unique_ptr NodeTableSchema::deserialize( + FileInfo* fileInfo, uint64_t& offset) { + property_id_t primaryKeyPropertyID; + std::unordered_set fwdRelTableIDSet; + std::unordered_set bwdRelTableIDSet; + SerDeser::deserializeValue(primaryKeyPropertyID, fileInfo, offset); + SerDeser::deserializeUnorderedSet(fwdRelTableIDSet, fileInfo, offset); + SerDeser::deserializeUnorderedSet(bwdRelTableIDSet, fileInfo, offset); + return std::make_unique( + primaryKeyPropertyID, fwdRelTableIDSet, bwdRelTableIDSet); +} + +void RelTableSchema::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeValue(relMultiplicity, fileInfo, offset); + SerDeser::serializeValue(srcTableID, fileInfo, offset); + SerDeser::serializeValue(dstTableID, fileInfo, offset); + SerDeser::serializeValue(srcPKDataType, fileInfo, offset); + SerDeser::serializeValue(dstPKDataType, fileInfo, offset); +} + +std::unique_ptr RelTableSchema::deserialize(FileInfo* fileInfo, uint64_t& offset) { + RelMultiplicity relMultiplicity; + table_id_t srcTableID; + table_id_t dstTableID; + LogicalType srcPKDataType; + LogicalType dstPKDataType; + SerDeser::deserializeValue(relMultiplicity, fileInfo, offset); + SerDeser::deserializeValue(srcTableID, fileInfo, offset); + SerDeser::deserializeValue(dstTableID, fileInfo, offset); + SerDeser::deserializeValue(srcPKDataType, fileInfo, offset); + SerDeser::deserializeValue(dstPKDataType, fileInfo, offset); + return std::make_unique( + relMultiplicity, srcTableID, dstTableID, srcPKDataType, dstPKDataType); +} + +} // namespace catalog +} // namespace kuzu diff --git a/src/function/table_functions.cpp b/src/function/table_functions.cpp index c4e5dc9ff4..23c370fb15 100644 --- a/src/function/table_functions.cpp +++ b/src/function/table_functions.cpp @@ -2,24 +2,27 @@ #include "catalog/catalog.h" +using namespace kuzu::catalog; +using namespace kuzu::common; + namespace kuzu { namespace function { -void TableInfoFunction::tableFunc(std::pair morsel, - function::TableFuncBindData* bindData, std::vector outputVectors) { +void TableInfoFunction::tableFunc(std::pair morsel, + function::TableFuncBindData* bindData, std::vector outputVectors) { auto tableSchema = reinterpret_cast(bindData)->tableSchema; auto numPropertiesToOutput = morsel.second - morsel.first; auto outVectorPos = 0; for (auto i = 0u; i < numPropertiesToOutput; i++) { auto property = tableSchema->properties[morsel.first + i]; - if (!tableSchema->isNodeTable && property.name == common::InternalKeyword::ID) { + if (tableSchema->tableType == TableType::REL && property.name == InternalKeyword::ID) { continue; } outputVectors[0]->setValue(outVectorPos, (int64_t)property.propertyID); outputVectors[1]->setValue(outVectorPos, property.name); outputVectors[2]->setValue( - outVectorPos, common::LogicalTypeUtils::dataTypeToString(property.dataType)); - if (tableSchema->isNodeTable) { + outVectorPos, LogicalTypeUtils::dataTypeToString(property.dataType)); + if (tableSchema->tableType == TableType::NODE) { auto primaryKeyID = reinterpret_cast(tableSchema)->primaryKeyPropertyID; outputVectors[3]->setValue(outVectorPos, primaryKeyID == property.propertyID); @@ -34,29 +37,29 @@ void TableInfoFunction::tableFunc(std::pair std::unique_ptr TableInfoFunction::bindFunc(main::ClientContext* context, kuzu::function::TableFuncBindInput input, catalog::CatalogContent* catalog) { std::vector returnColumnNames; - std::vector returnTypes; + std::vector returnTypes; auto tableName = input.inputs[0].getValue(); auto tableID = catalog->getTableID(tableName); auto schema = catalog->getTableSchema(tableID); returnColumnNames.emplace_back("property id"); - returnTypes.emplace_back(common::LogicalTypeID::INT64); + returnTypes.emplace_back(LogicalTypeID::INT64); returnColumnNames.emplace_back("name"); - returnTypes.emplace_back(common::LogicalTypeID::STRING); + returnTypes.emplace_back(LogicalTypeID::STRING); returnColumnNames.emplace_back("type"); - returnTypes.emplace_back(common::LogicalTypeID::STRING); - if (schema->isNodeTable) { + returnTypes.emplace_back(LogicalTypeID::STRING); + if (schema->tableType == TableType::NODE) { returnColumnNames.emplace_back("primary key"); - returnTypes.emplace_back(common::LogicalTypeID::BOOL); + returnTypes.emplace_back(LogicalTypeID::BOOL); } return std::make_unique( schema, std::move(returnTypes), std::move(returnColumnNames), schema->getNumProperties()); } -void DBVersionFunction::tableFunc(std::pair morsel, - function::TableFuncBindData* bindData, std::vector outputVectors) { +void DBVersionFunction::tableFunc(std::pair morsel, + function::TableFuncBindData* bindData, std::vector outputVectors) { auto outputVector = outputVectors[0]; auto pos = outputVector->state->selVector->selectedPositions[0]; - outputVectors[0]->setValue(pos, std::string(common::KUZU_VERSION)); + outputVectors[0]->setValue(pos, std::string(KUZU_VERSION)); outputVectors[0]->setNull(pos, false); outputVector->state->selVector->selectedSize = 1; } @@ -64,15 +67,15 @@ void DBVersionFunction::tableFunc(std::pair std::unique_ptr DBVersionFunction::bindFunc(main::ClientContext* context, kuzu::function::TableFuncBindInput input, catalog::CatalogContent* catalog) { std::vector returnColumnNames; - std::vector returnTypes; + std::vector returnTypes; returnColumnNames.emplace_back("version"); - returnTypes.emplace_back(common::LogicalTypeID::STRING); + returnTypes.emplace_back(LogicalTypeID::STRING); return std::make_unique( std::move(returnTypes), std::move(returnColumnNames), 1 /* one row result */); } -void CurrentSettingFunction::tableFunc(std::pair morsel, - function::TableFuncBindData* bindData, std::vector outputVectors) { +void CurrentSettingFunction::tableFunc(std::pair morsel, + function::TableFuncBindData* bindData, std::vector outputVectors) { auto currentSettingBindData = reinterpret_cast(bindData); auto outputVector = outputVectors[0]; auto pos = outputVector->state->selVector->selectedPositions[0]; @@ -85,9 +88,9 @@ std::unique_ptr CurrentSettingFunction::bindFunc(main::Client kuzu::function::TableFuncBindInput input, catalog::CatalogContent* catalog) { auto optionName = input.inputs[0].getValue(); std::vector returnColumnNames; - std::vector returnTypes; + std::vector returnTypes; returnColumnNames.emplace_back(optionName); - returnTypes.emplace_back(common::LogicalTypeID::STRING); + returnTypes.emplace_back(LogicalTypeID::STRING); return std::make_unique(context->getCurrentSetting(optionName), std::move(returnTypes), std::move(returnColumnNames), 1 /* one row result */); } diff --git a/src/include/binder/copy/bound_copy.h b/src/include/binder/copy/bound_copy.h index 2ab86faa6d..408084b3e3 100644 --- a/src/include/binder/copy/bound_copy.h +++ b/src/include/binder/copy/bound_copy.h @@ -5,7 +5,7 @@ #include #include "binder/bound_statement.h" -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" #include "common/copier_config/copier_config.h" namespace kuzu { diff --git a/src/include/binder/ddl/bound_create_table.h b/src/include/binder/ddl/bound_create_table.h index fba5e0fe05..6135f9c288 100644 --- a/src/include/binder/ddl/bound_create_table.h +++ b/src/include/binder/ddl/bound_create_table.h @@ -1,7 +1,7 @@ #pragma once #include "bound_ddl.h" -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" namespace kuzu { namespace binder { diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index 9325476c11..776e1760b1 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -2,7 +2,7 @@ #include -#include "catalog_structs.h" +#include "catalog/catalog_content.h" #include "common/assert.h" #include "common/exception.h" #include "common/file_utils.h" @@ -18,157 +18,6 @@ namespace kuzu { namespace catalog { -class CatalogContent { - friend class Catalog; - -public: - CatalogContent(); - - explicit CatalogContent(const std::string& directory); - - CatalogContent(const CatalogContent& other); - - /** - * Node and Rel table functions. - */ - common::table_id_t addNodeTableSchema(std::string tableName, common::property_id_t primaryKeyId, - std::vector properties); - - common::table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity, - std::vector properties, common::table_id_t srcTableID, - common::table_id_t dstTableID, common::LogicalType srcPKDataType, - common::LogicalType dstPKDataType); - - inline bool hasNodeTable() const { return !nodeTableSchemas.empty(); } - inline bool hasRelTable() const { return !relTableSchemas.empty(); } - - inline bool containNodeTable(common::table_id_t tableID) const { - return nodeTableSchemas.contains(tableID); - } - inline bool containRelTable(common::table_id_t tableID) const { - return relTableSchemas.contains(tableID); - } - inline bool containTable(const std::string& name) const { - return containNodeTable(name) || containRelTable(name); - } - - inline std::string getTableName(common::table_id_t tableID) const { - return getTableSchema(tableID)->tableName; - } - - inline NodeTableSchema* getNodeTableSchema(common::table_id_t tableID) const { - assert(containNodeTable(tableID)); - return nodeTableSchemas.at(tableID).get(); - } - inline RelTableSchema* getRelTableSchema(common::table_id_t tableID) const { - assert(containRelTable(tableID)); - return relTableSchemas.at(tableID).get(); - } - inline TableSchema* getTableSchema(common::table_id_t tableID) const { - assert(containRelTable(tableID) || containNodeTable(tableID)); - return nodeTableSchemas.contains(tableID) ? - (TableSchema*)nodeTableSchemas.at(tableID).get() : - (TableSchema*)relTableSchemas.at(tableID).get(); - } - - inline bool containNodeTable(const std::string& tableName) const { - return nodeTableNameToIDMap.contains(tableName); - } - inline bool containRelTable(const std::string& tableName) const { - return relTableNameToIDMap.contains(tableName); - } - - inline common::table_id_t getTableID(const std::string& tableName) const { - return nodeTableNameToIDMap.contains(tableName) ? nodeTableNameToIDMap.at(tableName) : - relTableNameToIDMap.at(tableName); - } - inline bool isSingleMultiplicityInDirection( - common::table_id_t tableID, common::RelDataDirection direction) const { - return relTableSchemas.at(tableID)->isSingleMultiplicityInDirection(direction); - } - - /** - * Node and Rel property functions. - */ - // getNodeProperty and getRelProperty should be called after checking if property exists - // (containNodeProperty and containRelProperty). - const Property& getNodeProperty( - common::table_id_t tableID, const std::string& propertyName) const; - const Property& getRelProperty( - common::table_id_t tableID, const std::string& propertyName) const; - - inline const std::vector& getNodeProperties(common::table_id_t tableID) const { - return nodeTableSchemas.at(tableID)->getProperties(); - } - inline const std::vector& getRelProperties(common::table_id_t tableID) const { - return relTableSchemas.at(tableID)->getProperties(); - } - inline std::vector getNodeTableIDs() const { - std::vector nodeTableIDs; - for (auto& [tableID, _] : nodeTableSchemas) { - nodeTableIDs.push_back(tableID); - } - return nodeTableIDs; - } - inline std::vector getRelTableIDs() const { - std::vector relTableIDs; - for (auto& [tableID, _] : relTableSchemas) { - relTableIDs.push_back(tableID); - } - return relTableIDs; - } - inline std::unordered_map>& - getNodeTableSchemas() { - return nodeTableSchemas; - } - inline std::unordered_map>& - getRelTableSchemas() { - return relTableSchemas; - } - - inline bool containMacro(const std::string& macroName) const { - return macros.contains(macroName); - } - - void dropTableSchema(common::table_id_t tableID); - - void renameTable(common::table_id_t tableID, const std::string& newName); - - void saveToFile(const std::string& directory, common::DBFileType dbFileType); - void readFromFile(const std::string& directory, common::DBFileType dbFileType); - - common::ExpressionType getFunctionType(const std::string& name) const; - - void addVectorFunction(std::string name, function::vector_function_definitions definitions); - - void addScalarMacroFunction( - std::string name, std::unique_ptr macro); - -private: - inline common::table_id_t assignNextTableID() { return nextTableID++; } - - void validateStorageVersion(storage::storage_version_t savedStorageVersion) const; - - void validateMagicBytes(common::FileInfo* fileInfo, common::offset_t& offset) const; - - void writeMagicBytes(common::FileInfo* fileInfo, common::offset_t& offset) const; - - void registerBuiltInFunctions(); - -private: - std::unordered_map> nodeTableSchemas; - std::unordered_map> relTableSchemas; - // These two maps are maintained as caches. They are not serialized to the catalog file, but - // is re-constructed when reading from the catalog file. - std::unordered_map nodeTableNameToIDMap; - std::unordered_map relTableNameToIDMap; - common::table_id_t nextTableID; - std::unique_ptr builtInVectorFunctions; - std::unique_ptr builtInAggregateFunctions; - std::unique_ptr builtInTableFunctions; - std::unordered_map> macros; -}; - class Catalog { public: Catalog(); @@ -216,7 +65,7 @@ class Catalog { void dropTableSchema(common::table_id_t tableID); - void renameTable(common::table_id_t tableID, std::string newName); + void renameTable(common::table_id_t tableID, const std::string& newName); void addProperty( common::table_id_t tableID, const std::string& propertyName, common::LogicalType dataType); @@ -235,7 +84,7 @@ class Catalog { std::string name, std::unique_ptr macro); // TODO(Ziyi): pass transaction pointer here. - inline function::ScalarMacroFunction* getScalarMacroFunction(std::string name) const { + inline function::ScalarMacroFunction* getScalarMacroFunction(const std::string& name) const { return catalogContentForReadOnlyTrx->macros.at(name).get(); } diff --git a/src/include/catalog/catalog_content.h b/src/include/catalog/catalog_content.h new file mode 100644 index 0000000000..bbb57f94a7 --- /dev/null +++ b/src/include/catalog/catalog_content.h @@ -0,0 +1,166 @@ +#pragma once + +#include "catalog/table_schema.h" +#include "function/aggregate/built_in_aggregate_functions.h" +#include "function/built_in_table_functions.h" +#include "function/built_in_vector_functions.h" +#include "function/scalar_macro_function.h" +#include "storage/storage_info.h" + +namespace kuzu { +namespace catalog { + +class CatalogContent { + friend class Catalog; + +public: + CatalogContent(); + + explicit CatalogContent(const std::string& directory); + + CatalogContent(const CatalogContent& other); + + /** + * Node and Rel table functions. + */ + common::table_id_t addNodeTableSchema(std::string tableName, common::property_id_t primaryKeyId, + std::vector properties); + + common::table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity, + std::vector properties, common::table_id_t srcTableID, + common::table_id_t dstTableID, common::LogicalType srcPKDataType, + common::LogicalType dstPKDataType); + + inline bool hasNodeTable() const { return !nodeTableSchemas.empty(); } + inline bool hasRelTable() const { return !relTableSchemas.empty(); } + + inline bool containNodeTable(common::table_id_t tableID) const { + return nodeTableSchemas.contains(tableID); + } + inline bool containRelTable(common::table_id_t tableID) const { + return relTableSchemas.contains(tableID); + } + inline bool containTable(const std::string& name) const { + return containNodeTable(name) || containRelTable(name); + } + + inline std::string getTableName(common::table_id_t tableID) const { + return getTableSchema(tableID)->tableName; + } + + inline NodeTableSchema* getNodeTableSchema(common::table_id_t tableID) const { + assert(containNodeTable(tableID)); + return nodeTableSchemas.at(tableID).get(); + } + inline RelTableSchema* getRelTableSchema(common::table_id_t tableID) const { + assert(containRelTable(tableID)); + return relTableSchemas.at(tableID).get(); + } + inline TableSchema* getTableSchema(common::table_id_t tableID) const { + assert(containRelTable(tableID) || containNodeTable(tableID)); + return nodeTableSchemas.contains(tableID) ? + (TableSchema*)nodeTableSchemas.at(tableID).get() : + (TableSchema*)relTableSchemas.at(tableID).get(); + } + + inline bool containNodeTable(const std::string& tableName) const { + return nodeTableNameToIDMap.contains(tableName); + } + inline bool containRelTable(const std::string& tableName) const { + return relTableNameToIDMap.contains(tableName); + } + + inline common::table_id_t getTableID(const std::string& tableName) const { + return nodeTableNameToIDMap.contains(tableName) ? nodeTableNameToIDMap.at(tableName) : + relTableNameToIDMap.at(tableName); + } + inline bool isSingleMultiplicityInDirection( + common::table_id_t tableID, common::RelDataDirection direction) const { + return relTableSchemas.at(tableID)->isSingleMultiplicityInDirection(direction); + } + + /** + * Node and Rel property functions. + */ + // getNodeProperty and getRelProperty should be called after checking if property exists + // (containNodeProperty and containRelProperty). + const Property& getNodeProperty( + common::table_id_t tableID, const std::string& propertyName) const; + const Property& getRelProperty( + common::table_id_t tableID, const std::string& propertyName) const; + + inline const std::vector& getNodeProperties(common::table_id_t tableID) const { + return nodeTableSchemas.at(tableID)->getProperties(); + } + inline const std::vector& getRelProperties(common::table_id_t tableID) const { + return relTableSchemas.at(tableID)->getProperties(); + } + inline std::vector getNodeTableIDs() const { + std::vector nodeTableIDs; + for (auto& [tableID, _] : nodeTableSchemas) { + nodeTableIDs.push_back(tableID); + } + return nodeTableIDs; + } + inline std::vector getRelTableIDs() const { + std::vector relTableIDs; + for (auto& [tableID, _] : relTableSchemas) { + relTableIDs.push_back(tableID); + } + return relTableIDs; + } + inline std::unordered_map>& + getNodeTableSchemas() { + return nodeTableSchemas; + } + inline std::unordered_map>& + getRelTableSchemas() { + return relTableSchemas; + } + + inline bool containMacro(const std::string& macroName) const { + return macros.contains(macroName); + } + + void dropTableSchema(common::table_id_t tableID); + + void renameTable(common::table_id_t tableID, const std::string& newName); + + void saveToFile(const std::string& directory, common::DBFileType dbFileType); + void readFromFile(const std::string& directory, common::DBFileType dbFileType); + + common::ExpressionType getFunctionType(const std::string& name) const; + + void addVectorFunction(std::string name, function::vector_function_definitions definitions); + + void addScalarMacroFunction( + std::string name, std::unique_ptr macro); + +private: + inline common::table_id_t assignNextTableID() { return nextTableID++; } + + static void validateStorageVersion(storage::storage_version_t savedStorageVersion); + + static void validateMagicBytes(common::FileInfo* fileInfo, common::offset_t& offset); + + static void writeMagicBytes(common::FileInfo* fileInfo, common::offset_t& offset); + + void registerBuiltInFunctions(); + +private: + // TODO(Guodong): I don't think it's necessary to keep separate maps for node and rel tables. + std::unordered_map> nodeTableSchemas; + std::unordered_map> relTableSchemas; + // These two maps are maintained as caches. They are not serialized to the catalog file, but + // is re-constructed when reading from the catalog file. + std::unordered_map nodeTableNameToIDMap; + std::unordered_map relTableNameToIDMap; + common::table_id_t nextTableID; + std::unique_ptr builtInVectorFunctions; + std::unique_ptr builtInAggregateFunctions; + std::unique_ptr builtInTableFunctions; + std::unordered_map> macros; +}; + +} // namespace catalog +} // namespace kuzu diff --git a/src/include/catalog/catalog_structs.h b/src/include/catalog/table_schema.h similarity index 67% rename from src/include/catalog/catalog_structs.h rename to src/include/catalog/table_schema.h index 1e4ecf6335..97f89f2644 100644 --- a/src/include/catalog/catalog_structs.h +++ b/src/include/catalog/table_schema.h @@ -6,13 +6,16 @@ #include "common/constants.h" #include "common/exception.h" +#include "common/file_utils.h" #include "common/rel_direction.h" #include "common/types/types_include.h" namespace kuzu { namespace catalog { -enum RelMultiplicity : uint8_t { MANY_MANY, MANY_ONE, ONE_MANY, ONE_ONE }; +enum class TableType : uint8_t { NODE, REL, INVALID }; + +enum class RelMultiplicity : uint8_t { MANY_MANY, MANY_ONE, ONE_MANY, ONE_ONE }; RelMultiplicity getRelMultiplicityFromString(const std::string& relMultiplicityString); std::string getRelMultiplicityAsString(RelMultiplicity relMultiplicity); @@ -21,8 +24,6 @@ struct Property { static constexpr std::string_view REL_FROM_PROPERTY_NAME = "_FROM_"; static constexpr std::string_view REL_TO_PROPERTY_NAME = "_TO_"; - // This constructor is needed for ser/deser functions - Property() : Property{"", common::LogicalType{}} {}; Property(std::string name, common::LogicalType dataType) : Property{std::move(name), std::move(dataType), common::INVALID_PROPERTY_ID, common::INVALID_TABLE_ID} {} @@ -31,17 +32,21 @@ struct Property { : name{std::move(name)}, dataType{std::move(dataType)}, propertyID{propertyID}, tableID{tableID} {} + void serialize(common::FileInfo* fileInfo, uint64_t& offset) const; + static std::unique_ptr deserialize(common::FileInfo* fileInfo, uint64_t& offset); + +public: std::string name; common::LogicalType dataType; common::property_id_t propertyID; common::table_id_t tableID; }; -struct TableSchema { +class TableSchema { public: - TableSchema(std::string tableName, common::table_id_t tableID, bool isNodeTable, + TableSchema(std::string tableName, common::table_id_t tableID, TableType tableType, std::vector properties) - : tableName{std::move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable}, + : tableName{std::move(tableName)}, tableID{tableID}, tableType{tableType}, properties{std::move(properties)}, nextPropertyID{ (common::property_id_t)this->properties.size()} {} @@ -77,24 +82,33 @@ struct TableSchema { void renameProperty(common::property_id_t propertyID, const std::string& newName); + void serialize(common::FileInfo* fileInfo, uint64_t& offset); + static std::unique_ptr deserialize(common::FileInfo* fileInfo, uint64_t& offset); + private: inline common::property_id_t increaseNextPropertyID() { return nextPropertyID++; } + virtual void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) = 0; + public: std::string tableName; common::table_id_t tableID; - bool isNodeTable; + TableType tableType; std::vector properties; common::property_id_t nextPropertyID; }; -struct NodeTableSchema : TableSchema { - NodeTableSchema() - : NodeTableSchema{ - "", common::INVALID_TABLE_ID, common::INVALID_PROPERTY_ID, std::vector{}} {} +class NodeTableSchema : public TableSchema { +public: + NodeTableSchema(common::property_id_t primaryPropertyId, + std::unordered_set fwdRelTableIDSet, + std::unordered_set bwdRelTableIDSet) + : TableSchema{"", common::INVALID_TABLE_ID, TableType::NODE, std::vector{}}, + primaryKeyPropertyID{primaryPropertyId}, fwdRelTableIDSet{std::move(fwdRelTableIDSet)}, + bwdRelTableIDSet{std::move(bwdRelTableIDSet)} {} NodeTableSchema(std::string tableName, common::table_id_t tableID, common::property_id_t primaryPropertyId, std::vector properties) - : TableSchema{std::move(tableName), tableID, true /* isNodeTable */, std::move(properties)}, + : TableSchema{std::move(tableName), tableID, TableType::NODE, std::move(properties)}, primaryKeyPropertyID{primaryPropertyId} {} inline void addFwdRelTableID(common::table_id_t tableID) { fwdRelTableIDSet.insert(tableID); } @@ -102,6 +116,13 @@ struct NodeTableSchema : TableSchema { inline Property getPrimaryKey() const { return properties[primaryKeyPropertyID]; } + static std::unique_ptr deserialize( + common::FileInfo* fileInfo, uint64_t& offset); + +private: + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; + +public: // TODO(Semih): When we support updating the schemas, we need to update this or, we need // a more robust mechanism to keep track of which property is the primary key (e.g., store this // information with the property). This is an idx, not an ID, so as the columns/properties of @@ -111,29 +132,29 @@ struct NodeTableSchema : TableSchema { std::unordered_set bwdRelTableIDSet; // dstNode->rel }; -struct RelTableSchema : TableSchema { +class RelTableSchema : public TableSchema { public: static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_ID = 0; - RelTableSchema() - : TableSchema{"", common::INVALID_TABLE_ID, false /* isNodeTable */, {} /* properties */}, - relMultiplicity{MANY_MANY}, srcTableID{common::INVALID_TABLE_ID}, - dstTableID{common::INVALID_TABLE_ID}, srcPKDataType{common::LogicalType{ - common::LogicalTypeID::ANY}}, - dstPKDataType{common::LogicalType{common::LogicalTypeID::ANY}} {} + RelTableSchema(RelMultiplicity relMultiplicity, common::table_id_t srcTableID, + common::table_id_t dstTableID, common::LogicalType srcPKDataType, + common::LogicalType dstPKDataType) + : TableSchema{"", common::INVALID_TABLE_ID, TableType::REL, {} /* properties */}, + relMultiplicity{relMultiplicity}, srcTableID{srcTableID}, dstTableID{dstTableID}, + srcPKDataType{std::move(srcPKDataType)}, dstPKDataType{std::move(dstPKDataType)} {} RelTableSchema(std::string tableName, common::table_id_t tableID, RelMultiplicity relMultiplicity, std::vector properties, common::table_id_t srcTableID, common::table_id_t dstTableID, common::LogicalType srcPKDataType, common::LogicalType dstPKDataType) - : TableSchema{std::move(tableName), tableID, false /* isNodeTable */, - std::move(properties)}, + : TableSchema{std::move(tableName), tableID, TableType::REL, std::move(properties)}, relMultiplicity{relMultiplicity}, srcTableID{srcTableID}, dstTableID{dstTableID}, srcPKDataType{std::move(srcPKDataType)}, dstPKDataType{std::move(dstPKDataType)} {} inline bool isSingleMultiplicityInDirection(common::RelDataDirection direction) const { - return relMultiplicity == ONE_ONE || - relMultiplicity == - (direction == common::RelDataDirection::FWD ? MANY_ONE : ONE_MANY); + return relMultiplicity == RelMultiplicity::ONE_ONE || + relMultiplicity == (direction == common::RelDataDirection::FWD ? + RelMultiplicity::MANY_ONE : + RelMultiplicity::ONE_MANY); } inline bool isSrcOrDstTable(common::table_id_t tableID) const { @@ -148,6 +169,13 @@ struct RelTableSchema : TableSchema { return relDirection == common::RelDataDirection::FWD ? dstTableID : srcTableID; } + static std::unique_ptr deserialize( + common::FileInfo* fileInfo, uint64_t& offset); + +private: + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; + +public: RelMultiplicity relMultiplicity; common::table_id_t srcTableID; common::table_id_t dstTableID; diff --git a/src/include/common/ser_deser.h b/src/include/common/ser_deser.h index fbb6b47481..85823759ae 100644 --- a/src/include/common/ser_deser.h +++ b/src/include/common/ser_deser.h @@ -68,6 +68,16 @@ class SerDeser { } } + template + static void serializeVectorOfObjects( + const std::vector& values, FileInfo* fileInfo, uint64_t& offset) { + uint64_t vectorSize = values.size(); + serializeValue(vectorSize, fileInfo, offset); + for (auto& value : values) { + value.serialize(fileInfo, offset); + } + } + template static void serializeVectorOfPtrs( const std::vector>& values, FileInfo* fileInfo, uint64_t& offset) { @@ -102,6 +112,17 @@ class SerDeser { } } + template + static void deserializeVectorOfObjects( + std::vector& values, FileInfo* fileInfo, uint64_t& offset) { + uint64_t vectorSize; + deserializeValue(vectorSize, fileInfo, offset); + values.reserve(vectorSize); + for (auto i = 0u; i < vectorSize; i++) { + values.push_back(*T::deserialize(fileInfo, offset)); + } + } + template static void deserializeVectorOfPtrs( std::vector>& values, FileInfo* fileInfo, uint64_t& offset) { diff --git a/src/include/planner/logical_plan/logical_operator/logical_copy.h b/src/include/planner/logical_plan/logical_operator/logical_copy.h index f962cc59ca..2b66ad80db 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_copy.h +++ b/src/include/planner/logical_plan/logical_operator/logical_copy.h @@ -1,7 +1,7 @@ #pragma once #include "base_logical_operator.h" -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" #include "common/copier_config/copier_config.h" namespace kuzu { diff --git a/src/include/planner/logical_plan/logical_operator/logical_create_table.h b/src/include/planner/logical_plan/logical_operator/logical_create_table.h index 7c6e785316..ec591b61a5 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_create_table.h +++ b/src/include/planner/logical_plan/logical_operator/logical_create_table.h @@ -1,6 +1,6 @@ #pragma once -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" #include "logical_ddl.h" namespace kuzu { diff --git a/src/include/storage/storage_info.h b/src/include/storage/storage_info.h index 97f3ded051..05c5c8f5be 100644 --- a/src/include/storage/storage_info.h +++ b/src/include/storage/storage_info.h @@ -12,8 +12,9 @@ using storage_version_t = uint64_t; struct StorageVersionInfo { static std::unordered_map getStorageVersionInfo() { - return {{"0.0.6.1", 10}, {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, - {"0.0.3.4", 5}, {"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}}; + return {{"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, + {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, + {"0.0.3", 1}}; } static storage_version_t getStorageVersion(); diff --git a/src/include/storage/storage_structure/lists/lists_update_store.h b/src/include/storage/storage_structure/lists/lists_update_store.h index 8d3edbbd20..9ece166055 100644 --- a/src/include/storage/storage_structure/lists/lists_update_store.h +++ b/src/include/storage/storage_structure/lists/lists_update_store.h @@ -2,7 +2,7 @@ #include -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" #include "common/data_chunk/data_chunk.h" #include "common/rel_direction.h" #include "common/types/types.h" diff --git a/src/include/storage/storage_utils.h b/src/include/storage/storage_utils.h index 11220221a3..730ade4ad4 100644 --- a/src/include/storage/storage_utils.h +++ b/src/include/storage/storage_utils.h @@ -3,7 +3,7 @@ #include #include -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" #include "common/constants.h" #include "common/file_utils.h" #include "common/null_mask.h" diff --git a/src/include/storage/store/table_statistics.h b/src/include/storage/store/table_statistics.h index da8dd16caf..6f93f798d3 100644 --- a/src/include/storage/store/table_statistics.h +++ b/src/include/storage/store/table_statistics.h @@ -3,7 +3,7 @@ #include #include -#include "catalog/catalog_structs.h" +#include "catalog/table_schema.h" #include "common/ser_deser.h" #include "spdlog/spdlog.h" #include "transaction/transaction.h" diff --git a/src/processor/operator/ddl/drop_table.cpp b/src/processor/operator/ddl/drop_table.cpp index d4901dd007..e667f387b6 100644 --- a/src/processor/operator/ddl/drop_table.cpp +++ b/src/processor/operator/ddl/drop_table.cpp @@ -2,6 +2,7 @@ #include "common/string_utils.h" +using namespace kuzu::catalog; using namespace kuzu::common; namespace kuzu { @@ -14,7 +15,7 @@ void DropTable::executeDDLInternal() { std::string DropTable::getOutputMsg() { auto tableSchema = catalog->getReadOnlyVersion()->getTableSchema(tableID); return StringUtils::string_format("{}Table: {} has been dropped.", - tableSchema->isNodeTable ? "Node" : "Rel", tableSchema->tableName); + tableSchema->tableType == TableType::NODE ? "Node" : "Rel", tableSchema->tableName); } } // namespace processor diff --git a/src/storage/copier/table_copy_utils.cpp b/src/storage/copier/table_copy_utils.cpp index e83ef9f4b0..a906aefdee 100644 --- a/src/storage/copier/table_copy_utils.cpp +++ b/src/storage/copier/table_copy_utils.cpp @@ -132,7 +132,7 @@ std::shared_ptr TableCopyUtils::createCSVReader( // Only the empty string is treated as NULL. csvConvertOptions.null_values = {""}; csvConvertOptions.quoted_strings_can_be_null = false; - if (!tableSchema->isNodeTable) { + if (tableSchema->tableType == TableType::REL) { auto relTableSchema = (RelTableSchema*)tableSchema; csvConvertOptions.column_types[std::string(Property::REL_FROM_PROPERTY_NAME)] = toArrowDataType(relTableSchema->srcPKDataType); @@ -384,7 +384,7 @@ std::unique_ptr TableCopyUtils::convertStringToValue( std::vector TableCopyUtils::getColumnNamesToRead(catalog::TableSchema* tableSchema) { std::vector columnNamesToRead; - if (!tableSchema->isNodeTable) { + if (tableSchema->tableType == TableType::REL) { columnNamesToRead.emplace_back(Property::REL_FROM_PROPERTY_NAME); columnNamesToRead.emplace_back(Property::REL_TO_PROPERTY_NAME); } diff --git a/src/storage/storage_structure/lists/lists_update_store.cpp b/src/storage/storage_structure/lists/lists_update_store.cpp index 5cd01fadbd..4aa294b07a 100644 --- a/src/storage/storage_structure/lists/lists_update_store.cpp +++ b/src/storage/storage_structure/lists/lists_update_store.cpp @@ -27,7 +27,7 @@ bool ListsUpdatesForNodeOffset::hasAnyUpdatedPersistentListOffsets() const { } ListsUpdatesStore::ListsUpdatesStore(MemoryManager& memoryManager, RelTableSchema& relTableSchema) - : memoryManager{memoryManager} { + : memoryManager{memoryManager}, relTableSchema{relTableSchema} { updateSchema(relTableSchema); } diff --git a/test/graph_test/graph_test.cpp b/test/graph_test/graph_test.cpp index 9dbca5c872..fb139f30a3 100644 --- a/test/graph_test/graph_test.cpp +++ b/test/graph_test/graph_test.cpp @@ -55,7 +55,8 @@ void BaseGraphTest::validateNodeColumnFilesExistence( void BaseGraphTest::validateRelColumnAndListFilesExistence( RelTableSchema* relTableSchema, DBFileType dbFileType, bool existence) { for (auto relDirection : RelDataDirectionUtils::getRelDataDirections()) { - if (relTableSchema->relMultiplicity) { + if (relTableSchema->relMultiplicity == RelMultiplicity::MANY_ONE || + relTableSchema->relMultiplicity == RelMultiplicity::ONE_ONE) { validateColumnFilesExistence(StorageUtils::getAdjColumnFName(databasePath, relTableSchema->tableID, relDirection, dbFileType), existence, false /* hasOverflow */);