Skip to content

Commit

Permalink
Move parquet buffered serializer into common/serializer
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminwinger committed Oct 18, 2023
1 parent a439404 commit e73855d
Show file tree
Hide file tree
Showing 52 changed files with 481 additions and 349 deletions.
1 change: 0 additions & 1 deletion src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "catalog/node_table_schema.h"
#include "catalog/rel_table_group_schema.h"
#include "catalog/rel_table_schema.h"
#include "common/ser_deser.h"
#include "storage/wal/wal.h"
#include "transaction/transaction_action.h"

Expand Down
4 changes: 3 additions & 1 deletion src/catalog/catalog_content.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
#include "catalog/rel_table_schema.h"
#include "common/exception/catalog.h"
#include "common/exception/runtime.h"
#include "common/ser_deser.h"
#include "common/string_format.h"
#include "common/serializer/buffered_file.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"
#include "common/string_utils.h"
#include "storage/storage_utils.h"

Expand Down
3 changes: 2 additions & 1 deletion src/catalog/node_table_schema.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "catalog/node_table_schema.h"

#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"

using namespace kuzu::common;

Expand Down
3 changes: 2 additions & 1 deletion src/catalog/property.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "catalog/property.h"

#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"

using namespace kuzu::common;

Expand Down
3 changes: 2 additions & 1 deletion src/catalog/rdf_graph_schema.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "catalog/rdf_graph_schema.h"

#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"

using namespace kuzu::common;

Expand Down
3 changes: 2 additions & 1 deletion src/catalog/rel_table_group_schema.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "catalog/rel_table_group_schema.h"

#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"

using namespace kuzu::common;

Expand Down
3 changes: 2 additions & 1 deletion src/catalog/rel_table_schema.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#include "catalog/rel_table_schema.h"

#include "common/exception/catalog.h"
#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"

using namespace kuzu::common;

Expand Down
3 changes: 2 additions & 1 deletion src/catalog/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
#include "common/exception/internal.h"
#include "common/exception/not_implemented.h"
#include "common/exception/runtime.h"
#include "common/ser_deser.h"
#include "common/string_format.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"
#include "common/string_utils.h"

using namespace kuzu::common;
Expand Down
4 changes: 2 additions & 2 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ add_subdirectory(arrow)
add_subdirectory(copier_config)
add_subdirectory(data_chunk)
add_subdirectory(exception)
add_subdirectory(serializer)
add_subdirectory(task_system)
add_subdirectory(types)
add_subdirectory(vector)
Expand All @@ -20,8 +21,7 @@ add_library(kuzu_common
type_utils.cpp
utils.cpp
string_utils.cpp
table_type.cpp
ser_deser.cpp)
table_type.cpp)

target_link_libraries(kuzu_common Glob)

Expand Down
22 changes: 0 additions & 22 deletions src/common/ser_deser.cpp

This file was deleted.

10 changes: 10 additions & 0 deletions src/common/serializer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
add_library(kuzu_common_serializer
OBJECT
serializer.cpp
deserializer.cpp
buffered_file.cpp
buffered_serializer.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_common_serializer>
PARENT_SCOPE)
21 changes: 21 additions & 0 deletions src/common/serializer/buffered_file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "common/serializer/buffered_file.h"

#include "common/file_utils.h"

namespace kuzu {
namespace common {
void BufferedFileWriter::flush() {
FileUtils::writeToFile(fileInfo.get(), buffer.get(), bufferOffset, fileOffset);
fileOffset += bufferOffset;
bufferOffset = 0;
memset(buffer.get(), 0, BUFFER_SIZE);
}

void BufferedFileReader::readNextPage() {
FileUtils::readFromFile(fileInfo.get(), buffer.get(), BUFFER_SIZE, fileOffset);
fileOffset += BUFFER_SIZE;
bufferOffset = 0;
}

} // namespace common
} // namespace kuzu
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#include "processor/operator/persistent/writer/parquet/buffered_serializer.h"
#include "common/serializer/buffered_serializer.h"

#include <cstring>

namespace kuzu {
namespace processor {
namespace common {

BufferedSerializer::BufferedSerializer(uint64_t maximum_size)
: BufferedSerializer(std::make_unique<uint8_t[]>(maximum_size), maximum_size) {}
Expand All @@ -14,7 +14,7 @@ BufferedSerializer::BufferedSerializer(std::unique_ptr<uint8_t[]> data, uint64_t
blob.data = std::move(data);
}

void BufferedSerializer::writeData(const uint8_t* buffer, uint64_t len) {
void BufferedSerializer::write(const uint8_t* buffer, uint64_t len) {
if (blob.size + len >= maximumSize) {
do {
maximumSize *= 2;
Expand All @@ -29,5 +29,5 @@ void BufferedSerializer::writeData(const uint8_t* buffer, uint64_t len) {
blob.size += len;
}

} // namespace processor
} // namespace common
} // namespace kuzu
15 changes: 15 additions & 0 deletions src/common/serializer/deserializer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "common/serializer/deserializer.h"

namespace kuzu {
namespace common {

template<>
void Deserializer::deserializeValue(std::string& value) {
uint64_t valueLength = 0;
deserializeValue(valueLength);
value.resize(valueLength);
reader->read((uint8_t*)value.data(), valueLength);
}

} // namespace common
} // namespace kuzu
21 changes: 21 additions & 0 deletions src/common/serializer/file_writer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "common/file_utils.h"
#include "common/serializer/buffered_file.h"

namespace kuzu {
namespace common {

void BufferedFileWriter::flush() {
FileUtils::writeToFile(fileInfo.get(), buffer.get(), bufferOffset, fileOffset);
fileOffset += bufferOffset;
bufferOffset = 0;
memset(buffer.get(), 0, BUFFER_SIZE);
}

void BufferedFileReader::readNextPage() {
FileUtils::readFromFile(fileInfo.get(), buffer.get(), BUFFER_SIZE, fileOffset);
fileOffset += BUFFER_SIZE;
bufferOffset = 0;
}

} // namespace common
} // namespace kuzu
14 changes: 14 additions & 0 deletions src/common/serializer/serializer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "common/serializer/serializer.h"

namespace kuzu {
namespace common {

template<>
void Serializer::serializeValue(const std::string& value) {
uint64_t valueLength = value.length();
writer->write((uint8_t*)&valueLength, sizeof(uint64_t));
writer->write((uint8_t*)value.data(), valueLength);
}

} // namespace common
} // namespace kuzu
3 changes: 2 additions & 1 deletion src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#include "common/exception/binder.h"
#include "common/exception/not_implemented.h"
#include "common/null_buffer.h"
#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"
#include "common/string_utils.h"
#include "common/types/interval_t.h"
#include "common/types/ku_list.h"
Expand Down
3 changes: 2 additions & 1 deletion src/common/types/value/value.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#include "common/types/value/value.h"

#include "common/null_buffer.h"
#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"
#include "common/types/blob.h"
#include "storage/storage_utils.h"

Expand Down
3 changes: 2 additions & 1 deletion src/function/scalar_macro_function.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "function/scalar_macro_function.h"

#include "common/ser_deser.h"
#include "common/serializer/deserializer.h"
#include "common/serializer/serializer.h"

using namespace kuzu::common;
using namespace kuzu::parser;
Expand Down
Loading

0 comments on commit e73855d

Please sign in to comment.