Skip to content

Commit

Permalink
Update vendored sources to duckdb/duckdb@312b995
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Sep 13, 2023
1 parent 7f3ed11 commit 64a2f25
Show file tree
Hide file tree
Showing 512 changed files with 4,209 additions and 9,666 deletions.
91 changes: 46 additions & 45 deletions src/duckdb/extension/parquet/column_writer.cpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/duckdb/extension/parquet/include/column_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "parquet_types.h"

namespace duckdb {
class BufferedSerializer;
class MemoryStream;
class ParquetWriter;
class ColumnWriterPageState;
class BasicColumnWriterState;
Expand Down Expand Up @@ -113,7 +113,7 @@ class ColumnWriter {
uint16_t define_value, uint16_t null_value);
void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);

void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
void CompressPage(MemoryStream &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
unique_ptr<data_t[]> &compressed_buf);
};

Expand Down
7 changes: 2 additions & 5 deletions src/duckdb/extension/parquet/include/parquet_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,8 @@ struct ParquetOptions {
MultiFileReaderOptions file_options;

public:
void Serialize(FieldWriter &writer) const;
void Deserialize(FieldReader &reader);

void FormatSerialize(FormatSerializer &serializer) const;
static ParquetOptions FormatDeserialize(FormatDeserializer &deserializer);
void Serialize(Serializer &serializer) const;
static ParquetOptions Deserialize(Deserializer &deserializer);
};

class ParquetReader {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ class RleBpEncoder {
void PrepareValue(uint32_t value);
void FinishPrepare();

void BeginWrite(Serializer &writer, uint32_t first_value);
void WriteValue(Serializer &writer, uint32_t value);
void FinishWrite(Serializer &writer);
void BeginWrite(WriteStream &writer, uint32_t first_value);
void WriteValue(WriteStream &writer, uint32_t value);
void FinishWrite(WriteStream &writer);

idx_t GetByteCount();

Expand All @@ -43,7 +43,7 @@ class RleBpEncoder {

private:
void FinishRun();
void WriteRun(Serializer &writer);
void WriteRun(WriteStream &writer);
};

} // namespace duckdb
131 changes: 75 additions & 56 deletions src/duckdb/extension/parquet/parquet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
#include "duckdb/common/constants.hpp"
#include "duckdb/common/enums/file_compression_type.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/multi_file_reader.hpp"
#include "duckdb/common/serializer/format_deserializer.hpp"
#include "duckdb/common/serializer/format_serializer.hpp"
#include "duckdb/common/types/chunk_collection.hpp"
#include "duckdb/function/copy_function.hpp"
#include "duckdb/function/table_function.hpp"
Expand All @@ -37,7 +34,8 @@
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/storage/statistics/base_statistics.hpp"
#include "duckdb/storage/table/row_group.hpp"

#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/serializer/deserializer.hpp"
#endif

namespace duckdb {
Expand Down Expand Up @@ -120,7 +118,7 @@ struct ParquetWriteBindData : public TableFunctionData {
vector<LogicalType> sql_types;
vector<string> column_names;
duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
idx_t row_group_size = Storage::ROW_GROUP_SIZE;

//! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
static constexpr const idx_t BYTES_PER_ROW = 1024;
Expand All @@ -143,18 +141,6 @@ struct ParquetWriteLocalState : public LocalFunctionData {
ColumnDataAppendState append_state;
};

void ParquetOptions::Serialize(FieldWriter &writer) const {
writer.WriteField<bool>(binary_as_string);
writer.WriteField<bool>(file_row_number);
writer.WriteSerializable(file_options);
}

void ParquetOptions::Deserialize(FieldReader &reader) {
binary_as_string = reader.ReadRequired<bool>();
file_row_number = reader.ReadRequired<bool>();
file_options = reader.ReadRequiredSerializable<MultiFileReaderOptions, MultiFileReaderOptions>();
}

BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) {
auto bind_info = BindInfo(ScanType::PARQUET);
auto &parquet_bind = bind_data->Cast<ParquetReadBindData>();
Expand Down Expand Up @@ -186,8 +172,6 @@ class ParquetScanFunction {
table_function.get_batch_index = ParquetScanGetBatchIndex;
table_function.serialize = ParquetScanSerialize;
table_function.deserialize = ParquetScanDeserialize;
table_function.format_serialize = ParquetScanFormatSerialize;
table_function.format_deserialize = ParquetScanFormatDeserialize;
table_function.get_batch_info = ParquetGetBatchInfo;
table_function.projection_pushdown = true;
table_function.filter_pushdown = true;
Expand Down Expand Up @@ -420,38 +404,16 @@ class ParquetScanFunction {
return data.batch_index;
}

static void ParquetScanSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
static void ParquetScanSerialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
const TableFunction &function) {
auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
writer.WriteList<string>(bind_data.files);
writer.WriteRegularSerializableList(bind_data.types);
writer.WriteList<string>(bind_data.names);
bind_data.parquet_options.Serialize(writer);
}

static unique_ptr<FunctionData> ParquetScanDeserialize(PlanDeserializationState &state, FieldReader &reader,
TableFunction &function) {
auto &context = state.context;
auto files = reader.ReadRequiredList<string>();
auto types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
auto names = reader.ReadRequiredList<string>();
ParquetOptions options(context);
options.Deserialize(reader);

return ParquetScanBindInternal(context, files, types, names, options);
}

static void ParquetScanFormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
const TableFunction &function) {
auto &bind_data = bind_data_p->Cast<ParquetReadBindData>();
serializer.WriteProperty(100, "files", bind_data.files);
serializer.WriteProperty(101, "types", bind_data.types);
serializer.WriteProperty(102, "names", bind_data.names);
serializer.WriteProperty(103, "parquet_options", bind_data.parquet_options);
}

static unique_ptr<FunctionData> ParquetScanFormatDeserialize(FormatDeserializer &deserializer,
TableFunction &function) {
static unique_ptr<FunctionData> ParquetScanDeserialize(Deserializer &deserializer, TableFunction &function) {
auto &context = deserializer.Get<ClientContext &>();
auto files = deserializer.ReadProperty<vector<string>>(100, "files");
auto types = deserializer.ReadProperty<vector<LogicalType>>(101, "types");
Expand Down Expand Up @@ -892,23 +854,80 @@ unique_ptr<LocalFunctionData> ParquetWriteInitializeLocal(ExecutionContext &cont
}

// LCOV_EXCL_START
static void ParquetCopySerialize(FieldWriter &writer, const FunctionData &bind_data_p, const CopyFunction &function) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
writer.WriteRegularSerializableList<LogicalType>(bind_data.sql_types);
writer.WriteList<string>(bind_data.column_names);
writer.WriteField<duckdb_parquet::format::CompressionCodec::type>(bind_data.codec);
writer.WriteField<idx_t>(bind_data.row_group_size);

// FIXME: Have these be generated instead
template <>
const char *EnumUtil::ToChars<duckdb_parquet::format::CompressionCodec::type>(
duckdb_parquet::format::CompressionCodec::type value) {
switch (value) {
case CompressionCodec::UNCOMPRESSED:
return "UNCOMPRESSED";
break;
case CompressionCodec::SNAPPY:
return "SNAPPY";
break;
case CompressionCodec::GZIP:
return "GZIP";
break;
case CompressionCodec::LZO:
return "LZO";
break;
case CompressionCodec::BROTLI:
return "BROTLI";
break;
case CompressionCodec::LZ4:
return "LZ4";
break;
case CompressionCodec::ZSTD:
return "ZSTD";
break;
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}
}

static unique_ptr<FunctionData> ParquetCopyDeserialize(ClientContext &context, FieldReader &reader,
CopyFunction &function) {
unique_ptr<ParquetWriteBindData> data = make_uniq<ParquetWriteBindData>();
template <>
duckdb_parquet::format::CompressionCodec::type
EnumUtil::FromString<duckdb_parquet::format::CompressionCodec::type>(const char *value) {
if (StringUtil::Equals(value, "UNCOMPRESSED")) {
return CompressionCodec::UNCOMPRESSED;
}
if (StringUtil::Equals(value, "SNAPPY")) {
return CompressionCodec::SNAPPY;
}
if (StringUtil::Equals(value, "GZIP")) {
return CompressionCodec::GZIP;
}
if (StringUtil::Equals(value, "LZO")) {
return CompressionCodec::LZO;
}
if (StringUtil::Equals(value, "BROTLI")) {
return CompressionCodec::BROTLI;
}
if (StringUtil::Equals(value, "LZ4")) {
return CompressionCodec::LZ4;
}
if (StringUtil::Equals(value, "ZSTD")) {
return CompressionCodec::ZSTD;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

data->sql_types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
data->column_names = reader.ReadRequiredList<string>();
data->codec = reader.ReadRequired<duckdb_parquet::format::CompressionCodec::type>();
data->row_group_size = reader.ReadRequired<idx_t>();
static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bind_data_p,
const CopyFunction &function) {
auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
serializer.WriteProperty(100, "sql_types", bind_data.sql_types);
serializer.WriteProperty(101, "column_names", bind_data.column_names);
serializer.WriteProperty(102, "codec", bind_data.codec);
serializer.WriteProperty(103, "row_group_size", bind_data.row_group_size);
}

static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
auto data = make_uniq<ParquetWriteBindData>();
data->sql_types = deserializer.ReadProperty<vector<LogicalType>>(100, "sql_types");
data->column_names = deserializer.ReadProperty<vector<string>>(101, "column_names");
data->codec = deserializer.ReadProperty<duckdb_parquet::format::CompressionCodec::type>(102, "codec");
data->row_group_size = deserializer.ReadProperty<idx_t>(103, "row_group_size");
return std::move(data);
}
// LCOV_EXCL_STOP
Expand Down
5 changes: 3 additions & 2 deletions src/duckdb/extension/parquet/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "duckdb/function/table_function.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/main/connection.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
#endif
Expand Down Expand Up @@ -55,7 +56,7 @@ FieldID FieldID::Copy() const {

class MyTransport : public TTransport {
public:
explicit MyTransport(Serializer &serializer) : serializer(serializer) {
explicit MyTransport(WriteStream &serializer) : serializer(serializer) {
}

bool isOpen() const override {
Expand All @@ -73,7 +74,7 @@ class MyTransport : public TTransport {
}

private:
Serializer &serializer;
WriteStream &serializer;
};

bool ParquetWriter::DuckDBTypeToParquetTypeInternal(const LogicalType &duckdb_type, Type::type &parquet_type) {
Expand Down
8 changes: 4 additions & 4 deletions src/duckdb/extension/parquet/serialize_parquet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
// Do not edit this file manually, your changes will be overwritten
//===----------------------------------------------------------------------===//

#include "duckdb/common/serializer/format_serializer.hpp"
#include "duckdb/common/serializer/format_deserializer.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/serializer/deserializer.hpp"
#include "parquet_reader.hpp"

namespace duckdb {

void ParquetOptions::FormatSerialize(FormatSerializer &serializer) const {
void ParquetOptions::Serialize(Serializer &serializer) const {
serializer.WriteProperty(100, "binary_as_string", binary_as_string);
serializer.WriteProperty(101, "file_row_number", file_row_number);
serializer.WriteProperty(102, "file_options", file_options);
}

ParquetOptions ParquetOptions::FormatDeserialize(FormatDeserializer &deserializer) {
ParquetOptions ParquetOptions::Deserialize(Deserializer &deserializer) {
ParquetOptions result;
deserializer.ReadProperty(100, "binary_as_string", result.binary_as_string);
deserializer.ReadProperty(101, "file_row_number", result.file_row_number);
Expand Down
13 changes: 2 additions & 11 deletions src/duckdb/src/catalog/catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,8 @@ void CatalogEntry::Serialize(Serializer &serializer) const {
info->Serialize(serializer);
}

unique_ptr<CreateInfo> CatalogEntry::Deserialize(Deserializer &source) {
return CreateInfo::Deserialize(source);
}

void CatalogEntry::FormatSerialize(FormatSerializer &serializer) const {
const auto info = GetInfo();
info->FormatSerialize(serializer);
}

unique_ptr<CreateInfo> CatalogEntry::FormatDeserialize(FormatDeserializer &deserializer) {
return CreateInfo::FormatDeserialize(deserializer);
unique_ptr<CreateInfo> CatalogEntry::Deserialize(Deserializer &deserializer) {
return CreateInfo::Deserialize(deserializer);
}

void CatalogEntry::Verify(Catalog &catalog_p) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "duckdb/catalog/catalog_entry/index_catalog_entry.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/storage/index.hpp"

namespace duckdb {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp"
#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/function/scalar_macro_function.hpp"

namespace duckdb {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/common/algorithm.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/catalog/dependency_list.hpp"
#include "duckdb/parser/parsed_data/create_schema_info.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/parser/parsed_data/create_sequence_info.hpp"
#include "duckdb/catalog/dependency_manager.hpp"

Expand Down
2 changes: 0 additions & 2 deletions src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
#include "duckdb/common/algorithm.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/common/serializer.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb/parser/constraints/list.hpp"
#include "duckdb/parser/parsed_data/create_table_info.hpp"
Expand Down
4 changes: 1 addition & 3 deletions src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"

#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/limits.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/parser/keyword_helper.hpp"
#include "duckdb/common/types/vector.hpp"
#include <algorithm>
#include <sstream>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/field_writer.hpp"
#include "duckdb/parser/parsed_data/alter_table_info.hpp"
#include "duckdb/parser/parsed_data/create_view_info.hpp"
#include "duckdb/common/limits.hpp"
Expand Down
Loading

0 comments on commit 64a2f25

Please sign in to comment.