From 9a79288f8356b337a2302218bccae247c13941a3 Mon Sep 17 00:00:00 2001 From: Zhang Lei Date: Mon, 8 Jul 2024 16:34:26 +0800 Subject: [PATCH] fix(interactive): Support `long_text` as primary key (#4011) Before, we only supported `var_char` as the primary key, but `long_text` makes no difference when used as a primary key in Interactive. --- .github/workflows/interactive.yml | 10 +++ .../loader/abstract_arrow_fragment_loader.cc | 12 +++- .../mutable_property_fragment.cc | 3 +- .../modern_graph_string_pk.yaml | 71 +++++++++++++++++++ flex/utils/property/column.cc | 3 + flex/utils/pt_indexer.h | 3 + 6 files changed, 98 insertions(+), 4 deletions(-) create mode 100644 flex/tests/rt_mutable_graph/modern_graph_string_pk.yaml diff --git a/.github/workflows/interactive.yml b/.github/workflows/interactive.yml index 3a22335ef033..18929079486f 100644 --- a/.github/workflows/interactive.yml +++ b/.github/workflows/interactive.yml @@ -452,6 +452,16 @@ jobs: run: | git clone -b master --single-branch --depth=1 https://github.com/GraphScope/gstest.git ${GS_TEST_DIR} + - name: Test String primary key on modern graph + env: + FLEX_DATA_DIR: ${{ github.workspace }}/flex/interactive/examples/modern_graph/ + run: | + rm -rf /tmp/csr-data-dir/ + cd ${GITHUB_WORKSPACE}/flex/build/ + SCHEMA_FILE=../tests/rt_mutable_graph/modern_graph_string_pk.yaml + BULK_LOAD_FILE=../interactive/examples/modern_graph/bulk_load.yaml + GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d /tmp/csr-data-dir/ + - name: Test String edge property on modern graph env: FLEX_DATA_DIR: ${{ github.workspace }}/flex/interactive/examples/modern_graph/ diff --git a/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc b/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc index eb93989c6765..944760e298b1 100644 --- a/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc +++ b/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc @@ -95,14 +95,16 @@ void set_properties_column(gs::ColumnBase* col, set_column(col, array, offset); } else if (col_type == PropertyType::kFloat) { set_column(col, array, offset); - } else if (col_type == PropertyType::kStringMap) { - set_column_from_string_array(col, array, offset); } else if (col_type == PropertyType::kDate) { set_column_from_timestamp_array(col, array, offset); } else if (col_type == PropertyType::kDay) { set_column_from_timestamp_array_to_day(col, array, offset); + } else if (col_type == PropertyType::kStringMap) { + set_column_from_string_array(col, array, offset); } else if (col_type.type_enum == impl::PropertyTypeImpl::kVarChar) { set_column_from_string_array(col, array, offset); + } else if (col_type == PropertyType::kStringView) { + set_column_from_string_array(col, array, offset); } else { LOG(FATAL) << "Not support type: " << type->ToString(); } @@ -216,9 +218,13 @@ void AbstractArrowFragmentLoader::AddVerticesRecordBatch( addVertexRecordBatchImpl(v_label_id, v_files, supplier_creator); } else if (type == PropertyType::kUInt64) { addVertexRecordBatchImpl(v_label_id, v_files, supplier_creator); - } else if (type.type_enum == impl::PropertyTypeImpl::kVarChar) { + } else if (type.type_enum == impl::PropertyTypeImpl::kVarChar || + type.type_enum == impl::PropertyTypeImpl::kStringView) { addVertexRecordBatchImpl(v_label_id, v_files, supplier_creator); + } else { + LOG(FATAL) << "Unsupported primary key type for vertex, type: " << type + << ", label: " << v_label_name; } VLOG(10) << "Finish init vertices for label " << v_label_name; } diff --git a/flex/storages/rt_mutable_graph/mutable_property_fragment.cc b/flex/storages/rt_mutable_graph/mutable_property_fragment.cc index 360d16ad0b8f..5f914fa3f888 100644 --- a/flex/storages/rt_mutable_graph/mutable_property_fragment.cc +++ b/flex/storages/rt_mutable_graph/mutable_property_fragment.cc @@ -102,7 +102,8 @@ inline DualCsrBase* create_csr(EdgeStrategy oes, EdgeStrategy ies, return new DualCsr( oes, ies, properties[0].additional_type_info.max_length); } else if (properties[0] == PropertyType::kStringView) { - return new DualCsr(oes, ies, 256); + return new DualCsr( + oes, ies, gs::PropertyType::STRING_DEFAULT_MAX_LENGTH); } } else { // TODO: fix me, storage strategy not set diff --git a/flex/tests/rt_mutable_graph/modern_graph_string_pk.yaml b/flex/tests/rt_mutable_graph/modern_graph_string_pk.yaml new file mode 100644 index 000000000000..5c4b9494c11e --- /dev/null +++ b/flex/tests/rt_mutable_graph/modern_graph_string_pk.yaml @@ -0,0 +1,71 @@ +name: modern_graph # then must have a modern dir under ${data} directory +store_type: mutable_csr # v6d, groot, gart +schema: + vertex_types: + - type_id: 0 + type_name: person + x_csr_params: + max_vertex_num: 100 + properties: + - property_id: 0 + property_name: id + property_type: + string: + long_text: "" + - property_id: 1 + property_name: name + property_type: + varchar: + max_length: 64 + - property_id: 2 + property_name: age + property_type: + primitive_type: DT_SIGNED_INT32 + primary_keys: + - id + - type_id: 1 + type_name: software + x_csr_params: + max_vertex_num: 100 + properties: + - property_id: 0 + property_name: id + property_type: + varchar: + max_length: 64 + - property_id: 1 + property_name: name + property_type: + varchar: + max_length: 64 + - property_id: 2 + property_name: lang + property_type: + varchar: + max_length: 64 + primary_keys: + - id + edge_types: + - type_id: 0 + type_name: knows + vertex_type_pair_relations: + - source_vertex: person + destination_vertex: person + relation: MANY_TO_MANY + properties: + - property_id: 0 + property_name: weight + property_type: + primitive_type: DT_DOUBLE + - type_id: 1 + type_name: created + vertex_type_pair_relations: + - source_vertex: person + destination_vertex: software + relation: MANY_TO_MANY + properties: + - property_id: 0 + property_name: weight + property_type: + varchar: + max_length: 64 \ No newline at end of file diff --git a/flex/utils/property/column.cc b/flex/utils/property/column.cc index 8f2ec478266e..9858b5ed7cb4 100644 --- a/flex/utils/property/column.cc +++ b/flex/utils/property/column.cc @@ -132,6 +132,9 @@ std::shared_ptr CreateColumn(PropertyType type, return std::make_shared(); } else if (type == PropertyType::kStringMap) { return std::make_shared(); + } else if (type == PropertyType::kStringView) { + return std::make_shared( + gs::PropertyType::STRING_DEFAULT_MAX_LENGTH); } else if (type.type_enum == impl::PropertyTypeImpl::kVarChar) { return std::make_shared( type.additional_type_info.max_length); diff --git a/flex/utils/pt_indexer.h b/flex/utils/pt_indexer.h index b85a46510e4a..fe34fba2d71b 100644 --- a/flex/utils/pt_indexer.h +++ b/flex/utils/pt_indexer.h @@ -183,6 +183,9 @@ class PTIndexer { } else if (type.type_enum == impl::PropertyTypeImpl::kVarChar) { keys_ = new StringColumn(StorageStrategy::kMem, type.additional_type_info.max_length); + } else if (type == PropertyType::kStringView) { + keys_ = new StringColumn(StorageStrategy::kMem, + gs::PropertyType::STRING_DEFAULT_MAX_LENGTH); } else { LOG(FATAL) << "Not support type [" << type << "] as pk type .."; }