From c7df2ca763c41c970c1eb798ee174d55e47168d3 Mon Sep 17 00:00:00 2001 From: Zhang Lei Date: Wed, 18 Sep 2024 22:23:58 +0800 Subject: [PATCH] feat(interactive): Introduce new version of graph_algo example graph (#4153) Update the raw data and graph definition of `graph_algo`. - [x] Keep the old graph_alg graph. - [x] Move the csv files to gstest? --------- Co-authored-by: liulx20 --- .github/workflows/interactive.yml | 5 + flex/bin/load_plan_and_gen.sh | 6 +- .../examples/new_graph_algo/1.cypher | 6 + .../examples/new_graph_algo/2.cypher | 13 ++ .../examples/new_graph_algo/graph.yaml | 168 ++++++++++++++ .../examples/new_graph_algo/import.yaml | 210 ++++++++++++++++++ 6 files changed, 405 insertions(+), 3 deletions(-) create mode 100644 flex/interactive/examples/new_graph_algo/1.cypher create mode 100644 flex/interactive/examples/new_graph_algo/2.cypher create mode 100644 flex/interactive/examples/new_graph_algo/graph.yaml create mode 100644 flex/interactive/examples/new_graph_algo/import.yaml diff --git a/.github/workflows/interactive.yml b/.github/workflows/interactive.yml index 8610723964db..fc10b28cf220 100644 --- a/.github/workflows/interactive.yml +++ b/.github/workflows/interactive.yml @@ -102,6 +102,9 @@ jobs: mkdir -p ${INTERACTIVE_WORKSPACE}/data/graph_algo cp ${GITHUB_WORKSPACE}/flex/tests/interactive/graph_algo_test.yaml ${INTERACTIVE_WORKSPACE}/data/graph_algo/graph.yaml cp ${GITHUB_WORKSPACE}/flex/interactive/examples/graph_algo/import.yaml ${INTERACTIVE_WORKSPACE}/data/graph_algo/import.yaml + mkdir -p ${INTERACTIVE_WORKSPACE}/data/new_graph_algo + cp ${GITHUB_WORKSPACE}/flex/interactive/examples/new_graph_algo/graph.yaml ${INTERACTIVE_WORKSPACE}/data/new_graph_algo/graph.yaml + cp ${GITHUB_WORKSPACE}/flex/interactive/examples/new_graph_algo/import.yaml ${INTERACTIVE_WORKSPACE}/data/new_graph_algo/import.yaml mkdir -p ${INTERACTIVE_WORKSPACE}/data/modern_graph cp ${GITHUB_WORKSPACE}/flex/interactive/examples/modern_graph/graph.yaml ${INTERACTIVE_WORKSPACE}/data/modern_graph/graph.yaml @@ -113,6 +116,8 @@ jobs: GLOG_v=10 ./bin/bulk_loader -g ${INTERACTIVE_WORKSPACE}/data/movies/graph.yaml -l ${INTERACTIVE_WORKSPACE}/data/movies/import.yaml -d ${INTERACTIVE_WORKSPACE}/data/movies/indices/ export FLEX_DATA_DIR=../interactive/examples/graph_algo GLOG_v=10 ./bin/bulk_loader -g ${INTERACTIVE_WORKSPACE}/data/graph_algo/graph.yaml -l ${INTERACTIVE_WORKSPACE}/data/graph_algo/import.yaml -d ${INTERACTIVE_WORKSPACE}/data/graph_algo/indices/ + export FLEX_DATA_DIR=${GS_TEST_DIR}/flex/new_graph_algo + GLOG_v=10 ./bin/bulk_loader -g ${INTERACTIVE_WORKSPACE}/data/new_graph_algo/graph.yaml -l ${INTERACTIVE_WORKSPACE}/data/new_graph_algo/import.yaml -d ${INTERACTIVE_WORKSPACE}/data/new_graph_algo/indices/ export FLEX_DATA_DIR=../interactive/examples/modern_graph GLOG_v=10 ./bin/bulk_loader -g ${INTERACTIVE_WORKSPACE}/data/modern_graph/graph.yaml -l ../interactive/examples/modern_graph/bulk_load.yaml -d ${INTERACTIVE_WORKSPACE}/data/modern_graph/indices/ diff --git a/flex/bin/load_plan_and_gen.sh b/flex/bin/load_plan_and_gen.sh index 93cb0116676c..5670c83ca079 100755 --- a/flex/bin/load_plan_and_gen.sh +++ b/flex/bin/load_plan_and_gen.sh @@ -368,19 +368,19 @@ compile_hqps_so() { ################### now copy ########################## # if dst_so_path eq output_so_path, skip copying. - if [ ${dst_so_path} == ${output_so_path} ]; then + if [[ ${dst_so_path} == ${output_so_path} ]]; then info "Output dir is same as work dir, skip copying." exit 0 fi # copy output to output_dir - if [ ! -z ${output_dir} ]; then + if [[ ! -z ${output_dir} ]]; then mkdir -p ${output_dir} else info "Output dir not set, skip copying." exit 0 fi # check output_dir doesn't contains output_so_name - if [ -f ${dst_so_path} ]; then + if [[ -f ${dst_so_path} ]]; then emph "Output dir ${output_dir} already contains ${procedure_name}.so,overriding it." fi cp ${output_so_path} ${output_dir} diff --git a/flex/interactive/examples/new_graph_algo/1.cypher b/flex/interactive/examples/new_graph_algo/1.cypher new file mode 100644 index 000000000000..b7611b659c7e --- /dev/null +++ b/flex/interactive/examples/new_graph_algo/1.cypher @@ -0,0 +1,6 @@ +MATCH (p: Paper)-[:WorkOn]->(t:Task), + (p)-[:Has]->(c:Challenge)-[:SolvedBy]->(s:Solution), + (p)-[:Use]->(s:Solution) +WHERE t.name = "Distributed Subgraph Matching Efficiency" +WITH t, c, count(p) AS num +RETURN t.name, c.name, num \ No newline at end of file diff --git a/flex/interactive/examples/new_graph_algo/2.cypher b/flex/interactive/examples/new_graph_algo/2.cypher new file mode 100644 index 000000000000..e72929de556f --- /dev/null +++ b/flex/interactive/examples/new_graph_algo/2.cypher @@ -0,0 +1,13 @@ +MATCH (p1:Paper)<-[:Cite]-(p2:Paper)<-[:Cite]-(p3:Paper), + (p1)-[:Has]->(c)-[:SolvedBy]->(s1:Solution) +WHERE c.name = "Optimizing Load Balance in Distributed Systems" and p1.title = "Parallel Subgraph Listing in a Large-Scale Graph" +WITH p1.title AS paper1, p2, p3, c, collect(s1.description) AS solutions1 +MATCH (p2)-[:Has]->(c)-[:SolvedBy]->(s2:Solution), + (p2)-[:Use]->(s2) +WHERE p2.title = "Scalable distributed subgraph enumeration" +WITH paper1, p2.title AS paper2, p3, c, solutions1, collect(s2.description) AS solutions2 +MATCH (p3)-[:Has]->(c)-[:SolvedBy]->(s3:Solution), + (p3)-[:Use]->(s3) +WHERE p3.title = "HUGE: An Efficient and Scalable Subgraph Enumeration System" +WITH paper1, paper2, p3.title as paper3, c.name as challenge, solutions1, solutions2, collect(s3.description) as solutions3 +RETURN paper1, paper2, paper3, challenge, solutions1, solutions2, solutions3 \ No newline at end of file diff --git a/flex/interactive/examples/new_graph_algo/graph.yaml b/flex/interactive/examples/new_graph_algo/graph.yaml new file mode 100644 index 000000000000..ca2631a42e40 --- /dev/null +++ b/flex/interactive/examples/new_graph_algo/graph.yaml @@ -0,0 +1,168 @@ +name: graph_algo +version: v0.1 +schema: + vertex_types: + - type_id: 0 + type_name: Challenge + properties: + - property_id: 0 + property_name: id + property_type: + string: + long_text: '' + - property_id: 1 + property_name: name + property_type: + string: + long_text: '' + - property_id: 2 + property_name: description + property_type: + string: + long_text: '' + primary_keys: + - id + - type_id: 1 + type_name: Task + properties: + - property_id: 0 + property_name: id + property_type: + string: + long_text: '' + - property_id: 1 + property_name: name + property_type: + string: + long_text: '' + - property_id: 2 + property_name: description + property_type: + string: + long_text: '' + primary_keys: + - id + - type_id: 2 + type_name: Solution + properties: + - property_id: 0 + property_name: id + property_type: + string: + long_text: '' + - property_id: 1 + property_name: name + property_type: + string: + long_text: '' + - property_id: 2 + property_name: description + property_type: + string: + long_text: '' + primary_keys: + - id + - type_id: 3 + type_name: Paper + properties: + - property_id: 0 + property_name: id + property_type: + string: + long_text: '' + - property_id: 1 + property_name: published + property_type: + string: + long_text: '' + - property_id: 2 + property_name: year + property_type: + primitive_type: DT_SIGNED_INT32 + - property_id: 3 + property_name: month + property_type: + primitive_type: DT_SIGNED_INT32 + - property_id: 4 + property_name: title + property_type: + string: + long_text: '' + - property_id: 5 + property_name: authors + property_type: + string: + long_text: '' + - property_id: 6 + property_name: summary + property_type: + string: + long_text: '' + - property_id: 7 + property_name: journal_ref + property_type: + string: + long_text: '' + - property_id: 8 + property_name: doi + property_type: + string: + long_text: '' + - property_id: 9 + property_name: primary_category + property_type: + string: + long_text: '' + - property_id: 10 + property_name: categories + property_type: + string: + long_text: '' + - property_id: 11 + property_name: problem_def + property_type: + string: + long_text: '' + - property_id: 12 + property_name: keywords + property_type: + string: + long_text: '' + primary_keys: + - id + edge_types: + - type_id: 0 + type_name: SolvedBy + properties: [] + vertex_type_pair_relations: + - destination_vertex: Solution + relation: MANY_TO_MANY + source_vertex: Challenge + - type_id: 1 + type_name: Cite + properties: [] + vertex_type_pair_relations: + - destination_vertex: Paper + relation: MANY_TO_MANY + source_vertex: Paper + - type_id: 2 + type_name: Has + properties: [] + vertex_type_pair_relations: + - destination_vertex: Challenge + relation: MANY_TO_MANY + source_vertex: Paper + - type_id: 3 + type_name: WorkOn + properties: [] + vertex_type_pair_relations: + - destination_vertex: Task + relation: MANY_TO_MANY + source_vertex: Paper + - type_id: 4 + type_name: Use + properties: [] + vertex_type_pair_relations: + - destination_vertex: Solution + relation: MANY_TO_MANY + source_vertex: Paper diff --git a/flex/interactive/examples/new_graph_algo/import.yaml b/flex/interactive/examples/new_graph_algo/import.yaml new file mode 100644 index 000000000000..c280441b0084 --- /dev/null +++ b/flex/interactive/examples/new_graph_algo/import.yaml @@ -0,0 +1,210 @@ +graph: graph_algo +loading_config: + data_source: + scheme: file # file, oss, s3, hdfs; only file is supported now +# location: /home/graphscope/graph_algo/ + import_option: init # append, overwrite, only init is supported now + format: + type: csv + metadata: + delimiter: '|' # other loading configuration places here + header_row: true # whether to use the first row as the header + quoting: true + quote_char: '"' + double_quote: true + escape_char: '\' + escaping: false + block_size: 4MB + batch_reader: false +vertex_mappings: + - type_name: Challenge + inputs: + - Challenge.csv + column_mappings: + - column: + index: 0 + name: id + property: id + - column: + index: 1 + name: name + property: name + - column: + index: 2 + name: description + property: description + - type_name: Task + inputs: + - Task.csv + column_mappings: + - column: + index: 0 + name: id + property: id + - column: + index: 1 + name: name + property: name + - column: + index: 2 + name: description + property: description + - type_name: Solution + inputs: + - Solution.csv + column_mappings: + - column: + index: 0 + name: id + property: id + - column: + index: 1 + name: name + property: name + - column: + index: 2 + name: description + property: description + - type_name: Paper + inputs: + - Paper.csv + column_mappings: + - column: + index: 0 + name: id + property: id + - column: + index: 1 + name: published + property: published + - column: + index: 2 + name: year + property: year + - column: + index: 3 + name: month + property: month + - column: + index: 4 + name: title + property: title + - column: + index: 5 + name: authors + property: authors + - column: + index: 6 + name: summary + property: summary + - column: + index: 7 + name: journal_ref + property: journal_ref + - column: + index: 8 + name: doi + property: doi + - column: + index: 9 + name: primary_category + property: primary_category + - column: + index: 10 + name: categories + property: categories + - column: + index: 11 + name: problem_def + property: problem_def + - column: + index: 12 + name: keywords + property: keywords +edge_mappings: + - type_triplet: + edge: SolvedBy + source_vertex: Challenge + destination_vertex: Solution + inputs: + - Challenge_Solvedby_Solution.csv + column_mappings: [] + source_vertex_mappings: + - column: + index: 0 + name: source + property: id + destination_vertex_mappings: + - column: + index: 1 + name: target + property: id + - type_triplet: + edge: Cite + source_vertex: Paper + destination_vertex: Paper + inputs: + - Paper_Cite_Paper.csv + column_mappings: [] + source_vertex_mappings: + - column: + index: 0 + name: source + property: id + destination_vertex_mappings: + - column: + index: 1 + name: target + property: id + - type_triplet: + edge: Has + source_vertex: Paper + destination_vertex: Challenge + inputs: + - Paper_Has_Challenge.csv + column_mappings: [] + source_vertex_mappings: + - column: + index: 0 + name: source + property: id + destination_vertex_mappings: + - column: + index: 1 + name: target + property: id + - type_triplet: + edge: WorkOn + source_vertex: Paper + destination_vertex: Task + inputs: + - Paper_WorkOn_Task.csv + column_mappings: [] + source_vertex_mappings: + - column: + index: 0 + name: source + property: id + destination_vertex_mappings: + - column: + index: 1 + name: target + property: id + - type_triplet: + edge: Use + source_vertex: Paper + destination_vertex: Solution + inputs: + - Paper_Use_Solution.csv + column_mappings: [] + source_vertex_mappings: + - column: + index: 0 + name: source + property: id + destination_vertex_mappings: + - column: + index: 1 + name: target + property: id +