Name	Name	Last commit message	Last commit date
parent directory ..
6.2	6.2
6.3	6.3
6.4	6.4
6.5	6.5
6.6	6.6
computation_queries	computation_queries
csv_to_graph_queries	csv_to_graph_queries
docker	docker
scripts/semanticscholar	scripts/semanticscholar
README.md	README.md
build_gs_exe.sh	build_gs_exe.sh
gb_computation.sh	gb_computation.sh
generate_graph_edges_only_space.sh	generate_graph_edges_only_space.sh
generate_graph_edges_vertices_comma.sh	generate_graph_edges_vertices_comma.sh
gs_computation.sh	gs_computation.sh
run_docker.sh	run_docker.sh

Experiments

Prepare

#!/bin/bash

set -euo pipefail; shopt -s nullglob

root=$HOME/graphsurge/experiments

# Download Graphsurge
git clone https://github.com/dsg-uwaterloo/graphsurge $HOME/graphsurge

# Build Graphsurge
cd $root/docker
cat Dockerfile.sample | sed -i -e "s/UID/$(id -u)/g" -e "s/GID/$(id -g)/g" > Dockerfile
docker build -t graphsurge .
mkdir $root/bin
cd $root/..
bash $root/build_gs_exe.sh

# Runtime configuration
runtypediffs=adaptivealldiffs
runtypediffs2=adaptivealldiffs2
runtyperestart=adaptiveallrestart
adaptive=adaptive

6.2

current_root=$root/6.2

## Prepare raw dataset
mkdir -p ${current_root}/graph/serde
cd ${current_root}/graph
[[ -f edges.txt ]] ||  {
    wget https://snap.stanford.edu/data/sx-stackoverflow.txt.gz;
    gunzip sx-stackoverflow.txt.gz;
    ln -snf sx-stackoverflow.txt edges.txt;
    # Add required headers
    sed -i "1i :start_id :end_id ts:int" edges.txt;
}
cd ${current_root}

## Load graph into Graphsurge
bash $root/run_docker.sh /opt/graphsurge/experiments/generate_graph_edges_only_space.sh

for i in 1_30  2_18  3_7  4_6  5_4  6_16  7_9  8_5  9_3 10_3 ;do
    cd $i
    mkdir -p vcol_data/{vcol_best_order_serde,vcol_manual_order_serde}
    
    for order in manual ;do
        echo $order

        ## Create view collections
        bash $root/run_docker.sh /opt/graphsurge/experiments/bin/gs create_vcol_queries/vcol_${order}_order.txt | tee vcol_${order}_order.log

        ## Run computations
        for computation in wcc bfs sssp mpsp pr scc ;do
            echo $computation
            for runtype in $runtypediffs $runtyperestart $adaptive ;do
                echo $runtype
                $root/run_docker.sh ../run_computation.sh $computation $runtype $order || break 3
            done
        done
    done
    cd ..
done

6.3

current_root=$root/6.3

## Prepare raw dataset
mkdir -p ${current_root}/graph/serde
cd ${current_root}/graph
[[ -f edges.txt && -f vertices.txt ]] ||  {
    mkdir raw
    cd raw
    wget https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2019-10-01/manifest.txt
    wget -B https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/2019-10-01/ -i manifest.txt
    cd ..
    python $root/scripts/semanticscholar/generate_raw.py ${current_root}/graph/raw
    python $root/scripts/semanticscholar/generate_write/py
    ln -snf paper.csv vertices.txt
    ln -snf cite.csv edges.txt
    # Add required headers
    sed -i "1i id,year:int,authors:int" vertices.txt
    sed -i "1i src,dst" edges.txt
}
cd ${current_root}

## Load graph into Graphsurge
bash $root/run_docker.sh /opt/graphsurge/experiments/generate_graph_edges_vertices_comma

for i in 1_16_0 2_16_0 3_25_253 ;do
    cd $i
    
    mkdir -p vcol_data/{vcol_best_order_serde,vcol_manual_order_serde}
    
    for order in manual ;do
        echo $order

        ## Create view collections
        bash $root/run_docker.sh /opt/graphsurge/experiments/bin/gs create_vcol_queries/vcol_${order}_order.txt | tee vcol_${order}_order.log

        ## Run computations
        for computation in wcc bfs sssp mpsp pr scc ;do
            echo $computation
            for runtype in $runtypediffs $runtyperestart $adaptive ;do
                echo $runtype
                $root/run_docker.sh ../run_computation.sh $computation $runtype $order || break 3
            done
        done
    done
    cd ..
done

6.4

for dataset in comlj wikitopcats;do
    current_root=$root/6.4/$dataset

    ## Prepare raw dataset
    mkdir -p ${current_root}/graph/serde
    cd ${current_root}/graph
    [[ -f edges.txt && -f vertices.txt ]] ||  {
        if [[ $dataset == "comlj" ]];then
            filename=com-lj.ungraph.txt
            wget https://snap.stanford.edu/data/bigdata/communities/${filename}.gz
            gunzip ${filename}.gz;
            ln -snf ${filename} edges.txt

            filename2=com-lj.all.cmty.txt
            wget https://snap.stanford.edu/data/bigdata/communities/${filename2}.gz
            gunzip ${filename2}.gz
            ln -snf ${filename2} communities.txt
        else
            filename=wiki-topcats.txt
            wget https://snap.stanford.edu/data/${filename}.gz
            gunzip ${filename}.gz;
            ln -snf ${filename} edges.txt

            filename2=wiki-topcats-categories.txt
            wget https://snap.stanford.edu/data/${filename2}.gz
            gunzip ${filename2}.gz
            ln -snf ${filename2} communities.txt
        fi
        python $root/scripts/generate_community_graph.py ${dataset}
    }
    cd ${current_root}

    ## Load graph into Graphsurge
    bash $root/run_docker.sh /opt/graphsurge/experiments/generate_graph_edges_vertices_comma

    ### One ordered view collection
    for i in comm_10_5 comm_7_4 ;do
        cd $i
        
        mkdir -p vcol_data/{vcol_best_order_serde,vcol_manual_order_serde}
        
        for order in best ;do
            echo $order

            ## Create view collections
            bash $root/run_docker.sh /opt/graphsurge/experiments/bin/gs create_vcol_queries/vcol_${order}_order.txt | tee vcol_${order}_order.log

            ## Run computations
            for computation in wcc bfs sssp mpsp pr scc ;do
                echo $computation
                for runtype in $runtypediffs $adaptive ;do
                    echo $runtype
                    $root/run_docker.sh ../run_computation.sh $computation $runtype $order || break 3
                done
            done
        done
        cd ..
    done
    ### Three random view collections
    for i in comm_10_5 comm2_10_5 comm3_10_5 comm_7_4 comm2_7_4 comm3_7_4 ;do
        cd $i
        for order in manual ;do
            echo $order

            ## Create view collections
            bash $root/run_docker.sh /opt/graphsurge/experiments/bin/gs create_vcol_queries/vcol_${order}_order.txt | tee vcol_${order}_order.log

            ## Run computations
            for computation in wcc bfs sssp mpsp pr scc ;do
                echo $computation
                for runtype in $runtypediffs $adaptive ;do
                    echo $runtype
                    $root/run_docker.sh ../run_computation.sh $computation $runtype $order || break 3
                done
            done
        done
        cd ..
    done

done

6.5

## Build graph-map
git clone https://github.com/frankmcsherry/graph-map.git $root/graphmap
cd graphmap
bash $root/run_docker.sh cargo build --release && cp target/release/parse $root/bin/

## Build graphbolt
git clone https://github.com/sigmod-2021-195/graphbolt.git $root/graphbolt
cd graphmap
# Follow README instructions to compile the Graphbolt apps and tools.

for dataset in soclj orkut twitter;do
    current_root=$root/6.5/$dataset

    ## Prepare raw dataset
    mkdir -p ${current_root}/graph/serde
    cd ${current_root}/graph
    [[ -f edges.txt ]] ||  {
        if [[ $dataset == "soclj" ]];then
            filename=soc-LiveJournal1.txt
            initial=34000000
            wget https://snap.stanford.edu/data/${filename}.gz;
            gunzip ${filename}.gz;
            ln -snf ${filename} edges.txt
            $root/bin/parse ${filename} ${dataset}.gmap
        elif [[ $dataset == "orkut" ]];then
            filename=com-orkut.ungraph.txt
            initial=58000000
            wget https://snap.stanford.edu/data/bigdata/communities/${filename}.gz
            gunzip ${filename}.gz;
            ln -snf ${filename} edges.txt
            $root/bin/parse ${filename} ${dataset}.gmap
        else
            filename=twitter_rv.net
            initial=700000000
            wget http://an.kaist.ac.kr/~haewoon/release/twitter_social_graph/twitter_rv.tar.gz
            tar xzf twitter_rv.tar.gz;
            ln -snf ${filename} edges.txt
            $root/bin/parse ${filename} ${dataset}.gmap
        fi
    }
    cd ${current_root}

    ## Create view collection
    for computation in sssp pr; do
        if [[ $computation == "sssp" ]];then
            i=5000
            comp=$runtypediffs2
            gb_computation=SSSP
        else
            i=1000
            comp=$runtypediffs
            gb_computation=PageRank
        fi

        ## Create view collection
        mkdir $i
        cd $i
        bash $root/6.5/create_vcol.sh 500 500 $initial $i${current_root}/graph/${dataset}.gmap

        ## Run computation: Graphsurge
        $root/run_docker.sh ../run_computation.sh $computation $comp
        ## Run computation: Graphbolt
        ../run_computation.sh gb $gb_computation $i
    done

done

6.6

current_root=$root/6.6

## Prepare raw dataset
mkdir -p ${current_root}/graph/serde
cd ${current_root}/graph
[[ -f edges.txt && -f vertices.txt ]] ||  {
    ln -s $root/6.5/graph/twitter/twitter_rv.net edges.txt
    python $root/scripts/generate_scalability_graph.py
}
cd ${current_root}

## Load graph into Graphsurge
bash $root/run_docker.sh /opt/graphsurge/experiments/generate_graph_edges_vertices_comma

## Create view collection
bash $root/run_docker.sh /opt/graphsurge/experiments/bin/gs create_vcol_queries/vcol_manual_order.txt | tee vcol_manual_order.log

computation=wcc # Set to one of "wcc bfs sssp mpsp"
num_machines=1 # Set to one of "1 2 4 8 12"
machine_id=0 # Set to correct machine id ranging from 0 to num_machines-1
# Set IP values in $root/run_docker.sh

## Run computation on all machines
$root/run_docker.sh ../run_computation.sh $computation 2_stage_differential manual $num_machines $machine_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

experiments

experiments

README.md

Experiments

Prepare

6.2

6.3

6.4

6.5

6.6

Files

experiments

Directory actions

More options

Directory actions

More options

Latest commit

History

experiments

Folders and files

parent directory

README.md

Experiments

Prepare

6.2

6.3

6.4

6.5

6.6