Skip to content

Commit

Permalink
Merge pull request #52 from bryanwongsz/sparkExtended
Browse files Browse the repository at this point in the history
add omni sparkSQL adaptor
  • Loading branch information
tushengxia authored Jun 29, 2022
2 parents 2a19c3d + 303733e commit 4c11524
Show file tree
Hide file tree
Showing 165 changed files with 23,302 additions and 0 deletions.
39 changes: 39 additions & 0 deletions omnioperator/omniop-spark-extension/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# project name
project(spark-thestral-plugin)

# required cmake version
cmake_minimum_required(VERSION 3.10)

# configure cmake
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_COMPILER "g++")

set(root_directory ${PROJECT_BINARY_DIR})

# configure file
configure_file(
"${PROJECT_SOURCE_DIR}/config.h.in"
"${PROJECT_SOURCE_DIR}/config.h"
)

# for header searching
include_directories(SYSTEM src)

# compile library
add_subdirectory(src)

message(STATUS "Build by ${CMAKE_BUILD_TYPE}")

option(BUILD_CPP_TESTS "test" ON)
message(STATUS "Option BUILD_CPP_TESTS: ${BUILD_CPP_TESTS}")
if(${BUILD_CPP_TESTS})
enable_testing()
add_subdirectory(test)
endif ()

# options
option(DEBUG_RUNTIME "Debug" OFF)
message(STATUS "Option DEBUG: ${DEBUG_RUNTIME}")

option(TRACE_RUNTIME "Trace" OFF)
message(STATUS "Option TRACE: ${TRACE_RUNTIME}")
56 changes: 56 additions & 0 deletions omnioperator/omniop-spark-extension/cpp/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
# Copyright (C) 2020-2022. Huawei Technologies Co., Ltd. All rights reserved.
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -eu

CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
echo $CURRENT_DIR
cd ${CURRENT_DIR}
if [ -d build ]; then
rm -r build
fi
mkdir build
cd build

# options
if [ $# != 0 ] ; then
options=""
if [ $1 = 'debug' ]; then
echo "-- Enable Debug"
options="$options -DCMAKE_BUILD_TYPE=Debug -DDEBUG_RUNTIME=ON"
elif [ $1 = 'trace' ]; then
echo "-- Enable Trace"
options="$options -DCMAKE_BUILD_TYPE=Debug -DTRACE_RUNTIME=ON"
elif [ $1 = 'release' ];then
echo "-- Enable Release"
options="$options -DCMAKE_BUILD_TYPE=Release"
elif [ $1 = 'test' ];then
echo "-- Enable Test"
options="$options -DCMAKE_BUILD_TYPE=Test -DBUILD_CPP_TESTS=TRUE"
else
echo "-- Enable Release"
options="$options -DCMAKE_BUILD_TYPE=Release"
fi
cmake .. $options
else
echo "-- Enable Release"
cmake .. -DCMAKE_BUILD_TYPE=Release
fi

make

set +eu
20 changes: 20 additions & 0 deletions omnioperator/omniop-spark-extension/cpp/config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (C) 2020-2022. Huawei Technologies Co., Ltd. All rights reserved.
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//#cmakedefine DEBUG_RUNTIME
//#cmakedefine TRACE_RUNTIME
2 changes: 2 additions & 0 deletions omnioperator/omniop-spark-extension/cpp/config.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#cmakedefine DEBUG_RUNTIME
#cmakedefine TRACE_RUNTIME
58 changes: 58 additions & 0 deletions omnioperator/omniop-spark-extension/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
include_directories(SYSTEM "/opt/lib/include")
include_directories(SYSTEM "/user/local/include")

set (PROJ_TARGET spark_columnar_plugin)


set (SOURCE_FILES
io/ColumnWriter.cc
io/Compression.cc
io/MemoryPool.cc
io/OutputStream.cc
io/SparkFile.cc
io/WriterOptions.cc
shuffle/splitter.cpp
common/common.cpp
jni/SparkJniWrapper.cpp
jni/OrcColumnarBatchJniReader.cpp
)

#Find required protobuf package
find_package(Protobuf REQUIRED)
if(PROTOBUF_FOUND)
message(STATUS "protobuf library found")
else()
message(FATAL_ERROR "protobuf library is needed but cant be found")
endif()

include_directories(${Protobuf_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
protobuf_generate_cpp(PROTO_SRCS_VB PROTO_HDRS_VB proto/vec_data.proto)
add_library (${PROJ_TARGET} SHARED ${SOURCE_FILES} ${PROTO_SRCS} ${PROTO_HDRS} ${PROTO_SRCS_VB} ${PROTO_HDRS_VB})

#JNI
target_include_directories(${PROJ_TARGET} PUBLIC $ENV{JAVA_HOME}/include)
target_include_directories(${PROJ_TARGET} PUBLIC $ENV{JAVA_HOME}/include/linux)
target_include_directories(${PROJ_TARGET} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(${PROJ_TARGET} PUBLIC /opt/lib/include)

target_link_libraries (${PROJ_TARGET} PUBLIC
orc
crypto
sasl2
protobuf
z
snappy
lz4
zstd
boostkit-omniop-runtime-1.0.0-aarch64
boostkit-omniop-vector-1.0.0-aarch64
)

set_target_properties(${PROJ_TARGET} PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${root_directory}/releases
)

target_compile_options(${PROJ_TARGET} PUBLIC -g -O2 -fPIC)

install(TARGETS ${PROJ_TARGET} DESTINATION lib)
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright (C) 2020-2022. Huawei Technologies Co., Ltd. All rights reserved.
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef SPARK_THESTRAL_PLUGIN_BINARYLOCATION_H
#define SPARK_THESTRAL_PLUGIN_BINARYLOCATION_H
class VCLocation {
public:
VCLocation(uint64_t vc_addr, uint32_t vc_len, bool isnull)
: vc_len(vc_len), vc_addr(vc_addr), is_null(isnull) {
}
~VCLocation() {
}
uint32_t get_vc_len() {
return vc_len;
}

uint64_t get_vc_addr() {
return vc_addr;
}

bool get_is_null() {
return is_null;
}

public:
uint32_t vc_len;
uint64_t vc_addr;
bool is_null;
};

class VCBatchInfo {
public:
VCBatchInfo(uint32_t vcb_capacity) {
this->vc_list.reserve(vcb_capacity);
this->vcb_capacity = vcb_capacity;
this->vcb_total_len = 0;
}

~VCBatchInfo() {
vc_list.clear();
}

uint32_t getVcbCapacity() {
return vcb_capacity;
}

uint32_t getVcbTotalLen() {
return vcb_total_len;
}

std::vector<VCLocation> &getVcList() {
return vc_list;
}

public:
uint32_t vcb_capacity;
uint32_t vcb_total_len;
std::vector<VCLocation> vc_list;


};
#endif //SPARK_THESTRAL_PLUGIN_BINARYLOCATION_H
42 changes: 42 additions & 0 deletions omnioperator/omniop-spark-extension/cpp/src/common/Buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (C) 2020-2022. Huawei Technologies Co., Ltd. All rights reserved.
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef CPP_BUFFER_H
#define CPP_BUFFER_H

#include <cstring>
#include <vector>
#include <chrono>
#include <memory>
#include <list>

class Buffer {
public:
Buffer(uint8_t* data, int64_t size, int64_t capacity)
: data_(data),
size_(size),
capacity_(capacity) {
}

public:
uint8_t * data_;
int64_t size_;
int64_t capacity_;
};

#endif //CPP_BUFFER_H
Loading

0 comments on commit 4c11524

Please sign in to comment.