Skip to content

Commit

Permalink
Add Spark connector reader support. (#11823)
Browse files Browse the repository at this point in the history
  • Loading branch information
JulianJaffePinterest authored Jan 14, 2022
1 parent 5895ebb commit f0888d3
Show file tree
Hide file tree
Showing 61 changed files with 6,442 additions and 26 deletions.
28 changes: 26 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ branches:
only:
- master
- /^\d+\.\d+\.\d+(-\S*)?$/ # release branches
- spark_druid_connector

language: java

Expand Down Expand Up @@ -94,7 +95,7 @@ jobs:
- sudo apt-get update && sudo apt-get install python3 python3-pip python3-setuptools -y
- ./check_test_suite.py && travis_terminate 0 || echo 'Continuing setup'
- pip3 install wheel # install wheel first explicitly
- pip3 install pyyaml
- pip3 install pyyaml==5.4.1
script:
- >
${MVN} apache-rat:check -Prat --fail-at-end
Expand Down Expand Up @@ -323,7 +324,12 @@ jobs:
<<: *test_processing_module
name: "(openjdk8) other modules test"
env:
- MAVEN_PROJECTS='!processing,!indexing-hadoop,!indexing-service,!extensions-core/kafka-indexing-service,!extensions-core/kinesis-indexing-service,!server,!web-console,!integration-tests'
- MAVEN_PROJECTS='!processing,!indexing-hadoop,!indexing-service,!extensions-core/kafka-indexing-service,!extensions-core/kinesis-indexing-service,!server,!web-console,!integration-tests,!spark'
after_failure:
- ls -alh
- ls -alh /home/travis/build/apache/druid/extensions-core/kafka-extraction-namespace/target/
- ls -alh /home/travis/build/apache/druid/extensions-core/kafka-extraction-namespace/target/surefire-reports/
- for d in /home/travis/build/apache/druid/extensions-core/kafka-extraction-namespace/target/surefire-reports/*.txt; do echo $d; cat $d; done

- <<: *test_other_modules
name: "(openjdk11) other modules test"
Expand All @@ -350,6 +356,24 @@ jobs:
stage: Tests - phase 2
jdk: openjdk15

# There's no need for separate sql compatibility tests since null handling isn't controlled by a command-line flag
# Instead, the spark module unit tests test both variations of null handling themselves.
- &test_spark_module
<<: *test_processing_module # Using the processing module settings for now
name: "(openjdk8) spark module test"
env:
- MAVEN_PROJECTS='spark'

- <<: *test_spark_module
name: "(openjdk11) spark module test"
stage: Tests - phase 2
jdk: openjdk11

- <<: *test_spark_module
name: "(openjdk15) spark module test"
stage: Tests - phase 2
jdk: openjdk15

- name: "web console"
install: skip
stage: Tests - phase 1
Expand Down
2 changes: 1 addition & 1 deletion distribution/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#

ARG JDK_VERSION=8
FROM maven:3-jdk-11-slim as builder
FROM maven:3.8.1-jdk-11-slim as builder
# Rebuild from source in this stage
# This can be unset if the tarball was already built outside of Docker
ARG BUILD_FROM_SOURCE="true"
Expand Down
290 changes: 290 additions & 0 deletions docs/operations/spark.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ public class KafkaLookupExtractorFactoryTest
NamespaceExtractionCacheManager.class);
private final CacheHandler cacheHandler = PowerMock.createStrictMock(CacheHandler.class);


@Rule
public ExpectedException expectedException = ExpectedException.none();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ public class DruidSegmentInputEntity implements InputEntity
private final DataSegment segment;
private final Interval intervalFilter;

DruidSegmentInputEntity(SegmentLoader segmentLoader, DataSegment segment, Interval intervalFilter)
public DruidSegmentInputEntity(SegmentLoader segmentLoader, DataSegment segment, Interval intervalFilter)
{
this.segmentLoader = segmentLoader;
this.segment = segment;
this.intervalFilter = intervalFilter;
}

Interval getIntervalFilter()
public Interval getIntervalFilter()
{
return intervalFilter;
}
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG JDK_VERSION=8-slim
ARG JDK_VERSION=8-slim-buster
FROM openjdk:$JDK_VERSION as druidbase

# Bundle everything into one script so cleanup can reduce image size.
Expand Down
6 changes: 3 additions & 3 deletions integration-tests/script/docker_build_containers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ else
case "${DRUID_INTEGRATION_TEST_JVM_RUNTIME}" in
8)
echo "Build druid-cluster with Java 8"
docker build -t druid/cluster --build-arg JDK_VERSION=8-slim --build-arg ZK_VERSION --build-arg KAFKA_VERSION --build-arg CONFLUENT_VERSION --build-arg MYSQL_VERSION --build-arg MARIA_VERSION --build-arg MYSQL_DRIVER_CLASSNAME --build-arg APACHE_ARCHIVE_MIRROR_HOST $SHARED_DIR/docker
docker build -t druid/cluster --build-arg JDK_VERSION=8-slim-buster --build-arg ZK_VERSION --build-arg KAFKA_VERSION --build-arg CONFLUENT_VERSION --build-arg MYSQL_VERSION --build-arg MARIA_VERSION --build-arg MYSQL_DRIVER_CLASSNAME --build-arg APACHE_ARCHIVE_MIRROR_HOST $SHARED_DIR/docker
;;
11)
echo "Build druid-cluster with Java 11"
docker build -t druid/cluster --build-arg JDK_VERSION=11-slim --build-arg ZK_VERSION --build-arg KAFKA_VERSION --build-arg CONFLUENT_VERSION --build-arg MYSQL_VERSION --build-arg MARIA_VERSION --build-arg MYSQL_DRIVER_CLASSNAME --build-arg APACHE_ARCHIVE_MIRROR_HOST $SHARED_DIR/docker
docker build -t druid/cluster --build-arg JDK_VERSION=11-slim-buster --build-arg ZK_VERSION --build-arg KAFKA_VERSION --build-arg CONFLUENT_VERSION --build-arg MYSQL_VERSION --build-arg MARIA_VERSION --build-arg MYSQL_DRIVER_CLASSNAME --build-arg APACHE_ARCHIVE_MIRROR_HOST $SHARED_DIR/docker
;;
15)
echo "Build druid-cluster with Java 15"
docker build -t druid/cluster --build-arg JDK_VERSION=15-slim --build-arg ZK_VERSION --build-arg KAFKA_VERSION --build-arg CONFLUENT_VERSION --build-arg MYSQL_VERSION --build-arg MARIA_VERSION --build-arg USE_MARIA --build-arg APACHE_ARCHIVE_MIRROR_HOST $SHARED_DIR/docker
docker build -t druid/cluster --build-arg JDK_VERSION=15-slim-buster --build-arg ZK_VERSION --build-arg KAFKA_VERSION --build-arg CONFLUENT_VERSION --build-arg MYSQL_VERSION --build-arg MARIA_VERSION --build-arg USE_MARIA --build-arg APACHE_ARCHIVE_MIRROR_HOST $SHARED_DIR/docker
;;
*)
echo "Invalid JVM Runtime given. Stopping"
Expand Down
75 changes: 75 additions & 0 deletions licenses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5735,3 +5735,78 @@ copyright: Berkeley Martinez
version: 4.0.3
license_file_path: licenses/bin/warning.MIT
# Web console modules end

---

name: Apache Spark
license_category: binary
module: spark
license_name: Apache License version 2.0
version: 2.4.7
libraries:
- org.apache.spark: spark-core_2.12
- org.apache.spark: spark-sql_2.12
notice: |
Apache Spark
Copyright 2014 and onwards The Apache Software Foundation.
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Export Control Notice
---------------------
This distribution includes cryptographic software. The country in which you currently reside may have
restrictions on the import, possession, use, and/or re-export to another country, of encryption software.
BEFORE using any encryption software, please check your country's laws, regulations and policies concerning
the import, possession, or use, and re-export of encryption software, to see if this is permitted. See
<http://www.wassenaar.org/> for more information.
The U.S. Government Department of Commerce, Bureau of Industry and Security (BIS), has classified this
software as Export Commodity Control Number (ECCN) 5D002.C.1, which includes information security software
using or performing cryptographic functions with asymmetric algorithms. The form and manner of this Apache
Software Foundation distribution makes it eligible for export under the License Exception ENC Technology
Software Unrestricted (TSU) exception (see the BIS Export Administration Regulations, Section 740.13) for
both object code and source code.
The following provides more details on the included cryptographic software:
This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to
support authentication, and encryption and decryption of data sent across the network between
services.
---

name: Scala Library
license_category: binary
module: spark
license_name: Apache License version 2.0
version: 2.12.12
libraries:
- org.scala-lang: scala-library
- org.scala-lang: scala-reflect
- org.scala-lang: scalap

---

# Not sure why check-license finds these as well (they're not in mvn dependency:tree for the spark module)
name: Paranamer
license_category: binary
module: spark
license_name: BSD-3-Clause License
version: 2.8
copyright: Paul Hammant & ThoughtWorks Inc
license_file_path: licenses/bin/paranamer.BSD3
libraries:
- com.thoughtworks.paranamer: paranamer

---

name: Jackson Paranamer
license_category: binary
module: spark
license_name: Apache License version 2.0
version: 2.10.5
libraries:
- com.fasterxml.jackson.module: jackson-module-paranamer
Loading

0 comments on commit f0888d3

Please sign in to comment.