diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 48a0e0540db0..78005a722336 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,6 +4,7 @@ updates: directory: "/" schedule: interval: "daily" + open-pull-requests-limit: 20 ignore: - dependency-name: "com.google.guava:guava" # pin ZooKeeper dependencies to 3.5.x @@ -18,3 +19,6 @@ updates: # Even then this will involve significant effort. # See https://github.com/apache/druid/pull/12258 - dependency-name: "org.apache.calcite" + # jclouds 2.1 needs Guava 18+ + - dependency-name: "org.apache.jclouds" + versions: "[2.1,)" diff --git a/.github/labeler.yml b/.github/labeler.yml index b84f335b12b2..a9bfc45a86ec 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -18,9 +18,50 @@ # # -# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler +# Pull Request Labeler GitHub Action Configuration: https://github.com/marketplace/actions/labeler -"Area - Documentation": - - "docs/**/*" - - "website/**" - - "examples/quickstart/jupyter-notebooks/*" \ No newline at end of file +'Area - Batch Ingestion': + - 'indexing-hadoop/**' + - 'extensions-core/multi-stage-query/**' + +'Area - Dependencies': + - '**/pom.xml' + - 'licenses.yaml' + +'Area - Documentation': + - 'docs/**/*' + - 'website/**' + - 'examples/quickstart/jupyter-notebooks/**' + +'Area - Ingestion': + - 'indexing-service/**' + +'Area - Lookups': + - 'extensions-core/lookups-cached-global/**' + - 'extensions-core/lookups-cached-single/**' + - 'extensions-core/kafka-extraction-namespace/**' + +'Area - Metrics/Event Emitting': + - 'processing/src/main/java/org/apache/druid/java/util/metrics/**' + - 'processing/src/main/java/org/apache/druid/java/util/emitter/**' + - 'extensions-contrib/*-emitter/**' + +'Area - MSQ': + - 'extensions-core/multi-stage-query/**' + +'Area - Querying': + - 'sql/**' + - 'extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/**' + +'Area - Segment Format and Ser/De': + - 'processing/src/main/java/org/apache/druid/segment/**' + +'Area - Streaming Ingestion': + - 'extensions-core/kafka-indexing-service/**' + - 'extensions-core/kinesis-indexing-service/**' + +'Area - Web Console': + - 'web-console/**' + +'Kubernetes': + - 'extensions-contrib/kubernetes-overlord-extensions/**' diff --git a/.github/scripts/analyze_dependencies_script.sh b/.github/scripts/analyze_dependencies_script.sh index c92d90030f71..1212dbd1cd2b 100755 --- a/.github/scripts/analyze_dependencies_script.sh +++ b/.github/scripts/analyze_dependencies_script.sh @@ -15,7 +15,7 @@ #!bin/bash -${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true ${HADOOP_PROFILE} || +${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true || { echo " The dependency analysis has found a dependency that is either: diff --git a/.github/scripts/license_checks_script.sh b/.github/scripts/license_checks_script.sh index 410ac60375fa..163214f82988 100755 --- a/.github/scripts/license_checks_script.sh +++ b/.github/scripts/license_checks_script.sh @@ -20,7 +20,7 @@ set -e ./.github/scripts/setup_generate_license.sh ${MVN} apache-rat:check -Prat --fail-at-end \ -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \ --Drat.consoleOutput=true ${HADOOP_PROFILE} +-Drat.consoleOutput=true # Generate dependency reports and checks they are valid. mkdir -p target distribution/bin/generate-license-dependency-reports.py . target --clean-maven-artifact-transfer --parallel 2 diff --git a/.github/scripts/unit_tests_script.sh b/.github/scripts/unit_tests_script.sh index 1f5407b95844..569811a93940 100755 --- a/.github/scripts/unit_tests_script.sh +++ b/.github/scripts/unit_tests_script.sh @@ -20,7 +20,7 @@ set -e unset _JAVA_OPTIONS # Set MAVEN_OPTS for Surefire launcher. -MAVEN_OPTS='-Xmx2500m' ${MVN} test -pl ${MAVEN_PROJECTS} \ +MAVEN_OPTS='-Xmx2500m' ${MVN} test -B -pl ${MAVEN_PROJECTS} -Dmaven.test.failure.ignore=true \ ${MAVEN_SKIP} -Ddruid.generic.useDefaultValueForNull=${DRUID_USE_DEFAULT_VALUE_FOR_NULL} \ -DjfrProfilerArgLine="${JFR_PROFILER_ARG_LINE}" sh -c "dmesg | egrep -i '(oom|out of memory|kill process|killed).*' -C 1 || exit 0" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 4e13d31de0a8..262d7ad80f85 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -29,6 +29,12 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 + - uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '8' + cache: 'maven' + # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 @@ -42,7 +48,6 @@ jobs: # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs queries: +security-and-quality - - run: | echo "Building using custom commands" mvn clean package -f "pom.xml" -B -V -e -Dfindbugs.skip -Dcheckstyle.skip -Dpmd.skip=true -Denforcer.skip -Dmaven.javadoc.skip -DskipTests -Dmaven.test.skip.exec -Dlicense.skip=true -Dweb.console.skip=true -Dcyclonedx.skip=true diff --git a/.github/workflows/cron-job-its.yml b/.github/workflows/cron-job-its.yml index 3752b6c60eae..65471ad81b05 100644 --- a/.github/workflows/cron-job-its.yml +++ b/.github/workflows/cron-job-its.yml @@ -111,8 +111,6 @@ jobs: name: security vulnerabilities strategy: fail-fast: false - matrix: - HADOOP_PROFILE: [ '', '-Phadoop2' ] runs-on: ubuntu-latest steps: - name: Checkout branch @@ -129,10 +127,8 @@ jobs: run: mvn clean install dependency:go-offline -P dist -P skip-static-checks,skip-tests -Dmaven.javadoc.skip=true -Dcyclonedx.skip=true -Dweb.console.skip=true - name: security vulnerabilities check - env: - HADOOP_PROFILE: ${{ matrix.HADOOP_PROFILE }} run: | - mvn dependency-check:purge dependency-check:check ${HADOOP_PROFILE} || { echo " + mvn dependency-check:purge dependency-check:check || { echo " The OWASP dependency check has found security vulnerabilities. Please use a newer version of the dependency that does not have vulnerabilities. To see a report run `mvn dependency-check:check` diff --git a/.github/workflows/reusable-revised-its.yml b/.github/workflows/reusable-revised-its.yml index 49887db14c59..e59284b0c3b0 100644 --- a/.github/workflows/reusable-revised-its.yml +++ b/.github/workflows/reusable-revised-its.yml @@ -133,3 +133,35 @@ jobs: - name: Run IT run: ${{ inputs.script }} + + - name: Collect docker logs on failure + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + run: | + mkdir docker-logs + for c in $(docker ps -a --format="{{.Names}}") + do + docker logs $c > ./docker-logs/$c.log + done + + - name: Tar docker logs + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + run: tar cvzf ./docker-logs.tgz ./docker-logs + + - name: Upload docker logs to GitHub + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + uses: actions/upload-artifact@master + with: + name: IT-${{ inputs.it }} docker logs (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }}, Indexer=${{ inputs.use_indexer }}, Mysql=${{ inputs.mysql_driver }}) + path: docker-logs.tgz + + - name: Collect service logs on failure + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + run: | + tar cvzf ./service-logs.tgz ~/shared/logs + + - name: Upload Druid service logs to GitHub + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + uses: actions/upload-artifact@master + with: + name: IT-${{ inputs.it }} service logs (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }}, Indexer=${{ inputs.use_indexer }}, Mysql=${{ inputs.mysql_driver }}) + path: service-logs.tgz diff --git a/.github/workflows/reusable-standard-its.yml b/.github/workflows/reusable-standard-its.yml index d4b0de1c0cfd..b96bb4b4c495 100644 --- a/.github/workflows/reusable-standard-its.yml +++ b/.github/workflows/reusable-standard-its.yml @@ -88,13 +88,34 @@ jobs: echo "${MVN} verify -pl integration-tests -P integration-tests ${{ inputs.testing_groups }} -Djvm.runtime=${{ inputs.runtime_jdk }} -Dit.indexer=${{ inputs.use_indexer }} ${MAVEN_SKIP} -Doverride.config.path=${{ inputs.override_config_path }}" ${MVN} verify -pl integration-tests -P integration-tests ${{ inputs.testing_groups }} -Djvm.runtime=${{ inputs.runtime_jdk }} -Dit.indexer=${{ inputs.use_indexer }} ${MAVEN_SKIP} -Doverride.config.path=${{ inputs.override_config_path }} - - name: Debug IT + - name: Collect docker logs on failure if: ${{ failure() && steps.run-it.conclusion == 'failure' }} run: | - for v in broker router ${{ inputs.use_indexer }} historical coordinator overlord; do - echo "=======================druid-"$v"========================"; - echo "-----------------------docker logs-----------------------"; - sudo docker logs druid-"$v" 2>&1 | tail -1000 ||:; - echo "-----------------------service logs----------------------"; - sudo docker exec druid-"$v" tail -1000 /shared/logs/"$v".log 2>&1 ||:; + mkdir docker-logs + for c in $(docker ps -a --format="{{.Names}}") + do + docker logs $c > ./docker-logs/$c.log done + + - name: Tar docker logs + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + run: tar cvzf ./docker-logs.tgz ./docker-logs + + - name: Upload docker logs to GitHub + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + uses: actions/upload-artifact@master + with: + name: IT-${{ inputs.group }} docker logs (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }}, Indexer=${{ inputs.use_indexer }}, Mysql=${{ inputs.mysql_driver }}) + path: docker-logs.tgz + + - name: Collect service logs on failure + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + run: | + tar cvzf ./service-logs.tgz ./shared/logs + + - name: Upload Druid service logs to GitHub + if: ${{ failure() && steps.run-it.conclusion == 'failure' }} + uses: actions/upload-artifact@master + with: + name: IT-${{ inputs.group }} service logs (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }}, Indexer=${{ inputs.use_indexer }}, Mysql=${{ inputs.mysql_driver }}) + path: service-logs.tgz diff --git a/.github/workflows/reusable-unit-tests.yml b/.github/workflows/reusable-unit-tests.yml index 34d992c397c2..8769673b1e89 100644 --- a/.github/workflows/reusable-unit-tests.yml +++ b/.github/workflows/reusable-unit-tests.yml @@ -111,6 +111,15 @@ jobs: MAVEN_PROJECTS: ${{ inputs.maven_projects }} run: ./.github/scripts/unit_tests_script.sh + - name: Test Report + uses: dorny/test-reporter@v1 + if: always() + with: + name: Maven Tests + path: '**/target/**/surefire-reports/TEST-*.xml' + reporter: java-junit + fail-on-error: true + - name: set outputs on failure id: set_outputs if: ${{ failure() }} diff --git a/.github/workflows/standard-its.yml b/.github/workflows/standard-its.yml index f44dee04bdf8..2648dc0993b6 100644 --- a/.github/workflows/standard-its.yml +++ b/.github/workflows/standard-its.yml @@ -159,6 +159,7 @@ jobs: with: path: ~/.m2/repository key: maven-${{ runner.os }}-8-${{ github.sha }} + restore-keys: setup-java-Linux-maven-${{ hashFiles('**/pom.xml') }} - name: Maven build if: steps.maven-restore.outputs.cache-hit != 'true' diff --git a/.github/workflows/static-checks.yml b/.github/workflows/static-checks.yml index 8fc05d35bd87..4b1c4db0c68d 100644 --- a/.github/workflows/static-checks.yml +++ b/.github/workflows/static-checks.yml @@ -41,20 +41,17 @@ jobs: strategy: fail-fast: false matrix: - java: [ 'jdk8', 'jdk11', 'jdk17' ] + java: [ '8', '11', '17' ] runs-on: ubuntu-latest steps: - name: checkout branch uses: actions/checkout@v3 - - name: set java version - run: | - export jdk=${{ matrix.java }} - echo "java_version=${jdk:3}" >> $GITHUB_ENV - - - name: setup ${{ matrix.java }} - run: | - echo "JAVA_HOME=$JAVA_HOME_${{ env.java_version }}_X64" >> $GITHUB_ENV + - uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: ${{ matrix.java }} + cache: 'maven' - name: packaging check run: | @@ -66,60 +63,53 @@ jobs: - name: script checks # who watches the watchers? - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ./check_test_suite_test.py - name: (openjdk17) strict compilation - if: ${{ matrix.java == 'jdk17' }} + if: ${{ matrix.java == '17' }} # errorprone requires JDK 11+ # Strict compilation requires more than 2 GB run: ${MVN} clean -DstrictCompile compile test-compile --fail-at-end ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} - name: maven install - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: | echo 'Running Maven install...' && ${MVN} clean install -q -ff -pl '!distribution,!:druid-it-image,!:druid-it-cases' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C && ${MVN} install -q -ff -pl 'distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} - name: checkstyle - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ${MVN} checkstyle:checkstyle --fail-at-end - name: license checks - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ./.github/scripts/license_checks_script.sh - name: analyze dependencies - if: ${{ matrix.java == 'jdk8' }} - run: | - ./.github/scripts/analyze_dependencies_script.sh - - - name: analyze dependencies for hadoop2 - if: ${{ matrix.java == 'jdk8' }} - env: - HADOOP_PROFILE: -Phadoop2 + if: ${{ matrix.java == '8' }} run: | ./.github/scripts/analyze_dependencies_script.sh - name: animal sniffer checks - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ${MVN} animal-sniffer:check --fail-at-end - name: enforcer checks - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ${MVN} enforcer:enforce --fail-at-end - name: forbidden api checks - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ${MVN} forbiddenapis:check forbiddenapis:testCheck --fail-at-end - name: pmd checks - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ${MVN} pmd:check --fail-at-end # TODO: consider adding pmd:cpd-check - name: spotbugs checks - if: ${{ matrix.java == 'jdk8' }} + if: ${{ matrix.java == '8' }} run: ${MVN} spotbugs:check --fail-at-end -pl '!benchmarks' intellij-inspections: @@ -130,9 +120,11 @@ jobs: - name: checkout branch uses: actions/checkout@v3 - - name: setup JDK8 - run: | - echo "JAVA_HOME=$JAVA_HOME_8_X64" >> $GITHUB_ENV + - uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '8' + cache: 'maven' - name: maven install run: | @@ -160,9 +152,11 @@ jobs: - name: checkout branch uses: actions/checkout@v3 - - name: setup JDK17 - run: | - echo "JAVA_HOME=$JAVA_HOME_17_X64" >> $GITHUB_ENV + - uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '17' + cache: 'maven' - name: setup node uses: actions/setup-node@v3 @@ -173,7 +167,7 @@ jobs: run: | (cd website && npm install) cd website - npm run link-lint + npm run build npm run spellcheck - name: web console diff --git a/.github/workflows/unit-and-integration-tests-unified.yml b/.github/workflows/unit-and-integration-tests-unified.yml index ff963dac7718..6ff6c8bd6500 100644 --- a/.github/workflows/unit-and-integration-tests-unified.yml +++ b/.github/workflows/unit-and-integration-tests-unified.yml @@ -72,6 +72,7 @@ jobs: with: path: ~/.m2/repository key: maven-${{ runner.os }}-${{ matrix.jdk }}-${{ github.sha }} + restore-keys: setup-java-Linux-maven-${{ hashFiles('**/pom.xml') }} - name: Cache targets id: target diff --git a/.gitignore b/.gitignore index cd33e6271a1d..7d7cf0d5bd24 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,13 @@ integration-tests/gen-scripts/ /bin/ *.hprof **/.ipynb_checkpoints/ +website/.yarn/ +website/node_modules/ +website/.docusaurus/ +website/build/ + +# Local Netlify folder +.netlify *.pyc **/.ipython/ **/.jupyter/ diff --git a/README.md b/README.md index 6a2448228986..cfd87db66728 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,11 @@ Use the built-in query workbench to prototype [DruidSQL](https://druid.apache.or See the [latest documentation](https://druid.apache.org/docs/latest/) for the documentation for the current official release. If you need information on a previous release, you can browse [previous releases documentation](https://druid.apache.org/docs/). -Make documentation and tutorials updates in [`/docs`](https://github.com/apache/druid/tree/master/docs) using [MarkDown](https://www.markdownguide.org/) and contribute them using a pull request. +Make documentation and tutorials updates in [`/docs`](https://github.com/apache/druid/tree/master/docs) using [Markdown](https://www.markdownguide.org/) or extended Markdown [(MDX)](https://mdxjs.com/). Then, open a pull request. + +To build the site locally, you need Node 16.14 or higher and to install Docusaurus 2 with `npm|yarn install` in the `website` directory. Then you can run `npm|yarn start` to launch a local build of the docs. + +If you're looking to update non-doc pages like Use Cases, those files are in the [`druid-website-src`](https://github.com/apache/druid-website-src/tree/master) repo. ### Community diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml index 0d6319c3202b..fdada96d98a6 100644 --- a/benchmarks/pom.xml +++ b/benchmarks/pom.xml @@ -261,7 +261,7 @@ maven-source-plugin - 2.2.1 + 3.3.0 maven-surefire-plugin diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java index fe0e717ab18e..d09d501a050e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/FrontCodedIndexedBenchmark.java @@ -88,10 +88,10 @@ public class FrontCodedIndexedBenchmark @Param({ "generic", - "front-coded-4", - "front-coded-16", - "front-coded-incremental-buckets-4", - "front-coded-incremental-buckets-16" + "front-coded-v0-4", + "front-coded-v0-16", + "front-coded-v1-4", + "front-coded-v1-16" }) public String indexType; @@ -138,7 +138,7 @@ public void createIndex() throws IOException FrontCodedIndexedWriter frontCodedIndexedWriter = new FrontCodedIndexedWriter( new OnHeapMemorySegmentWriteOutMedium(), ByteOrder.nativeOrder(), - "front-coded-4".equals(indexType) ? 4 : 16, + "front-coded-v0-4".equals(indexType) ? 4 : 16, FrontCodedIndexed.V0 ); frontCodedIndexedWriter.open(); @@ -146,7 +146,7 @@ public void createIndex() throws IOException FrontCodedIndexedWriter frontCodedIndexedWriterIncrementalBuckets = new FrontCodedIndexedWriter( new OnHeapMemorySegmentWriteOutMedium(), ByteOrder.nativeOrder(), - "front-coded-incremental-buckets-4".equals(indexType) ? 4 : 16, + "front-coded-v1-4".equals(indexType) ? 4 : 16, FrontCodedIndexed.V1 ); frontCodedIndexedWriterIncrementalBuckets.open(); @@ -166,11 +166,11 @@ public void createIndex() throws IOException fileGeneric = File.createTempFile("genericIndexedBenchmark", "meta"); smooshDirFrontCodedIncrementalBuckets = FileUtils.createTempDir(); - fileFrontCodedIncrementalBuckets = File.createTempFile("frontCodedIndexedBenchmarkIncrementalBuckets", "meta"); + fileFrontCodedIncrementalBuckets = File.createTempFile("frontCodedIndexedBenchmarkv1Buckets", "meta"); EncodingSizeProfiler.encodedSize = (int) ("generic".equals(indexType) ? genericIndexedWriter.getSerializedSize() - : indexType.startsWith("front-coded-incremental-buckets") + : indexType.startsWith("front-coded-v1") ? frontCodedIndexedWriterIncrementalBuckets.getSerializedSize() : frontCodedIndexedWriter.getSerializedSize()); try ( @@ -286,7 +286,7 @@ public void createIndex() throws IOException } if ("generic".equals(indexType)) { indexed = genericIndexed.singleThreaded(); - } else if (indexType.startsWith("front-coded-incremental-buckets")) { + } else if (indexType.startsWith("front-coded-v1")) { indexed = frontCodedIndexedIncrementalBuckets; } else { indexed = frontCodedIndexed; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java index 87a421df5fcb..2d902c12163f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java @@ -52,13 +52,10 @@ import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; -import org.apache.druid.query.groupby.GroupByQueryEngine; import org.apache.druid.query.groupby.GroupByQueryQueryToolChest; import org.apache.druid.query.groupby.GroupByQueryRunnerFactory; +import org.apache.druid.query.groupby.GroupingEngine; import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; -import org.apache.druid.query.groupby.strategy.GroupByStrategyV1; -import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.spec.QuerySegmentSpec; import org.apache.druid.segment.IndexIO; @@ -128,9 +125,6 @@ public class GroupByTypeInterfaceBenchmark @Param({"100000"}) private int rowsPerSegment; - @Param({"v2"}) - private String defaultStrategy; - @Param({"all"}) private String queryGranularity; @@ -346,11 +340,6 @@ public void setup() throws IOException ); final GroupByQueryConfig config = new GroupByQueryConfig() { - @Override - public String getDefaultStrategy() - { - return defaultStrategy; - } @Override public int getBufferGrouperInitialBuckets() @@ -365,8 +354,6 @@ public HumanReadableBytes getMaxOnDiskStorage() } }; config.setSingleThreaded(false); - config.setMaxIntermediateRows(Integer.MAX_VALUE); - config.setMaxResults(Integer.MAX_VALUE); DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() { @@ -385,27 +372,19 @@ public String getFormatString() }; final Supplier configSupplier = Suppliers.ofInstance(config); - final GroupByStrategySelector strategySelector = new GroupByStrategySelector( + final GroupingEngine groupingEngine = new GroupingEngine( + druidProcessingConfig, configSupplier, - new GroupByStrategyV1( - configSupplier, - new GroupByQueryEngine(configSupplier, bufferPool), - QueryBenchmarkUtil.NOOP_QUERYWATCHER - ), - new GroupByStrategyV2( - druidProcessingConfig, - configSupplier, - bufferPool, - mergePool, - TestHelper.makeJsonMapper(), - new ObjectMapper(new SmileFactory()), - QueryBenchmarkUtil.NOOP_QUERYWATCHER - ) + bufferPool, + mergePool, + TestHelper.makeJsonMapper(), + new ObjectMapper(new SmileFactory()), + QueryBenchmarkUtil.NOOP_QUERYWATCHER ); factory = new GroupByQueryRunnerFactory( - strategySelector, - new GroupByQueryQueryToolChest(strategySelector) + groupingEngine, + new GroupByQueryQueryToolChest(groupingEngine) ); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java index 4b0d55c2c62a..5adfcee0de8e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/CachingClusteredClientBenchmark.java @@ -54,12 +54,13 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.BrokerParallelMergeConfig; import org.apache.druid.query.BySegmentQueryRunner; import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.Druids; import org.apache.druid.query.FinalizeResultsQueryRunner; -import org.apache.druid.query.FluentQueryRunnerBuilder; +import org.apache.druid.query.FluentQueryRunner; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryPlus; @@ -76,14 +77,11 @@ import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; -import org.apache.druid.query.groupby.GroupByQueryEngine; import org.apache.druid.query.groupby.GroupByQueryQueryToolChest; import org.apache.druid.query.groupby.GroupByQueryRunnerFactory; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.GroupingEngine; import org.apache.druid.query.groupby.ResultRow; -import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; -import org.apache.druid.query.groupby.strategy.GroupByStrategyV1; -import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.query.planning.DataSourceAnalysis; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.spec.QuerySegmentSpec; @@ -103,7 +101,6 @@ import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; -import org.apache.druid.segment.join.JoinableFactoryWrapperTest; import org.apache.druid.server.QueryStackTests; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -258,12 +255,6 @@ public int getNumThreads() { return numProcessingThreads; } - - @Override - public boolean useParallelMergePool() - { - return true; - } }; conglomerate = new DefaultQueryRunnerFactoryConglomerate( @@ -293,11 +284,6 @@ public boolean useParallelMergePool() GroupByQueryRunnerTest.DEFAULT_MAPPER, new GroupByQueryConfig() { - @Override - public String getDefaultStrategy() - { - return GroupByStrategySelector.STRATEGY_V2; - } }, processingConfig ) @@ -339,10 +325,15 @@ public > QueryToolChest getToolChest new ForegroundCachePopulator(JSON_MAPPER, new CachePopulatorStats(), 0), new CacheConfig(), new DruidHttpClientConfig(), - processingConfig, + new BrokerParallelMergeConfig() { + @Override + public boolean useParallelMergePool() + { + return true; + } + }, forkJoinPool, QueryStackTests.DEFAULT_NOOP_SCHEDULER, - JoinableFactoryWrapperTest.NOOP_JOINABLE_FACTORY_WRAPPER, new NoopServiceEmitter() ); } @@ -365,25 +356,17 @@ private static GroupByQueryRunnerFactory makeGroupByQueryRunnerFactory( bufferSupplier, processingConfig.getNumMergeBuffers() ); - final GroupByStrategySelector strategySelector = new GroupByStrategySelector( + final GroupingEngine groupingEngine = new GroupingEngine( + processingConfig, configSupplier, - new GroupByStrategyV1( - configSupplier, - new GroupByQueryEngine(configSupplier, bufferPool), - QueryRunnerTestHelper.NOOP_QUERYWATCHER - ), - new GroupByStrategyV2( - processingConfig, - configSupplier, - bufferPool, - mergeBufferPool, - mapper, - mapper, - QueryRunnerTestHelper.NOOP_QUERYWATCHER - ) + bufferPool, + mergeBufferPool, + mapper, + mapper, + QueryRunnerTestHelper.NOOP_QUERYWATCHER ); - final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(strategySelector); - return new GroupByQueryRunnerFactory(strategySelector, toolChest); + final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(groupingEngine); + return new GroupByQueryRunnerFactory(groupingEngine, toolChest); } @TearDown(Level.Trial) @@ -480,8 +463,11 @@ public void groupByQuery(Blackhole blackhole) private List runQuery() { //noinspection unchecked - QueryRunner theRunner = new FluentQueryRunnerBuilder(toolChestWarehouse.getToolChest(query)) - .create(cachingClusteredClient.getQueryRunnerForIntervals(query, query.getIntervals())) + QueryRunner theRunner = FluentQueryRunner + .create( + cachingClusteredClient.getQueryRunnerForIntervals(query, query.getIntervals()), + toolChestWarehouse.getToolChest(query) + ) .applyPreMergeDecoration() .mergeResults() .applyPostMergeDecoration(); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java index 08c51b9edf2b..d355dd2d0058 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java @@ -63,15 +63,12 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; -import org.apache.druid.query.groupby.GroupByQueryEngine; import org.apache.druid.query.groupby.GroupByQueryQueryToolChest; import org.apache.druid.query.groupby.GroupByQueryRunnerFactory; +import org.apache.druid.query.groupby.GroupingEngine; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; -import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; -import org.apache.druid.query.groupby.strategy.GroupByStrategyV1; -import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.spec.QuerySegmentSpec; @@ -139,9 +136,6 @@ public class GroupByBenchmark @Param({"basic.A", "basic.nested"}) private String schemaAndQuery; - @Param({"v1", "v2"}) - private String defaultStrategy; - @Param({"all", "day"}) private String queryGranularity; @@ -461,11 +455,6 @@ public void setup() ); final GroupByQueryConfig config = new GroupByQueryConfig() { - @Override - public String getDefaultStrategy() - { - return defaultStrategy; - } @Override public int getBufferGrouperInitialBuckets() @@ -480,8 +469,6 @@ public HumanReadableBytes getMaxOnDiskStorage() } }; config.setSingleThreaded(false); - config.setMaxIntermediateRows(Integer.MAX_VALUE); - config.setMaxResults(Integer.MAX_VALUE); DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() { @@ -500,27 +487,19 @@ public String getFormatString() }; final Supplier configSupplier = Suppliers.ofInstance(config); - final GroupByStrategySelector strategySelector = new GroupByStrategySelector( + final GroupingEngine groupingEngine = new GroupingEngine( + druidProcessingConfig, configSupplier, - new GroupByStrategyV1( - configSupplier, - new GroupByQueryEngine(configSupplier, bufferPool), - QueryBenchmarkUtil.NOOP_QUERYWATCHER - ), - new GroupByStrategyV2( - druidProcessingConfig, - configSupplier, - bufferPool, - mergePool, - TestHelper.makeJsonMapper(), - new ObjectMapper(new SmileFactory()), - QueryBenchmarkUtil.NOOP_QUERYWATCHER - ) + bufferPool, + mergePool, + TestHelper.makeJsonMapper(), + new ObjectMapper(new SmileFactory()), + QueryBenchmarkUtil.NOOP_QUERYWATCHER ); factory = new GroupByQueryRunnerFactory( - strategySelector, - new GroupByQueryQueryToolChest(strategySelector) + groupingEngine, + new GroupByQueryQueryToolChest(groupingEngine) ); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 7733281908f0..498a9c2bdacd 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -84,7 +84,7 @@ public class SqlExpressionBenchmark static { NullHandling.initializeForTests(); - ExpressionProcessing.initializeForStrictBooleansTests(true); + ExpressionProcessing.initializeForTests(); } private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() @@ -106,13 +106,6 @@ public int getNumThreads() { return 1; } - - @Override - public boolean useParallelMergePoolConfigured() - { - return true; - } - @Override public String getFormatString() { @@ -204,7 +197,7 @@ public String getFormatString() "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", // 37: time shift + expr agg (group by), uniform distribution high cardinality "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", - // 38: LATEST aggregator + // 38: LATEST aggregator long "SELECT LATEST(long1) FROM foo", // 39: LATEST aggregator double "SELECT LATEST(double4) FROM foo", @@ -214,7 +207,13 @@ public String getFormatString() "SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM foo", // 42,43: filter numeric nulls "SELECT SUM(long5) FROM foo WHERE long5 IS NOT NULL", - "SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1" + "SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1", + // 44: EARLIEST aggregator long + "SELECT EARLIEST(long1) FROM foo", + // 45: EARLIEST aggregator double + "SELECT EARLIEST(double4) FROM foo", + // 46: EARLIEST aggregator float + "SELECT EARLIEST(float3) FROM foo" ); @Param({"5000000"}) @@ -272,7 +271,11 @@ public String getFormatString() "40", "41", "42", - "43" + "43", + "44", + "45", + "46", + "47" }) private String query; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java index 98514512e9ab..e1d866a4ba36 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java @@ -34,8 +34,8 @@ import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.NestedDataDimensionSchema; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.data.FrontCodedIndexed; @@ -90,7 +90,7 @@ public class SqlNestedDataBenchmark static { NullHandling.initializeForTests(); - ExpressionProcessing.initializeForStrictBooleansTests(true); + ExpressionProcessing.initializeForTests(); } private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig() @@ -112,13 +112,7 @@ public int getNumThreads() { return 1; } - - @Override - public boolean useParallelMergePoolConfigured() - { - return true; - } - + @Override public String getFormatString() { @@ -304,7 +298,7 @@ public void setup() ); List dims = ImmutableList.builder() .addAll(schemaInfo.getDimensionsSpec().getDimensions()) - .add(new NestedDataDimensionSchema("nested")) + .add(new AutoTypeColumnSchema("nested")) .build(); DimensionsSpec dimsSpec = new DimensionsSpec(dims); diff --git a/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java b/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java index 531003352123..52a6e0d97570 100644 --- a/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/server/coordinator/NewestSegmentFirstPolicyBenchmark.java @@ -24,9 +24,9 @@ import org.apache.druid.client.DataSourcesSnapshot; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.server.coordinator.duty.CompactionSegmentIterator; -import org.apache.druid.server.coordinator.duty.CompactionSegmentSearchPolicy; -import org.apache.druid.server.coordinator.duty.NewestSegmentFirstPolicy; +import org.apache.druid.server.coordinator.compact.CompactionSegmentIterator; +import org.apache.druid.server.coordinator.compact.CompactionSegmentSearchPolicy; +import org.apache.druid.server.coordinator.compact.NewestSegmentFirstPolicy; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.partition.NumberedShardSpec; @@ -142,8 +142,7 @@ public void measureNewestSegmentFirstPolicy(Blackhole blackhole) { final CompactionSegmentIterator iterator = policy.reset(compactionConfigs, dataSources, Collections.emptyMap()); for (int i = 0; i < numCompactionTaskSlots && iterator.hasNext(); i++) { - final List segments = iterator.next(); - blackhole.consume(segments); + blackhole.consume(iterator.next()); } } } diff --git a/cloud/aws-common/pom.xml b/cloud/aws-common/pom.xml index 61bc6720016c..4bad5332ceef 100644 --- a/cloud/aws-common/pom.xml +++ b/cloud/aws-common/pom.xml @@ -76,8 +76,8 @@ aws-java-sdk-core - javax.validation - validation-api + jakarta.validation + jakarta.validation-api diff --git a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java index c587ac6580aa..939cdcbe5ecb 100644 --- a/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java +++ b/cloud/aws-common/src/main/java/org/apache/druid/common/aws/AWSClientUtil.java @@ -22,11 +22,50 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.retry.RetryUtils; +import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import com.amazonaws.services.s3.model.MultiObjectDeleteException; +import com.google.common.collect.ImmutableSet; import java.io.IOException; +import java.util.Set; public class AWSClientUtil { + /** + * This list of error code come from {@link RetryUtils}, and + * .... At the moment, aws sdk + * does not expose a good way of retrying + * {@link com.amazonaws.services.s3.AmazonS3#deleteObjects(DeleteObjectsRequest)} requests. This request is used in + * org.apache.druid.storage.s3.S3DataSegmentKiller to delete a batch of segments from deep storage. + */ + private static final Set RECOVERABLE_ERROR_CODES = ImmutableSet.of( + "503 SlowDown", + "AuthFailure", + "BandwidthLimitExceeded", + "EC2ThrottledException", + "IDPCommunicationError", + "InternalError", + "InvalidSignatureException", + "PriorRequestNotComplete", + "ProvisionedThroughputExceededException", + "RequestExpired", + "RequestInTheFuture", + "RequestLimitExceeded", + "RequestThrottled", + "RequestThrottledException", + "RequestTimeTooSkewed", + "RequestTimeout", + "RequestTimeoutException", + "ServiceUnavailable", + "SignatureDoesNotMatch", + "SlowDown", + "ThrottledException", + "ThrottlingException", + "TooManyRequestsException", + "TransactionInProgressException", + "Throttling" + ); + /** * Checks whether an exception can be retried or not. Implementation is copied * from {@link com.amazonaws.retry.PredefinedRetryPolicies.SDKDefaultRetryCondition} except deprecated methods @@ -54,6 +93,19 @@ public static boolean isClientExceptionRecoverable(AmazonClientException excepti return true; } - return RetryUtils.isClockSkewError(exception); + if (RetryUtils.isClockSkewError(exception)) { + return true; + } + + if (exception instanceof MultiObjectDeleteException) { + MultiObjectDeleteException multiObjectDeleteException = (MultiObjectDeleteException) exception; + for (MultiObjectDeleteException.DeleteError error : multiObjectDeleteException.getErrors()) { + if (RECOVERABLE_ERROR_CODES.contains(error.getCode())) { + return true; + } + } + } + + return false; } } diff --git a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java index bb1c4cdca796..3d1ff9a20c19 100644 --- a/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java +++ b/cloud/aws-common/src/test/java/org/apache/druid/common/aws/AWSClientUtilTest.java @@ -21,6 +21,8 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.model.MultiObjectDeleteException; +import com.google.common.collect.ImmutableList; import org.junit.Assert; import org.junit.Test; @@ -82,6 +84,20 @@ public void testRecoverableException_ClockSkewedError() Assert.assertTrue(AWSClientUtil.isClientExceptionRecoverable(ex)); } + @Test + public void testRecoverableException_MultiObjectDeleteException() + { + MultiObjectDeleteException.DeleteError retryableError = new MultiObjectDeleteException.DeleteError(); + retryableError.setCode("RequestLimitExceeded"); + MultiObjectDeleteException.DeleteError nonRetryableError = new MultiObjectDeleteException.DeleteError(); + nonRetryableError.setCode("nonRetryableError"); + MultiObjectDeleteException ex = new MultiObjectDeleteException( + ImmutableList.of(retryableError, nonRetryableError), + ImmutableList.of() + ); + Assert.assertTrue(AWSClientUtil.isClientExceptionRecoverable(ex)); + } + @Test public void testNonRecoverableException_RuntimeException() { diff --git a/codestyle/guava16-forbidden-apis.txt b/codestyle/guava16-forbidden-apis.txt deleted file mode 100644 index 274a12696fb2..000000000000 --- a/codestyle/guava16-forbidden-apis.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Those signatures are only available in Guava 16: -com.google.common.util.concurrent.MoreExecutors#sameThreadExecutor() @ Use org.apache.druid.java.util.common.concurrent.Execs#directExecutor() -com.google.common.base.Objects#firstNonNull(java.lang.Object, java.lang.Object) @ Use org.apache.druid.common.guava.GuavaUtils#firstNonNull(java.lang.Object, java.lang.Object) instead (probably... the GuavaUtils method return object is nullable) diff --git a/dev/druid_intellij_formatting.xml b/dev/druid_intellij_formatting.xml index 6e0a33c9fd21..7771b2654721 100644 --- a/dev/druid_intellij_formatting.xml +++ b/dev/druid_intellij_formatting.xml @@ -74,6 +74,10 @@