apache · abhishekagarwal87 · Aug 9, 2023 · Aug 7, 2023 · Aug 8, 2023 · Aug 9, 2023
diff --git a/.github/scripts/analyze_dependencies_script.sh b/.github/scripts/analyze_dependencies_script.sh
@@ -15,7 +15,7 @@
 
 #!bin/bash
 
-${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true ${HADOOP_PROFILE} ||
+${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true ||
 { echo "
     The dependency analysis has found a dependency that is either:
 

diff --git a/.github/scripts/license_checks_script.sh b/.github/scripts/license_checks_script.sh
@@ -20,7 +20,7 @@ set -e
 ./.github/scripts/setup_generate_license.sh
 ${MVN} apache-rat:check -Prat --fail-at-end \
 -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \
--Drat.consoleOutput=true ${HADOOP_PROFILE}
+-Drat.consoleOutput=true
 # Generate dependency reports and checks they are valid.
 mkdir -p target
 distribution/bin/generate-license-dependency-reports.py . target --clean-maven-artifact-transfer --parallel 2

diff --git a/.github/workflows/cron-job-its.yml b/.github/workflows/cron-job-its.yml
@@ -112,7 +112,6 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        HADOOP_PROFILE: [ '', '-Phadoop2' ]
     runs-on: ubuntu-latest
     steps:
       - name: Checkout branch
@@ -129,10 +128,8 @@ jobs:
         run: mvn clean install dependency:go-offline -P dist -P skip-static-checks,skip-tests -Dmaven.javadoc.skip=true -Dcyclonedx.skip=true -Dweb.console.skip=true
 
       - name: security vulnerabilities check
-        env:
-          HADOOP_PROFILE: ${{ matrix.HADOOP_PROFILE }}
         run: |
-          mvn dependency-check:purge dependency-check:check ${HADOOP_PROFILE} || { echo "
+          mvn dependency-check:purge dependency-check:check || { echo "
           The OWASP dependency check has found security vulnerabilities. Please use a newer version
           of the dependency that does not have vulnerabilities. To see a report run
           `mvn dependency-check:check`

diff --git a/.github/workflows/static-checks.yml b/.github/workflows/static-checks.yml
@@ -95,13 +95,6 @@ jobs:
         run: |
           ./.github/scripts/analyze_dependencies_script.sh
 
-      - name: analyze dependencies for hadoop2
-        if: ${{ matrix.java == 'jdk8' }}
-        env:
-          HADOOP_PROFILE: -Phadoop2
-        run: |
-          ./.github/scripts/analyze_dependencies_script.sh
-
       - name: animal sniffer checks
         if: ${{ matrix.java == 'jdk8' }}
         run: ${MVN} animal-sniffer:check --fail-at-end

diff --git a/distribution/pom.xml b/distribution/pom.xml
@@ -115,191 +115,6 @@
     </build>
 
     <profiles>
-        <profile>
-            <id>dist-hadoop2</id>
-            <activation>
-                <activeByDefault>false</activeByDefault>
-                <property>
-                    <name>tar</name>
-                </property>
-            </activation>
-            <build>
-                <plugins>
-                    <plugin>
-                        <groupId>org.codehaus.mojo</groupId>
-                        <artifactId>exec-maven-plugin</artifactId>
-                        <executions>
-                            <execution>
-                                <id>generate-readme</id>
-                                <phase>initialize</phase>
-                                <goals>
-                                    <goal>exec</goal>
-                                </goals>
-                                <configuration>
-                                    <executable>${project.basedir}/bin/build-textfile-readme.sh</executable>
-                                    <arguments>
-                                        <argument>${project.basedir}/../</argument>
-                                        <argument>${project.parent.version}</argument>
-                                    </arguments>
-                                </configuration>
-                            </execution>
-                            <execution>
-                                <id>generate-binary-license</id>
-                                <phase>initialize</phase>
-                                <goals>
-                                    <goal>exec</goal>
-                                </goals>
-                                <configuration>
-                                    <executable>${project.basedir}/bin/generate-binary-license.py</executable>
-                                    <arguments>
-                                        <argument>${project.parent.basedir}/licenses/APACHE2</argument>
-                                        <argument>${project.parent.basedir}/licenses.yaml</argument>
-                                        <argument>${project.parent.basedir}/LICENSE.BINARY</argument>
-                                    </arguments>
-                                </configuration>
-                            </execution>
-                            <execution>
-                                <id>generate-binary-notice</id>
-                                <phase>initialize</phase>
-                                <goals>
-                                    <goal>exec</goal>
-                                </goals>
-                                <configuration>
-                                    <executable>${project.basedir}/bin/generate-binary-notice.py</executable>
-                                    <arguments>
-                                        <argument>${project.parent.basedir}/NOTICE</argument>
-                                        <argument>${project.parent.basedir}/licenses.yaml</argument>
-                                        <argument>${project.parent.basedir}/NOTICE.BINARY</argument>
-                                    </arguments>
-                                </configuration>
-                            </execution>
-                            <execution>
-                                <id>pull-deps</id>
-                                <phase>package</phase>
-                                <goals>
-                                    <goal>exec</goal>
-                                </goals>
-                                <configuration>
-                                    <executable>${project.parent.basedir}/examples/bin/run-java</executable>
-                                    <arguments>
-                                        <argument>-classpath</argument>
-                                        <classpath />
-                                        <argument>-Ddruid.extensions.loadList=[]</argument>
-                                        <argument>-Ddruid.extensions.directory=${project.build.directory}/extensions
-                                        </argument>
-                                        <argument>
-                                            -Ddruid.extensions.hadoopDependenciesDir=${project.build.directory}/hadoop-dependencies
-                                        </argument>
-                                        <argument>-Dhadoop2.enabled=true</argument>
-                                        <argument>org.apache.druid.cli.Main</argument>
-                                        <argument>tools</argument>
-                                        <argument>pull-deps</argument>
-                                        <argument>--clean</argument>
-                                        <argument>--defaultVersion</argument>
-                                        <argument>${project.parent.version}</argument>
-                                        <argument>-l</argument>
-                                        <argument>${settings.localRepository}</argument>
-                                        <argument>-h</argument>
-                                        <argument>org.apache.hadoop:hadoop-client:${hadoop.compile.version}</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-avro-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-azure-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-bloom-filter</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-datasketches</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-hdfs-storage</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-histogram</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-kafka-extraction-namespace</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-kafka-indexing-service</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-kinesis-indexing-service</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-lookups-cached-global</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-lookups-cached-single</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-multi-stage-query</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-protobuf-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:mysql-metadata-storage</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-orc-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-parquet-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:postgresql-metadata-storage</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-kerberos</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-s3-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-aws-rds-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-ec2-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-google-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-stats</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:simple-client-sslcontext</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-basic-security</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-pac4j</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-ranger-security</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-kubernetes-extensions</argument>
-                                        <argument>-c</argument>
-                                        <argument>org.apache.druid.extensions:druid-catalog</argument>
-                                        <argument>${druid.distribution.pulldeps.opts}</argument>
-                                    </arguments>
-                                </configuration>
-                            </execution>
-                        </executions>
-                    </plugin>
-                    <plugin>
-                        <groupId>org.apache.maven.plugins</groupId>
-                        <artifactId>maven-assembly-plugin</artifactId>
-                        <executions>
-                            <execution>
-                                <id>distro-assembly</id>
-                                <phase>package</phase>
-                                <goals>
-                                    <goal>single</goal>
-                                </goals>
-                                <configuration>
-                                    <finalName>apache-druid-${project.parent.version}</finalName>
-                                    <tarLongFileMode>posix</tarLongFileMode>
-                                    <descriptors>
-                                        <descriptor>src/assembly/assembly.xml</descriptor>
-                                    </descriptors>
-                                </configuration>
-                            </execution>
-                        </executions>
-                    </plugin>
-                    <plugin>
-                        <groupId>org.codehaus.mojo</groupId>
-                        <artifactId>license-maven-plugin</artifactId>
-                        <executions>
-                            <execution>
-                                <id>download-licenses</id>
-                                <goals>
-                                    <goal>download-licenses</goal>
-                                </goals>
-                            </execution>
-                        </executions>
-                    </plugin>
-                </plugins>
-            </build>
-        </profile>
         <profile>
             <id>dist</id>
             <activation>

diff --git a/docs/configuration/index.md b/docs/configuration/index.md
@@ -1534,7 +1534,7 @@ Additional peon configs include:
 |`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`|
 |`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/task`|
 |`druid.indexer.task.batchProcessingMode`| Batch ingestion tasks have three operating modes to control construction and tracking for intermediary segments: `OPEN_SEGMENTS`, `CLOSED_SEGMENTS`, and `CLOSED_SEGMENT_SINKS`. `OPEN_SEGMENTS` uses the streaming ingestion code path and performs a `mmap` on intermediary segments to build a timeline to make these segments available to realtime queries. Batch ingestion doesn't require intermediary segments, so the default mode, `CLOSED_SEGMENTS`, eliminates `mmap` of intermediary segments. `CLOSED_SEGMENTS` mode still tracks the entire set of segments in heap. The `CLOSED_SEGMENTS_SINKS` mode is the most aggressive configuration and should have the smallest memory footprint. It eliminates in-memory tracking and `mmap` of intermediary segments produced during segment creation. `CLOSED_SEGMENTS_SINKS` mode isn't as well tested as other modes so is currently considered experimental. You can use `OPEN_SEGMENTS` mode if problems occur with the 2 newer modes. |`CLOSED_SEGMENTS`|
-|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.5|
+|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client-api:3.3.6,org.apache.hadoop:hadoop-client-runtime:3.3.6|
-|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client-api:3.3.6,org.apache.hadoop:hadoop-client-runtime:3.3.6|
+|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|`org.apache.hadoop:hadoop-client-api:3.3.6`, `org.apache.hadoop:hadoop-client-runtime:3.3.6`|
-|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client-api:3.3.6,org.apache.hadoop:hadoop-client-runtime:3.3.6|
+|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|`org.apache.hadoop:hadoop-client-api:3.3.6`, `org.apache.hadoop:hadoop-client-runtime:3.3.6`|
 |`druid.indexer.task.defaultRowFlushBoundary`|Highest row count before persisting to disk. Used for indexing generating tasks.|75000|
 |`druid.indexer.task.directoryLockTimeout`|Wait this long for zombie peons to exit before giving up on their replacements.|PT10M|
 |`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M|
@@ -1605,7 +1605,7 @@ then the value from the configuration below is used:
 |`druid.worker.numConcurrentMerges`|Maximum number of segment persist or merge operations that can run concurrently across all tasks.|`druid.worker.capacity` / 2, rounded down|
 |`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`|
 |`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/tasks`|
-|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.5|
+|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client-api:3.3.6,org.apache.hadoop:hadoop-client-runtime:3.3.6|
-|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client-api:3.3.6,org.apache.hadoop:hadoop-client-runtime:3.3.6|
+|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|`org.apache.hadoop:hadoop-client-api:3.3.6`, `org.apache.hadoop:hadoop-client-runtime:3.3.6`|
-|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client-api:3.3.6,org.apache.hadoop:hadoop-client-runtime:3.3.6|
+|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|`org.apache.hadoop:hadoop-client-api:3.3.6`, `org.apache.hadoop:hadoop-client-runtime:3.3.6`|
 |`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on Indexer restart for restorable tasks to gracefully exit.|PT5M|
 |`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
 |`druid.indexer.task.restoreTasksOnRestart`|If true, the Indexer will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|

diff --git a/docs/development/extensions-core/hdfs.md b/docs/development/extensions-core/hdfs.md
@@ -112,7 +112,7 @@ example properties. Please follow the instructions at
 [https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md](https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md)
 for more details.
 For more configurations, [GCS core default](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.0.0/gcs/conf/gcs-core-default.xml)
-and [GCS core template](https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml).
+and [GCS core template](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcs/src/test/resources/core-site.xml).
 
 ```xml
 <property>
@@ -147,8 +147,6 @@ and [GCS core template](https://github.com/GoogleCloudPlatform/bdutil/blob/maste
 </property>
 ```
 
-Tested with Druid 0.17.0, Hadoop 2.8.5 and gcs-connector jar 2.0.0-hadoop2.
-
 ## Reading data from HDFS or Cloud Storage
 
 ### Native batch ingestion

diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md
@@ -180,7 +180,7 @@ Once you install the GCS Connector jar in all MiddleManager and Indexer processe
 your Google Cloud Storage paths in the inputSpec with the below job properties.
 For more configurations, see the [instructions to configure Hadoop](https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md#configure-hadoop),
 [GCS core default](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.0.0/gcs/conf/gcs-core-default.xml)
-and [GCS core template](https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml).
+and [GCS core template](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcs/src/test/resources/core-site.xml).
 
 ```
 "paths" : "gs://billy-bucket/the/data/is/here/data.gz,gs://billy-bucket/the/data/is/here/moredata.gz,gs://billy-bucket/the/data/is/here/evenmoredata.gz"