From 9a16d4e21922af60b13463f4fd4c31f45761a5c0 Mon Sep 17 00:00:00 2001 From: Abhishek Radhakrishnan Date: Fri, 18 Oct 2024 14:39:01 -0700 Subject: [PATCH 01/12] Move SqlTaskStatus and SqlTaskStausTest from msq module to sql module. (#17380) - This is a non-functional change that moves SqlTaskStatus and its unit test SqlTaskStatusTest from the msq module to the sql module to help class reuse in other places. - This refactor is extracted from this PR to facilitate easier review. - Fix a minor spacing issue in the TaskStartTimeoutFault error message. --- .../druid/msq/indexing/error/TaskStartTimeoutFault.java | 2 +- .../org/apache/druid/msq/sql/resources/SqlTaskResource.java | 2 +- .../druid/testsEx/catalog/ITCatalogIngestAndQueryTest.java | 2 +- .../druid/testsEx/catalog/ITCatalogIngestErrorTest.java | 2 +- .../druid/testsEx/msq/ITKeyStatisticsSketchMergeMode.java | 2 +- .../testsEx/msq/ITMultiStageQueryWorkerFaultTolerance.java | 2 +- .../java/org/apache/druid/testsEx/msq/MultiStageQuery.java | 2 +- .../org/apache/druid/testing/utils/MsqTestQueryHelper.java | 2 +- .../main/java/org/apache/druid/sql/http}/SqlTaskStatus.java | 5 ++--- .../java/org/apache/druid/sql/http}/SqlTaskStatusTest.java | 2 +- 10 files changed, 11 insertions(+), 12 deletions(-) rename {extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql => sql/src/main/java/org/apache/druid/sql/http}/SqlTaskStatus.java (94%) rename {extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql => sql/src/test/java/org/apache/druid/sql/http}/SqlTaskStatusTest.java (98%) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/TaskStartTimeoutFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/TaskStartTimeoutFault.java index 37d464a324e7..1eba2e8dc779 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/TaskStartTimeoutFault.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/TaskStartTimeoutFault.java @@ -45,7 +45,7 @@ public TaskStartTimeoutFault( { super( CODE, - "Unable to launch [%d] workers out of the total [%d] worker tasks within [%,d] seconds of the last successful worker launch." + "Unable to launch [%d] workers out of the total [%d] worker tasks within [%,d] seconds of the last successful worker launch. " + "There might be insufficient available slots to start all worker tasks simultaneously. " + "Try lowering '%s' in your query context to a number that fits within your available task capacity, " + "or try increasing capacity.", diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/resources/SqlTaskResource.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/resources/SqlTaskResource.java index 7a51bc8d26a4..a07ef2c403fb 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/resources/SqlTaskResource.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/resources/SqlTaskResource.java @@ -33,7 +33,6 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.msq.guice.MultiStageQuery; import org.apache.druid.msq.sql.MSQTaskSqlEngine; -import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.query.QueryException; import org.apache.druid.server.QueryResponse; import org.apache.druid.server.initialization.ServerConfig; @@ -48,6 +47,7 @@ import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.sql.http.SqlQuery; import org.apache.druid.sql.http.SqlResource; +import org.apache.druid.sql.http.SqlTaskStatus; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.Consumes; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestAndQueryTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestAndQueryTest.java index 29dee7329cb0..1a756910bf24 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestAndQueryTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestAndQueryTest.java @@ -29,9 +29,9 @@ import org.apache.druid.catalog.model.table.TableBuilder; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.query.QueryContexts; import org.apache.druid.sql.http.SqlQuery; +import org.apache.druid.sql.http.SqlTaskStatus; import org.apache.druid.testing.utils.DataLoaderHelper; import org.apache.druid.testing.utils.MsqTestQueryHelper; import org.apache.druid.testsEx.cluster.CatalogClient; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestErrorTest.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestErrorTest.java index 87e35e2b6465..e075462d0bb6 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestErrorTest.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/catalog/ITCatalogIngestErrorTest.java @@ -27,8 +27,8 @@ import org.apache.druid.catalog.model.table.DatasourceDefn; import org.apache.druid.catalog.model.table.TableBuilder; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.sql.http.SqlQuery; +import org.apache.druid.sql.http.SqlTaskStatus; import org.apache.druid.testing.utils.MsqTestQueryHelper; import org.apache.druid.testsEx.categories.Catalog; import org.apache.druid.testsEx.cluster.CatalogClient; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITKeyStatisticsSketchMergeMode.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITKeyStatisticsSketchMergeMode.java index ad0b764e70fe..b0dee9e713eb 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITKeyStatisticsSketchMergeMode.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITKeyStatisticsSketchMergeMode.java @@ -23,9 +23,9 @@ import com.google.inject.Inject; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.msq.exec.ClusterStatisticsMergeMode; -import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.msq.util.MultiStageQueryContext; import org.apache.druid.sql.http.SqlQuery; +import org.apache.druid.sql.http.SqlTaskStatus; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.utils.DataLoaderHelper; import org.apache.druid.testing.utils.MsqTestQueryHelper; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQueryWorkerFaultTolerance.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQueryWorkerFaultTolerance.java index 37d99b7d9b93..f45ce3ad0e96 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQueryWorkerFaultTolerance.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/ITMultiStageQueryWorkerFaultTolerance.java @@ -25,8 +25,8 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.msq.util.MultiStageQueryContext; +import org.apache.druid.sql.http.SqlTaskStatus; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.utils.DataLoaderHelper; import org.apache.druid.testing.utils.ITRetryUtil; diff --git a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/MultiStageQuery.java b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/MultiStageQuery.java index bda1c2434536..3cdc57c37bfa 100644 --- a/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/MultiStageQuery.java +++ b/integration-tests-ex/cases/src/test/java/org/apache/druid/testsEx/msq/MultiStageQuery.java @@ -29,7 +29,7 @@ import org.apache.druid.msq.indexing.report.MSQResultsReport; import org.apache.druid.msq.indexing.report.MSQTaskReport; import org.apache.druid.msq.indexing.report.MSQTaskReportPayload; -import org.apache.druid.msq.sql.SqlTaskStatus; +import org.apache.druid.sql.http.SqlTaskStatus; import org.apache.druid.storage.local.LocalFileExportStorageProvider; import org.apache.druid.testing.clients.CoordinatorResourceTestClient; import org.apache.druid.testing.utils.DataLoaderHelper; diff --git a/integration-tests/src/main/java/org/apache/druid/testing/utils/MsqTestQueryHelper.java b/integration-tests/src/main/java/org/apache/druid/testing/utils/MsqTestQueryHelper.java index 4efd5c7d42d8..2a4d04f42dbd 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/utils/MsqTestQueryHelper.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/MsqTestQueryHelper.java @@ -37,8 +37,8 @@ import org.apache.druid.msq.indexing.report.MSQResultsReport; import org.apache.druid.msq.indexing.report.MSQTaskReport; import org.apache.druid.msq.indexing.report.MSQTaskReportPayload; -import org.apache.druid.msq.sql.SqlTaskStatus; import org.apache.druid.sql.http.SqlQuery; +import org.apache.druid.sql.http.SqlTaskStatus; import org.apache.druid.testing.IntegrationTestingConfig; import org.apache.druid.testing.clients.OverlordResourceTestClient; import org.apache.druid.testing.clients.SqlResourceTestClient; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/SqlTaskStatus.java b/sql/src/main/java/org/apache/druid/sql/http/SqlTaskStatus.java similarity index 94% rename from extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/SqlTaskStatus.java rename to sql/src/main/java/org/apache/druid/sql/http/SqlTaskStatus.java index 9c92a2589f2d..765891ec20a1 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/SqlTaskStatus.java +++ b/sql/src/main/java/org/apache/druid/sql/http/SqlTaskStatus.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.msq.sql; +package org.apache.druid.sql.http; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonInclude; @@ -25,13 +25,12 @@ import com.google.common.base.Preconditions; import org.apache.druid.error.ErrorResponse; import org.apache.druid.indexer.TaskState; -import org.apache.druid.msq.sql.resources.SqlTaskResource; import javax.annotation.Nullable; import java.util.Objects; /** - * Response object for {@link SqlTaskResource#doPost}. + * Response object for org.apache.druid.msq.sql.resources.SqlTaskResource#doPost. */ public class SqlTaskStatus { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/SqlTaskStatusTest.java b/sql/src/test/java/org/apache/druid/sql/http/SqlTaskStatusTest.java similarity index 98% rename from extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/SqlTaskStatusTest.java rename to sql/src/test/java/org/apache/druid/sql/http/SqlTaskStatusTest.java index 301f91ce8d3d..c73e8d3cddbd 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/sql/SqlTaskStatusTest.java +++ b/sql/src/test/java/org/apache/druid/sql/http/SqlTaskStatusTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.msq.sql; +package org.apache.druid.sql.http; import com.fasterxml.jackson.databind.ObjectMapper; import nl.jqno.equalsverifier.EqualsVerifier; From 5da99499926395c7c5fe2cc23b56c8e358c04fd9 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Sat, 19 Oct 2024 13:33:33 +0530 Subject: [PATCH 02/12] Fail MSQ compaction if multi-valued partition dimensions are found (#17344) MSQ currently supports only single-valued string dimensions as partition keys. This patch adds a check to ensure that partition keys are single-valued in case this info is available by virtue of segment download for schema inference. During compaction, if MSQ finds multi-valued dimensions (MVDs) declared as part of `range` partitionsSpec, it switches partitioning type to dynamic, ending up in repeated compactions of the same interval. To avoid this scenario, the segment download logic is also updated to always download segments if info on multi-valued dimensions is required. --- .../msq/indexing/MSQCompactionRunner.java | 43 ++++++- .../msq/indexing/MSQCompactionRunnerTest.java | 58 +++++++-- .../indexing/common/task/CompactionTask.java | 66 ++++++++-- .../common/task/CompactionTaskTest.java | 117 ++++++++++++++++++ .../indexing/ClientCompactionRunnerInfo.java | 9 +- .../ClientCompactionRunnerInfoTest.java | 29 ++++- 6 files changed, 296 insertions(+), 26 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java index e20188d58294..d05ab12ea3fe 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java @@ -29,6 +29,7 @@ import com.google.inject.Injector; import org.apache.druid.client.indexing.ClientCompactionRunnerInfo; import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; @@ -84,6 +85,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -130,7 +132,7 @@ public MSQCompactionRunner( * The following configs aren't supported: *