From 1f575d4d816032e136b44b15ea940a19c67a9466 Mon Sep 17 00:00:00 2001 From: Yi Hu Date: Mon, 14 Oct 2024 11:38:19 -0400 Subject: [PATCH] Avoid repeated run of setDefaultPipelineOptionsOnce in TestPipelineOptions.create (#32723) --- .../org/apache/beam/sdk/io/FileSystems.java | 23 +++++++++++++++++-- .../apache/beam/sdk/testing/TestPipeline.java | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java index fb25cac6262f..5ca22749b163 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileSystems.java @@ -565,11 +565,13 @@ static FileSystem getFileSystemInternal(String scheme) { * *

It will be used in {@link FileSystemRegistrar FileSystemRegistrars} for all schemes. * - *

This is expected only to be used by runners after {@code Pipeline.run}, or in tests. + *

Outside of workers where Beam FileSystem API is used (e.g. test methods, user code executed + * during pipeline submission), consider use {@link #registerFileSystemsOnce} if initialize + * FIleSystem of supported schema is the main goal. */ @Internal public static void setDefaultPipelineOptions(PipelineOptions options) { - checkNotNull(options, "options"); + checkNotNull(options, "options cannot be null"); long id = options.getOptionsId(); int nextRevision = options.revision(); @@ -593,6 +595,23 @@ public static void setDefaultPipelineOptions(PipelineOptions options) { } } + /** + * Register file systems once if never done before. + * + *

This method executes {@link #setDefaultPipelineOptions} only if it has never been run, + * otherwise it returns immediately. + * + *

It is internally used by test setup to avoid repeated filesystem registrations (involves + * expensive ServiceLoader calls) when there are multiple pipeline and PipelineOptions object + * initialized, which is commonly seen in test execution. + */ + @Internal + public static synchronized void registerFileSystemsOnce(PipelineOptions options) { + if (FILESYSTEM_REVISION.get() == null) { + setDefaultPipelineOptions(options); + } + } + @VisibleForTesting static Map verifySchemesAreUnique( PipelineOptions options, Set registrars) { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java index ed61f7f3d6f2..328bf19c466c 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java @@ -373,7 +373,7 @@ public PipelineResult runWithAdditionalOptionArgs(List additionalArgs) { } newOptions.setStableUniqueNames(CheckEnabled.ERROR); - FileSystems.setDefaultPipelineOptions(options); + FileSystems.registerFileSystemsOnce(options); return run(newOptions); } catch (IOException e) { throw new RuntimeException( @@ -515,7 +515,7 @@ public static PipelineOptions testingPipelineOptions() { } options.setStableUniqueNames(CheckEnabled.ERROR); - FileSystems.setDefaultPipelineOptions(options); + FileSystems.registerFileSystemsOnce(options); return options; } catch (IOException e) { throw new RuntimeException(