From b17c26a66f0f24859029f40ff7b36560a750d798 Mon Sep 17 00:00:00 2001 From: Hang Zheng Date: Wed, 9 Oct 2024 05:31:14 -0400 Subject: [PATCH] expose the tls option on the s3options,only start the minio with tls in linux as the minio with mingw could NOT work well --- cpp/src/arrow/filesystem/filesystem.h | 3 ++ cpp/src/arrow/filesystem/s3_internal.h | 7 +++-- cpp/src/arrow/filesystem/s3_test_util.cc | 29 +++++++++--------- cpp/src/arrow/filesystem/s3_test_util.h | 6 ++++ cpp/src/arrow/filesystem/s3fs.cc | 23 +++++++++++++-- cpp/src/arrow/filesystem/s3fs.h | 14 +++++++++ cpp/src/arrow/filesystem/s3fs_benchmark.cc | 16 +++++----- cpp/src/arrow/filesystem/s3fs_test.cc | 34 +++++++++++++++------- 8 files changed, 93 insertions(+), 39 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index d4f62f86a7482..4ee5d8bbb15fa 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -710,6 +710,9 @@ struct FileSystemGlobalOptions { /// /// If empty, the underlying TLS library's defaults will be used. std::string tls_ca_dir_path; + + /// Controls whether to verify TLS certificates. Defaults to true. + bool tls_verify_certificates = true; }; /// EXPERIMENTAL: optional global initialization routine diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h index 3c2e94c119663..fc4b4288ae6dd 100644 --- a/cpp/src/arrow/filesystem/s3_internal.h +++ b/cpp/src/arrow/filesystem/s3_internal.h @@ -323,14 +323,15 @@ Status SetSSECustomerKey(S3RequestType& request, const std::string& sse_customer if (sse_customer_key.empty()) { return Status::OK(); // do nothing if the sse_customer_key is not configured } -#ifndef ARROW_S3_SUPPORT_SSEC - return Status::NotImplemented("SSE-C is not supported"); -#endif +#ifdef ARROW_S3_SUPPORT_SSEC ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECustomerKeyMD5(sse_customer_key)); request.SetSSECustomerKeyMD5(md5); request.SetSSECustomerKey(arrow::util::base64_encode(sse_customer_key)); request.SetSSECustomerAlgorithm("AES256"); return Status::OK(); +#else + return Status::NotImplemented("SSE-C is not supported"); +#endif } } // namespace internal diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc index 4617bb6f9d937..8b181df9ffe42 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.cc +++ b/cpp/src/arrow/filesystem/s3_test_util.cc @@ -61,6 +61,7 @@ struct MinioTestServer::Impl { std::string access_key_ = kMinioAccessKey; std::string secret_key_ = kMinioSecretKey; std::unique_ptr server_process_; + std::string scheme_ = "http"; }; MinioTestServer::MinioTestServer() : impl_(new Impl) {} @@ -80,6 +81,8 @@ std::string MinioTestServer::ca_path() const { return impl_->temp_dir_ca_->path().ToString(); } +std::string MinioTestServer::scheme() const { return impl_->scheme_; } + Status MinioTestServer::GenerateCertificateFile() { // create the dedicated folder for certificate file, rather than reuse the data // folder, since there is test case to check whether the folder is empty. @@ -101,19 +104,10 @@ Status MinioTestServer::GenerateCertificateFile() { strlen(kMinioPrivateKey))); ARROW_RETURN_NOT_OK(private_key_fd.Close()); - // Set the trusted CA certificate -#if defined(__linux__) arrow::fs::FileSystemGlobalOptions global_options; - global_options.tls_ca_dir_path = ca_path(); + global_options.tls_verify_certificates = false; ARROW_RETURN_NOT_OK(arrow::fs::Initialize(global_options)); -#elif defined(_WIN32) - // Windows does not have a standard location for CA certificates - auto import_cert_process = std::make_unique(); - ARROW_RETURN_NOT_OK(import_cert_process->SetExecutable("certutil")); - import_cert_process->SetArgs( - {"-addstore", "-f", "ArrowTest", public_crt_file.ToString()}); - ARROW_RETURN_NOT_OK(import_cert_process->Execute()); -#endif + return Status::OK(); } @@ -137,12 +131,19 @@ Status MinioTestServer::Start() { // Disable the embedded console (one less listening address to care about) impl_->server_process_->SetEnv("MINIO_BROWSER", "off"); impl_->connect_string_ = GenerateConnectString(); + std::vector minio_args({"server", "--quiet", "--compat", "--address", + impl_->connect_string_, + impl_->temp_dir_->path().ToString()}); +#ifdef MINIO_SERVER_WITH_TLS ARROW_RETURN_NOT_OK(GenerateCertificateFile()); + minio_args.push_back("--certs-dir"); + minio_args.push_back(ca_path()); + impl_->scheme_ = "https"; +#endif // MINIO_SERVER_WITH_TLS + ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName)); // NOTE: --quiet makes startup faster by suppressing remote version check - impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--certs-dir", - ca_path(), "--address", impl_->connect_string_, - impl_->temp_dir_->path().ToString()}); + impl_->server_process_->SetArgs(minio_args); ARROW_RETURN_NOT_OK(impl_->server_process_->Execute()); return Status::OK(); } diff --git a/cpp/src/arrow/filesystem/s3_test_util.h b/cpp/src/arrow/filesystem/s3_test_util.h index 6c1a1675c2904..0dfdcf9d783ee 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.h +++ b/cpp/src/arrow/filesystem/s3_test_util.h @@ -30,6 +30,10 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/macros.h" +#if defined(__linux__) +# define MINIO_SERVER_WITH_TLS +#endif // Linux + namespace arrow { namespace fs { @@ -52,6 +56,8 @@ class MinioTestServer { std::string ca_path() const; + std::string scheme() const; + private: Status GenerateCertificateFile(); struct Impl; diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index aea17581ecdf5..13229cc9a83bf 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -408,6 +408,13 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { } else if (kv.first == "allow_bucket_deletion") { ARROW_ASSIGN_OR_RAISE(options.allow_bucket_deletion, ::arrow::internal::ParseBoolean(kv.second)); + } else if (kv.first == "tls_ca_file_path") { + options.tls_ca_file_path = kv.second; + } else if (kv.first == "tls_ca_dir_path") { + options.tls_ca_dir_path = kv.second; + } else if (kv.first == "tls_verify_certificates") { + ARROW_ASSIGN_OR_RAISE(options.tls_verify_certificates, + ::arrow::internal::ParseBoolean(kv.second)); } else { return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); } @@ -444,6 +451,9 @@ bool S3Options::Equals(const S3Options& other) const { background_writes == other.background_writes && allow_bucket_creation == other.allow_bucket_creation && allow_bucket_deletion == other.allow_bucket_deletion && + tls_ca_file_path == other.tls_ca_file_path && + tls_ca_dir_path == other.tls_ca_dir_path && + tls_verify_certificates == other.tls_verify_certificates && sse_customer_key == other.sse_customer_key && default_metadata_equals && GetAccessKey() == other.GetAccessKey() && GetSecretKey() == other.GetSecretKey() && @@ -1131,12 +1141,21 @@ class ClientBuilder { } else { client_config_.retryStrategy = std::make_shared(); } - if (!internal::global_options.tls_ca_file_path.empty()) { + if (!options_.tls_ca_file_path.empty()) { + client_config_.caFile = ToAwsString(options_.tls_ca_file_path); + } else if (!internal::global_options.tls_ca_file_path.empty()) { client_config_.caFile = ToAwsString(internal::global_options.tls_ca_file_path); } - if (!internal::global_options.tls_ca_dir_path.empty()) { + if (!options_.tls_ca_dir_path.empty()) { + client_config_.caPath = ToAwsString(options_.tls_ca_dir_path); + } else if (!internal::global_options.tls_ca_dir_path.empty()) { client_config_.caPath = ToAwsString(internal::global_options.tls_ca_dir_path); } + if (!options_.tls_verify_certificates) { + client_config_.verifySSL = options_.tls_verify_certificates; + } else if (!internal::global_options.tls_verify_certificates) { + client_config_.verifySSL = internal::global_options.tls_verify_certificates; + } // Set proxy options if provided if (!options_.proxy_options.scheme.empty()) { diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 46aa5430ed914..a1c0a98a8001e 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -199,6 +199,20 @@ struct ARROW_EXPORT S3Options { /// the SSE-C customized key (raw 32 bytes key). std::string sse_customer_key; + /// Path to a single PEM file holding all TLS CA certificates + /// + /// If empty, the underlying TLS library's defaults will be used. + std::string tls_ca_file_path; + + /// Path to a directory holding TLS CA certificates in individual PEM files + /// named along the OpenSSL "hashed" format. + /// + /// If empty, the underlying TLS library's defaults will be used. + std::string tls_ca_dir_path; + + /// Controls whether to verify TLS certificates. Defaults to true. + bool tls_verify_certificates = true; + S3Options(); /// Configure with the default AWS credentials provider chain. diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc index d5e2e78d61dfa..9dd2bf0228b4c 100644 --- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc +++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc @@ -81,14 +81,12 @@ class MinioFixture : public benchmark::Fixture { } client_config_.endpointOverride = ToAwsString(minio_->connect_string()); - client_config_.scheme = Aws::Http::Scheme::HTTPS; -// The caPath only take effect on linux according to the AWS SDK documentation -// https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/client-config.html -#if defined(__linux__) - client_config_.caPath = ToAwsString(minio_->ca_path()); -#else - client_config_.verifySSL = false; -#endif + if (minio_->scheme() == "https") { + client_config_.scheme = Aws::Http::Scheme::HTTPS; + client_config_.verifySSL = false; + } else { + client_config_.scheme = Aws::Http::Scheme::HTTP; + } if (!region_.empty()) { client_config_.region = ToAwsString(region_); } @@ -117,7 +115,7 @@ class MinioFixture : public benchmark::Fixture { void MakeFileSystem() { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); - options_.scheme = "https"; + options_.scheme = minio_->scheme(); if (!region_.empty()) { options_.region = region_; } diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index bd35ffe0a987b..6afbd9045bd1a 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -206,14 +206,12 @@ class S3TestMixin : public AwsTestMixin { ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv()->GetOneServer()); client_config_.reset(new Aws::Client::ClientConfiguration()); client_config_->endpointOverride = ToAwsString(minio_->connect_string()); - client_config_->scheme = Aws::Http::Scheme::HTTPS; -// The caPath only take effect on linux according to the AWS SDK documentation -// https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/client-config.html -#if defined(__linux__) - client_config_->caPath = ToAwsString(minio_->ca_path()); -#else - client_config_->verifySSL = false; -#endif + if (minio_->scheme() == "https") { + client_config_->scheme = Aws::Http::Scheme::HTTPS; + client_config_->verifySSL = false; + } else { + client_config_->scheme = Aws::Http::Scheme::HTTP; + } client_config_->retryStrategy = std::make_shared(kRetryInterval, kMaxRetryDuration); credentials_ = {ToAwsString(minio_->access_key()), ToAwsString(minio_->secret_key())}; @@ -309,6 +307,16 @@ TEST_F(S3OptionsTest, FromUri) { ASSERT_EQ(options.endpoint_override, "localhost"); ASSERT_EQ(path, "mybucket/foo/bar"); + // Explicit tls related configuration + ASSERT_OK_AND_ASSIGN( + options, + S3Options::FromUri("s3://mybucket/foo/bar/?tls_ca_dir_path=/test&tls_ca_file_path=/" + "test/test.pem&tls_verify_certificates=false", + &path)); + ASSERT_EQ(options.tls_ca_dir_path, "/test"); + ASSERT_EQ(options.tls_ca_file_path, "/test/test.pem"); + ASSERT_EQ(options.tls_verify_certificates, false); + // Missing bucket name ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path)); @@ -451,6 +459,7 @@ class TestS3FS : public S3TestMixin { // Most tests will create buckets options_.allow_bucket_creation = true; options_.allow_bucket_deletion = true; + options_.tls_verify_certificates = false; MakeFileSystem(); // Set up test bucket { @@ -540,7 +549,7 @@ class TestS3FS : public S3TestMixin { Result> MakeNewFileSystem( io::IOContext io_context = io::default_io_context()) { options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); - options_.scheme = "https"; + options_.scheme = minio_->scheme(); options_.endpoint_override = minio_->connect_string(); if (!options_.retry_strategy) { options_.retry_strategy = std::make_shared(); @@ -1305,6 +1314,7 @@ TEST_F(TestS3FS, OpenInputFile) { ASSERT_RAISES(IOError, file->Seek(10)); } +#ifdef MINIO_SERVER_WITH_TLS TEST_F(TestS3FS, SSECustomerKeyMatch) { // normal write/read with correct SSEC key std::shared_ptr stream; @@ -1332,6 +1342,7 @@ TEST_F(TestS3FS, SSECustomerKeyMismatch) { ASSERT_RAISES(IOError, fs_->OpenInputFile("bucket/newfile_with_sse_c")); ASSERT_OK(RestoreTestBucket()); } +#endif // MINIO_SERVER_WITH_TLS struct S3OptionsTestParameters { bool background_writes{false}; @@ -1455,7 +1466,8 @@ TEST_F(TestS3FS, FileSystemFromUri) { std::stringstream ss; ss << "s3://" << minio_->access_key() << ":" << minio_->secret_key() << "@bucket/somedir/subdir/subfile" - << "?scheme=https&endpoint_override=" << UriEscape(minio_->connect_string()); + << "?scheme=" << minio_->scheme() + << "&endpoint_override=" << UriEscape(minio_->connect_string()); std::string path; ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri(ss.str(), &path)); @@ -1557,7 +1569,7 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest { } options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key()); - options_.scheme = "https"; + options_.scheme = minio_->scheme(); options_.endpoint_override = minio_->connect_string(); options_.retry_strategy = std::make_shared(); ASSERT_OK_AND_ASSIGN(s3fs_, S3FileSystem::Make(options_));