Skip to content

Commit

Permalink
expose the tls option on the s3options,only start the minio with tls…
Browse files Browse the repository at this point in the history
… in linux as the minio with mingw could NOT work well
  • Loading branch information
Hang Zheng committed Oct 9, 2024
1 parent 44d759a commit b17c26a
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 39 deletions.
3 changes: 3 additions & 0 deletions cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,9 @@ struct FileSystemGlobalOptions {
///
/// If empty, the underlying TLS library's defaults will be used.
std::string tls_ca_dir_path;

/// Controls whether to verify TLS certificates. Defaults to true.
bool tls_verify_certificates = true;
};

/// EXPERIMENTAL: optional global initialization routine
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/filesystem/s3_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,14 +323,15 @@ Status SetSSECustomerKey(S3RequestType& request, const std::string& sse_customer
if (sse_customer_key.empty()) {
return Status::OK(); // do nothing if the sse_customer_key is not configured
}
#ifndef ARROW_S3_SUPPORT_SSEC
return Status::NotImplemented("SSE-C is not supported");
#endif
#ifdef ARROW_S3_SUPPORT_SSEC
ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECustomerKeyMD5(sse_customer_key));
request.SetSSECustomerKeyMD5(md5);
request.SetSSECustomerKey(arrow::util::base64_encode(sse_customer_key));
request.SetSSECustomerAlgorithm("AES256");
return Status::OK();
#else
return Status::NotImplemented("SSE-C is not supported");
#endif
}

} // namespace internal
Expand Down
29 changes: 15 additions & 14 deletions cpp/src/arrow/filesystem/s3_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct MinioTestServer::Impl {
std::string access_key_ = kMinioAccessKey;
std::string secret_key_ = kMinioSecretKey;
std::unique_ptr<util::Process> server_process_;
std::string scheme_ = "http";
};

MinioTestServer::MinioTestServer() : impl_(new Impl) {}
Expand All @@ -80,6 +81,8 @@ std::string MinioTestServer::ca_path() const {
return impl_->temp_dir_ca_->path().ToString();
}

std::string MinioTestServer::scheme() const { return impl_->scheme_; }

Status MinioTestServer::GenerateCertificateFile() {
// create the dedicated folder for certificate file, rather than reuse the data
// folder, since there is test case to check whether the folder is empty.
Expand All @@ -101,19 +104,10 @@ Status MinioTestServer::GenerateCertificateFile() {
strlen(kMinioPrivateKey)));
ARROW_RETURN_NOT_OK(private_key_fd.Close());

// Set the trusted CA certificate
#if defined(__linux__)
arrow::fs::FileSystemGlobalOptions global_options;
global_options.tls_ca_dir_path = ca_path();
global_options.tls_verify_certificates = false;
ARROW_RETURN_NOT_OK(arrow::fs::Initialize(global_options));
#elif defined(_WIN32)
// Windows does not have a standard location for CA certificates
auto import_cert_process = std::make_unique<util::Process>();
ARROW_RETURN_NOT_OK(import_cert_process->SetExecutable("certutil"));
import_cert_process->SetArgs(
{"-addstore", "-f", "ArrowTest", public_crt_file.ToString()});
ARROW_RETURN_NOT_OK(import_cert_process->Execute());
#endif

return Status::OK();
}

Expand All @@ -137,12 +131,19 @@ Status MinioTestServer::Start() {
// Disable the embedded console (one less listening address to care about)
impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
impl_->connect_string_ = GenerateConnectString();
std::vector<std::string> minio_args({"server", "--quiet", "--compat", "--address",
impl_->connect_string_,
impl_->temp_dir_->path().ToString()});
#ifdef MINIO_SERVER_WITH_TLS
ARROW_RETURN_NOT_OK(GenerateCertificateFile());
minio_args.push_back("--certs-dir");
minio_args.push_back(ca_path());
impl_->scheme_ = "https";
#endif // MINIO_SERVER_WITH_TLS

ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
// NOTE: --quiet makes startup faster by suppressing remote version check
impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--certs-dir",
ca_path(), "--address", impl_->connect_string_,
impl_->temp_dir_->path().ToString()});
impl_->server_process_->SetArgs(minio_args);
ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
return Status::OK();
}
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/filesystem/s3_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"

#if defined(__linux__)
# define MINIO_SERVER_WITH_TLS
#endif // Linux

namespace arrow {
namespace fs {

Expand All @@ -52,6 +56,8 @@ class MinioTestServer {

std::string ca_path() const;

std::string scheme() const;

private:
Status GenerateCertificateFile();
struct Impl;
Expand Down
23 changes: 21 additions & 2 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,13 @@ Result<S3Options> S3Options::FromUri(const Uri& uri, std::string* out_path) {
} else if (kv.first == "allow_bucket_deletion") {
ARROW_ASSIGN_OR_RAISE(options.allow_bucket_deletion,
::arrow::internal::ParseBoolean(kv.second));
} else if (kv.first == "tls_ca_file_path") {
options.tls_ca_file_path = kv.second;
} else if (kv.first == "tls_ca_dir_path") {
options.tls_ca_dir_path = kv.second;
} else if (kv.first == "tls_verify_certificates") {
ARROW_ASSIGN_OR_RAISE(options.tls_verify_certificates,
::arrow::internal::ParseBoolean(kv.second));
} else {
return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'");
}
Expand Down Expand Up @@ -444,6 +451,9 @@ bool S3Options::Equals(const S3Options& other) const {
background_writes == other.background_writes &&
allow_bucket_creation == other.allow_bucket_creation &&
allow_bucket_deletion == other.allow_bucket_deletion &&
tls_ca_file_path == other.tls_ca_file_path &&
tls_ca_dir_path == other.tls_ca_dir_path &&
tls_verify_certificates == other.tls_verify_certificates &&
sse_customer_key == other.sse_customer_key && default_metadata_equals &&
GetAccessKey() == other.GetAccessKey() &&
GetSecretKey() == other.GetSecretKey() &&
Expand Down Expand Up @@ -1131,12 +1141,21 @@ class ClientBuilder {
} else {
client_config_.retryStrategy = std::make_shared<ConnectRetryStrategy>();
}
if (!internal::global_options.tls_ca_file_path.empty()) {
if (!options_.tls_ca_file_path.empty()) {
client_config_.caFile = ToAwsString(options_.tls_ca_file_path);
} else if (!internal::global_options.tls_ca_file_path.empty()) {
client_config_.caFile = ToAwsString(internal::global_options.tls_ca_file_path);
}
if (!internal::global_options.tls_ca_dir_path.empty()) {
if (!options_.tls_ca_dir_path.empty()) {
client_config_.caPath = ToAwsString(options_.tls_ca_dir_path);
} else if (!internal::global_options.tls_ca_dir_path.empty()) {
client_config_.caPath = ToAwsString(internal::global_options.tls_ca_dir_path);
}
if (!options_.tls_verify_certificates) {
client_config_.verifySSL = options_.tls_verify_certificates;
} else if (!internal::global_options.tls_verify_certificates) {
client_config_.verifySSL = internal::global_options.tls_verify_certificates;
}

// Set proxy options if provided
if (!options_.proxy_options.scheme.empty()) {
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/filesystem/s3fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,20 @@ struct ARROW_EXPORT S3Options {
/// the SSE-C customized key (raw 32 bytes key).
std::string sse_customer_key;

/// Path to a single PEM file holding all TLS CA certificates
///
/// If empty, the underlying TLS library's defaults will be used.
std::string tls_ca_file_path;

/// Path to a directory holding TLS CA certificates in individual PEM files
/// named along the OpenSSL "hashed" format.
///
/// If empty, the underlying TLS library's defaults will be used.
std::string tls_ca_dir_path;

/// Controls whether to verify TLS certificates. Defaults to true.
bool tls_verify_certificates = true;

S3Options();

/// Configure with the default AWS credentials provider chain.
Expand Down
16 changes: 7 additions & 9 deletions cpp/src/arrow/filesystem/s3fs_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,12 @@ class MinioFixture : public benchmark::Fixture {
}

client_config_.endpointOverride = ToAwsString(minio_->connect_string());
client_config_.scheme = Aws::Http::Scheme::HTTPS;
// The caPath only take effect on linux according to the AWS SDK documentation
// https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/client-config.html
#if defined(__linux__)
client_config_.caPath = ToAwsString(minio_->ca_path());
#else
client_config_.verifySSL = false;
#endif
if (minio_->scheme() == "https") {
client_config_.scheme = Aws::Http::Scheme::HTTPS;
client_config_.verifySSL = false;
} else {
client_config_.scheme = Aws::Http::Scheme::HTTP;
}
if (!region_.empty()) {
client_config_.region = ToAwsString(region_);
}
Expand Down Expand Up @@ -117,7 +115,7 @@ class MinioFixture : public benchmark::Fixture {

void MakeFileSystem() {
options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());
options_.scheme = "https";
options_.scheme = minio_->scheme();
if (!region_.empty()) {
options_.region = region_;
}
Expand Down
34 changes: 23 additions & 11 deletions cpp/src/arrow/filesystem/s3fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,14 +206,12 @@ class S3TestMixin : public AwsTestMixin {
ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv()->GetOneServer());
client_config_.reset(new Aws::Client::ClientConfiguration());
client_config_->endpointOverride = ToAwsString(minio_->connect_string());
client_config_->scheme = Aws::Http::Scheme::HTTPS;
// The caPath only take effect on linux according to the AWS SDK documentation
// https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/client-config.html
#if defined(__linux__)
client_config_->caPath = ToAwsString(minio_->ca_path());
#else
client_config_->verifySSL = false;
#endif
if (minio_->scheme() == "https") {
client_config_->scheme = Aws::Http::Scheme::HTTPS;
client_config_->verifySSL = false;
} else {
client_config_->scheme = Aws::Http::Scheme::HTTP;
}
client_config_->retryStrategy =
std::make_shared<ConnectRetryStrategy>(kRetryInterval, kMaxRetryDuration);
credentials_ = {ToAwsString(minio_->access_key()), ToAwsString(minio_->secret_key())};
Expand Down Expand Up @@ -309,6 +307,16 @@ TEST_F(S3OptionsTest, FromUri) {
ASSERT_EQ(options.endpoint_override, "localhost");
ASSERT_EQ(path, "mybucket/foo/bar");

// Explicit tls related configuration
ASSERT_OK_AND_ASSIGN(
options,
S3Options::FromUri("s3://mybucket/foo/bar/?tls_ca_dir_path=/test&tls_ca_file_path=/"
"test/test.pem&tls_verify_certificates=false",
&path));
ASSERT_EQ(options.tls_ca_dir_path, "/test");
ASSERT_EQ(options.tls_ca_file_path, "/test/test.pem");
ASSERT_EQ(options.tls_verify_certificates, false);

// Missing bucket name
ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path));

Expand Down Expand Up @@ -451,6 +459,7 @@ class TestS3FS : public S3TestMixin {
// Most tests will create buckets
options_.allow_bucket_creation = true;
options_.allow_bucket_deletion = true;
options_.tls_verify_certificates = false;
MakeFileSystem();
// Set up test bucket
{
Expand Down Expand Up @@ -540,7 +549,7 @@ class TestS3FS : public S3TestMixin {
Result<std::shared_ptr<S3FileSystem>> MakeNewFileSystem(
io::IOContext io_context = io::default_io_context()) {
options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());
options_.scheme = "https";
options_.scheme = minio_->scheme();
options_.endpoint_override = minio_->connect_string();
if (!options_.retry_strategy) {
options_.retry_strategy = std::make_shared<ShortRetryStrategy>();
Expand Down Expand Up @@ -1305,6 +1314,7 @@ TEST_F(TestS3FS, OpenInputFile) {
ASSERT_RAISES(IOError, file->Seek(10));
}

#ifdef MINIO_SERVER_WITH_TLS
TEST_F(TestS3FS, SSECustomerKeyMatch) {
// normal write/read with correct SSEC key
std::shared_ptr<io::OutputStream> stream;
Expand Down Expand Up @@ -1332,6 +1342,7 @@ TEST_F(TestS3FS, SSECustomerKeyMismatch) {
ASSERT_RAISES(IOError, fs_->OpenInputFile("bucket/newfile_with_sse_c"));
ASSERT_OK(RestoreTestBucket());
}
#endif // MINIO_SERVER_WITH_TLS

struct S3OptionsTestParameters {
bool background_writes{false};
Expand Down Expand Up @@ -1455,7 +1466,8 @@ TEST_F(TestS3FS, FileSystemFromUri) {
std::stringstream ss;
ss << "s3://" << minio_->access_key() << ":" << minio_->secret_key()
<< "@bucket/somedir/subdir/subfile"
<< "?scheme=https&endpoint_override=" << UriEscape(minio_->connect_string());
<< "?scheme=" << minio_->scheme()
<< "&endpoint_override=" << UriEscape(minio_->connect_string());

std::string path;
ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri(ss.str(), &path));
Expand Down Expand Up @@ -1557,7 +1569,7 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest {
}

options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());
options_.scheme = "https";
options_.scheme = minio_->scheme();
options_.endpoint_override = minio_->connect_string();
options_.retry_strategy = std::make_shared<ShortRetryStrategy>();
ASSERT_OK_AND_ASSIGN(s3fs_, S3FileSystem::Make(options_));
Expand Down

0 comments on commit b17c26a

Please sign in to comment.