From a7f355f5c2eded19ad7a023feeb05416218ba329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:15:01 +0200 Subject: [PATCH 01/11] build(conan): add optional aws-crt-cpp dependency Needed for AWS SigV4 request signing in the REST catalog client (AWS Glue's Iceberg REST endpoint rejects OAuth bearer tokens and requires SigV4). The CMake resolver is QUIET so the dep stays optional: when found, ICEBERG_REST_HAVE_SIGV4 is exported for downstream targets to gate the SigV4 code path; when absent, the REST client still builds. --- .../IcebergThirdpartyToolchain.cmake | 26 +++++++++++++++++++ conanfile.py | 5 ++++ 2 files changed, 31 insertions(+) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8b5ab67c8..58ea7545a 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -570,6 +570,32 @@ if(ICEBERG_BUILD_BUNDLE) resolve_zstd_dependency() endif() +# ---------------------------------------------------------------------- +# aws-crt-cpp (optional, used by REST catalog SigV4 AuthManager). +# If not found, the REST client still builds and SigV4 returns NotImplemented +# at runtime. Install via Conan or vcpkg to enable AWS Glue integration. + +function(resolve_aws_crt_cpp_dependency) + find_package(aws-crt-cpp CONFIG QUIET) + if(aws-crt-cpp_FOUND) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES aws-crt-cpp) + set(ICEBERG_REST_HAVE_SIGV4 + TRUE + PARENT_SCOPE) + message(STATUS "Found aws-crt-cpp (${aws-crt-cpp_VERSION_STRING}); enabling REST " + "SigV4 auth") + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) + else() + set(ICEBERG_REST_HAVE_SIGV4 + FALSE + PARENT_SCOPE) + message(STATUS "aws-crt-cpp not found; REST SigV4 auth will return NotImplemented") + endif() +endfunction() + if(ICEBERG_BUILD_REST) resolve_cpr_dependency() + resolve_aws_crt_cpp_dependency() endif() diff --git a/conanfile.py b/conanfile.py index a0a650566..4b7116ec6 100644 --- a/conanfile.py +++ b/conanfile.py @@ -65,6 +65,10 @@ def requirements(self): self.requires("zlib/[>=1.2.11 <2]") self.requires("snappy/[>=1.1 <2]") self.requires("zstd/[>=1.5 <2]") + # AWS CRT (C++) — used by the REST catalog's SigV4 AuthManager to sign + # requests for AWS Glue's Iceberg REST endpoint. Pulls aws-c-auth, + # aws-c-common, aws-c-cal, aws-c-http, aws-c-io transitively. + self.requires("aws-crt-cpp/[>=0.26 <1]") def generate(self): tc = CMakeToolchain(self) @@ -152,6 +156,7 @@ def package_info(self): self.cpp_info.components["iceberg_rest"].requires = [ "iceberg", "vendored_cpr", + "aws-crt-cpp::aws-crt-cpp" # Conan component; CMake target is AWS::aws-crt-cpp, ] # Bundle (Arrow/Parquet integration) From 7d0fc329ab3f133c2431889d12042e5847b71d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:15:08 +0200 Subject: [PATCH 02/11] feat(rest/auth): extend AuthSession with SignableRequest overload Adds a SignableRequest struct (method, URL, query params, body) and a virtual Authenticate(SignableRequest, headers) overload that by default forwards to the existing headers-only method. Needed for SigV4, which must hash the body and include the method / URL / query in its canonical request. Existing None/Basic/OAuth2 sessions ignore the extra context and need no changes. --- src/iceberg/catalog/rest/auth/auth_session.h | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index 89f0c442e..8f890644f 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "iceberg/catalog/rest/iceberg_rest_export.h" @@ -33,6 +34,24 @@ namespace iceberg::rest::auth { +/// \brief A description of an outgoing HTTP request that an AuthSession may +/// need to inspect in order to authenticate (e.g., SigV4 needs the +/// method, URL, query params and body hash to build its canonical +/// request). Header-only auth schemes ignore it. +struct SignableRequest { + /// HTTP method, uppercase (e.g., "GET", "POST"). + std::string_view method; + /// Full request URL including scheme, host, path, but *without* query + /// string — query parameters are passed separately so callers don't need + /// to re-encode them. + std::string_view url; + /// Query parameters (pre-encoding). May be null for requests that have + /// none. Not owned. + const std::unordered_map* query_params = nullptr; + /// Raw request body. Empty for GET/HEAD/DELETE without a body. + std::string_view body; +}; + /// \brief An authentication session that can authenticate outgoing HTTP requests. class ICEBERG_REST_EXPORT AuthSession { public: @@ -53,6 +72,18 @@ class ICEBERG_REST_EXPORT AuthSession { /// - RestError: HTTP errors from authentication service virtual Status Authenticate(std::unordered_map& headers) = 0; + /// \brief Authenticate using full request context. + /// + /// Overloaded form used by auth schemes that need to see the request method, + /// URL, query parameters and body in order to compute their header value + /// (e.g., SigV4). The default implementation ignores the request context and + /// forwards to the headers-only Authenticate(), which is what every non- + /// signing auth scheme wants. + virtual Status Authenticate(const SignableRequest& /*request*/, + std::unordered_map& headers) { + return Authenticate(headers); + } + /// \brief Close the session and release any resources. /// /// This method is called when the session is no longer needed. For stateful From d9614b53d947e19de44c62604413bf0ec4c090ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:15:24 +0200 Subject: [PATCH 03/11] feat(rest/http): plumb request context through BuildHeaders Every HttpClient method (Get/Post/PostForm/Head/Delete) now forwards method, URL, query params, and body to BuildHeaders so the AuthSession's richer Authenticate overload can see what's being sent. PostForm pre-encodes the form body once via UrlEncoder so the bytes that get signed match what libcurl puts on the wire. No-op for existing auth schemes (they don't override the new overload); unlocks SigV4 signing in a follow-up. --- src/iceberg/catalog/rest/http_client.cc | 57 +++++++++++++++++++++---- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index d45cc5b7c..79a0154b4 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -30,6 +30,7 @@ #include "iceberg/json_serde_internal.h" #include "iceberg/result.h" #include "iceberg/util/macros.h" +#include "iceberg/util/url_encoder.h" namespace iceberg::rest { @@ -69,7 +70,16 @@ namespace { constexpr std::string_view kRestExceptionType = "RESTException"; /// \brief Prepare headers for an HTTP request. +/// +/// Merges default + per-request headers and then lets the AuthSession +/// authenticate the request. The full request context (method, URL, query +/// params, body) is forwarded to the session so auth schemes that need to +/// sign over the request (e.g., SigV4) see what they need; header-only +/// schemes ignore the extra context. Result BuildHeaders( + std::string_view method, std::string_view url, + const std::unordered_map* query_params, + std::string_view body, const std::unordered_map& request_headers, const std::unordered_map& default_headers, auth::AuthSession& session) { @@ -77,10 +87,32 @@ Result BuildHeaders( for (const auto& [key, val] : request_headers) { headers.insert_or_assign(key, val); } - ICEBERG_RETURN_UNEXPECTED(session.Authenticate(headers)); + auth::SignableRequest signable{.method = method, + .url = url, + .query_params = query_params, + .body = body}; + ICEBERG_RETURN_UNEXPECTED(session.Authenticate(signable, headers)); return cpr::Header(headers.begin(), headers.end()); } +/// \brief Serialize a form-data map to an application/x-www-form-urlencoded +/// body string. Used so that SigV4 can hash the payload it will +/// actually see on the wire. +std::string EncodeFormBody(const std::unordered_map& form_data) { + std::string out; + bool first = true; + for (const auto& [key, val] : form_data) { + if (!first) { + out.push_back('&'); + } + first = false; + out.append(UrlEncoder::Encode(key)); + out.push_back('='); + out.append(UrlEncoder::Encode(val)); + } + return out; +} + cpr::SslOptions BuildSslOptions(const SslConfig& config) { cpr::SslOptions opts; opts.verify_host = config.verify; @@ -174,8 +206,9 @@ Result HttpClient::Get( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + BuildHeaders("GET", path, ¶ms, /*body=*/{}, headers, default_headers_, session)); cpr::Response response = cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -190,7 +223,8 @@ Result HttpClient::Post( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + BuildHeaders("POST", path, /*query_params=*/nullptr, body, + headers, default_headers_, session)); cpr::Response response = cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -207,8 +241,12 @@ Result HttpClient::PostForm( const ErrorHandler& error_handler, auth::AuthSession& session) { std::unordered_map form_headers(headers); form_headers.insert_or_assign(kHeaderContentType, kMimeTypeFormUrlEncoded); + // Encode the form body ourselves so that the same bytes we'll send on the + // wire are available to sign (SigV4 hashes the payload). + std::string form_body = EncodeFormBody(form_data); ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(form_headers, default_headers_, session)); + BuildHeaders("POST", path, /*query_params=*/nullptr, form_body, + form_headers, default_headers_, session)); std::vector pair_list; pair_list.reserve(form_data.size()); for (const auto& [key, val] : form_data) { @@ -228,7 +266,8 @@ Result HttpClient::Head( const std::string& path, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + BuildHeaders("HEAD", path, /*query_params=*/nullptr, /*body=*/{}, + headers, default_headers_, session)); cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -242,8 +281,10 @@ Result HttpClient::Delete( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, + BuildHeaders("DELETE", path, ¶ms, /*body=*/{}, headers, default_headers_, + session)); cpr::Response response = cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, BuildSslOptions(ssl_config_), *connection_pool_); From ac89826c4bfadf97bcb01dfe3e1bbd2a3bf2216e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:15:32 +0200 Subject: [PATCH 04/11] feat(rest/auth): add SigV4 credential property keys Declares property keys for SigV4 static credentials (access-key-id, secret-access-key, session-token) and a credentials-provider selector that toggles between static keys and the aws-crt-cpp default credential chain. Also adds the execute-api signing-service default that matches the Java Iceberg client. Consumed by the SigV4 manager in a follow-up; no behaviour change on its own. --- .../catalog/rest/auth/auth_properties.h | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index f6a20baa3..442c2d1ab 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -58,6 +58,27 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { inline static const std::string kSigV4Service = "rest.auth.sigv4.service"; inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; + inline static const std::string kSigV4AccessKeyId = + "rest.auth.sigv4.access-key-id"; + inline static const std::string kSigV4SecretAccessKey = + "rest.auth.sigv4.secret-access-key"; + inline static const std::string kSigV4SessionToken = + "rest.auth.sigv4.session-token"; + /// Selects which credential source drives SigV4 signing. Values: + /// "static" — use the access-key-id/secret-access-key/session-token + /// properties. Best for tests and short-lived scripts. + /// "default" — aws-crt-cpp's cached default chain: + /// Environment → Profile → STS Web Identity (IRSA) → IMDSv2/ECS. + /// Best for EC2/EKS/ECS deployments where creds rotate. + /// If unset, we infer: "static" when an access-key-id is configured, + /// "default" otherwise. + inline static const std::string kSigV4CredentialsProvider = + "rest.auth.sigv4.credentials-provider"; + inline static constexpr std::string_view kSigV4ProviderStatic = "static"; + inline static constexpr std::string_view kSigV4ProviderDefault = "default"; + /// Default service name when rest.auth.sigv4.service is unset — matches the + /// Java Iceberg client, which defaults to AWS API Gateway's signing name. + inline static constexpr std::string_view kSigV4DefaultService = "execute-api"; // ---- OAuth2 entries ---- From b28948689330084138c72818783422c21d4f9eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:15:54 +0200 Subject: [PATCH 05/11] feat(rest/auth): implement SigV4 AuthManager via aws-crt-cpp Wraps aws-crt-cpp's Sigv4HttpRequestSigner + AwsSigningConfig to sign outgoing REST requests for AWS Glue's Iceberg REST endpoint. - SigV4Signer holds a shared ICredentialsProvider built from config: either static keys (rest.auth.sigv4.access-key-id/...) or the aws-crt-cpp default chain (Environment -> Profile -> STS Web Identity -> IMDS/ECS), with auto-detection when the provider type is unset. - Signing options mirror the SigV4 spec for non-S3 services: double URI encoding, path normalization, explicit sha256 payload hash via x-amz-content-sha256 (worked around the Java Iceberg empty-body SDK bug). - SigV4AuthSession runs an optional delegate session first so its Authorization header is covered by the signature; the delegate's Authorization is renamed to X-Iceberg-Access-Delegation before signing to avoid aws-c-auth's reserved-header rejection. - MakeSigV4Manager is registered in the auth-manager registry when ICEBERG_REST_WITH_SIGV4 is defined; otherwise the registry keeps returning NotImplemented. - CMakeLists gates the sigv4_signer.cc source + AWS::aws-crt-cpp link on the cache variable set by the toolchain resolver. --- src/iceberg/catalog/rest/CMakeLists.txt | 18 + .../catalog/rest/auth/auth_manager_internal.h | 6 + .../catalog/rest/auth/auth_managers.cc | 6 +- src/iceberg/catalog/rest/auth/sigv4_signer.cc | 532 ++++++++++++++++++ src/iceberg/catalog/rest/auth/sigv4_signer.h | 112 ++++ 5 files changed, 673 insertions(+), 1 deletion(-) create mode 100644 src/iceberg/catalog/rest/auth/sigv4_signer.cc create mode 100644 src/iceberg/catalog/rest/auth/sigv4_signer.h diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index e91b12962..f66a105ac 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -33,6 +33,10 @@ set(ICEBERG_REST_SOURCES rest_util.cc types.cc) +if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_SOURCES auth/sigv4_signer.cc) +endif() + set(ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS) @@ -51,9 +55,23 @@ list(APPEND "$,iceberg::iceberg_shared,iceberg::iceberg_static>" "$,iceberg::cpr,cpr::cpr>") +if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS AWS::aws-crt-cpp) + list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS AWS::aws-crt-cpp) + list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS AWS::aws-crt-cpp) + list(APPEND ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS AWS::aws-crt-cpp) +endif() + +set(ICEBERG_REST_DEFINITIONS) +if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_DEFINITIONS ICEBERG_REST_WITH_SIGV4) +endif() + add_iceberg_lib(iceberg_rest SOURCES ${ICEBERG_REST_SOURCES} + DEFINITIONS + ${ICEBERG_REST_DEFINITIONS} SHARED_LINK_LIBS ${ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS} STATIC_LINK_LIBS diff --git a/src/iceberg/catalog/rest/auth/auth_manager_internal.h b/src/iceberg/catalog/rest/auth/auth_manager_internal.h index 051d05505..08b3eab67 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/auth_manager_internal.h @@ -47,4 +47,10 @@ Result> MakeOAuth2Manager( std::string_view name, const std::unordered_map& properties); +/// \brief Create a SigV4 authentication manager for AWS services such as +/// the AWS Glue Iceberg REST endpoint. +Result> MakeSigV4Manager( + std::string_view name, + const std::unordered_map& properties); + } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index f55885d75..55dd43b74 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -62,11 +62,15 @@ std::string InferAuthType( } AuthManagerRegistry CreateDefaultRegistry() { - return { + AuthManagerRegistry registry = { {AuthProperties::kAuthTypeNone, MakeNoopAuthManager}, {AuthProperties::kAuthTypeBasic, MakeBasicAuthManager}, {AuthProperties::kAuthTypeOAuth2, MakeOAuth2Manager}, }; +#ifdef ICEBERG_REST_WITH_SIGV4 + registry.emplace(AuthProperties::kAuthTypeSigV4, MakeSigV4Manager); +#endif + return registry; } // Get the global registry of auth manager factories. diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.cc b/src/iceberg/catalog/rest/auth/sigv4_signer.cc new file mode 100644 index 000000000..0018871f2 --- /dev/null +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.cc @@ -0,0 +1,532 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/rest/auth/sigv4_signer.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "iceberg/catalog/rest/auth/auth_manager.h" +#include "iceberg/catalog/rest/auth/auth_manager_internal.h" +#include "iceberg/catalog/rest/auth/auth_managers.h" +#include "iceberg/catalog/rest/auth/auth_properties.h" +#include "iceberg/util/macros.h" + +namespace iceberg::rest::auth { + +namespace { + +/// aws-crt-cpp requires a single ApiHandle alive for the lifetime of any CRT +/// user — it initializes aws-c-common / aws-c-io / etc. Use a function-local +/// static so it's lazily constructed on first use and destroyed at exit. +Aws::Crt::ApiHandle& GlobalApiHandle() { + static Aws::Crt::ApiHandle handle; + return handle; +} + +std::string Sha256Hex(std::string_view data) { + unsigned char digest[SHA256_DIGEST_LENGTH]; + ::SHA256(reinterpret_cast(data.data()), data.size(), digest); + static constexpr char kHex[] = "0123456789abcdef"; + std::string out(SHA256_DIGEST_LENGTH * 2, '\0'); + for (size_t i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + out[2 * i] = kHex[digest[i] >> 4]; + out[2 * i + 1] = kHex[digest[i] & 0x0F]; + } + return out; +} + +/// Extracts host[:port] from a URL like "https://host:port/path?q=1". +std::string_view ExtractHost(std::string_view url) { + auto scheme_end = url.find("://"); + std::string_view rest = + (scheme_end == std::string_view::npos) ? url : url.substr(scheme_end + 3); + auto slash = rest.find('/'); + if (slash != std::string_view::npos) rest = rest.substr(0, slash); + auto q = rest.find('?'); + if (q != std::string_view::npos) rest = rest.substr(0, q); + return rest; +} + +/// Extracts the path (and any inline query string) from a URL; returns "/" if +/// none is present. The caller must append its own query_params separately. +std::string_view ExtractPath(std::string_view url) { + auto scheme_end = url.find("://"); + std::string_view rest = + (scheme_end == std::string_view::npos) ? url : url.substr(scheme_end + 3); + auto slash = rest.find('/'); + if (slash == std::string_view::npos) { + return std::string_view("/"); + } + return rest.substr(slash); +} + +bool EqualsIgnoreCase(std::string_view a, std::string_view b) { + if (a.size() != b.size()) return false; + for (size_t i = 0; i < a.size(); ++i) { + auto lower = [](char c) { + return (c >= 'A' && c <= 'Z') ? static_cast(c + 32) : c; + }; + if (lower(a[i]) != lower(b[i])) return false; + } + return true; +} + +std::string ByteCursorToStdString(Aws::Crt::ByteCursor bc) { + return std::string(reinterpret_cast(bc.ptr), bc.len); +} + +} // namespace + +class SigV4Signer::Impl { + public: + Impl(SigV4Config config, + std::shared_ptr provider, + std::optional fixed_time) + : config_(std::move(config)), + provider_(std::move(provider)), + fixed_time_(fixed_time) {} + + const SigV4Config& config() const { return config_; } + const std::shared_ptr& provider() const { + return provider_; + } + const std::optional& fixed_time() const { + return fixed_time_; + } + + private: + SigV4Config config_; + std::shared_ptr provider_; + /// When set, overrides the signing timestamp (tests only). + std::optional fixed_time_; +}; + +SigV4Signer::SigV4Signer(std::unique_ptr impl) : impl_(std::move(impl)) {} +SigV4Signer::~SigV4Signer() = default; + +namespace { + +/// Builds the aws-crt-cpp credentials provider matching the SigV4Config. +/// For the default chain we explicitly pass the static default ClientBootstrap +/// so aws-crt-cpp can make network calls for IMDS / STS Web Identity. +Result> BuildCredentialsProvider( + const SigV4Config& config) { + namespace Auth = Aws::Crt::Auth; + switch (config.provider) { + case SigV4CredentialsProvider::kStatic: { + if (config.access_key_id.empty() || config.secret_access_key.empty()) { + return InvalidArgument( + "SigV4: static provider requires access-key-id and secret-access-key"); + } + Auth::CredentialsProviderStaticConfig static_cfg; + static_cfg.AccessKeyId = Aws::Crt::ByteCursorFromCString(config.access_key_id.c_str()); + static_cfg.SecretAccessKey = + Aws::Crt::ByteCursorFromCString(config.secret_access_key.c_str()); + if (!config.session_token.empty()) { + static_cfg.SessionToken = + Aws::Crt::ByteCursorFromCString(config.session_token.c_str()); + } + auto provider = Auth::CredentialsProvider::CreateCredentialsProviderStatic(static_cfg); + if (!provider) { + return AuthenticationFailed("SigV4: failed to build static credentials provider"); + } + return provider; + } + case SigV4CredentialsProvider::kDefault: { + Auth::CredentialsProviderChainDefaultConfig chain_cfg; + // The API handle we keep alive for the process owns the default + // ClientBootstrap / EventLoopGroup / HostResolver. The default chain + // needs them to reach IMDS and STS. + chain_cfg.Bootstrap = Aws::Crt::ApiHandle::GetOrCreateStaticDefaultClientBootstrap(); + if (chain_cfg.Bootstrap == nullptr) { + return IOError( + "SigV4: could not create default ClientBootstrap for credentials chain"); + } + auto provider = + Auth::CredentialsProvider::CreateCredentialsProviderChainDefault(chain_cfg); + if (!provider) { + return AuthenticationFailed( + "SigV4: failed to build default credentials provider chain"); + } + return provider; + } + } + return InvalidArgument("SigV4: unknown credentials provider"); +} + +} // namespace + +Result> SigV4Signer::Make(SigV4Config config) { + if (config.region.empty()) { + return InvalidArgument("SigV4: region is required"); + } + if (config.service.empty()) { + return InvalidArgument("SigV4: service is required"); + } + GlobalApiHandle(); + ICEBERG_ASSIGN_OR_RAISE(auto provider, BuildCredentialsProvider(config)); + return std::shared_ptr(new SigV4Signer( + std::make_unique(std::move(config), std::move(provider), std::nullopt))); +} + +Result> SigV4Signer::MakeForTests( + SigV4Config config, std::chrono::system_clock::time_point signing_time) { + if (config.region.empty()) { + return InvalidArgument("SigV4: region is required"); + } + if (config.service.empty()) { + return InvalidArgument("SigV4: service is required"); + } + GlobalApiHandle(); + ICEBERG_ASSIGN_OR_RAISE(auto provider, BuildCredentialsProvider(config)); + return std::shared_ptr(new SigV4Signer( + std::make_unique(std::move(config), std::move(provider), signing_time))); +} + +Result> SigV4Signer::Sign( + const SignableRequest& request, + const std::unordered_map& existing_headers) const { + namespace Crt = Aws::Crt; + namespace Auth = Aws::Crt::Auth; + namespace Http = Aws::Crt::Http; + + const SigV4Config& cfg = impl_->config(); + + auto req = std::make_shared(); + if (!*req) { + return IOError("SigV4: failed to allocate HttpRequest"); + } + + // Build "path?k=v&k=v" from the URL and any query_params. aws-crt-cpp will + // URI-encode per its double-encode setting, so we pass raw values here. + std::string path_and_query(ExtractPath(request.url)); + if (request.query_params != nullptr && !request.query_params->empty()) { + bool already_has_query = path_and_query.find('?') != std::string::npos; + path_and_query.push_back(already_has_query ? '&' : '?'); + bool first = true; + for (const auto& [k, v] : *request.query_params) { + if (!first) path_and_query.push_back('&'); + first = false; + path_and_query.append(k).push_back('='); + path_and_query.append(v); + } + } + + const std::string method(request.method); + req->SetMethod(Crt::ByteCursorFromCString(method.c_str())); + req->SetPath(Crt::ByteCursorFromCString(path_and_query.c_str())); + + // Keep header name/value storage alive until AddHeader has copied them. + // (aws-c-http copies internally, but we keep the strings around during the + // loop to be safe against future library changes.) + // + // aws-c-auth rejects requests that already contain any header it plans to + // add itself (Authorization, X-Amz-Date, X-Amz-Content-Sha256, + // X-Amz-Security-Token). When a delegate auth session has added + // "Authorization: Bearer ...", we rename it to a non-conflicting header so + // (a) the delegate's credential is still covered by the SigV4 signature, + // and (b) aws-c-auth can add its own Authorization header with the + // signature. This mirrors the Java Iceberg RESTSigV4AuthSession's rename + // to X-Iceberg-Access-Delegation. + static constexpr std::string_view kDelegatedAuthHeader = + "X-Iceberg-Access-Delegation"; + std::vector> header_storage; + header_storage.reserve(existing_headers.size() + 1); + bool has_host = false; + for (const auto& [k, v] : existing_headers) { + if (EqualsIgnoreCase(k, "Authorization")) { + header_storage.emplace_back(std::string(kDelegatedAuthHeader), v); + } else { + header_storage.emplace_back(k, v); + } + if (EqualsIgnoreCase(k, "host")) has_host = true; + } + if (!has_host) { + header_storage.emplace_back("Host", std::string(ExtractHost(request.url))); + } + + std::unordered_set pre_signed_header_names; + pre_signed_header_names.reserve(header_storage.size()); + for (const auto& [k, v] : header_storage) { + Http::HttpHeader h{}; + h.name = Crt::ByteCursorFromCString(k.c_str()); + h.value = Crt::ByteCursorFromCString(v.c_str()); + req->AddHeader(h); + pre_signed_header_names.insert(k); + } + + // Build signing config. + Auth::AwsSigningConfig signing_config; + signing_config.SetSigningAlgorithm(Auth::SigningAlgorithm::SigV4); + signing_config.SetSignatureType(Auth::SignatureType::HttpRequestViaHeaders); + signing_config.SetRegion(Crt::String(cfg.region.begin(), cfg.region.end())); + signing_config.SetService(Crt::String(cfg.service.begin(), cfg.service.end())); + // Glue and most AWS services (non-S3) double-encode the URI path in the + // canonical request; S3 is the sole exception. We target non-S3 so set true. + signing_config.SetUseDoubleUriEncode(true); + signing_config.SetShouldNormalizeUriPath(true); + // Explicit payload hash — always emit the header, compute it ourselves so we + // don't depend on the library's payload-stream handling (Java Iceberg hit + // correctness bugs around empty-body SDK defaults; PR apache/iceberg#6951). + const std::string payload_hash = Sha256Hex(request.body); + signing_config.SetSignedBodyValue( + Crt::String(payload_hash.begin(), payload_hash.end())); + signing_config.SetSignedBodyHeader(Auth::SignedBodyHeaderType::XAmzContentSha256); + + // Test-only: pin the signing timestamp so we can reproduce AWS's + // canonical test vectors bytewise. In production the default is + // "now", which is what we want. + if (impl_->fixed_time().has_value()) { + signing_config.SetSigningTimepoint(Aws::Crt::DateTime(*impl_->fixed_time())); + } + + // Hand aws-crt-cpp the credentials provider. For the default chain this is + // where Environment / Profile / STS Web Identity / IMDS resolution happens, + // transparently cached and refreshed inside aws-c-auth. + signing_config.SetCredentialsProvider(impl_->provider()); + + // Sign. For a static provider the callback fires synchronously; for IMDS / + // STS the first call blocks while the provider fetches credentials (then + // caches them until near expiry). Either way we wrap in a promise so the + // public API stays sync. + Auth::Sigv4HttpRequestSigner signer; + std::promise promise; + auto future = promise.get_future(); + const bool scheduled = signer.SignRequest( + req, signing_config, + [&promise](const std::shared_ptr& /*signed_req*/, + int error_code) { promise.set_value(error_code); }); + if (!scheduled) { + return AuthenticationFailed("SigV4: Sigv4HttpRequestSigner::SignRequest failed to " + "schedule signing"); + } + const int error_code = future.get(); + if (error_code != 0) { + return AuthenticationFailed("SigV4: signing failed (aws error {})", error_code); + } + + // Collect headers that the signer added (anything not already in + // `existing_headers`). These are what the HTTP client needs to append. + std::unordered_map out; + const size_t header_count = req->GetHeaderCount(); + for (size_t i = 0; i < header_count; ++i) { + auto header_opt = req->GetHeader(i); + if (!header_opt.has_value()) continue; + const Http::HttpHeader& h = header_opt.value(); + std::string name = ByteCursorToStdString(h.name); + if (pre_signed_header_names.contains(name)) { + continue; + } + out.emplace(std::move(name), ByteCursorToStdString(h.value)); + } + return out; +} + +// --------------------------------------------------------------------------- +// SigV4 AuthManager + AuthSession (registered below via MakeSigV4Manager). + +namespace { + +/// \brief AuthSession that signs each outgoing request with AWS SigV4. +/// +/// If ``delegate`` is non-null, it is invoked first so its headers (e.g., +/// ``Authorization: Bearer `` added by OAuth2) participate in the +/// signed header set — matching the behaviour of Java Iceberg's +/// ``RESTSigV4AuthSession``. SigV4 then overwrites ``Authorization`` with +/// its own signature, so the delegate header is covered by the signature +/// but not visible to the server as a second auth scheme. +class SigV4AuthSession : public AuthSession { + public: + SigV4AuthSession(std::shared_ptr signer, + std::shared_ptr delegate) + : signer_(std::move(signer)), delegate_(std::move(delegate)) {} + + Status Authenticate( + [[maybe_unused]] std::unordered_map& headers) override { + // Header-only signing is impossible for SigV4 (we need method/url/body). + // Callers routed through HttpClient always use the SignableRequest + // overload below; reaching here indicates a caller that does not plumb + // request context. + return AuthenticationFailed( + "SigV4 requires request context; call Authenticate(SignableRequest, headers)."); + } + + Status Authenticate(const SignableRequest& request, + std::unordered_map& headers) override { + if (delegate_) { + ICEBERG_RETURN_UNEXPECTED(delegate_->Authenticate(request, headers)); + } + ICEBERG_ASSIGN_OR_RAISE(auto signed_headers, signer_->Sign(request, headers)); + for (auto& [k, v] : signed_headers) { + headers.insert_or_assign(std::move(k), std::move(v)); + } + return {}; + } + + Status Close() override { + if (delegate_) { + return delegate_->Close(); + } + return {}; + } + + private: + std::shared_ptr signer_; + std::shared_ptr delegate_; +}; + +/// \brief Manager that constructs SigV4AuthSessions for a REST catalog. +class SigV4Manager : public AuthManager { + public: + SigV4Manager(std::shared_ptr signer, + std::unique_ptr delegate) + : signer_(std::move(signer)), delegate_(std::move(delegate)) {} + + Result> InitSession( + HttpClient& init_client, + const std::unordered_map& properties) override { + std::shared_ptr delegate_session; + if (delegate_) { + ICEBERG_ASSIGN_OR_RAISE(delegate_session, + delegate_->InitSession(init_client, properties)); + } + return std::make_shared(signer_, std::move(delegate_session)); + } + + Result> CatalogSession( + HttpClient& client, + const std::unordered_map& properties) override { + std::shared_ptr delegate_session; + if (delegate_) { + ICEBERG_ASSIGN_OR_RAISE(delegate_session, + delegate_->CatalogSession(client, properties)); + } + return std::make_shared(signer_, std::move(delegate_session)); + } + + Status Close() override { + if (delegate_) { + return delegate_->Close(); + } + return {}; + } + + private: + std::shared_ptr signer_; + std::unique_ptr delegate_; +}; + +std::string GetOr(const std::unordered_map& properties, + const std::string& key, std::string_view fallback) { + auto it = properties.find(key); + if (it == properties.end() || it->second.empty()) { + return std::string(fallback); + } + return it->second; +} + +/// Resolve the credentials provider enum from the property value. +/// Auto-detects when the property is unset: static if an access key is +/// configured, default chain otherwise. +Result ResolveCredentialsProvider( + std::string_view raw, bool have_static_keys) { + if (raw.empty()) { + return have_static_keys ? SigV4CredentialsProvider::kStatic + : SigV4CredentialsProvider::kDefault; + } + if (raw == AuthProperties::kSigV4ProviderStatic) { + return SigV4CredentialsProvider::kStatic; + } + if (raw == AuthProperties::kSigV4ProviderDefault) { + return SigV4CredentialsProvider::kDefault; + } + return InvalidArgument( + "SigV4: unsupported credentials-provider '{}' (expected 'static' or 'default')", + raw); +} + +} // namespace + +Result> MakeSigV4Manager( + std::string_view name, + const std::unordered_map& properties) { + SigV4Config sigv4_config; + sigv4_config.region = GetOr(properties, AuthProperties::kSigV4Region, ""); + sigv4_config.service = + GetOr(properties, AuthProperties::kSigV4Service, AuthProperties::kSigV4DefaultService); + sigv4_config.access_key_id = GetOr(properties, AuthProperties::kSigV4AccessKeyId, ""); + sigv4_config.secret_access_key = + GetOr(properties, AuthProperties::kSigV4SecretAccessKey, ""); + sigv4_config.session_token = GetOr(properties, AuthProperties::kSigV4SessionToken, ""); + + const std::string provider_raw = + GetOr(properties, AuthProperties::kSigV4CredentialsProvider, ""); + // Catch the common typo where a user sets one of the static-key pair but + // not the other — without this check we'd silently fall through to the + // default chain and the user would spend a while wondering why their + // configured secret is being ignored. + const bool has_ak = !sigv4_config.access_key_id.empty(); + const bool has_sk = !sigv4_config.secret_access_key.empty(); + if (provider_raw.empty() && has_ak != has_sk) { + return InvalidArgument( + "SigV4: access-key-id and secret-access-key must be set together"); + } + ICEBERG_ASSIGN_OR_RAISE(sigv4_config.provider, + ResolveCredentialsProvider(provider_raw, has_ak)); + + ICEBERG_ASSIGN_OR_RAISE(auto signer, SigV4Signer::Make(std::move(sigv4_config))); + + // Optional delegate auth type: the inner manager (e.g., OAuth2) produces a + // session whose headers are then SigV4-signed on top. Matches the Java + // RESTSigV4AuthManager behaviour. + std::unique_ptr delegate; + auto delegate_it = properties.find(AuthProperties::kSigV4DelegateAuthType); + if (delegate_it != properties.end() && !delegate_it->second.empty()) { + // Avoid infinite recursion if a user sets the delegate type back to sigv4. + if (delegate_it->second == AuthProperties::kAuthTypeSigV4) { + return InvalidArgument( + "SigV4 delegate auth type cannot itself be 'sigv4' (would recurse)"); + } + // Build a properties map with rest.auth.type set to the delegate type, so + // AuthManagers::Load resolves the inner manager. + std::unordered_map delegate_properties = properties; + delegate_properties[AuthProperties::kAuthType] = delegate_it->second; + ICEBERG_ASSIGN_OR_RAISE(delegate, + AuthManagers::Load(name, delegate_properties)); + } + + return std::make_unique(std::move(signer), std::move(delegate)); +} + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.h b/src/iceberg/catalog/rest/auth/sigv4_signer.h new file mode 100644 index 000000000..088349a6f --- /dev/null +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.h @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "iceberg/catalog/rest/auth/auth_session.h" +#include "iceberg/catalog/rest/iceberg_rest_export.h" +#include "iceberg/result.h" + +/// \file iceberg/catalog/rest/auth/sigv4_signer.h +/// \brief AWS Signature V4 request signer for the REST catalog. +/// +/// Wraps aws-crt-cpp's Sigv4HttpRequestSigner so callers can ask "what +/// headers do I need to add to this HTTP request so AWS accepts it?" +/// without depending on aws-crt-cpp types directly. + +namespace iceberg::rest::auth { + +/// \brief Which credential source the signer should use. +enum class SigV4CredentialsProvider { + /// Static access-key / secret-access-key (/ optional session-token) from + /// config. Best for tests and short-lived scripts. + kStatic, + /// aws-crt-cpp's SDK-standard default chain, cached: + /// Environment → Profile → STS Web Identity (IRSA) → IMDSv2 / ECS. + /// Credentials are refreshed automatically by aws-crt-cpp before expiry. + /// Best for EC2/EKS/ECS deployments and AWS Glue integration. + kDefault, +}; + +/// \brief SigV4 signing configuration. +struct SigV4Config { + /// AWS region (e.g., "us-east-1"). Required. + std::string region; + /// AWS signing service name. "glue" for AWS Glue's Iceberg REST endpoint; + /// "execute-api" for API Gateway; "s3tables" for S3 Tables. Required. + std::string service; + /// Which credential source the signer should consult. + SigV4CredentialsProvider provider = SigV4CredentialsProvider::kStatic; + /// Static AWS access key ID. Required when ``provider == kStatic``. + std::string access_key_id; + /// Static AWS secret access key. Required when ``provider == kStatic``. + std::string secret_access_key; + /// Optional STS session token. When present, X-Amz-Security-Token is + /// added to the request and included in the signed header set. + /// Only consulted when ``provider == kStatic``. + std::string session_token; +}; + +/// \brief Computes SigV4 headers for outgoing HTTP requests. +/// +/// Thread-safe after construction. +class ICEBERG_REST_EXPORT SigV4Signer { + public: + /// \brief Construct a signer. Validates that required config fields are set. + static Result> Make(SigV4Config config); + + /// \brief Construct a signer that pins every Sign() call to a fixed UTC + /// timestamp. Intended strictly for deterministic unit tests + /// (canonical SigV4 test vectors) — production callers MUST use + /// ``Make`` so the signing time stays in sync with the host clock. + static Result> MakeForTests( + SigV4Config config, std::chrono::system_clock::time_point signing_time); + + ~SigV4Signer(); + SigV4Signer(const SigV4Signer&) = delete; + SigV4Signer& operator=(const SigV4Signer&) = delete; + + /// \brief Compute the SigV4 headers for a request. + /// + /// \param request The method/URL/query/body to sign. + /// \param existing_headers Headers already set on the request (e.g., a + /// delegate auth manager may have added "Authorization: Bearer ..."; + /// that header MUST be part of the signed header set). + /// \return The headers that the caller should insert_or_assign onto the + /// outgoing request to carry the signature. Typically + /// ``Authorization``, ``X-Amz-Date``, ``X-Amz-Content-Sha256``, + /// ``Host`` (if it wasn't in ``existing_headers``), and + /// ``X-Amz-Security-Token`` (if session_token is configured). + Result> Sign( + const SignableRequest& request, + const std::unordered_map& existing_headers) const; + + private: + class Impl; + explicit SigV4Signer(std::unique_ptr impl); + std::unique_ptr impl_; +}; + +} // namespace iceberg::rest::auth From 72958c79649899641b27da20b6cf9f7c5c5d4672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:16:07 +0200 Subject: [PATCH 06/11] test(rest/auth): add SigV4 manager + canonical vector tests - auth_manager_test.cc: 12 SigV4 manager cases covering case-insensitive auth-type, required-field validation, header emission, session token pass-through, OAuth2-delegate wrapping, recursion rejection, and the default / static / unknown credentials-provider paths. - sigv4_signer_test.cc (new): canonical SigV4 vectors with the signing time pinned via SigV4Signer::MakeForTests. Expected Authorization headers were generated with Python botocore (SigV4Auth with the same parameters as our signer) and are asserted bytewise so any regression in our aws-crt-cpp wrapping is caught. - CMakeLists gates sigv4_signer_test.cc on ICEBERG_REST_HAVE_SIGV4 and defines ICEBERG_REST_WITH_SIGV4 for the whole rest_catalog_test target so conditional tests compile. --- src/iceberg/test/CMakeLists.txt | 19 ++- src/iceberg/test/auth_manager_test.cc | 208 ++++++++++++++++++++++++++ src/iceberg/test/sigv4_signer_test.cc | 200 +++++++++++++++++++++++++ 3 files changed, 421 insertions(+), 6 deletions(-) create mode 100644 src/iceberg/test/sigv4_signer_test.cc diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 2dc90da64..1c30c25b3 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -223,12 +223,19 @@ if(ICEBERG_BUILD_REST) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() - add_rest_iceberg_test(rest_catalog_test - SOURCES - auth_manager_test.cc - endpoint_test.cc - rest_json_serde_test.cc - rest_util_test.cc) + set(ICEBERG_REST_TEST_SOURCES + auth_manager_test.cc + endpoint_test.cc + rest_json_serde_test.cc + rest_util_test.cc) + if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_TEST_SOURCES sigv4_signer_test.cc) + endif() + + add_rest_iceberg_test(rest_catalog_test SOURCES ${ICEBERG_REST_TEST_SOURCES}) + if(ICEBERG_REST_HAVE_SIGV4) + target_compile_definitions(rest_catalog_test PRIVATE ICEBERG_REST_WITH_SIGV4) + endif() if(ICEBERG_BUILD_REST_INTEGRATION_TESTS) add_rest_iceberg_test(rest_catalog_integration_test diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index bd06fee3f..aa79b672f 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -33,6 +33,7 @@ #include "iceberg/catalog/rest/http_client.h" #include "iceberg/catalog/rest/json_serde_internal.h" #include "iceberg/json_serde_internal.h" +#include "iceberg/result.h" #include "iceberg/test/matchers.h" namespace iceberg::rest::auth { @@ -358,4 +359,211 @@ TEST_F(AuthManagerTest, OAuthTokenResponseNATokenType) { EXPECT_EQ(result->token_type, "N_A"); } +// ---- SigV4 ---- + +#ifdef ICEBERG_REST_WITH_SIGV4 + +namespace { + +std::unordered_map MinimalSigV4Properties() { + return { + {AuthProperties::kAuthType, AuthProperties::kAuthTypeSigV4}, + {AuthProperties::kSigV4Region, "us-east-1"}, + {AuthProperties::kSigV4Service, "glue"}, + {AuthProperties::kSigV4AccessKeyId, "AKIAIOSFODNN7EXAMPLE"}, + {AuthProperties::kSigV4SecretAccessKey, + "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"}, + }; +} + +SignableRequest MakeGetRequest(std::string_view url) { + return SignableRequest{.method = "GET", .url = url, .query_params = nullptr, .body = {}}; +} + +} // namespace + +// Verifies that auth type "sigv4" resolves to a real manager (no longer +// "NotImplemented") once credentials are supplied. +TEST_F(AuthManagerTest, LoadSigV4AuthManager) { + auto properties = MinimalSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + ASSERT_NE(manager_result.value(), nullptr); +} + +// Verifies that "sigv4" auth type is case-insensitive. +TEST_F(AuthManagerTest, SigV4AuthTypeCaseInsensitive) { + for (const auto& auth_type : {"SIGV4", "SigV4", "sIgV4"}) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kAuthType] = auth_type; + EXPECT_THAT(AuthManagers::Load("test-catalog", properties), IsOk()) + << "Failed for auth type: " << auth_type; + } +} + +// Required fields: missing region. +TEST_F(AuthManagerTest, SigV4MissingRegion) { + auto properties = MinimalSigV4Properties(); + properties.erase(AuthProperties::kSigV4Region); + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// When the user explicitly requests the static provider but omits the +// access-key-id, we must reject the config (not silently fall back to the +// default chain). +TEST_F(AuthManagerTest, SigV4ExplicitStaticMissingAccessKeyId) { + auto properties = MinimalSigV4Properties(); + properties.erase(AuthProperties::kSigV4AccessKeyId); + properties[AuthProperties::kSigV4CredentialsProvider] = + std::string(AuthProperties::kSigV4ProviderStatic); + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// Same for a missing secret: explicit static without the secret is an error. +TEST_F(AuthManagerTest, SigV4ExplicitStaticMissingSecretAccessKey) { + auto properties = MinimalSigV4Properties(); + properties.erase(AuthProperties::kSigV4SecretAccessKey); + properties[AuthProperties::kSigV4CredentialsProvider] = + std::string(AuthProperties::kSigV4ProviderStatic); + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// Verifies that SigV4 signs a simple GET request and populates the expected +// AWS headers. +TEST_F(AuthManagerTest, SigV4SignsRequestHeaders) { + auto properties = MinimalSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + auto request = + MakeGetRequest("https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"); + ASSERT_THAT(session_result.value()->Authenticate(request, headers), IsOk()); + + EXPECT_TRUE(headers.contains("Authorization")); + EXPECT_THAT(headers["Authorization"], + ::testing::StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("/us-east-1/glue/aws4_request")); + EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("SignedHeaders=")); + EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("Signature=")); + EXPECT_TRUE(headers.contains("X-Amz-Date")); + // aws-c-auth emits this header lowercase (SigV4 spec uses lowercase in + // canonical form; HTTP treats it case-insensitively on the wire). + EXPECT_TRUE(headers.contains("x-amz-content-sha256")); + // No session token supplied, so X-Amz-Security-Token must NOT be set. + EXPECT_FALSE(headers.contains("X-Amz-Security-Token")); +} + +// Verifies that the session_token surfaces as an X-Amz-Security-Token header +// on the signed request. +TEST_F(AuthManagerTest, SigV4IncludesSessionToken) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4SessionToken] = "SESSION-TOKEN-12345"; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + auto request = + MakeGetRequest("https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"); + ASSERT_THAT(session_result.value()->Authenticate(request, headers), IsOk()); + + ASSERT_TRUE(headers.contains("X-Amz-Security-Token")); + EXPECT_EQ(headers["X-Amz-Security-Token"], "SESSION-TOKEN-12345"); +} + +// A SigV4 session rejects the headers-only Authenticate() overload — signing +// is impossible without request context. +TEST_F(AuthManagerTest, SigV4RejectsHeadersOnlyAuthenticate) { + auto properties = MinimalSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + std::unordered_map headers; + auto status = session_result.value()->Authenticate(headers); + EXPECT_THAT(status, IsError(ErrorKind::kAuthenticationFailed)); +} + +// Using oauth2 as a SigV4 delegate: both the Bearer token (from OAuth) and +// the SigV4 signature flow through; SigV4 overwrites Authorization (matches +// the expected behaviour documented in the design). +TEST_F(AuthManagerTest, SigV4WithOAuth2Delegate) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4DelegateAuthType] = AuthProperties::kAuthTypeOAuth2; + properties[AuthProperties::kToken.key()] = "my-oauth-token"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + auto request = + MakeGetRequest("https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"); + ASSERT_THAT(session_result.value()->Authenticate(request, headers), IsOk()); + + // SigV4 replaces Authorization with its own AWS4 signature. + EXPECT_THAT(headers["Authorization"], + ::testing::StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_TRUE(headers.contains("X-Amz-Date")); +} + +// Recursive delegate (sigv4 wrapping sigv4) is rejected. +TEST_F(AuthManagerTest, SigV4RejectsRecursiveDelegate) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4DelegateAuthType] = AuthProperties::kAuthTypeSigV4; + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// Default credentials chain (env → profile → STS Web Identity → IMDS) can be +// selected explicitly. We only verify that the manager loads and returns a +// session — actually signing with the default chain would require real AWS +// credentials in the test environment. +TEST_F(AuthManagerTest, SigV4DefaultCredentialsProviderLoads) { + std::unordered_map properties = { + {AuthProperties::kAuthType, AuthProperties::kAuthTypeSigV4}, + {AuthProperties::kSigV4Region, "us-east-1"}, + {AuthProperties::kSigV4Service, "glue"}, + {AuthProperties::kSigV4CredentialsProvider, + std::string(AuthProperties::kSigV4ProviderDefault)}, + }; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); +} + +// Without an access-key-id and without an explicit provider, auto-detection +// picks the default chain — catalog loads successfully. +TEST_F(AuthManagerTest, SigV4AutoSelectsDefaultWhenNoStaticKeys) { + std::unordered_map properties = { + {AuthProperties::kAuthType, AuthProperties::kAuthTypeSigV4}, + {AuthProperties::kSigV4Region, "us-east-1"}, + {AuthProperties::kSigV4Service, "glue"}, + }; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); +} + +// Unknown credentials-provider value is rejected. +TEST_F(AuthManagerTest, SigV4UnknownCredentialsProvider) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4CredentialsProvider] = "iam"; + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +#endif // ICEBERG_REST_WITH_SIGV4 + } // namespace iceberg::rest::auth diff --git a/src/iceberg/test/sigv4_signer_test.cc b/src/iceberg/test/sigv4_signer_test.cc new file mode 100644 index 000000000..78f559013 --- /dev/null +++ b/src/iceberg/test/sigv4_signer_test.cc @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include +#include + +#include "iceberg/catalog/rest/auth/auth_session.h" +#include "iceberg/catalog/rest/auth/sigv4_signer.h" +#include "iceberg/test/matchers.h" + +/// SigV4 canonical-vector tests. +/// +/// The expected ``Authorization`` headers in these tests were computed by an +/// independent reference implementation (Python botocore 1.37's +/// ``SigV4Auth``) against the same inputs our signer sees, with the signing +/// time pinned to 2015-08-30T12:36:00Z. The generator script lives at +/// ``/tmp/gen_sigv4_vectors.py`` in the development setup; rerunning it with +/// the same inputs reproduces these exact signatures. +/// +/// Purpose: catch regressions in how we wrap aws-crt-cpp (wrong service +/// name, wrong region, wrong double-URI-encoding flag, missing +/// x-amz-content-sha256 header, etc.) — such regressions would change the +/// ``Authorization`` header bytes and fail these tests, whereas the +/// "headers present + start with AWS4-HMAC-SHA256" assertions in +/// auth_manager_test.cc would still pass. + +namespace iceberg::rest::auth { + +namespace { + +constexpr const char* kAccessKey = "AKIDEXAMPLE"; +constexpr const char* kSecretKey = "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"; +constexpr const char* kRegion = "us-east-1"; +constexpr const char* kService = "glue"; +constexpr const char* kHost = "glue.us-east-1.amazonaws.com"; + +/// 2015-08-30T12:36:00Z — matches the timestamp used when generating the +/// expected Authorization headers. +std::chrono::system_clock::time_point FixedSigningTime() { + return std::chrono::system_clock::time_point(std::chrono::seconds(1440938160)); +} + +SigV4Config DefaultConfig() { + return SigV4Config{ + .region = kRegion, + .service = kService, + .provider = SigV4CredentialsProvider::kStatic, + .access_key_id = kAccessKey, + .secret_access_key = kSecretKey, + .session_token = "", + }; +} + +} // namespace + +// ----------------------------------------------------------------------------- +// GET, empty body, no query params. +TEST(SigV4SignerTest, CanonicalVectorGetVanilla) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/config"; + SignableRequest request{ + .method = "GET", + .url = url, + .query_params = nullptr, + .body = {}, + }; + std::unordered_map existing{{"Host", kHost}}; + + auto result = signer_result.value()->Sign(request, existing); + ASSERT_THAT(result, IsOk()); + + EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); + EXPECT_EQ(result->at("x-amz-content-sha256"), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + EXPECT_EQ( + result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "029d943d26726020757da405ee50a23ab8e28649d845d5bf69717d7246cd3f87"); +} + +// ----------------------------------------------------------------------------- +// GET with query parameters. +TEST(SigV4SignerTest, CanonicalVectorGetWithQuery) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"; + std::unordered_map query_params{ + {"parent", "default"}, + {"pageSize", "10"}, + }; + SignableRequest request{ + .method = "GET", + .url = url, + .query_params = &query_params, + .body = {}, + }; + std::unordered_map existing{{"Host", kHost}}; + + auto result = signer_result.value()->Sign(request, existing); + ASSERT_THAT(result, IsOk()); + + EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); + EXPECT_EQ(result->at("x-amz-content-sha256"), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + EXPECT_EQ( + result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "e82f9da81e1f789f7d55aae636026b6df73a0c23fd3ba08b6d4a7d4e01c1062c"); +} + +// ----------------------------------------------------------------------------- +// POST with a JSON body and a Content-Type header in the signed set. +TEST(SigV4SignerTest, CanonicalVectorPostJsonBody) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"; + std::string body = R"({"namespace":["default"],"properties":{}})"; + SignableRequest request{ + .method = "POST", + .url = url, + .query_params = nullptr, + .body = body, + }; + std::unordered_map existing{ + {"Host", kHost}, + {"Content-Type", "application/json"}, + }; + + auto result = signer_result.value()->Sign(request, existing); + ASSERT_THAT(result, IsOk()); + + EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); + EXPECT_EQ(result->at("x-amz-content-sha256"), + "6a2d107daab05b4284e65fac424da4093a29ad3c9726c6f3e8bf85d5daeb0e34"); + EXPECT_EQ( + result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "d4c36f6140f60082617e086c3945701678f24db257724d9c6392962885ecb099"); +} + +// ----------------------------------------------------------------------------- +// Sanity: two successive signs of the same request with the same pinned +// timestamp produce byte-identical output. Proves our impl is deterministic +// given fixed inputs (the credentials provider is not re-fetched, the +// timestamp is honored, nothing ambient leaks in). +TEST(SigV4SignerTest, DeterministicWithFixedTimestamp) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/config"; + SignableRequest request{ + .method = "GET", + .url = url, + .query_params = nullptr, + .body = {}, + }; + std::unordered_map existing{{"Host", kHost}}; + + auto r1 = signer_result.value()->Sign(request, existing); + auto r2 = signer_result.value()->Sign(request, existing); + ASSERT_THAT(r1, IsOk()); + ASSERT_THAT(r2, IsOk()); + EXPECT_EQ(r1->at("Authorization"), r2->at("Authorization")); + EXPECT_EQ(r1->at("X-Amz-Date"), r2->at("X-Amz-Date")); +} + +} // namespace iceberg::rest::auth From 76352153a90aa309494debd84dacbaab551a6d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:18:57 +0200 Subject: [PATCH 07/11] style: apply clang-format Whitespace-only reformats across the SigV4 additions; no behaviour change. Tests still pass. --- .../catalog/rest/auth/auth_properties.h | 6 +-- src/iceberg/catalog/rest/auth/sigv4_signer.cc | 35 ++++++++-------- src/iceberg/catalog/rest/http_client.cc | 28 ++++++------- src/iceberg/test/CMakeLists.txt | 7 +--- src/iceberg/test/auth_manager_test.cc | 9 ++-- src/iceberg/test/sigv4_signer_test.cc | 42 +++++++++---------- 6 files changed, 59 insertions(+), 68 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index 442c2d1ab..556655e1b 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -58,12 +58,10 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { inline static const std::string kSigV4Service = "rest.auth.sigv4.service"; inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; - inline static const std::string kSigV4AccessKeyId = - "rest.auth.sigv4.access-key-id"; + inline static const std::string kSigV4AccessKeyId = "rest.auth.sigv4.access-key-id"; inline static const std::string kSigV4SecretAccessKey = "rest.auth.sigv4.secret-access-key"; - inline static const std::string kSigV4SessionToken = - "rest.auth.sigv4.session-token"; + inline static const std::string kSigV4SessionToken = "rest.auth.sigv4.session-token"; /// Selects which credential source drives SigV4 signing. Values: /// "static" — use the access-key-id/secret-access-key/session-token /// properties. Best for tests and short-lived scripts. diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.cc b/src/iceberg/catalog/rest/auth/sigv4_signer.cc index 0018871f2..06fe75feb 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_signer.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.cc @@ -33,7 +33,6 @@ #include #include #include - #include #include "iceberg/catalog/rest/auth/auth_manager.h" @@ -110,8 +109,7 @@ std::string ByteCursorToStdString(Aws::Crt::ByteCursor bc) { class SigV4Signer::Impl { public: - Impl(SigV4Config config, - std::shared_ptr provider, + Impl(SigV4Config config, std::shared_ptr provider, std::optional fixed_time) : config_(std::move(config)), provider_(std::move(provider)), @@ -150,14 +148,16 @@ Result> BuildCredentialsPr "SigV4: static provider requires access-key-id and secret-access-key"); } Auth::CredentialsProviderStaticConfig static_cfg; - static_cfg.AccessKeyId = Aws::Crt::ByteCursorFromCString(config.access_key_id.c_str()); + static_cfg.AccessKeyId = + Aws::Crt::ByteCursorFromCString(config.access_key_id.c_str()); static_cfg.SecretAccessKey = Aws::Crt::ByteCursorFromCString(config.secret_access_key.c_str()); if (!config.session_token.empty()) { static_cfg.SessionToken = Aws::Crt::ByteCursorFromCString(config.session_token.c_str()); } - auto provider = Auth::CredentialsProvider::CreateCredentialsProviderStatic(static_cfg); + auto provider = + Auth::CredentialsProvider::CreateCredentialsProviderStatic(static_cfg); if (!provider) { return AuthenticationFailed("SigV4: failed to build static credentials provider"); } @@ -168,7 +168,8 @@ Result> BuildCredentialsPr // The API handle we keep alive for the process owns the default // ClientBootstrap / EventLoopGroup / HostResolver. The default chain // needs them to reach IMDS and STS. - chain_cfg.Bootstrap = Aws::Crt::ApiHandle::GetOrCreateStaticDefaultClientBootstrap(); + chain_cfg.Bootstrap = + Aws::Crt::ApiHandle::GetOrCreateStaticDefaultClientBootstrap(); if (chain_cfg.Bootstrap == nullptr) { return IOError( "SigV4: could not create default ClientBootstrap for credentials chain"); @@ -259,8 +260,7 @@ Result> SigV4Signer::Sign( // and (b) aws-c-auth can add its own Authorization header with the // signature. This mirrors the Java Iceberg RESTSigV4AuthSession's rename // to X-Iceberg-Access-Delegation. - static constexpr std::string_view kDelegatedAuthHeader = - "X-Iceberg-Access-Delegation"; + static constexpr std::string_view kDelegatedAuthHeader = "X-Iceberg-Access-Delegation"; std::vector> header_storage; header_storage.reserve(existing_headers.size() + 1); bool has_host = false; @@ -328,8 +328,9 @@ Result> SigV4Signer::Sign( [&promise](const std::shared_ptr& /*signed_req*/, int error_code) { promise.set_value(error_code); }); if (!scheduled) { - return AuthenticationFailed("SigV4: Sigv4HttpRequestSigner::SignRequest failed to " - "schedule signing"); + return AuthenticationFailed( + "SigV4: Sigv4HttpRequestSigner::SignRequest failed to " + "schedule signing"); } const int error_code = future.get(); if (error_code != 0) { @@ -409,8 +410,7 @@ class SigV4AuthSession : public AuthSession { /// \brief Manager that constructs SigV4AuthSessions for a REST catalog. class SigV4Manager : public AuthManager { public: - SigV4Manager(std::shared_ptr signer, - std::unique_ptr delegate) + SigV4Manager(std::shared_ptr signer, std::unique_ptr delegate) : signer_(std::move(signer)), delegate_(std::move(delegate)) {} Result> InitSession( @@ -459,8 +459,8 @@ std::string GetOr(const std::unordered_map& properties /// Resolve the credentials provider enum from the property value. /// Auto-detects when the property is unset: static if an access key is /// configured, default chain otherwise. -Result ResolveCredentialsProvider( - std::string_view raw, bool have_static_keys) { +Result ResolveCredentialsProvider(std::string_view raw, + bool have_static_keys) { if (raw.empty()) { return have_static_keys ? SigV4CredentialsProvider::kStatic : SigV4CredentialsProvider::kDefault; @@ -483,8 +483,8 @@ Result> MakeSigV4Manager( const std::unordered_map& properties) { SigV4Config sigv4_config; sigv4_config.region = GetOr(properties, AuthProperties::kSigV4Region, ""); - sigv4_config.service = - GetOr(properties, AuthProperties::kSigV4Service, AuthProperties::kSigV4DefaultService); + sigv4_config.service = GetOr(properties, AuthProperties::kSigV4Service, + AuthProperties::kSigV4DefaultService); sigv4_config.access_key_id = GetOr(properties, AuthProperties::kSigV4AccessKeyId, ""); sigv4_config.secret_access_key = GetOr(properties, AuthProperties::kSigV4SecretAccessKey, ""); @@ -522,8 +522,7 @@ Result> MakeSigV4Manager( // AuthManagers::Load resolves the inner manager. std::unordered_map delegate_properties = properties; delegate_properties[AuthProperties::kAuthType] = delegate_it->second; - ICEBERG_ASSIGN_OR_RAISE(delegate, - AuthManagers::Load(name, delegate_properties)); + ICEBERG_ASSIGN_OR_RAISE(delegate, AuthManagers::Load(name, delegate_properties)); } return std::make_unique(std::move(signer), std::move(delegate)); diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index 79a0154b4..ca248124d 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -87,10 +87,8 @@ Result BuildHeaders( for (const auto& [key, val] : request_headers) { headers.insert_or_assign(key, val); } - auth::SignableRequest signable{.method = method, - .url = url, - .query_params = query_params, - .body = body}; + auth::SignableRequest signable{ + .method = method, .url = url, .query_params = query_params, .body = body}; ICEBERG_RETURN_UNEXPECTED(session.Authenticate(signable, headers)); return cpr::Header(headers.begin(), headers.end()); } @@ -98,7 +96,8 @@ Result BuildHeaders( /// \brief Serialize a form-data map to an application/x-www-form-urlencoded /// body string. Used so that SigV4 can hash the payload it will /// actually see on the wire. -std::string EncodeFormBody(const std::unordered_map& form_data) { +std::string EncodeFormBody( + const std::unordered_map& form_data) { std::string out; bool first = true; for (const auto& [key, val] : form_data) { @@ -206,9 +205,9 @@ Result HttpClient::Get( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - BuildHeaders("GET", path, ¶ms, /*body=*/{}, headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE(auto all_headers, + BuildHeaders("GET", path, ¶ms, /*body=*/{}, headers, + default_headers_, session)); cpr::Response response = cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -265,9 +264,9 @@ Result HttpClient::PostForm( Result HttpClient::Head( const std::string& path, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders("HEAD", path, /*query_params=*/nullptr, /*body=*/{}, - headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, BuildHeaders("HEAD", path, /*query_params=*/nullptr, /*body=*/{}, + headers, default_headers_, session)); cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -281,10 +280,9 @@ Result HttpClient::Delete( const std::string& path, const std::unordered_map& params, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE( - auto all_headers, - BuildHeaders("DELETE", path, ¶ms, /*body=*/{}, headers, default_headers_, - session)); + ICEBERG_ASSIGN_OR_RAISE(auto all_headers, + BuildHeaders("DELETE", path, ¶ms, /*body=*/{}, headers, + default_headers_, session)); cpr::Response response = cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, BuildSslOptions(ssl_config_), *connection_pool_); diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 1c30c25b3..3f706f5c9 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -223,11 +223,8 @@ if(ICEBERG_BUILD_REST) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() - set(ICEBERG_REST_TEST_SOURCES - auth_manager_test.cc - endpoint_test.cc - rest_json_serde_test.cc - rest_util_test.cc) + set(ICEBERG_REST_TEST_SOURCES auth_manager_test.cc endpoint_test.cc + rest_json_serde_test.cc rest_util_test.cc) if(ICEBERG_REST_HAVE_SIGV4) list(APPEND ICEBERG_REST_TEST_SOURCES sigv4_signer_test.cc) endif() diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index aa79b672f..91c241e03 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -371,13 +371,13 @@ std::unordered_map MinimalSigV4Properties() { {AuthProperties::kSigV4Region, "us-east-1"}, {AuthProperties::kSigV4Service, "glue"}, {AuthProperties::kSigV4AccessKeyId, "AKIAIOSFODNN7EXAMPLE"}, - {AuthProperties::kSigV4SecretAccessKey, - "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"}, + {AuthProperties::kSigV4SecretAccessKey, "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"}, }; } SignableRequest MakeGetRequest(std::string_view url) { - return SignableRequest{.method = "GET", .url = url, .query_params = nullptr, .body = {}}; + return SignableRequest{ + .method = "GET", .url = url, .query_params = nullptr, .body = {}}; } } // namespace @@ -449,7 +449,8 @@ TEST_F(AuthManagerTest, SigV4SignsRequestHeaders) { EXPECT_TRUE(headers.contains("Authorization")); EXPECT_THAT(headers["Authorization"], ::testing::StartsWith("AWS4-HMAC-SHA256 Credential=")); - EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("/us-east-1/glue/aws4_request")); + EXPECT_THAT(headers["Authorization"], + ::testing::HasSubstr("/us-east-1/glue/aws4_request")); EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("SignedHeaders=")); EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("Signature=")); EXPECT_TRUE(headers.contains("X-Amz-Date")); diff --git a/src/iceberg/test/sigv4_signer_test.cc b/src/iceberg/test/sigv4_signer_test.cc index 78f559013..27b372c6f 100644 --- a/src/iceberg/test/sigv4_signer_test.cc +++ b/src/iceberg/test/sigv4_signer_test.cc @@ -17,6 +17,8 @@ * under the License. */ +#include "iceberg/catalog/rest/auth/sigv4_signer.h" + #include #include #include @@ -25,7 +27,6 @@ #include #include "iceberg/catalog/rest/auth/auth_session.h" -#include "iceberg/catalog/rest/auth/sigv4_signer.h" #include "iceberg/test/matchers.h" /// SigV4 canonical-vector tests. @@ -94,13 +95,12 @@ TEST(SigV4SignerTest, CanonicalVectorGetVanilla) { EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); EXPECT_EQ(result->at("x-amz-content-sha256"), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); - EXPECT_EQ( - result->at("Authorization"), - "AWS4-HMAC-SHA256 " - "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " - "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " - "Signature=" - "029d943d26726020757da405ee50a23ab8e28649d845d5bf69717d7246cd3f87"); + EXPECT_EQ(result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "029d943d26726020757da405ee50a23ab8e28649d845d5bf69717d7246cd3f87"); } // ----------------------------------------------------------------------------- @@ -128,13 +128,12 @@ TEST(SigV4SignerTest, CanonicalVectorGetWithQuery) { EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); EXPECT_EQ(result->at("x-amz-content-sha256"), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); - EXPECT_EQ( - result->at("Authorization"), - "AWS4-HMAC-SHA256 " - "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " - "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " - "Signature=" - "e82f9da81e1f789f7d55aae636026b6df73a0c23fd3ba08b6d4a7d4e01c1062c"); + EXPECT_EQ(result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "e82f9da81e1f789f7d55aae636026b6df73a0c23fd3ba08b6d4a7d4e01c1062c"); } // ----------------------------------------------------------------------------- @@ -162,13 +161,12 @@ TEST(SigV4SignerTest, CanonicalVectorPostJsonBody) { EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); EXPECT_EQ(result->at("x-amz-content-sha256"), "6a2d107daab05b4284e65fac424da4093a29ad3c9726c6f3e8bf85d5daeb0e34"); - EXPECT_EQ( - result->at("Authorization"), - "AWS4-HMAC-SHA256 " - "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " - "SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, " - "Signature=" - "d4c36f6140f60082617e086c3945701678f24db257724d9c6392962885ecb099"); + EXPECT_EQ(result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "d4c36f6140f60082617e086c3945701678f24db257724d9c6392962885ecb099"); } // ----------------------------------------------------------------------------- From 59b00eb471a4007741c253a710595d3807d41b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:52:02 +0200 Subject: [PATCH 08/11] fix(rest/auth): address clang-tidy findings in sigv4_signer - Replace the C-style ``unsigned char digest[]`` scratch buffer with ``std::array`` and feed ``digest.data()`` to ``SHA256`` (modernize- avoid-c-arrays). - Swap the hex lookup table to a ``constexpr std::string_view`` instead of a C-string array (modernize-avoid-c-arrays). - Use braced-init return in ``ExtractPath`` (modernize-return-braced- init-list). No behaviour change; the canonical-vector tests still produce the same Authorization headers. --- src/iceberg/catalog/rest/auth/sigv4_signer.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.cc b/src/iceberg/catalog/rest/auth/sigv4_signer.cc index 06fe75feb..f43a840cf 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_signer.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.cc @@ -19,6 +19,7 @@ #include "iceberg/catalog/rest/auth/sigv4_signer.h" +#include #include #include #include @@ -54,9 +55,10 @@ Aws::Crt::ApiHandle& GlobalApiHandle() { } std::string Sha256Hex(std::string_view data) { - unsigned char digest[SHA256_DIGEST_LENGTH]; - ::SHA256(reinterpret_cast(data.data()), data.size(), digest); - static constexpr char kHex[] = "0123456789abcdef"; + std::array digest{}; + ::SHA256(reinterpret_cast(data.data()), data.size(), + digest.data()); + static constexpr std::string_view kHex = "0123456789abcdef"; std::string out(SHA256_DIGEST_LENGTH * 2, '\0'); for (size_t i = 0; i < SHA256_DIGEST_LENGTH; ++i) { out[2 * i] = kHex[digest[i] >> 4]; @@ -85,7 +87,7 @@ std::string_view ExtractPath(std::string_view url) { (scheme_end == std::string_view::npos) ? url : url.substr(scheme_end + 3); auto slash = rest.find('/'); if (slash == std::string_view::npos) { - return std::string_view("/"); + return {"/"}; } return rest.substr(slash); } From 7fed41857457599503d2e2410b31ee62ca542f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 09:52:11 +0200 Subject: [PATCH 09/11] ci(cpp-linter): install aws-crt-cpp via Conan Without aws-crt-cpp on the lint runner, clang-tidy reports sigv4_signer.cc's ``#include `` as file-not-found and fails the job even though the rest of the file is conceptually clean. Switch the lint job to resolve dependencies via Conan (``conan install . -s build_type=Release --build=missing``) and configure with the resulting ``conan-release`` preset. aws-crt-cpp comes in as a Conan package and the generated compile_commands.json now carries the right include paths for the SigV4 source. Updated the cpp-linter ``database`` and ``extra-args`` inputs to point at the new ``build/Release`` layout. --- .github/workflows/cpp-linter.yml | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index 26324479c..d095468e0 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -38,14 +38,31 @@ jobs: - name: Install dependencies shell: bash run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev + - name: Set up Python for Conan + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install Conan + shell: bash + run: | + pip install 'conan>=2.1.0' + conan profile detect --force + - name: Resolve Conan dependencies + # Pulls aws-crt-cpp so the SigV4 source compiles under clang-tidy. + # Other REST deps (cpr, arrow, …) still come via FetchContent below. + shell: bash + env: + CC: gcc-14 + CXX: g++-14 + run: | + conan install . -s build_type=Release --build=missing - name: Run build env: CC: gcc-14 CXX: g++-14 run: | - mkdir build && cd build - cmake .. -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON - cmake --build . + cmake --preset conan-release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + cmake --build build/Release - uses: cpp-linter/cpp-linter-action@0f6d1b8d7e38b584cbee606eb23d850c217d54f8 id: linter continue-on-error: true @@ -59,10 +76,10 @@ jobs: lines-changed-only: true thread-comments: true ignore: 'build|cmake_modules|ci' - database: build + database: build/Release verbosity: 'debug' # need '-fno-builtin-std-forward_like', see https://github.com/llvm/llvm-project/issues/101614 - extra-args: '-std=c++23 -I$PWD/src -I$PWD/build/src -fno-builtin-std-forward_like' + extra-args: '-std=c++23 -I$PWD/src -I$PWD/build/Release/src -fno-builtin-std-forward_like' - name: Fail fast?! if: steps.linter.outputs.checks-failed != 0 run: | From 681a834f9c16aa82c3e9939e3f1c2c14fef3d81b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 10:23:59 +0200 Subject: [PATCH 10/11] fix: resolve remaining cpp-linter findings - sigv4_signer.cc: ByteCursorToStdString now uses braced-init return to satisfy modernize-return-braced-init-list (missed in the previous sweep). - cpp-linter.yml: override ICEBERG_BUILD_TESTS=ON in the lint build so the gtest/gmock test targets land in compile_commands.json. Without this, clang-tidy can't resolve in auth_manager_test or sigv4_signer_test because the conanfile disables tests for the package build by default. --- .github/workflows/cpp-linter.yml | 7 ++++++- src/iceberg/catalog/rest/auth/sigv4_signer.cc | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index d095468e0..d276dc292 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -61,7 +61,12 @@ jobs: CC: gcc-14 CXX: g++-14 run: | - cmake --preset conan-release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + # The conanfile.py disables tests for the package build; override + # here so the test targets land in compile_commands.json and + # clang-tidy can resolve their includes (gmock/gtest). + cmake --preset conan-release \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DICEBERG_BUILD_TESTS=ON cmake --build build/Release - uses: cpp-linter/cpp-linter-action@0f6d1b8d7e38b584cbee606eb23d850c217d54f8 id: linter diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.cc b/src/iceberg/catalog/rest/auth/sigv4_signer.cc index f43a840cf..262803116 100644 --- a/src/iceberg/catalog/rest/auth/sigv4_signer.cc +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.cc @@ -104,7 +104,7 @@ bool EqualsIgnoreCase(std::string_view a, std::string_view b) { } std::string ByteCursorToStdString(Aws::Crt::ByteCursor bc) { - return std::string(reinterpret_cast(bc.ptr), bc.len); + return {reinterpret_cast(bc.ptr), bc.len}; } } // namespace From dd28bad82ac1af5cb8b919b8e9c9f8d0dbb2291d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 21 Apr 2026 10:57:37 +0200 Subject: [PATCH 11/11] ci(cpp-linter): scope build to rest_catalog_test only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling ICEBERG_BUILD_TESTS with the default bundle ON was pulling in arrow-backed test targets (avro/arrow/parquet/scan), which Conan had to compile from source for gcc-14 — pushing the lint job past 30 minutes. Since the lint only needs compile_commands.json entries for our sigv4_signer / rest auth / rest test files, configure with BUNDLE=OFF and build only the ``rest_catalog_test`` target. cpp-linter still gets full coverage of every file touched in this PR. --- .github/workflows/cpp-linter.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index d276dc292..4de52357e 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -63,11 +63,14 @@ jobs: run: | # The conanfile.py disables tests for the package build; override # here so the test targets land in compile_commands.json and - # clang-tidy can resolve their includes (gmock/gtest). + # clang-tidy can resolve their includes (gmock/gtest). We also + # skip the arrow-heavy bundle so the lint job stays fast: the + # REST targets are what we need for this PR's lint coverage. cmake --preset conan-release \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DICEBERG_BUILD_TESTS=ON - cmake --build build/Release + -DICEBERG_BUILD_TESTS=ON \ + -DICEBERG_BUILD_BUNDLE=OFF + cmake --build build/Release --target rest_catalog_test - uses: cpp-linter/cpp-linter-action@0f6d1b8d7e38b584cbee606eb23d850c217d54f8 id: linter continue-on-error: true