diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml index 26324479c..4de52357e 100644 --- a/.github/workflows/cpp-linter.yml +++ b/.github/workflows/cpp-linter.yml @@ -38,14 +38,39 @@ jobs: - name: Install dependencies shell: bash run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev + - name: Set up Python for Conan + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install Conan + shell: bash + run: | + pip install 'conan>=2.1.0' + conan profile detect --force + - name: Resolve Conan dependencies + # Pulls aws-crt-cpp so the SigV4 source compiles under clang-tidy. + # Other REST deps (cpr, arrow, …) still come via FetchContent below. + shell: bash + env: + CC: gcc-14 + CXX: g++-14 + run: | + conan install . -s build_type=Release --build=missing - name: Run build env: CC: gcc-14 CXX: g++-14 run: | - mkdir build && cd build - cmake .. -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON - cmake --build . + # The conanfile.py disables tests for the package build; override + # here so the test targets land in compile_commands.json and + # clang-tidy can resolve their includes (gmock/gtest). We also + # skip the arrow-heavy bundle so the lint job stays fast: the + # REST targets are what we need for this PR's lint coverage. + cmake --preset conan-release \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DICEBERG_BUILD_TESTS=ON \ + -DICEBERG_BUILD_BUNDLE=OFF + cmake --build build/Release --target rest_catalog_test - uses: cpp-linter/cpp-linter-action@0f6d1b8d7e38b584cbee606eb23d850c217d54f8 id: linter continue-on-error: true @@ -59,10 +84,10 @@ jobs: lines-changed-only: true thread-comments: true ignore: 'build|cmake_modules|ci' - database: build + database: build/Release verbosity: 'debug' # need '-fno-builtin-std-forward_like', see https://github.com/llvm/llvm-project/issues/101614 - extra-args: '-std=c++23 -I$PWD/src -I$PWD/build/src -fno-builtin-std-forward_like' + extra-args: '-std=c++23 -I$PWD/src -I$PWD/build/Release/src -fno-builtin-std-forward_like' - name: Fail fast?! if: steps.linter.outputs.checks-failed != 0 run: | diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8b5ab67c8..58ea7545a 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -570,6 +570,32 @@ if(ICEBERG_BUILD_BUNDLE) resolve_zstd_dependency() endif() +# ---------------------------------------------------------------------- +# aws-crt-cpp (optional, used by REST catalog SigV4 AuthManager). +# If not found, the REST client still builds and SigV4 returns NotImplemented +# at runtime. Install via Conan or vcpkg to enable AWS Glue integration. + +function(resolve_aws_crt_cpp_dependency) + find_package(aws-crt-cpp CONFIG QUIET) + if(aws-crt-cpp_FOUND) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES aws-crt-cpp) + set(ICEBERG_REST_HAVE_SIGV4 + TRUE + PARENT_SCOPE) + message(STATUS "Found aws-crt-cpp (${aws-crt-cpp_VERSION_STRING}); enabling REST " + "SigV4 auth") + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) + else() + set(ICEBERG_REST_HAVE_SIGV4 + FALSE + PARENT_SCOPE) + message(STATUS "aws-crt-cpp not found; REST SigV4 auth will return NotImplemented") + endif() +endfunction() + if(ICEBERG_BUILD_REST) resolve_cpr_dependency() + resolve_aws_crt_cpp_dependency() endif() diff --git a/conanfile.py b/conanfile.py index a0a650566..4b7116ec6 100644 --- a/conanfile.py +++ b/conanfile.py @@ -65,6 +65,10 @@ def requirements(self): self.requires("zlib/[>=1.2.11 <2]") self.requires("snappy/[>=1.1 <2]") self.requires("zstd/[>=1.5 <2]") + # AWS CRT (C++) — used by the REST catalog's SigV4 AuthManager to sign + # requests for AWS Glue's Iceberg REST endpoint. Pulls aws-c-auth, + # aws-c-common, aws-c-cal, aws-c-http, aws-c-io transitively. + self.requires("aws-crt-cpp/[>=0.26 <1]") def generate(self): tc = CMakeToolchain(self) @@ -152,6 +156,7 @@ def package_info(self): self.cpp_info.components["iceberg_rest"].requires = [ "iceberg", "vendored_cpr", + "aws-crt-cpp::aws-crt-cpp" # Conan component; CMake target is AWS::aws-crt-cpp, ] # Bundle (Arrow/Parquet integration) diff --git a/src/iceberg/catalog/rest/CMakeLists.txt b/src/iceberg/catalog/rest/CMakeLists.txt index e91b12962..f66a105ac 100644 --- a/src/iceberg/catalog/rest/CMakeLists.txt +++ b/src/iceberg/catalog/rest/CMakeLists.txt @@ -33,6 +33,10 @@ set(ICEBERG_REST_SOURCES rest_util.cc types.cc) +if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_SOURCES auth/sigv4_signer.cc) +endif() + set(ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS) set(ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS) @@ -51,9 +55,23 @@ list(APPEND "$,iceberg::iceberg_shared,iceberg::iceberg_static>" "$,iceberg::cpr,cpr::cpr>") +if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS AWS::aws-crt-cpp) + list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS AWS::aws-crt-cpp) + list(APPEND ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS AWS::aws-crt-cpp) + list(APPEND ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS AWS::aws-crt-cpp) +endif() + +set(ICEBERG_REST_DEFINITIONS) +if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_DEFINITIONS ICEBERG_REST_WITH_SIGV4) +endif() + add_iceberg_lib(iceberg_rest SOURCES ${ICEBERG_REST_SOURCES} + DEFINITIONS + ${ICEBERG_REST_DEFINITIONS} SHARED_LINK_LIBS ${ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS} STATIC_LINK_LIBS diff --git a/src/iceberg/catalog/rest/auth/auth_manager_internal.h b/src/iceberg/catalog/rest/auth/auth_manager_internal.h index 051d05505..08b3eab67 100644 --- a/src/iceberg/catalog/rest/auth/auth_manager_internal.h +++ b/src/iceberg/catalog/rest/auth/auth_manager_internal.h @@ -47,4 +47,10 @@ Result> MakeOAuth2Manager( std::string_view name, const std::unordered_map& properties); +/// \brief Create a SigV4 authentication manager for AWS services such as +/// the AWS Glue Iceberg REST endpoint. +Result> MakeSigV4Manager( + std::string_view name, + const std::unordered_map& properties); + } // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/auth_managers.cc b/src/iceberg/catalog/rest/auth/auth_managers.cc index f55885d75..55dd43b74 100644 --- a/src/iceberg/catalog/rest/auth/auth_managers.cc +++ b/src/iceberg/catalog/rest/auth/auth_managers.cc @@ -62,11 +62,15 @@ std::string InferAuthType( } AuthManagerRegistry CreateDefaultRegistry() { - return { + AuthManagerRegistry registry = { {AuthProperties::kAuthTypeNone, MakeNoopAuthManager}, {AuthProperties::kAuthTypeBasic, MakeBasicAuthManager}, {AuthProperties::kAuthTypeOAuth2, MakeOAuth2Manager}, }; +#ifdef ICEBERG_REST_WITH_SIGV4 + registry.emplace(AuthProperties::kAuthTypeSigV4, MakeSigV4Manager); +#endif + return registry; } // Get the global registry of auth manager factories. diff --git a/src/iceberg/catalog/rest/auth/auth_properties.h b/src/iceberg/catalog/rest/auth/auth_properties.h index f6a20baa3..556655e1b 100644 --- a/src/iceberg/catalog/rest/auth/auth_properties.h +++ b/src/iceberg/catalog/rest/auth/auth_properties.h @@ -58,6 +58,25 @@ class ICEBERG_REST_EXPORT AuthProperties : public ConfigBase { inline static const std::string kSigV4Service = "rest.auth.sigv4.service"; inline static const std::string kSigV4DelegateAuthType = "rest.auth.sigv4.delegate-auth-type"; + inline static const std::string kSigV4AccessKeyId = "rest.auth.sigv4.access-key-id"; + inline static const std::string kSigV4SecretAccessKey = + "rest.auth.sigv4.secret-access-key"; + inline static const std::string kSigV4SessionToken = "rest.auth.sigv4.session-token"; + /// Selects which credential source drives SigV4 signing. Values: + /// "static" — use the access-key-id/secret-access-key/session-token + /// properties. Best for tests and short-lived scripts. + /// "default" — aws-crt-cpp's cached default chain: + /// Environment → Profile → STS Web Identity (IRSA) → IMDSv2/ECS. + /// Best for EC2/EKS/ECS deployments where creds rotate. + /// If unset, we infer: "static" when an access-key-id is configured, + /// "default" otherwise. + inline static const std::string kSigV4CredentialsProvider = + "rest.auth.sigv4.credentials-provider"; + inline static constexpr std::string_view kSigV4ProviderStatic = "static"; + inline static constexpr std::string_view kSigV4ProviderDefault = "default"; + /// Default service name when rest.auth.sigv4.service is unset — matches the + /// Java Iceberg client, which defaults to AWS API Gateway's signing name. + inline static constexpr std::string_view kSigV4DefaultService = "execute-api"; // ---- OAuth2 entries ---- diff --git a/src/iceberg/catalog/rest/auth/auth_session.h b/src/iceberg/catalog/rest/auth/auth_session.h index 89f0c442e..8f890644f 100644 --- a/src/iceberg/catalog/rest/auth/auth_session.h +++ b/src/iceberg/catalog/rest/auth/auth_session.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "iceberg/catalog/rest/iceberg_rest_export.h" @@ -33,6 +34,24 @@ namespace iceberg::rest::auth { +/// \brief A description of an outgoing HTTP request that an AuthSession may +/// need to inspect in order to authenticate (e.g., SigV4 needs the +/// method, URL, query params and body hash to build its canonical +/// request). Header-only auth schemes ignore it. +struct SignableRequest { + /// HTTP method, uppercase (e.g., "GET", "POST"). + std::string_view method; + /// Full request URL including scheme, host, path, but *without* query + /// string — query parameters are passed separately so callers don't need + /// to re-encode them. + std::string_view url; + /// Query parameters (pre-encoding). May be null for requests that have + /// none. Not owned. + const std::unordered_map* query_params = nullptr; + /// Raw request body. Empty for GET/HEAD/DELETE without a body. + std::string_view body; +}; + /// \brief An authentication session that can authenticate outgoing HTTP requests. class ICEBERG_REST_EXPORT AuthSession { public: @@ -53,6 +72,18 @@ class ICEBERG_REST_EXPORT AuthSession { /// - RestError: HTTP errors from authentication service virtual Status Authenticate(std::unordered_map& headers) = 0; + /// \brief Authenticate using full request context. + /// + /// Overloaded form used by auth schemes that need to see the request method, + /// URL, query parameters and body in order to compute their header value + /// (e.g., SigV4). The default implementation ignores the request context and + /// forwards to the headers-only Authenticate(), which is what every non- + /// signing auth scheme wants. + virtual Status Authenticate(const SignableRequest& /*request*/, + std::unordered_map& headers) { + return Authenticate(headers); + } + /// \brief Close the session and release any resources. /// /// This method is called when the session is no longer needed. For stateful diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.cc b/src/iceberg/catalog/rest/auth/sigv4_signer.cc new file mode 100644 index 000000000..262803116 --- /dev/null +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.cc @@ -0,0 +1,533 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/rest/auth/sigv4_signer.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "iceberg/catalog/rest/auth/auth_manager.h" +#include "iceberg/catalog/rest/auth/auth_manager_internal.h" +#include "iceberg/catalog/rest/auth/auth_managers.h" +#include "iceberg/catalog/rest/auth/auth_properties.h" +#include "iceberg/util/macros.h" + +namespace iceberg::rest::auth { + +namespace { + +/// aws-crt-cpp requires a single ApiHandle alive for the lifetime of any CRT +/// user — it initializes aws-c-common / aws-c-io / etc. Use a function-local +/// static so it's lazily constructed on first use and destroyed at exit. +Aws::Crt::ApiHandle& GlobalApiHandle() { + static Aws::Crt::ApiHandle handle; + return handle; +} + +std::string Sha256Hex(std::string_view data) { + std::array digest{}; + ::SHA256(reinterpret_cast(data.data()), data.size(), + digest.data()); + static constexpr std::string_view kHex = "0123456789abcdef"; + std::string out(SHA256_DIGEST_LENGTH * 2, '\0'); + for (size_t i = 0; i < SHA256_DIGEST_LENGTH; ++i) { + out[2 * i] = kHex[digest[i] >> 4]; + out[2 * i + 1] = kHex[digest[i] & 0x0F]; + } + return out; +} + +/// Extracts host[:port] from a URL like "https://host:port/path?q=1". +std::string_view ExtractHost(std::string_view url) { + auto scheme_end = url.find("://"); + std::string_view rest = + (scheme_end == std::string_view::npos) ? url : url.substr(scheme_end + 3); + auto slash = rest.find('/'); + if (slash != std::string_view::npos) rest = rest.substr(0, slash); + auto q = rest.find('?'); + if (q != std::string_view::npos) rest = rest.substr(0, q); + return rest; +} + +/// Extracts the path (and any inline query string) from a URL; returns "/" if +/// none is present. The caller must append its own query_params separately. +std::string_view ExtractPath(std::string_view url) { + auto scheme_end = url.find("://"); + std::string_view rest = + (scheme_end == std::string_view::npos) ? url : url.substr(scheme_end + 3); + auto slash = rest.find('/'); + if (slash == std::string_view::npos) { + return {"/"}; + } + return rest.substr(slash); +} + +bool EqualsIgnoreCase(std::string_view a, std::string_view b) { + if (a.size() != b.size()) return false; + for (size_t i = 0; i < a.size(); ++i) { + auto lower = [](char c) { + return (c >= 'A' && c <= 'Z') ? static_cast(c + 32) : c; + }; + if (lower(a[i]) != lower(b[i])) return false; + } + return true; +} + +std::string ByteCursorToStdString(Aws::Crt::ByteCursor bc) { + return {reinterpret_cast(bc.ptr), bc.len}; +} + +} // namespace + +class SigV4Signer::Impl { + public: + Impl(SigV4Config config, std::shared_ptr provider, + std::optional fixed_time) + : config_(std::move(config)), + provider_(std::move(provider)), + fixed_time_(fixed_time) {} + + const SigV4Config& config() const { return config_; } + const std::shared_ptr& provider() const { + return provider_; + } + const std::optional& fixed_time() const { + return fixed_time_; + } + + private: + SigV4Config config_; + std::shared_ptr provider_; + /// When set, overrides the signing timestamp (tests only). + std::optional fixed_time_; +}; + +SigV4Signer::SigV4Signer(std::unique_ptr impl) : impl_(std::move(impl)) {} +SigV4Signer::~SigV4Signer() = default; + +namespace { + +/// Builds the aws-crt-cpp credentials provider matching the SigV4Config. +/// For the default chain we explicitly pass the static default ClientBootstrap +/// so aws-crt-cpp can make network calls for IMDS / STS Web Identity. +Result> BuildCredentialsProvider( + const SigV4Config& config) { + namespace Auth = Aws::Crt::Auth; + switch (config.provider) { + case SigV4CredentialsProvider::kStatic: { + if (config.access_key_id.empty() || config.secret_access_key.empty()) { + return InvalidArgument( + "SigV4: static provider requires access-key-id and secret-access-key"); + } + Auth::CredentialsProviderStaticConfig static_cfg; + static_cfg.AccessKeyId = + Aws::Crt::ByteCursorFromCString(config.access_key_id.c_str()); + static_cfg.SecretAccessKey = + Aws::Crt::ByteCursorFromCString(config.secret_access_key.c_str()); + if (!config.session_token.empty()) { + static_cfg.SessionToken = + Aws::Crt::ByteCursorFromCString(config.session_token.c_str()); + } + auto provider = + Auth::CredentialsProvider::CreateCredentialsProviderStatic(static_cfg); + if (!provider) { + return AuthenticationFailed("SigV4: failed to build static credentials provider"); + } + return provider; + } + case SigV4CredentialsProvider::kDefault: { + Auth::CredentialsProviderChainDefaultConfig chain_cfg; + // The API handle we keep alive for the process owns the default + // ClientBootstrap / EventLoopGroup / HostResolver. The default chain + // needs them to reach IMDS and STS. + chain_cfg.Bootstrap = + Aws::Crt::ApiHandle::GetOrCreateStaticDefaultClientBootstrap(); + if (chain_cfg.Bootstrap == nullptr) { + return IOError( + "SigV4: could not create default ClientBootstrap for credentials chain"); + } + auto provider = + Auth::CredentialsProvider::CreateCredentialsProviderChainDefault(chain_cfg); + if (!provider) { + return AuthenticationFailed( + "SigV4: failed to build default credentials provider chain"); + } + return provider; + } + } + return InvalidArgument("SigV4: unknown credentials provider"); +} + +} // namespace + +Result> SigV4Signer::Make(SigV4Config config) { + if (config.region.empty()) { + return InvalidArgument("SigV4: region is required"); + } + if (config.service.empty()) { + return InvalidArgument("SigV4: service is required"); + } + GlobalApiHandle(); + ICEBERG_ASSIGN_OR_RAISE(auto provider, BuildCredentialsProvider(config)); + return std::shared_ptr(new SigV4Signer( + std::make_unique(std::move(config), std::move(provider), std::nullopt))); +} + +Result> SigV4Signer::MakeForTests( + SigV4Config config, std::chrono::system_clock::time_point signing_time) { + if (config.region.empty()) { + return InvalidArgument("SigV4: region is required"); + } + if (config.service.empty()) { + return InvalidArgument("SigV4: service is required"); + } + GlobalApiHandle(); + ICEBERG_ASSIGN_OR_RAISE(auto provider, BuildCredentialsProvider(config)); + return std::shared_ptr(new SigV4Signer( + std::make_unique(std::move(config), std::move(provider), signing_time))); +} + +Result> SigV4Signer::Sign( + const SignableRequest& request, + const std::unordered_map& existing_headers) const { + namespace Crt = Aws::Crt; + namespace Auth = Aws::Crt::Auth; + namespace Http = Aws::Crt::Http; + + const SigV4Config& cfg = impl_->config(); + + auto req = std::make_shared(); + if (!*req) { + return IOError("SigV4: failed to allocate HttpRequest"); + } + + // Build "path?k=v&k=v" from the URL and any query_params. aws-crt-cpp will + // URI-encode per its double-encode setting, so we pass raw values here. + std::string path_and_query(ExtractPath(request.url)); + if (request.query_params != nullptr && !request.query_params->empty()) { + bool already_has_query = path_and_query.find('?') != std::string::npos; + path_and_query.push_back(already_has_query ? '&' : '?'); + bool first = true; + for (const auto& [k, v] : *request.query_params) { + if (!first) path_and_query.push_back('&'); + first = false; + path_and_query.append(k).push_back('='); + path_and_query.append(v); + } + } + + const std::string method(request.method); + req->SetMethod(Crt::ByteCursorFromCString(method.c_str())); + req->SetPath(Crt::ByteCursorFromCString(path_and_query.c_str())); + + // Keep header name/value storage alive until AddHeader has copied them. + // (aws-c-http copies internally, but we keep the strings around during the + // loop to be safe against future library changes.) + // + // aws-c-auth rejects requests that already contain any header it plans to + // add itself (Authorization, X-Amz-Date, X-Amz-Content-Sha256, + // X-Amz-Security-Token). When a delegate auth session has added + // "Authorization: Bearer ...", we rename it to a non-conflicting header so + // (a) the delegate's credential is still covered by the SigV4 signature, + // and (b) aws-c-auth can add its own Authorization header with the + // signature. This mirrors the Java Iceberg RESTSigV4AuthSession's rename + // to X-Iceberg-Access-Delegation. + static constexpr std::string_view kDelegatedAuthHeader = "X-Iceberg-Access-Delegation"; + std::vector> header_storage; + header_storage.reserve(existing_headers.size() + 1); + bool has_host = false; + for (const auto& [k, v] : existing_headers) { + if (EqualsIgnoreCase(k, "Authorization")) { + header_storage.emplace_back(std::string(kDelegatedAuthHeader), v); + } else { + header_storage.emplace_back(k, v); + } + if (EqualsIgnoreCase(k, "host")) has_host = true; + } + if (!has_host) { + header_storage.emplace_back("Host", std::string(ExtractHost(request.url))); + } + + std::unordered_set pre_signed_header_names; + pre_signed_header_names.reserve(header_storage.size()); + for (const auto& [k, v] : header_storage) { + Http::HttpHeader h{}; + h.name = Crt::ByteCursorFromCString(k.c_str()); + h.value = Crt::ByteCursorFromCString(v.c_str()); + req->AddHeader(h); + pre_signed_header_names.insert(k); + } + + // Build signing config. + Auth::AwsSigningConfig signing_config; + signing_config.SetSigningAlgorithm(Auth::SigningAlgorithm::SigV4); + signing_config.SetSignatureType(Auth::SignatureType::HttpRequestViaHeaders); + signing_config.SetRegion(Crt::String(cfg.region.begin(), cfg.region.end())); + signing_config.SetService(Crt::String(cfg.service.begin(), cfg.service.end())); + // Glue and most AWS services (non-S3) double-encode the URI path in the + // canonical request; S3 is the sole exception. We target non-S3 so set true. + signing_config.SetUseDoubleUriEncode(true); + signing_config.SetShouldNormalizeUriPath(true); + // Explicit payload hash — always emit the header, compute it ourselves so we + // don't depend on the library's payload-stream handling (Java Iceberg hit + // correctness bugs around empty-body SDK defaults; PR apache/iceberg#6951). + const std::string payload_hash = Sha256Hex(request.body); + signing_config.SetSignedBodyValue( + Crt::String(payload_hash.begin(), payload_hash.end())); + signing_config.SetSignedBodyHeader(Auth::SignedBodyHeaderType::XAmzContentSha256); + + // Test-only: pin the signing timestamp so we can reproduce AWS's + // canonical test vectors bytewise. In production the default is + // "now", which is what we want. + if (impl_->fixed_time().has_value()) { + signing_config.SetSigningTimepoint(Aws::Crt::DateTime(*impl_->fixed_time())); + } + + // Hand aws-crt-cpp the credentials provider. For the default chain this is + // where Environment / Profile / STS Web Identity / IMDS resolution happens, + // transparently cached and refreshed inside aws-c-auth. + signing_config.SetCredentialsProvider(impl_->provider()); + + // Sign. For a static provider the callback fires synchronously; for IMDS / + // STS the first call blocks while the provider fetches credentials (then + // caches them until near expiry). Either way we wrap in a promise so the + // public API stays sync. + Auth::Sigv4HttpRequestSigner signer; + std::promise promise; + auto future = promise.get_future(); + const bool scheduled = signer.SignRequest( + req, signing_config, + [&promise](const std::shared_ptr& /*signed_req*/, + int error_code) { promise.set_value(error_code); }); + if (!scheduled) { + return AuthenticationFailed( + "SigV4: Sigv4HttpRequestSigner::SignRequest failed to " + "schedule signing"); + } + const int error_code = future.get(); + if (error_code != 0) { + return AuthenticationFailed("SigV4: signing failed (aws error {})", error_code); + } + + // Collect headers that the signer added (anything not already in + // `existing_headers`). These are what the HTTP client needs to append. + std::unordered_map out; + const size_t header_count = req->GetHeaderCount(); + for (size_t i = 0; i < header_count; ++i) { + auto header_opt = req->GetHeader(i); + if (!header_opt.has_value()) continue; + const Http::HttpHeader& h = header_opt.value(); + std::string name = ByteCursorToStdString(h.name); + if (pre_signed_header_names.contains(name)) { + continue; + } + out.emplace(std::move(name), ByteCursorToStdString(h.value)); + } + return out; +} + +// --------------------------------------------------------------------------- +// SigV4 AuthManager + AuthSession (registered below via MakeSigV4Manager). + +namespace { + +/// \brief AuthSession that signs each outgoing request with AWS SigV4. +/// +/// If ``delegate`` is non-null, it is invoked first so its headers (e.g., +/// ``Authorization: Bearer `` added by OAuth2) participate in the +/// signed header set — matching the behaviour of Java Iceberg's +/// ``RESTSigV4AuthSession``. SigV4 then overwrites ``Authorization`` with +/// its own signature, so the delegate header is covered by the signature +/// but not visible to the server as a second auth scheme. +class SigV4AuthSession : public AuthSession { + public: + SigV4AuthSession(std::shared_ptr signer, + std::shared_ptr delegate) + : signer_(std::move(signer)), delegate_(std::move(delegate)) {} + + Status Authenticate( + [[maybe_unused]] std::unordered_map& headers) override { + // Header-only signing is impossible for SigV4 (we need method/url/body). + // Callers routed through HttpClient always use the SignableRequest + // overload below; reaching here indicates a caller that does not plumb + // request context. + return AuthenticationFailed( + "SigV4 requires request context; call Authenticate(SignableRequest, headers)."); + } + + Status Authenticate(const SignableRequest& request, + std::unordered_map& headers) override { + if (delegate_) { + ICEBERG_RETURN_UNEXPECTED(delegate_->Authenticate(request, headers)); + } + ICEBERG_ASSIGN_OR_RAISE(auto signed_headers, signer_->Sign(request, headers)); + for (auto& [k, v] : signed_headers) { + headers.insert_or_assign(std::move(k), std::move(v)); + } + return {}; + } + + Status Close() override { + if (delegate_) { + return delegate_->Close(); + } + return {}; + } + + private: + std::shared_ptr signer_; + std::shared_ptr delegate_; +}; + +/// \brief Manager that constructs SigV4AuthSessions for a REST catalog. +class SigV4Manager : public AuthManager { + public: + SigV4Manager(std::shared_ptr signer, std::unique_ptr delegate) + : signer_(std::move(signer)), delegate_(std::move(delegate)) {} + + Result> InitSession( + HttpClient& init_client, + const std::unordered_map& properties) override { + std::shared_ptr delegate_session; + if (delegate_) { + ICEBERG_ASSIGN_OR_RAISE(delegate_session, + delegate_->InitSession(init_client, properties)); + } + return std::make_shared(signer_, std::move(delegate_session)); + } + + Result> CatalogSession( + HttpClient& client, + const std::unordered_map& properties) override { + std::shared_ptr delegate_session; + if (delegate_) { + ICEBERG_ASSIGN_OR_RAISE(delegate_session, + delegate_->CatalogSession(client, properties)); + } + return std::make_shared(signer_, std::move(delegate_session)); + } + + Status Close() override { + if (delegate_) { + return delegate_->Close(); + } + return {}; + } + + private: + std::shared_ptr signer_; + std::unique_ptr delegate_; +}; + +std::string GetOr(const std::unordered_map& properties, + const std::string& key, std::string_view fallback) { + auto it = properties.find(key); + if (it == properties.end() || it->second.empty()) { + return std::string(fallback); + } + return it->second; +} + +/// Resolve the credentials provider enum from the property value. +/// Auto-detects when the property is unset: static if an access key is +/// configured, default chain otherwise. +Result ResolveCredentialsProvider(std::string_view raw, + bool have_static_keys) { + if (raw.empty()) { + return have_static_keys ? SigV4CredentialsProvider::kStatic + : SigV4CredentialsProvider::kDefault; + } + if (raw == AuthProperties::kSigV4ProviderStatic) { + return SigV4CredentialsProvider::kStatic; + } + if (raw == AuthProperties::kSigV4ProviderDefault) { + return SigV4CredentialsProvider::kDefault; + } + return InvalidArgument( + "SigV4: unsupported credentials-provider '{}' (expected 'static' or 'default')", + raw); +} + +} // namespace + +Result> MakeSigV4Manager( + std::string_view name, + const std::unordered_map& properties) { + SigV4Config sigv4_config; + sigv4_config.region = GetOr(properties, AuthProperties::kSigV4Region, ""); + sigv4_config.service = GetOr(properties, AuthProperties::kSigV4Service, + AuthProperties::kSigV4DefaultService); + sigv4_config.access_key_id = GetOr(properties, AuthProperties::kSigV4AccessKeyId, ""); + sigv4_config.secret_access_key = + GetOr(properties, AuthProperties::kSigV4SecretAccessKey, ""); + sigv4_config.session_token = GetOr(properties, AuthProperties::kSigV4SessionToken, ""); + + const std::string provider_raw = + GetOr(properties, AuthProperties::kSigV4CredentialsProvider, ""); + // Catch the common typo where a user sets one of the static-key pair but + // not the other — without this check we'd silently fall through to the + // default chain and the user would spend a while wondering why their + // configured secret is being ignored. + const bool has_ak = !sigv4_config.access_key_id.empty(); + const bool has_sk = !sigv4_config.secret_access_key.empty(); + if (provider_raw.empty() && has_ak != has_sk) { + return InvalidArgument( + "SigV4: access-key-id and secret-access-key must be set together"); + } + ICEBERG_ASSIGN_OR_RAISE(sigv4_config.provider, + ResolveCredentialsProvider(provider_raw, has_ak)); + + ICEBERG_ASSIGN_OR_RAISE(auto signer, SigV4Signer::Make(std::move(sigv4_config))); + + // Optional delegate auth type: the inner manager (e.g., OAuth2) produces a + // session whose headers are then SigV4-signed on top. Matches the Java + // RESTSigV4AuthManager behaviour. + std::unique_ptr delegate; + auto delegate_it = properties.find(AuthProperties::kSigV4DelegateAuthType); + if (delegate_it != properties.end() && !delegate_it->second.empty()) { + // Avoid infinite recursion if a user sets the delegate type back to sigv4. + if (delegate_it->second == AuthProperties::kAuthTypeSigV4) { + return InvalidArgument( + "SigV4 delegate auth type cannot itself be 'sigv4' (would recurse)"); + } + // Build a properties map with rest.auth.type set to the delegate type, so + // AuthManagers::Load resolves the inner manager. + std::unordered_map delegate_properties = properties; + delegate_properties[AuthProperties::kAuthType] = delegate_it->second; + ICEBERG_ASSIGN_OR_RAISE(delegate, AuthManagers::Load(name, delegate_properties)); + } + + return std::make_unique(std::move(signer), std::move(delegate)); +} + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/auth/sigv4_signer.h b/src/iceberg/catalog/rest/auth/sigv4_signer.h new file mode 100644 index 000000000..088349a6f --- /dev/null +++ b/src/iceberg/catalog/rest/auth/sigv4_signer.h @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "iceberg/catalog/rest/auth/auth_session.h" +#include "iceberg/catalog/rest/iceberg_rest_export.h" +#include "iceberg/result.h" + +/// \file iceberg/catalog/rest/auth/sigv4_signer.h +/// \brief AWS Signature V4 request signer for the REST catalog. +/// +/// Wraps aws-crt-cpp's Sigv4HttpRequestSigner so callers can ask "what +/// headers do I need to add to this HTTP request so AWS accepts it?" +/// without depending on aws-crt-cpp types directly. + +namespace iceberg::rest::auth { + +/// \brief Which credential source the signer should use. +enum class SigV4CredentialsProvider { + /// Static access-key / secret-access-key (/ optional session-token) from + /// config. Best for tests and short-lived scripts. + kStatic, + /// aws-crt-cpp's SDK-standard default chain, cached: + /// Environment → Profile → STS Web Identity (IRSA) → IMDSv2 / ECS. + /// Credentials are refreshed automatically by aws-crt-cpp before expiry. + /// Best for EC2/EKS/ECS deployments and AWS Glue integration. + kDefault, +}; + +/// \brief SigV4 signing configuration. +struct SigV4Config { + /// AWS region (e.g., "us-east-1"). Required. + std::string region; + /// AWS signing service name. "glue" for AWS Glue's Iceberg REST endpoint; + /// "execute-api" for API Gateway; "s3tables" for S3 Tables. Required. + std::string service; + /// Which credential source the signer should consult. + SigV4CredentialsProvider provider = SigV4CredentialsProvider::kStatic; + /// Static AWS access key ID. Required when ``provider == kStatic``. + std::string access_key_id; + /// Static AWS secret access key. Required when ``provider == kStatic``. + std::string secret_access_key; + /// Optional STS session token. When present, X-Amz-Security-Token is + /// added to the request and included in the signed header set. + /// Only consulted when ``provider == kStatic``. + std::string session_token; +}; + +/// \brief Computes SigV4 headers for outgoing HTTP requests. +/// +/// Thread-safe after construction. +class ICEBERG_REST_EXPORT SigV4Signer { + public: + /// \brief Construct a signer. Validates that required config fields are set. + static Result> Make(SigV4Config config); + + /// \brief Construct a signer that pins every Sign() call to a fixed UTC + /// timestamp. Intended strictly for deterministic unit tests + /// (canonical SigV4 test vectors) — production callers MUST use + /// ``Make`` so the signing time stays in sync with the host clock. + static Result> MakeForTests( + SigV4Config config, std::chrono::system_clock::time_point signing_time); + + ~SigV4Signer(); + SigV4Signer(const SigV4Signer&) = delete; + SigV4Signer& operator=(const SigV4Signer&) = delete; + + /// \brief Compute the SigV4 headers for a request. + /// + /// \param request The method/URL/query/body to sign. + /// \param existing_headers Headers already set on the request (e.g., a + /// delegate auth manager may have added "Authorization: Bearer ..."; + /// that header MUST be part of the signed header set). + /// \return The headers that the caller should insert_or_assign onto the + /// outgoing request to carry the signature. Typically + /// ``Authorization``, ``X-Amz-Date``, ``X-Amz-Content-Sha256``, + /// ``Host`` (if it wasn't in ``existing_headers``), and + /// ``X-Amz-Security-Token`` (if session_token is configured). + Result> Sign( + const SignableRequest& request, + const std::unordered_map& existing_headers) const; + + private: + class Impl; + explicit SigV4Signer(std::unique_ptr impl); + std::unique_ptr impl_; +}; + +} // namespace iceberg::rest::auth diff --git a/src/iceberg/catalog/rest/http_client.cc b/src/iceberg/catalog/rest/http_client.cc index d45cc5b7c..ca248124d 100644 --- a/src/iceberg/catalog/rest/http_client.cc +++ b/src/iceberg/catalog/rest/http_client.cc @@ -30,6 +30,7 @@ #include "iceberg/json_serde_internal.h" #include "iceberg/result.h" #include "iceberg/util/macros.h" +#include "iceberg/util/url_encoder.h" namespace iceberg::rest { @@ -69,7 +70,16 @@ namespace { constexpr std::string_view kRestExceptionType = "RESTException"; /// \brief Prepare headers for an HTTP request. +/// +/// Merges default + per-request headers and then lets the AuthSession +/// authenticate the request. The full request context (method, URL, query +/// params, body) is forwarded to the session so auth schemes that need to +/// sign over the request (e.g., SigV4) see what they need; header-only +/// schemes ignore the extra context. Result BuildHeaders( + std::string_view method, std::string_view url, + const std::unordered_map* query_params, + std::string_view body, const std::unordered_map& request_headers, const std::unordered_map& default_headers, auth::AuthSession& session) { @@ -77,10 +87,31 @@ Result BuildHeaders( for (const auto& [key, val] : request_headers) { headers.insert_or_assign(key, val); } - ICEBERG_RETURN_UNEXPECTED(session.Authenticate(headers)); + auth::SignableRequest signable{ + .method = method, .url = url, .query_params = query_params, .body = body}; + ICEBERG_RETURN_UNEXPECTED(session.Authenticate(signable, headers)); return cpr::Header(headers.begin(), headers.end()); } +/// \brief Serialize a form-data map to an application/x-www-form-urlencoded +/// body string. Used so that SigV4 can hash the payload it will +/// actually see on the wire. +std::string EncodeFormBody( + const std::unordered_map& form_data) { + std::string out; + bool first = true; + for (const auto& [key, val] : form_data) { + if (!first) { + out.push_back('&'); + } + first = false; + out.append(UrlEncoder::Encode(key)); + out.push_back('='); + out.append(UrlEncoder::Encode(val)); + } + return out; +} + cpr::SslOptions BuildSslOptions(const SslConfig& config) { cpr::SslOptions opts; opts.verify_host = config.verify; @@ -175,7 +206,8 @@ Result HttpClient::Get( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + BuildHeaders("GET", path, ¶ms, /*body=*/{}, headers, + default_headers_, session)); cpr::Response response = cpr::Get(cpr::Url{path}, GetParameters(params), all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -190,7 +222,8 @@ Result HttpClient::Post( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + BuildHeaders("POST", path, /*query_params=*/nullptr, body, + headers, default_headers_, session)); cpr::Response response = cpr::Post(cpr::Url{path}, cpr::Body{body}, all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -207,8 +240,12 @@ Result HttpClient::PostForm( const ErrorHandler& error_handler, auth::AuthSession& session) { std::unordered_map form_headers(headers); form_headers.insert_or_assign(kHeaderContentType, kMimeTypeFormUrlEncoded); + // Encode the form body ourselves so that the same bytes we'll send on the + // wire are available to sign (SigV4 hashes the payload). + std::string form_body = EncodeFormBody(form_data); ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(form_headers, default_headers_, session)); + BuildHeaders("POST", path, /*query_params=*/nullptr, form_body, + form_headers, default_headers_, session)); std::vector pair_list; pair_list.reserve(form_data.size()); for (const auto& [key, val] : form_data) { @@ -227,8 +264,9 @@ Result HttpClient::PostForm( Result HttpClient::Head( const std::string& path, const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { - ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + ICEBERG_ASSIGN_OR_RAISE( + auto all_headers, BuildHeaders("HEAD", path, /*query_params=*/nullptr, /*body=*/{}, + headers, default_headers_, session)); cpr::Response response = cpr::Head(cpr::Url{path}, all_headers, BuildSslOptions(ssl_config_), *connection_pool_); @@ -243,7 +281,8 @@ Result HttpClient::Delete( const std::unordered_map& headers, const ErrorHandler& error_handler, auth::AuthSession& session) { ICEBERG_ASSIGN_OR_RAISE(auto all_headers, - BuildHeaders(headers, default_headers_, session)); + BuildHeaders("DELETE", path, ¶ms, /*body=*/{}, headers, + default_headers_, session)); cpr::Response response = cpr::Delete(cpr::Url{path}, GetParameters(params), all_headers, BuildSslOptions(ssl_config_), *connection_pool_); diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 2dc90da64..3f706f5c9 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -223,12 +223,16 @@ if(ICEBERG_BUILD_REST) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() - add_rest_iceberg_test(rest_catalog_test - SOURCES - auth_manager_test.cc - endpoint_test.cc - rest_json_serde_test.cc - rest_util_test.cc) + set(ICEBERG_REST_TEST_SOURCES auth_manager_test.cc endpoint_test.cc + rest_json_serde_test.cc rest_util_test.cc) + if(ICEBERG_REST_HAVE_SIGV4) + list(APPEND ICEBERG_REST_TEST_SOURCES sigv4_signer_test.cc) + endif() + + add_rest_iceberg_test(rest_catalog_test SOURCES ${ICEBERG_REST_TEST_SOURCES}) + if(ICEBERG_REST_HAVE_SIGV4) + target_compile_definitions(rest_catalog_test PRIVATE ICEBERG_REST_WITH_SIGV4) + endif() if(ICEBERG_BUILD_REST_INTEGRATION_TESTS) add_rest_iceberg_test(rest_catalog_integration_test diff --git a/src/iceberg/test/auth_manager_test.cc b/src/iceberg/test/auth_manager_test.cc index bd06fee3f..91c241e03 100644 --- a/src/iceberg/test/auth_manager_test.cc +++ b/src/iceberg/test/auth_manager_test.cc @@ -33,6 +33,7 @@ #include "iceberg/catalog/rest/http_client.h" #include "iceberg/catalog/rest/json_serde_internal.h" #include "iceberg/json_serde_internal.h" +#include "iceberg/result.h" #include "iceberg/test/matchers.h" namespace iceberg::rest::auth { @@ -358,4 +359,212 @@ TEST_F(AuthManagerTest, OAuthTokenResponseNATokenType) { EXPECT_EQ(result->token_type, "N_A"); } +// ---- SigV4 ---- + +#ifdef ICEBERG_REST_WITH_SIGV4 + +namespace { + +std::unordered_map MinimalSigV4Properties() { + return { + {AuthProperties::kAuthType, AuthProperties::kAuthTypeSigV4}, + {AuthProperties::kSigV4Region, "us-east-1"}, + {AuthProperties::kSigV4Service, "glue"}, + {AuthProperties::kSigV4AccessKeyId, "AKIAIOSFODNN7EXAMPLE"}, + {AuthProperties::kSigV4SecretAccessKey, "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"}, + }; +} + +SignableRequest MakeGetRequest(std::string_view url) { + return SignableRequest{ + .method = "GET", .url = url, .query_params = nullptr, .body = {}}; +} + +} // namespace + +// Verifies that auth type "sigv4" resolves to a real manager (no longer +// "NotImplemented") once credentials are supplied. +TEST_F(AuthManagerTest, LoadSigV4AuthManager) { + auto properties = MinimalSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + ASSERT_NE(manager_result.value(), nullptr); +} + +// Verifies that "sigv4" auth type is case-insensitive. +TEST_F(AuthManagerTest, SigV4AuthTypeCaseInsensitive) { + for (const auto& auth_type : {"SIGV4", "SigV4", "sIgV4"}) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kAuthType] = auth_type; + EXPECT_THAT(AuthManagers::Load("test-catalog", properties), IsOk()) + << "Failed for auth type: " << auth_type; + } +} + +// Required fields: missing region. +TEST_F(AuthManagerTest, SigV4MissingRegion) { + auto properties = MinimalSigV4Properties(); + properties.erase(AuthProperties::kSigV4Region); + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// When the user explicitly requests the static provider but omits the +// access-key-id, we must reject the config (not silently fall back to the +// default chain). +TEST_F(AuthManagerTest, SigV4ExplicitStaticMissingAccessKeyId) { + auto properties = MinimalSigV4Properties(); + properties.erase(AuthProperties::kSigV4AccessKeyId); + properties[AuthProperties::kSigV4CredentialsProvider] = + std::string(AuthProperties::kSigV4ProviderStatic); + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// Same for a missing secret: explicit static without the secret is an error. +TEST_F(AuthManagerTest, SigV4ExplicitStaticMissingSecretAccessKey) { + auto properties = MinimalSigV4Properties(); + properties.erase(AuthProperties::kSigV4SecretAccessKey); + properties[AuthProperties::kSigV4CredentialsProvider] = + std::string(AuthProperties::kSigV4ProviderStatic); + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// Verifies that SigV4 signs a simple GET request and populates the expected +// AWS headers. +TEST_F(AuthManagerTest, SigV4SignsRequestHeaders) { + auto properties = MinimalSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + auto request = + MakeGetRequest("https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"); + ASSERT_THAT(session_result.value()->Authenticate(request, headers), IsOk()); + + EXPECT_TRUE(headers.contains("Authorization")); + EXPECT_THAT(headers["Authorization"], + ::testing::StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_THAT(headers["Authorization"], + ::testing::HasSubstr("/us-east-1/glue/aws4_request")); + EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("SignedHeaders=")); + EXPECT_THAT(headers["Authorization"], ::testing::HasSubstr("Signature=")); + EXPECT_TRUE(headers.contains("X-Amz-Date")); + // aws-c-auth emits this header lowercase (SigV4 spec uses lowercase in + // canonical form; HTTP treats it case-insensitively on the wire). + EXPECT_TRUE(headers.contains("x-amz-content-sha256")); + // No session token supplied, so X-Amz-Security-Token must NOT be set. + EXPECT_FALSE(headers.contains("X-Amz-Security-Token")); +} + +// Verifies that the session_token surfaces as an X-Amz-Security-Token header +// on the signed request. +TEST_F(AuthManagerTest, SigV4IncludesSessionToken) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4SessionToken] = "SESSION-TOKEN-12345"; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + auto request = + MakeGetRequest("https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"); + ASSERT_THAT(session_result.value()->Authenticate(request, headers), IsOk()); + + ASSERT_TRUE(headers.contains("X-Amz-Security-Token")); + EXPECT_EQ(headers["X-Amz-Security-Token"], "SESSION-TOKEN-12345"); +} + +// A SigV4 session rejects the headers-only Authenticate() overload — signing +// is impossible without request context. +TEST_F(AuthManagerTest, SigV4RejectsHeadersOnlyAuthenticate) { + auto properties = MinimalSigV4Properties(); + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + std::unordered_map headers; + auto status = session_result.value()->Authenticate(headers); + EXPECT_THAT(status, IsError(ErrorKind::kAuthenticationFailed)); +} + +// Using oauth2 as a SigV4 delegate: both the Bearer token (from OAuth) and +// the SigV4 signature flow through; SigV4 overwrites Authorization (matches +// the expected behaviour documented in the design). +TEST_F(AuthManagerTest, SigV4WithOAuth2Delegate) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4DelegateAuthType] = AuthProperties::kAuthTypeOAuth2; + properties[AuthProperties::kToken.key()] = "my-oauth-token"; + + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); + + std::unordered_map headers; + auto request = + MakeGetRequest("https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"); + ASSERT_THAT(session_result.value()->Authenticate(request, headers), IsOk()); + + // SigV4 replaces Authorization with its own AWS4 signature. + EXPECT_THAT(headers["Authorization"], + ::testing::StartsWith("AWS4-HMAC-SHA256 Credential=")); + EXPECT_TRUE(headers.contains("X-Amz-Date")); +} + +// Recursive delegate (sigv4 wrapping sigv4) is rejected. +TEST_F(AuthManagerTest, SigV4RejectsRecursiveDelegate) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4DelegateAuthType] = AuthProperties::kAuthTypeSigV4; + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +// Default credentials chain (env → profile → STS Web Identity → IMDS) can be +// selected explicitly. We only verify that the manager loads and returns a +// session — actually signing with the default chain would require real AWS +// credentials in the test environment. +TEST_F(AuthManagerTest, SigV4DefaultCredentialsProviderLoads) { + std::unordered_map properties = { + {AuthProperties::kAuthType, AuthProperties::kAuthTypeSigV4}, + {AuthProperties::kSigV4Region, "us-east-1"}, + {AuthProperties::kSigV4Service, "glue"}, + {AuthProperties::kSigV4CredentialsProvider, + std::string(AuthProperties::kSigV4ProviderDefault)}, + }; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); + auto session_result = manager_result.value()->CatalogSession(client_, properties); + ASSERT_THAT(session_result, IsOk()); +} + +// Without an access-key-id and without an explicit provider, auto-detection +// picks the default chain — catalog loads successfully. +TEST_F(AuthManagerTest, SigV4AutoSelectsDefaultWhenNoStaticKeys) { + std::unordered_map properties = { + {AuthProperties::kAuthType, AuthProperties::kAuthTypeSigV4}, + {AuthProperties::kSigV4Region, "us-east-1"}, + {AuthProperties::kSigV4Service, "glue"}, + }; + auto manager_result = AuthManagers::Load("test-catalog", properties); + ASSERT_THAT(manager_result, IsOk()); +} + +// Unknown credentials-provider value is rejected. +TEST_F(AuthManagerTest, SigV4UnknownCredentialsProvider) { + auto properties = MinimalSigV4Properties(); + properties[AuthProperties::kSigV4CredentialsProvider] = "iam"; + auto result = AuthManagers::Load("test-catalog", properties); + EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument)); +} + +#endif // ICEBERG_REST_WITH_SIGV4 + } // namespace iceberg::rest::auth diff --git a/src/iceberg/test/sigv4_signer_test.cc b/src/iceberg/test/sigv4_signer_test.cc new file mode 100644 index 000000000..27b372c6f --- /dev/null +++ b/src/iceberg/test/sigv4_signer_test.cc @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/rest/auth/sigv4_signer.h" + +#include +#include +#include + +#include +#include + +#include "iceberg/catalog/rest/auth/auth_session.h" +#include "iceberg/test/matchers.h" + +/// SigV4 canonical-vector tests. +/// +/// The expected ``Authorization`` headers in these tests were computed by an +/// independent reference implementation (Python botocore 1.37's +/// ``SigV4Auth``) against the same inputs our signer sees, with the signing +/// time pinned to 2015-08-30T12:36:00Z. The generator script lives at +/// ``/tmp/gen_sigv4_vectors.py`` in the development setup; rerunning it with +/// the same inputs reproduces these exact signatures. +/// +/// Purpose: catch regressions in how we wrap aws-crt-cpp (wrong service +/// name, wrong region, wrong double-URI-encoding flag, missing +/// x-amz-content-sha256 header, etc.) — such regressions would change the +/// ``Authorization`` header bytes and fail these tests, whereas the +/// "headers present + start with AWS4-HMAC-SHA256" assertions in +/// auth_manager_test.cc would still pass. + +namespace iceberg::rest::auth { + +namespace { + +constexpr const char* kAccessKey = "AKIDEXAMPLE"; +constexpr const char* kSecretKey = "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"; +constexpr const char* kRegion = "us-east-1"; +constexpr const char* kService = "glue"; +constexpr const char* kHost = "glue.us-east-1.amazonaws.com"; + +/// 2015-08-30T12:36:00Z — matches the timestamp used when generating the +/// expected Authorization headers. +std::chrono::system_clock::time_point FixedSigningTime() { + return std::chrono::system_clock::time_point(std::chrono::seconds(1440938160)); +} + +SigV4Config DefaultConfig() { + return SigV4Config{ + .region = kRegion, + .service = kService, + .provider = SigV4CredentialsProvider::kStatic, + .access_key_id = kAccessKey, + .secret_access_key = kSecretKey, + .session_token = "", + }; +} + +} // namespace + +// ----------------------------------------------------------------------------- +// GET, empty body, no query params. +TEST(SigV4SignerTest, CanonicalVectorGetVanilla) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/config"; + SignableRequest request{ + .method = "GET", + .url = url, + .query_params = nullptr, + .body = {}, + }; + std::unordered_map existing{{"Host", kHost}}; + + auto result = signer_result.value()->Sign(request, existing); + ASSERT_THAT(result, IsOk()); + + EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); + EXPECT_EQ(result->at("x-amz-content-sha256"), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + EXPECT_EQ(result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "029d943d26726020757da405ee50a23ab8e28649d845d5bf69717d7246cd3f87"); +} + +// ----------------------------------------------------------------------------- +// GET with query parameters. +TEST(SigV4SignerTest, CanonicalVectorGetWithQuery) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"; + std::unordered_map query_params{ + {"parent", "default"}, + {"pageSize", "10"}, + }; + SignableRequest request{ + .method = "GET", + .url = url, + .query_params = &query_params, + .body = {}, + }; + std::unordered_map existing{{"Host", kHost}}; + + auto result = signer_result.value()->Sign(request, existing); + ASSERT_THAT(result, IsOk()); + + EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); + EXPECT_EQ(result->at("x-amz-content-sha256"), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); + EXPECT_EQ(result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "e82f9da81e1f789f7d55aae636026b6df73a0c23fd3ba08b6d4a7d4e01c1062c"); +} + +// ----------------------------------------------------------------------------- +// POST with a JSON body and a Content-Type header in the signed set. +TEST(SigV4SignerTest, CanonicalVectorPostJsonBody) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/namespaces"; + std::string body = R"({"namespace":["default"],"properties":{}})"; + SignableRequest request{ + .method = "POST", + .url = url, + .query_params = nullptr, + .body = body, + }; + std::unordered_map existing{ + {"Host", kHost}, + {"Content-Type", "application/json"}, + }; + + auto result = signer_result.value()->Sign(request, existing); + ASSERT_THAT(result, IsOk()); + + EXPECT_EQ(result->at("X-Amz-Date"), "20150830T123600Z"); + EXPECT_EQ(result->at("x-amz-content-sha256"), + "6a2d107daab05b4284e65fac424da4093a29ad3c9726c6f3e8bf85d5daeb0e34"); + EXPECT_EQ(result->at("Authorization"), + "AWS4-HMAC-SHA256 " + "Credential=AKIDEXAMPLE/20150830/us-east-1/glue/aws4_request, " + "SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, " + "Signature=" + "d4c36f6140f60082617e086c3945701678f24db257724d9c6392962885ecb099"); +} + +// ----------------------------------------------------------------------------- +// Sanity: two successive signs of the same request with the same pinned +// timestamp produce byte-identical output. Proves our impl is deterministic +// given fixed inputs (the credentials provider is not re-fetched, the +// timestamp is honored, nothing ambient leaks in). +TEST(SigV4SignerTest, DeterministicWithFixedTimestamp) { + auto signer_result = SigV4Signer::MakeForTests(DefaultConfig(), FixedSigningTime()); + ASSERT_THAT(signer_result, IsOk()); + + std::string url = "https://glue.us-east-1.amazonaws.com/iceberg/v1/config"; + SignableRequest request{ + .method = "GET", + .url = url, + .query_params = nullptr, + .body = {}, + }; + std::unordered_map existing{{"Host", kHost}}; + + auto r1 = signer_result.value()->Sign(request, existing); + auto r2 = signer_result.value()->Sign(request, existing); + ASSERT_THAT(r1, IsOk()); + ASSERT_THAT(r2, IsOk()); + EXPECT_EQ(r1->at("Authorization"), r2->at("Authorization")); + EXPECT_EQ(r1->at("X-Amz-Date"), r2->at("X-Amz-Date")); +} + +} // namespace iceberg::rest::auth