From 647fbbd0430beb92ec07938223e2461dcfd3bab5 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Thu, 20 Jun 2024 15:55:27 +0100 Subject: [PATCH] Remove metrics endpoints (#6283) --- CHANGELOG.md | 4 + doc/schemas/app_openapi.json | 73 +----------- doc/schemas/gov_openapi.json | 73 +----------- doc/schemas/node_openapi.json | 85 ++------------ include/ccf/app_interface.h | 15 +++ include/ccf/base_endpoint_registry.h | 5 - include/ccf/endpoint_registry.h | 48 ++++---- samples/apps/logging/logging.cpp | 2 +- .../apps/programmability/programmability.cpp | 2 +- src/endpoints/base_endpoint_registry.cpp | 21 ---- src/endpoints/common_endpoint_registry.cpp | 24 ---- src/endpoints/endpoint_registry.cpp | 48 -------- src/node/gov/gov_endpoint_registry.h | 20 ++++ src/node/rpc/frontend.h | 93 ++++++++------- src/node/rpc/jwt_management.h | 9 +- src/node/rpc/member_frontend.h | 2 +- src/node/rpc/node_frontend.h | 67 ++++++----- src/node/rpc/test/frontend_test.cpp | 109 +++++++++++++----- tests/e2e_logging.py | 63 ---------- tests/jwt_test.py | 53 +++------ 20 files changed, 267 insertions(+), 549 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1db6cd265f3c..5ae03a8e12b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - `::consensus` is now `ccf::consensus` - `::tls` is now `ccf::tls` +### Removed + +- Removed the existing metrics endpoint and API (`GET /api/metrics`, `get_metrics_v1`). Stats for request execution can instead be gathered by overriding the `EndpointRegistry::handle_event_request_completed()` method. + ## [5.0.0-dev18] [5.0.0-dev18]: https://github.com/microsoft/CCF/releases/tag/ccf-5.0.0-dev18 diff --git a/doc/schemas/app_openapi.json b/doc/schemas/app_openapi.json index 76281742aaa9..e711e6655b81 100644 --- a/doc/schemas/app_openapi.json +++ b/doc/schemas/app_openapi.json @@ -37,54 +37,6 @@ ], "type": "string" }, - "EndpointMetrics": { - "properties": { - "metrics": { - "$ref": "#/components/schemas/EndpointMetricsEntry_array" - } - }, - "required": [ - "metrics" - ], - "type": "object" - }, - "EndpointMetricsEntry": { - "properties": { - "calls": { - "$ref": "#/components/schemas/uint64" - }, - "errors": { - "$ref": "#/components/schemas/uint64" - }, - "failures": { - "$ref": "#/components/schemas/uint64" - }, - "method": { - "$ref": "#/components/schemas/string" - }, - "path": { - "$ref": "#/components/schemas/string" - }, - "retries": { - "$ref": "#/components/schemas/uint64" - } - }, - "required": [ - "path", - "method", - "calls", - "errors", - "failures", - "retries" - ], - "type": "object" - }, - "EndpointMetricsEntry_array": { - "items": { - "$ref": "#/components/schemas/EndpointMetricsEntry" - }, - "type": "array" - }, "GetCode__Out": { "properties": { "versions": { @@ -311,7 +263,7 @@ "info": { "description": "This CCF sample app implements a simple logging application, securely recording messages at client-specified IDs. It demonstrates most of the features available to CCF apps.", "title": "CCF Sample Logging App", - "version": "2.3.0" + "version": "2.3.1" }, "openapi": "3.0.0", "paths": { @@ -338,29 +290,6 @@ } } }, - "/app/api/metrics": { - "get": { - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EndpointMetrics" - } - } - }, - "description": "Default response description" - }, - "default": { - "$ref": "#/components/responses/default" - } - }, - "summary": "Usage metrics for endpoints", - "x-ccf-forwarding": { - "$ref": "#/components/x-ccf-forwarding/sometimes" - } - } - }, "/app/code": { "get": { "deprecated": true, diff --git a/doc/schemas/gov_openapi.json b/doc/schemas/gov_openapi.json index e9d9cf5b3523..9ce989aa261c 100644 --- a/doc/schemas/gov_openapi.json +++ b/doc/schemas/gov_openapi.json @@ -109,54 +109,6 @@ }, "type": "array" }, - "EndpointMetrics": { - "properties": { - "metrics": { - "$ref": "#/components/schemas/EndpointMetricsEntry_array" - } - }, - "required": [ - "metrics" - ], - "type": "object" - }, - "EndpointMetricsEntry": { - "properties": { - "calls": { - "$ref": "#/components/schemas/uint64" - }, - "errors": { - "$ref": "#/components/schemas/uint64" - }, - "failures": { - "$ref": "#/components/schemas/uint64" - }, - "method": { - "$ref": "#/components/schemas/string" - }, - "path": { - "$ref": "#/components/schemas/string" - }, - "retries": { - "$ref": "#/components/schemas/uint64" - } - }, - "required": [ - "path", - "method", - "calls", - "errors", - "failures", - "retries" - ], - "type": "object" - }, - "EndpointMetricsEntry_array": { - "items": { - "$ref": "#/components/schemas/EndpointMetricsEntry" - }, - "type": "array" - }, "EndpointProperties": { "properties": { "authn_policies": { @@ -1366,7 +1318,7 @@ "info": { "description": "This API is used to submit and query proposals which affect CCF's public governance tables.", "title": "CCF Governance API", - "version": "4.1.7" + "version": "4.1.8" }, "openapi": "3.0.0", "paths": { @@ -1452,29 +1404,6 @@ } } }, - "/gov/api/metrics": { - "get": { - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EndpointMetrics" - } - } - }, - "description": "Default response description" - }, - "default": { - "$ref": "#/components/responses/default" - } - }, - "summary": "Usage metrics for endpoints", - "x-ccf-forwarding": { - "$ref": "#/components/x-ccf-forwarding/sometimes" - } - } - }, "/gov/code": { "get": { "deprecated": true, diff --git a/doc/schemas/node_openapi.json b/doc/schemas/node_openapi.json index 152e8237d079..6ad3058a4009 100644 --- a/doc/schemas/node_openapi.json +++ b/doc/schemas/node_openapi.json @@ -183,54 +183,6 @@ ], "type": "object" }, - "EndpointMetrics": { - "properties": { - "metrics": { - "$ref": "#/components/schemas/EndpointMetricsEntry_array" - } - }, - "required": [ - "metrics" - ], - "type": "object" - }, - "EndpointMetricsEntry": { - "properties": { - "calls": { - "$ref": "#/components/schemas/uint64" - }, - "errors": { - "$ref": "#/components/schemas/uint64" - }, - "failures": { - "$ref": "#/components/schemas/uint64" - }, - "method": { - "$ref": "#/components/schemas/string" - }, - "path": { - "$ref": "#/components/schemas/string" - }, - "retries": { - "$ref": "#/components/schemas/uint64" - } - }, - "required": [ - "path", - "method", - "calls", - "errors", - "failures", - "retries" - ], - "type": "object" - }, - "EndpointMetricsEntry_array": { - "items": { - "$ref": "#/components/schemas/EndpointMetricsEntry" - }, - "type": "array" - }, "GetCode__Out": { "properties": { "versions": { @@ -449,18 +401,22 @@ ], "type": "object" }, - "JWTMetrics": { + "JWTRefreshMetrics": { "properties": { "attempts": { "$ref": "#/components/schemas/uint64" }, + "failures": { + "$ref": "#/components/schemas/uint64" + }, "successes": { "$ref": "#/components/schemas/uint64" } }, "required": [ "attempts", - "successes" + "successes", + "failures" ], "type": "object" }, @@ -937,7 +893,7 @@ "info": { "description": "This API provides public, uncredentialed access to service and node state.", "title": "CCF Public Node API", - "version": "4.9.1" + "version": "4.9.2" }, "openapi": "3.0.0", "paths": { @@ -964,29 +920,6 @@ } } }, - "/node/api/metrics": { - "get": { - "responses": { - "200": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EndpointMetrics" - } - } - }, - "description": "Default response description" - }, - "default": { - "$ref": "#/components/responses/default" - } - }, - "summary": "Usage metrics for endpoints", - "x-ccf-forwarding": { - "$ref": "#/components/x-ccf-forwarding/sometimes" - } - } - }, "/node/backup": { "get": { "responses": { @@ -1157,14 +1090,14 @@ } } }, - "/node/jwt_metrics": { + "/node/jwt_keys/refresh/metrics": { "get": { "responses": { "200": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/JWTMetrics" + "$ref": "#/components/schemas/JWTRefreshMetrics" } } }, diff --git a/include/ccf/app_interface.h b/include/ccf/app_interface.h index 150592242efc..f5c9ed702f9e 100644 --- a/include/ccf/app_interface.h +++ b/include/ccf/app_interface.h @@ -30,6 +30,21 @@ namespace ccf UserEndpointRegistry(ccfapp::AbstractNodeContext& context) : CommonEndpointRegistry(get_actor_prefix(ActorsType::users), context) {} + + // Default behaviour is to do nothing - do NOT log summary of every request + // as it completes. Apps may override this if they wish. + void handle_event_request_completed( + const ccf::endpoints::RequestCompletedEvent& event) override + {} + + void handle_event_dispatch_failed( + const ccf::endpoints::DispatchFailedEvent& event) override + { + // Log dispatch failures, as a coarse metric of some user errors, but do + // not log the raw path, which may contain confidential fields + // misformatted into the wrong url + CCF_APP_INFO("DispatchFailedEvent: {} {}", event.method, event.status); + } }; } diff --git a/include/ccf/base_endpoint_registry.h b/include/ccf/base_endpoint_registry.h index f7f0a0834054..17487562b145 100644 --- a/include/ccf/base_endpoint_registry.h +++ b/include/ccf/base_endpoint_registry.h @@ -186,10 +186,5 @@ namespace ccf /** Get untrusted time from the host of the currently executing node. */ ApiResult get_untrusted_host_time_v1(::timespec& time); - - /** Get usage metrics from endpoints under the registry, including - * number of calls, errors, failures and retries. - */ - ApiResult get_metrics_v1(EndpointMetrics& endpoint_metrics); }; } diff --git a/include/ccf/endpoint_registry.h b/include/ccf/endpoint_registry.h index 8be93dbb3746..d9575399018e 100644 --- a/include/ccf/endpoint_registry.h +++ b/include/ccf/endpoint_registry.h @@ -39,6 +39,25 @@ namespace ccf::endpoints PathTemplateSpec spec; }; + struct RequestCompletedEvent + { + std::string method = ""; + // This contains the path template against which the request matched. For + // instance `/user/{user_id}` rather than `/user/Bob`. This should be safe + // to log, though doing so still reveals (to anyone with stdout access) + // exactly which endpoints were executed and when. + std::string dispatch_path = ""; + int status = 0; + std::chrono::milliseconds exec_time{0}; + size_t attempts = 0; + }; + + struct DispatchFailedEvent + { + std::string method = ""; + int status = 0; + }; + void default_locally_committed_func( CommandEndpointContext& ctx, const TxID& tx_id); @@ -113,14 +132,6 @@ namespace ccf::endpoints std::string document_version = "0.0.1"; } openapi_info; - struct Metrics - { - size_t calls = 0; - size_t errors = 0; - size_t failures = 0; - size_t retries = 0; - }; - template bool get_path_param( const ccf::PathParams& params, @@ -151,12 +162,6 @@ namespace ccf::endpoints std::map>> templated_endpoints; - ccf::pal::Mutex metrics_lock; - std::map> metrics; - - EndpointRegistry::Metrics& get_metrics_for_request( - const std::string& method, const std::string& verb); - kv::Consensus* consensus = nullptr; kv::TxHistory* history = nullptr; @@ -278,13 +283,14 @@ namespace ccf::endpoints void set_history(kv::TxHistory* h); - virtual void increment_metrics_calls(const EndpointDefinitionPtr& endpoint); - virtual void increment_metrics_errors( - const EndpointDefinitionPtr& endpoint); - virtual void increment_metrics_failures( - const EndpointDefinitionPtr& endpoint); - virtual void increment_metrics_retries( - const EndpointDefinitionPtr& endpoint); + // Override these methods to log or report request metrics. + virtual void handle_event_request_completed( + const ccf::endpoints::RequestCompletedEvent& event) + {} + + virtual void handle_event_dispatch_failed( + const ccf::endpoints::DispatchFailedEvent& event) + {} virtual bool apply_uncommitted_tx_backpressure() const { diff --git a/samples/apps/logging/logging.cpp b/samples/apps/logging/logging.cpp index 729e86994d8f..f00867f69f15 100644 --- a/samples/apps/logging/logging.cpp +++ b/samples/apps/logging/logging.cpp @@ -454,7 +454,7 @@ namespace loggingapp "recording messages at client-specified IDs. It demonstrates most of " "the features available to CCF apps."; - openapi_info.document_version = "2.3.0"; + openapi_info.document_version = "2.3.1"; index_per_public_key = std::make_shared( PUBLIC_RECORDS, context, 10000, 20); diff --git a/samples/apps/programmability/programmability.cpp b/samples/apps/programmability/programmability.cpp index b93aff4c9d45..5c00f887fb39 100644 --- a/samples/apps/programmability/programmability.cpp +++ b/samples/apps/programmability/programmability.cpp @@ -76,7 +76,7 @@ namespace programmabilityapp // public:custom_endpoints.* ) { - openapi_info.title = "CCF Programmabilit App"; + openapi_info.title = "CCF Programmability App"; openapi_info.description = "Lightweight application demonstrating app-space JS programmability"; openapi_info.document_version = "0.0.1"; diff --git a/src/endpoints/base_endpoint_registry.cpp b/src/endpoints/base_endpoint_registry.cpp index 983dcf7b3703..4d96f5b62a99 100644 --- a/src/endpoints/base_endpoint_registry.cpp +++ b/src/endpoints/base_endpoint_registry.cpp @@ -319,25 +319,4 @@ namespace ccf return ApiResult::OK; } - - ApiResult BaseEndpointRegistry::get_metrics_v1( - EndpointMetrics& endpoint_metrics) - { - endpoint_metrics.metrics.clear(); - std::lock_guard guard(metrics_lock); - for (const auto& [path, verb_metrics] : metrics) - { - for (const auto& [verb, metric] : verb_metrics) - { - endpoint_metrics.metrics.push_back( - {path, - verb, - metric.calls, - metric.errors, - metric.failures, - metric.retries}); - } - } - return ApiResult::OK; - } } diff --git a/src/endpoints/common_endpoint_registry.cpp b/src/endpoints/common_endpoint_registry.cpp index 447215a69cff..832f3b6cea7e 100644 --- a/src/endpoints/common_endpoint_registry.cpp +++ b/src/endpoints/common_endpoint_registry.cpp @@ -277,30 +277,6 @@ namespace ccf .set_openapi_summary("OpenAPI schema") .install(); - auto endpoint_metrics_fn = [this](auto&, nlohmann::json&&) { - EndpointMetrics out; - const auto result = get_metrics_v1(out); - if (result == ccf::ApiResult::OK) - { - return make_success(out); - } - else - { - return make_error( - HTTP_STATUS_INTERNAL_SERVER_ERROR, - ccf::errors::InternalError, - fmt::format("Error code: {}", ccf::api_result_to_str(result))); - } - }; - make_command_endpoint( - "/api/metrics", - HTTP_GET, - json_command_adapter(endpoint_metrics_fn), - no_auth_required) - .set_auto_schema() - .set_openapi_summary("Usage metrics for endpoints") - .install(); - auto is_tx_committed = [this](ccf::View view, ccf::SeqNo seqno, std::string& error_reason) { return ccf::historical::is_tx_committed_v2( diff --git a/src/endpoints/endpoint_registry.cpp b/src/endpoints/endpoint_registry.cpp index a0d42c1b81b5..b477764a2f62 100644 --- a/src/endpoints/endpoint_registry.cpp +++ b/src/endpoints/endpoint_registry.cpp @@ -173,18 +173,6 @@ namespace ccf::endpoints return spec; } - EndpointRegistry::Metrics& EndpointRegistry::get_metrics_for_request( - const std::string& method_, const std::string& verb) - { - auto substr_start = method_.find_first_not_of('/'); - if (substr_start == std::string::npos) - { - substr_start = 0; - } - auto method = method_.substr(substr_start); - return metrics[method][verb]; - } - void default_locally_committed_func( CommandEndpointContext& ctx, const TxID& tx_id) { @@ -576,40 +564,4 @@ namespace ccf::endpoints { history = h; } - - void EndpointRegistry::increment_metrics_calls( - const endpoints::EndpointDefinitionPtr& endpoint) - { - std::lock_guard guard(metrics_lock); - get_metrics_for_request( - endpoint->dispatch.uri_path, endpoint->dispatch.verb.c_str()) - .calls++; - } - - void EndpointRegistry::increment_metrics_errors( - const endpoints::EndpointDefinitionPtr& endpoint) - { - std::lock_guard guard(metrics_lock); - get_metrics_for_request( - endpoint->dispatch.uri_path, endpoint->dispatch.verb.c_str()) - .errors++; - } - - void EndpointRegistry::increment_metrics_failures( - const endpoints::EndpointDefinitionPtr& endpoint) - { - std::lock_guard guard(metrics_lock); - get_metrics_for_request( - endpoint->dispatch.uri_path, endpoint->dispatch.verb.c_str()) - .failures++; - } - - void EndpointRegistry::increment_metrics_retries( - const endpoints::EndpointDefinitionPtr& endpoint) - { - std::lock_guard guard(metrics_lock); - get_metrics_for_request( - endpoint->dispatch.uri_path, endpoint->dispatch.verb.c_str()) - .retries++; - } } diff --git a/src/node/gov/gov_endpoint_registry.h b/src/node/gov/gov_endpoint_registry.h index 52fc3d2fd811..4a67504e6265 100644 --- a/src/node/gov/gov_endpoint_registry.h +++ b/src/node/gov/gov_endpoint_registry.h @@ -50,6 +50,26 @@ namespace ccf rpc_ctx.get_request_path() == "/gov/members/proposals:create"); } + // Log these events on /gov frontend. Everything here is public, so + // safe to display in clear in the log + void handle_event_request_completed( + const ccf::endpoints::RequestCompletedEvent& event) override + { + GOV_INFO_FMT( + "RequestCompletedEvent: {} {} {} {}ms {} attempt(s)", + event.method, + event.dispatch_path, + event.status, + event.exec_time.count(), + event.attempts); + } + + void handle_event_dispatch_failed( + const ccf::endpoints::DispatchFailedEvent& event) override + { + GOV_INFO_FMT("DispatchFailedEvent: {} {}", event.method, event.status); + } + void api_endpoint(ccf::endpoints::ReadOnlyEndpointContext& ctx) override { using namespace ccf::gov::endpoints; diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index ba547f2a7ae5..7368a4f80021 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -13,6 +13,7 @@ #include "ccf/service/tables/nodes.h" #include "ccf/service/tables/service.h" #include "common/configuration.h" +#include "enclave/enclave_time.h" #include "enclave/rpc_handler.h" #include "forwarder.h" #include "http/http_jwt.h" @@ -70,22 +71,6 @@ namespace ccf endpoints.set_history(history); } - void update_metrics( - const std::shared_ptr& ctx, - const endpoints::EndpointDefinitionPtr& endpoint) - { - int cat = ctx->get_response_status() / 100; - switch (cat) - { - case 4: - endpoints.increment_metrics_errors(endpoint); - return; - case 5: - endpoints.increment_metrics_failures(endpoint); - return; - } - } - endpoints::EndpointDefinitionPtr find_endpoint( std::shared_ptr ctx, kv::CommittableTx& tx) { @@ -442,7 +427,6 @@ namespace ccf ccf::errors::InvalidAuthenticationInfo, "Invalid authentication credentials.", std::move(json_details)); - update_metrics(ctx, endpoint); } return identity; @@ -482,7 +466,7 @@ namespace ccf HTTP_STATUS_NOT_IMPLEMENTED, ccf::errors::NotImplemented, "Request cannot be forwarded to primary on HTTP/2 interface."); - update_metrics(ctx, endpoint); + return; } @@ -492,7 +476,7 @@ namespace ccf HTTP_STATUS_INTERNAL_SERVER_ERROR, ccf::errors::InternalError, "No consensus or forwarder to forward request."); - update_metrics(ctx, endpoint); + return; } @@ -504,7 +488,7 @@ namespace ccf HTTP_STATUS_SERVICE_UNAVAILABLE, ccf::errors::RequestAlreadyForwarded, "RPC was already forwarded."); - update_metrics(ctx, endpoint); + return; } @@ -522,7 +506,7 @@ namespace ccf HTTP_STATUS_SERVICE_UNAVAILABLE, ccf::errors::InternalError, "RPC could not be forwarded to unknown primary."); - update_metrics(ctx, endpoint); + return; } @@ -536,7 +520,7 @@ namespace ccf HTTP_STATUS_SERVICE_UNAVAILABLE, ccf::errors::InternalError, "Unable to establish channel to forward to primary."); - update_metrics(ctx, endpoint); + return; } @@ -555,9 +539,46 @@ namespace ccf void process_command(std::shared_ptr ctx) { size_t attempts = 0; - constexpr auto max_attempts = 30; endpoints::EndpointDefinitionPtr endpoint = nullptr; + const auto start_time = ccf::get_enclave_time(); + + process_command_inner(ctx, endpoint, attempts); + + const auto end_time = ccf::get_enclave_time(); + + if (endpoint != nullptr) + { + endpoints::RequestCompletedEvent rce; + rce.method = endpoint->dispatch.verb.c_str(); + rce.dispatch_path = endpoint->dispatch.uri_path; + rce.status = ctx->get_response_status(); + // Although enclave time returns a microsecond value, the actual + // precision/granularity depends on the host's TimeUpdater. By default + // this only advances each millisecond. Avoid implying more precision + // than that, by rounding to milliseconds + rce.exec_time = std::chrono::duration_cast( + end_time - start_time); + rce.attempts = attempts; + + endpoints.handle_event_request_completed(rce); + } + else + { + endpoints::DispatchFailedEvent dfe; + dfe.method = ctx->get_method(); + dfe.status = ctx->get_response_status(); + + endpoints.handle_event_dispatch_failed(dfe); + } + } + + void process_command_inner( + std::shared_ptr ctx, + endpoints::EndpointDefinitionPtr& endpoint, + size_t& attempts) + { + constexpr auto max_attempts = 30; while (attempts < max_attempts) { if (consensus != nullptr) @@ -601,18 +622,6 @@ namespace ccf { return; } - else - { - // Only register calls to existing endpoints - if (attempts == 1) - { - endpoints.increment_metrics_calls(endpoint); - } - else - { - endpoints.increment_metrics_retries(endpoint); - } - } try { @@ -701,7 +710,6 @@ namespace ccf if (!ctx->should_apply_writes()) { - update_metrics(ctx, endpoint); return; } @@ -778,7 +786,6 @@ namespace ccf history->try_emit_signature(); } - update_metrics(ctx, endpoint); return; } @@ -794,7 +801,7 @@ namespace ccf HTTP_STATUS_SERVICE_UNAVAILABLE, ccf::errors::TransactionReplicationFailed, "Transaction failed to replicate."); - update_metrics(ctx, endpoint); + return; } } @@ -811,7 +818,7 @@ namespace ccf { ctx->clear_response_headers(); ctx->set_error(std::move(e.error)); - update_metrics(ctx, endpoint); + return; } catch (const ccf::JsonParseError& e) @@ -819,7 +826,7 @@ namespace ccf ctx->clear_response_headers(); ctx->set_error( HTTP_STATUS_BAD_REQUEST, ccf::errors::InvalidInput, e.describe()); - update_metrics(ctx, endpoint); + return; } catch (const nlohmann::json::exception& e) @@ -827,7 +834,7 @@ namespace ccf ctx->clear_response_headers(); ctx->set_error( HTTP_STATUS_BAD_REQUEST, ccf::errors::InvalidInput, e.what()); - update_metrics(ctx, endpoint); + return; } catch (const kv::KvSerialiserException& e) @@ -846,7 +853,7 @@ namespace ccf HTTP_STATUS_INTERNAL_SERVER_ERROR, ccf::errors::InternalError, e.what()); - update_metrics(ctx, endpoint); + return; } } // end of while loop @@ -859,7 +866,7 @@ namespace ccf "Transaction continued to conflict after {} attempts. Retry " "later.", max_attempts)); - update_metrics(ctx, endpoint); + static constexpr size_t retry_after_seconds = 3; ctx->set_response_header(http::headers::RETRY_AFTER, retry_after_seconds); diff --git a/src/node/rpc/jwt_management.h b/src/node/rpc/jwt_management.h index 9fcd94777d04..1e630381d53f 100644 --- a/src/node/rpc/jwt_management.h +++ b/src/node/rpc/jwt_management.h @@ -124,22 +124,17 @@ namespace ccf static bool set_jwt_public_signing_keys( kv::Tx& tx, - const ProposalId& proposal_id, + const std::string& log_prefix, std::string issuer, const JwtIssuerMetadata& issuer_metadata, const JsonWebKeySet& jwks) { auto keys = tx.rw(Tables::JWT_PUBLIC_SIGNING_KEYS_METADATA); - - auto log_prefix = proposal_id.empty() ? - "JWT key auto-refresh" : - fmt::format("Proposal {}", proposal_id); - // add keys if (jwks.keys.empty()) { - LOG_FAIL_FMT("{}: JWKS has no keys", log_prefix, proposal_id); + LOG_FAIL_FMT("{}: JWKS has no keys", log_prefix); return false; } std::map> new_keys; diff --git a/src/node/rpc/member_frontend.h b/src/node/rpc/member_frontend.h index b0f525d8d5e5..344ec13df8ec 100644 --- a/src/node/rpc/member_frontend.h +++ b/src/node/rpc/member_frontend.h @@ -591,7 +591,7 @@ namespace ccf openapi_info.description = "This API is used to submit and query proposals which affect CCF's " "public governance tables."; - openapi_info.document_version = "4.1.7"; + openapi_info.document_version = "4.1.8"; } static std::optional get_caller_member_id( diff --git a/src/node/rpc/node_frontend.h b/src/node/rpc/node_frontend.h index ab3fbd3a892b..6ef867a53add 100644 --- a/src/node/rpc/node_frontend.h +++ b/src/node/rpc/node_frontend.h @@ -86,14 +86,15 @@ namespace ccf max_execution_time, max_cached_interpreters); - struct JWTMetrics + struct JWTRefreshMetrics { - size_t attempts; - size_t successes; + size_t attempts = 0; + size_t successes = 0; + size_t failures = 0; }; - DECLARE_JSON_TYPE(JWTMetrics) - DECLARE_JSON_REQUIRED_FIELDS(JWTMetrics, attempts, successes) + DECLARE_JSON_TYPE(JWTRefreshMetrics) + DECLARE_JSON_REQUIRED_FIELDS(JWTRefreshMetrics, attempts, successes, failures) struct SetJwtPublicSigningKeys { @@ -373,6 +374,25 @@ namespace ccf return make_success(rep); } + JWTRefreshMetrics jwt_refresh_metrics; + void handle_event_request_completed( + const ccf::endpoints::RequestCompletedEvent& event) override + { + if (event.method == "POST" && event.dispatch_path == "/jwt_keys/refresh") + { + jwt_refresh_metrics.attempts += 1; + int status_category = event.status / 100; + if (status_category >= 4) + { + jwt_refresh_metrics.failures += 1; + } + else if (status_category == 2) + { + jwt_refresh_metrics.successes += 1; + } + } + } + public: NodeEndpoints( NetworkState& network_, ccfapp::AbstractNodeContext& context_) : @@ -384,7 +404,7 @@ namespace ccf openapi_info.description = "This API provides public, uncredentialed access to service and node " "state."; - openapi_info.document_version = "4.9.1"; + openapi_info.document_version = "4.9.2"; } void init_handlers() override @@ -1448,27 +1468,6 @@ namespace ccf .set_auto_schema() .install(); - auto jwt_metrics = [this](auto& ctx, nlohmann::json&&) { - JWTMetrics m; - // Attempts are recorded by the key refresh code itself, registering - // before each call to each issuer's keys - m.attempts = node_operation.get_jwt_attempts(); - // Success is marked by the fact that the key succeeded and called - // our internal "jwt_keys/refresh" endpoint. - auto metric = get_metrics_for_request( - "/jwt_keys/refresh", llhttp_method_name(HTTP_POST)); - m.successes = metric.calls - (metric.failures + metric.errors); - return m; - }; - - make_read_only_endpoint( - "/jwt_metrics", - HTTP_GET, - json_read_only_adapter(jwt_metrics), - no_auth_required) - .set_auto_schema() - .install(); - auto version = [this](auto&, nlohmann::json&&) { GetVersion::Out result; result.ccf_version = ccf::ccf_version; @@ -1636,7 +1635,7 @@ namespace ccf "Unable to parse body."); } - auto issuers = ctx.tx.rw(this->network.jwt_issuers); + auto issuers = ctx.tx.ro(this->network.jwt_issuers); auto issuer_metadata_ = issuers->get(parsed.issuer); if (!issuer_metadata_.has_value()) { @@ -1690,11 +1689,21 @@ namespace ccf .set_openapi_hidden(true) .install(); + auto get_jwt_metrics = [this](auto& args, const nlohmann::json& params) { + return make_success(jwt_refresh_metrics); + }; + make_read_only_endpoint( + "/jwt_keys/refresh/metrics", + HTTP_GET, + json_read_only_adapter(get_jwt_metrics), + no_auth_required) + .set_auto_schema() + .install(); + auto service_config_handler = [this](auto& args, const nlohmann::json& params) { return make_success(args.tx.ro(network.config)->get()); }; - make_endpoint( "/service/configuration", HTTP_GET, diff --git a/src/node/rpc/test/frontend_test.cpp b/src/node/rpc/test/frontend_test.cpp index 9b1146f846ec..42e741dae104 100644 --- a/src/node/rpc/test/frontend_test.cpp +++ b/src/node/rpc/test/frontend_test.cpp @@ -36,19 +36,20 @@ namespace threading using namespace ccf; using namespace std; -class SimpleUserRpcFrontend : public RpcFrontend +template +class TSimpleFrontend : public RpcFrontend { -protected: - UserEndpointRegistry common_handlers; - public: - SimpleUserRpcFrontend( - kv::Store& tables, ccfapp::AbstractNodeContext& context) : - RpcFrontend(tables, common_handlers, context), - common_handlers(context) + Registry registry; + + TSimpleFrontend(kv::Store& tables, ccfapp::AbstractNodeContext& context) : + RpcFrontend(tables, registry, context), + registry(context) {} }; +using SimpleUserRpcFrontend = TSimpleFrontend; + class BaseTestFrontend : public SimpleUserRpcFrontend { public: @@ -1422,7 +1423,7 @@ TEST_CASE("Retry on conflict") } } -class TestManualConflictsFrontend : public BaseTestFrontend +class TestManualConflictsRegistry : public UserEndpointRegistry { public: using MyVals = kv::Map; @@ -1458,10 +1459,33 @@ class TestManualConflictsFrontend : public BaseTestFrontend WaitPoint before_write; WaitPoint after_write; - TestManualConflictsFrontend(kv::Store& tables) : BaseTestFrontend(tables) + struct Metrics { - open(); + size_t calls = 0; + size_t retries = 0; + size_t errors = 0; + }; + + Metrics pausable_metrics; + + void handle_event_request_completed( + const ccf::endpoints::RequestCompletedEvent& event) override + { + if (event.method == "POST" && event.dispatch_path == "/pausable") + { + pausable_metrics.calls += 1; + pausable_metrics.retries += event.attempts - 1; + if (event.status / 100 == 4) + { + pausable_metrics.errors += 1; + } + } + } + + TestManualConflictsRegistry(ccfapp::AbstractNodeContext& context) : + UserEndpointRegistry(context) + { auto pausable = [this](auto& ctx) { auto src_handle = ctx.tx.template rw(SRC); auto dst_handle = ctx.tx.template rw(DST); @@ -1478,6 +1502,36 @@ class TestManualConflictsFrontend : public BaseTestFrontend }; make_endpoint("/pausable", HTTP_POST, pausable, {user_cert_auth_policy}) .install(); + + auto get_pausable_metrics = [this](auto& ctx) { + auto j = nlohmann::json::object(); + j["calls"] = pausable_metrics.calls; + j["retries"] = pausable_metrics.retries; + j["errors"] = pausable_metrics.errors; + + ctx.rpc_ctx->set_response_body(j.dump(2)); + ctx.rpc_ctx->set_response_status(HTTP_STATUS_OK); + }; + make_endpoint("/pausable/metrics", HTTP_GET, get_pausable_metrics, {}) + .install(); + } +}; + +class TestManualConflictsFrontend + : public TSimpleFrontend +{ +public: + using MyVals = TestManualConflictsRegistry::MyVals; + static constexpr auto SRC = TestManualConflictsRegistry::SRC; + static constexpr auto DST = TestManualConflictsRegistry::DST; + static constexpr auto KEY = TestManualConflictsRegistry::KEY; + + ccf::StubNodeContext context; + + TestManualConflictsFrontend(kv::Store& tables) : + TSimpleFrontend(tables, context) + { + open(); } }; @@ -1504,7 +1558,7 @@ TEST_CASE("Manual conflicts") auto get_metrics = [&]() { crypto::openssl_sha256_init(); - auto req = create_simple_request("/api/metrics"); + auto req = create_simple_request("/pausable/metrics"); req.set_method(HTTP_GET); auto serialized_call = req.build_request(); auto rpc_ctx = ccf::make_rpc_context(user_session, serialized_call); @@ -1512,17 +1566,10 @@ TEST_CASE("Manual conflicts") auto response = parse_response(rpc_ctx->serialise_response()); CHECK(response.status == HTTP_STATUS_OK); auto body = nlohmann::json::parse(response.body); - auto& element = body["metrics"]; - ccf::EndpointMetricsEntry ret; - for (const auto& j : element) - { - if (j["path"] == "pausable") - { - ret = j.get(); - break; - } - } - + TestManualConflictsRegistry::Metrics ret; + ret.calls = body["calls"].get(); + ret.retries = body["retries"].get(); + ret.errors = body["errors"].get(); crypto::openssl_sha256_shutdown(); return ret; }; @@ -1553,20 +1600,20 @@ TEST_CASE("Manual conflicts") std::shared_ptr session = user_session, http_status expected_status = HTTP_STATUS_OK) { crypto::openssl_sha256_init(); - frontend.before_read.ready = false; - frontend.after_read.ready = false; - frontend.before_write.ready = false; - frontend.after_write.ready = false; + frontend.registry.before_read.ready = false; + frontend.registry.after_read.ready = false; + frontend.registry.before_write.ready = false; + frontend.registry.after_write.ready = false; std::thread worker(call_pausable, session, expected_status); - frontend.before_read.notify(); - frontend.after_read.wait(); + frontend.registry.before_read.notify(); + frontend.registry.after_read.wait(); read_write_op(); - frontend.before_write.notify(); - frontend.after_write.wait(); + frontend.registry.before_write.notify(); + frontend.registry.after_write.wait(); worker.join(); crypto::openssl_sha256_shutdown(); diff --git a/tests/e2e_logging.py b/tests/e2e_logging.py index e719e49f3ec0..64b1acc1da9a 100644 --- a/tests/e2e_logging.py +++ b/tests/e2e_logging.py @@ -755,68 +755,6 @@ def test_raw_text(network, args): r = network.txs.request(log_id, priv=True) assert msg in r.body.json()["msg"], r - primary, _ = network.find_primary() - with primary.client("user0") as c: - r = c.get("/app/api/metrics") - assert get_metrics(r, "log/private/raw_text/{id}", "POST")["calls"] > 0 - - return network - - -@reqs.description("Read metrics") -@reqs.supports_methods("/app/api/metrics") -def test_metrics(network, args): - primary, _ = network.find_primary() - - calls = 0 - errors = 0 - with primary.client("user0") as c: - r = c.get("/app/api/metrics") - m = get_metrics(r, "api/metrics", "GET") - calls = m["calls"] - errors = m["errors"] - - with primary.client("user0") as c: - r = c.get("/app/api/metrics") - assert get_metrics(r, "api/metrics", "GET")["calls"] == calls + 1 - r = c.get("/app/api/metrics") - assert get_metrics(r, "api/metrics", "GET")["calls"] == calls + 2 - - with primary.client() as c: - r = c.get("/app/api/metrics", headers={"accept": "nonsense"}) - assert r.status_code == http.HTTPStatus.BAD_REQUEST.value - - with primary.client() as c: - r = c.get("/app/api/metrics") - assert get_metrics(r, "api/metrics", "GET")["errors"] == errors + 1 - - calls = 0 - with primary.client("user0") as c: - r = c.get("/app/api/metrics") - calls = get_metrics(r, "log/public", "POST", {"calls": 0})["calls"] - - network.txs.issue( - network=network, - number_txs=1, - ) - - with primary.client("user0") as c: - r = c.get("/app/api/metrics") - assert get_metrics(r, "log/public", "POST")["calls"] == calls + 1 - - with primary.client("user0") as c: - r = c.get("/app/no_such_endpoint") - assert r.status_code == http.HTTPStatus.NOT_FOUND.value - r = c.get("/app/api/metrics") - assert ( - get_metrics( - r, - "no_such_endpoint", - "GET", - ) - is None - ) - return network @@ -2072,7 +2010,6 @@ def run_main_tests(network, args): test_historical_query(network, args) test_historical_query_range(network, args) test_view_history(network, args) - test_metrics(network, args) test_empty_path(network, args) if args.package == "samples/apps/logging/liblogging": # Local-commit lambda is currently only supported in C++ diff --git a/tests/jwt_test.py b/tests/jwt_test.py index 85aa054af381..83771dca1904 100644 --- a/tests/jwt_test.py +++ b/tests/jwt_test.py @@ -487,24 +487,13 @@ def check_kv_jwt_keys_not_empty(args, network, issuer): assert False, "No keys for issuer" -def get_jwt_refresh_endpoint_metrics(network) -> dict: - primary, _ = network.find_nodes() - with primary.client(network.consortium.get_any_active_member().local_id) as c: - r = c.get("/node/api/metrics") - m = next( - v - for v in r.body.json()["metrics"] - if v["path"] == "jwt_keys/refresh" and v["method"] == "POST" - ) - assert m["errors"] == 0, m["errors"] # not used in jwt refresh endpoint - m["successes"] = m["calls"] - m["failures"] - return m - - -def get_jwt_metrics(network) -> dict: - primary, _ = network.find_nodes() - with primary.client(network.consortium.get_any_active_member().local_id) as c: - r = c.get("/node/jwt_metrics") +def get_jwt_refresh_endpoint_metrics(primary) -> dict: + # Note that these metrics are local to a node. So if the primary changes, or + # a different node has processed jwt_keys/refresh, you may not see the values + # you expect + with primary.client() as c: + r = c.get("/node/jwt_keys/refresh/metrics") + assert r.status_code == 200, r return r.body.json() @@ -561,15 +550,11 @@ def test_jwt_key_auto_refresh(network, args): timeout=5, ) - LOG.info("Check that JWT refresh has attempts and successes") - m = get_jwt_metrics(network) - assert m["attempts"] > 0, m["attempts"] - assert m["successes"] > 0, m["successes"] - - LOG.info("Check that JWT refresh endpoint has no failures") - m = get_jwt_refresh_endpoint_metrics(network) - assert m["failures"] == 0, m["failures"] - assert m["successes"] > 0, m["successes"] + LOG.info("Check that JWT refresh has attempts and successes and no failures") + m = get_jwt_refresh_endpoint_metrics(primary) + assert m["attempts"] > 0, m + assert m["successes"] > 0, m + assert m["failures"] == 0, m LOG.info("Serve invalid JWKS") server.jwks = {"foo": "bar"} @@ -577,14 +562,14 @@ def test_jwt_key_auto_refresh(network, args): LOG.info("Check that JWT refresh endpoint has some failures") def check_has_failures(): - m = get_jwt_refresh_endpoint_metrics(network) - assert m["failures"] > 0, m["failures"] + m = get_jwt_refresh_endpoint_metrics(primary) + assert m["failures"] > 0, m with_timeout(check_has_failures, timeout=5) LOG.info("Check that JWT refresh has fewer successes than attempts") - m = get_jwt_metrics(network) - assert m["attempts"] > m["successes"], m["attempts"] + m = get_jwt_refresh_endpoint_metrics(primary) + assert m["attempts"] > m["successes"], m LOG.info("Restart OpenID endpoint server with new keys") kid2 = "the_kid_2" @@ -643,7 +628,7 @@ def test_jwt_key_auto_refresh_entries(network, args): ) LOG.info("Check that JWT refresh has attempts and successes") - m = get_jwt_metrics(network) + m = get_jwt_refresh_endpoint_metrics(primary) attempts = m["attempts"] successes = m["successes"] assert attempts > 0, attempts @@ -652,7 +637,7 @@ def test_jwt_key_auto_refresh_entries(network, args): # Wait long enough for at least one refresh to take place time.sleep(args.jwt_key_refresh_interval_s) - m = get_jwt_metrics(network) + m = get_jwt_refresh_endpoint_metrics(primary) assert m["attempts"] > attempts, m["attempts"] assert m["successes"] > successes, m["successes"] @@ -728,7 +713,7 @@ def test_jwt_key_initial_refresh(network, args): ) LOG.info("Check that JWT refresh endpoint has no failures") - m = get_jwt_refresh_endpoint_metrics(network) + m = get_jwt_refresh_endpoint_metrics(primary) assert m["failures"] == 0, m["failures"] assert m["successes"] > 0, m["successes"]