From 671edf243dcb88d5e69d16a4c01a23ba64bef090 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Thu, 12 Dec 2024 15:51:09 -0800 Subject: [PATCH 01/28] Add agent control health checks When the agent recognizes it is running in an agent control environment, it will start automatic health checks that will create a new file at a configured destination at a given frequency that provides details about the last reported status of the agent. When the agent is not seen within an agent control environment, files will not be created. --- CHANGELOG.md | 4 + lib/new_relic/agent/agent.rb | 4 + lib/new_relic/agent/agent_helpers/connect.rb | 1 + lib/new_relic/agent/agent_helpers/harvest.rb | 3 + lib/new_relic/agent/agent_helpers/shutdown.rb | 1 + .../agent_helpers/start_worker_thread.rb | 1 + lib/new_relic/agent/agent_helpers/startup.rb | 5 + .../agent/configuration/default_source.rb | 24 ++ .../agent/configuration/yaml_source.rb | 2 + lib/new_relic/agent/health_check.rb | 125 +++++++ lib/new_relic/agent/new_relic_service.rb | 10 +- test/agent_helper.rb | 7 + test/new_relic/agent/agent/start_test.rb | 3 + .../orphan_configuration_test.rb | 10 +- test/new_relic/agent/health_check_test.rb | 320 ++++++++++++++++++ 15 files changed, 517 insertions(+), 3 deletions(-) create mode 100644 lib/new_relic/agent/health_check.rb create mode 100644 test/new_relic/agent/health_check_test.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 2851296782..cf5bbc74dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The agent now supports Ruby 3.4.0. We've made incremental changes throughout the preview stage to reach compatibility. This release includes an update to the Thread Profiler for compatibility with Ruby 3.4.0's new backtrace format. [Issue#2992](https://github.com/newrelic/newrelic-ruby-agent/issues/2992) [PR#2997](https://github.com/newrelic/newrelic-ruby-agent/pull/2997) +- **Feature: Add health checks when the agent runs within Agent Control** + + When the agent is started with a within an agent control environment, automatic health check files will be created within the configured file destination at the configured frequency. [PR#2995](https://github.com/newrelic/newrelic-ruby-agent/pull/2995) + - **Bugfix: Stop emitting inaccurate debug-level log about deprecated configuration options** In the previous major release, we dropped support for `disable_` configuration options in favor of `instrumentation.`. Previously, a DEBUG level log warning appeared whenever `disable_*` options were set to `true`, even for libraries (e.g. Action Dispatch) without equivalent `instrumentation.*` options: diff --git a/lib/new_relic/agent/agent.rb b/lib/new_relic/agent/agent.rb index 6c22862f80..7e80a130dc 100644 --- a/lib/new_relic/agent/agent.rb +++ b/lib/new_relic/agent/agent.rb @@ -13,6 +13,7 @@ require 'new_relic/coerce' require 'new_relic/agent/autostart' require 'new_relic/agent/harvester' +require 'new_relic/agent/health_check' require 'new_relic/agent/hostname' require 'new_relic/agent/new_relic_service' require 'new_relic/agent/pipe_service' @@ -88,6 +89,7 @@ def init_basics end def init_components + @health_check = HealthCheck.new @service = NewRelicService.new @events = EventListener.new @stats_engine = StatsEngine.new @@ -139,6 +141,8 @@ def instance # Holds all the methods defined on NewRelic::Agent::Agent # instances module InstanceMethods + # the agent control health check file generator + attr_reader :health_check # the statistics engine that holds all the timeslice data attr_reader :stats_engine # the transaction sampler that handles recording transactions diff --git a/lib/new_relic/agent/agent_helpers/connect.rb b/lib/new_relic/agent/agent_helpers/connect.rb index 7cae31c2bb..75fc02d49e 100644 --- a/lib/new_relic/agent/agent_helpers/connect.rb +++ b/lib/new_relic/agent/agent_helpers/connect.rb @@ -198,6 +198,7 @@ def connect(options = {}) rescue NewRelic::Agent::UnrecoverableAgentException => e handle_unrecoverable_agent_error(e) rescue StandardError, Timeout::Error, NewRelic::Agent::ServerConnectionException => e + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT) retry if retry_from_error?(e, opts) rescue Exception => e ::NewRelic::Agent.logger.error('Exception of unexpected type during Agent#connect():', e) diff --git a/lib/new_relic/agent/agent_helpers/harvest.rb b/lib/new_relic/agent/agent_helpers/harvest.rb index af5596ad21..893d8e1778 100644 --- a/lib/new_relic/agent/agent_helpers/harvest.rb +++ b/lib/new_relic/agent/agent_helpers/harvest.rb @@ -119,6 +119,7 @@ def send_data_to_endpoint(endpoint, payload, container) rescue UnrecoverableServerException => e NewRelic::Agent.logger.warn("#{endpoint} data was rejected by remote service, discarding. Error: ", e) rescue ServerConnectionException => e + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT) log_remote_unavailable(endpoint, e) container.merge!(payload) rescue => e @@ -133,9 +134,11 @@ def check_for_and_handle_agent_commands rescue ForceRestartException, ForceDisconnectException raise rescue UnrecoverableServerException => e + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT) NewRelic::Agent.logger.warn('get_agent_commands message was rejected by remote service, discarding. ' \ 'Error: ', e) rescue ServerConnectionException => e + NewRelic::Agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT) log_remote_unavailable(:get_agent_commands, e) rescue => e NewRelic::Agent.logger.info('Error during check_for_and_handle_agent_commands, will retry later: ', e) diff --git a/lib/new_relic/agent/agent_helpers/shutdown.rb b/lib/new_relic/agent/agent_helpers/shutdown.rb index 9bc36393bc..7746769e0b 100644 --- a/lib/new_relic/agent/agent_helpers/shutdown.rb +++ b/lib/new_relic/agent/agent_helpers/shutdown.rb @@ -19,6 +19,7 @@ def shutdown revert_to_default_configuration @started = nil + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::SHUTDOWN) Control.reset end diff --git a/lib/new_relic/agent/agent_helpers/start_worker_thread.rb b/lib/new_relic/agent/agent_helpers/start_worker_thread.rb index 006d492967..e10a020b2b 100644 --- a/lib/new_relic/agent/agent_helpers/start_worker_thread.rb +++ b/lib/new_relic/agent/agent_helpers/start_worker_thread.rb @@ -86,6 +86,7 @@ def handle_force_restart(error) # is the worker thread that gathers data and talks to the # server. def handle_force_disconnect(error) + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FORCED_DISCONNECT) ::NewRelic::Agent.logger.warn('Agent received a ForceDisconnectException from the server, disconnecting. ' \ "(#{error.message})") disconnect diff --git a/lib/new_relic/agent/agent_helpers/startup.rb b/lib/new_relic/agent/agent_helpers/startup.rb index f683a1e917..bc1a98a57b 100644 --- a/lib/new_relic/agent/agent_helpers/startup.rb +++ b/lib/new_relic/agent/agent_helpers/startup.rb @@ -48,6 +48,7 @@ def check_config_and_start_agent # Treatment of @started and env report is important to get right. def setup_and_start_agent(options = {}) @started = true + @health_check.create_and_run_health_check_loop @harvester.mark_started unless in_resque_child_process? @@ -129,6 +130,7 @@ def monitoring? if Agent.config[:monitor_mode] true else + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::AGENT_DISABLED) ::NewRelic::Agent.logger.warn('Agent configured not to send data in this environment.') false end @@ -140,6 +142,7 @@ def has_license_key? if Agent.config[:license_key] && Agent.config[:license_key].length > 0 true else + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::MISSING_LICENSE_KEY) ::NewRelic::Agent.logger.warn('No license key found. ' + 'This often means your newrelic.yml file was not found, or it lacks a section for the running ' \ "environment, '#{NewRelic::Control.instance.env}'. You may also want to try linting your newrelic.yml " \ @@ -160,6 +163,7 @@ def correct_license_length if key.length == 40 true else + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) ::NewRelic::Agent.logger.error("Invalid license key: #{key}") false end @@ -180,6 +184,7 @@ def agent_should_start? end unless app_name_configured? + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::MISSING_APP_NAME) NewRelic::Agent.logger.error('No application name configured.', 'The agent cannot start without at least one. Please check your ', 'newrelic.yml and ensure that it is valid and has at least one ', diff --git a/lib/new_relic/agent/configuration/default_source.rb b/lib/new_relic/agent/configuration/default_source.rb index b379719c62..707c77eaf6 100644 --- a/lib/new_relic/agent/configuration/default_source.rb +++ b/lib/new_relic/agent/configuration/default_source.rb @@ -2188,6 +2188,30 @@ def self.notify :transform => DefaultSource.method(:convert_to_constant_list), :description => 'Specify a list of exceptions you do not want the agent to strip when [strip_exception_messages](#strip_exception_messages-enabled) is `true`. Separate exceptions with a comma. For example, `"ImportantException,PreserveMessageException"`.' }, + # Agent Control + :'agent_control.fleet_id' => { + :default => nil, + :allow_nil => true, + :public => true, + :type => String, + :allowed_from_server => false, + :description => 'This assigns a fleet id to the language agent. This id is generated by agent control. If this setting is present, it indicates the agent is running in a super agent/fleet environment and health file(s) will be generated.' + }, + :'agent_control.health.delivery_location' => { + :default => nil, + :allow_nil => true, + :public => true, + :type => String, + :allowed_from_server => false, + :description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. For example: `file:///var/lib/newrelic-super-agent/fleet/agents.d/`. This configuration will be set by agent control, or one of its components, prior to agent startup.' + }, + :'agent_control.health.frequency' => { + :default => 5, + :public => true, + :type => Integer, + :allowed_from_server => false, + :description => 'The interval, in seconds, of how often the health file(s) will be written to. This configuration will be set by agent control, or one of its components, prior to agent startup.' + }, # Thread profiler :'thread_profiler.enabled' => { :default => DefaultSource.thread_profiler_enabled, diff --git a/lib/new_relic/agent/configuration/yaml_source.rb b/lib/new_relic/agent/configuration/yaml_source.rb index 574d29ef83..ca9b8b212d 100644 --- a/lib/new_relic/agent/configuration/yaml_source.rb +++ b/lib/new_relic/agent/configuration/yaml_source.rb @@ -36,6 +36,7 @@ def initialize(path, env) erb_file = process_erb(raw_file) config = process_yaml(erb_file, env, config, @file_path) rescue ScriptError, StandardError => e + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_PARSE_CONFIG) log_failure("Failed to read or parse configuration file at #{path}", e) end @@ -99,6 +100,7 @@ def process_erb(file) file.gsub!(/^\s*#.*$/, '#') ERB.new(file).result(binding) rescue ScriptError, StandardError => e + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_PARSE_CONFIG) message = 'Failed ERB processing configuration file. This is typically caused by a Ruby error in <% %> templating blocks in your newrelic.yml file.' failure_array = [message, e] failure_array << e.backtrace[0] if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('3.4.0') diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb new file mode 100644 index 0000000000..f7788ed28c --- /dev/null +++ b/lib/new_relic/agent/health_check.rb @@ -0,0 +1,125 @@ +# This file is distributed under New Relic's license terms. +# See https://github.com/newrelic/newrelic-ruby-agent/blob/main/LICENSE for complete details. +# frozen_string_literal: true + +module NewRelic + module Agent + class HealthCheck + def initialize + @start_time = nano_time + @fleet_id = ENV['NEW_RELIC_AGENT_CONTROL_FLEET_ID'] + # The spec states file paths for the delivery location will begin with file:// + # This does not create a valid path in Ruby, so remove the prefix when present + @delivery_location = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION']&.gsub('file://', '') + @frequency = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'].to_i + @continue = true + @status = HEALTHY + end + + HEALTHY = {healthy: true, last_error: 'NR-APM-000', message: 'Healthy'} + INVALID_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-001', message: 'Invalid liense key (HTTP status code 401)'} + MISSING_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-002', message: 'License key missing in configuration'} + FORCED_DISCONNECT = {healthy: false, last_error: 'NR-APM-003', message: 'Forced disconnect received from New Relic (HTTP status code 410)'} + HTTP_ERROR = {healthy: false, last_error: 'NR-APM-004', message: 'HTTP error response code [%s] recevied from New Relic while sending data type [%s]'} + MISSING_APP_NAME = {healthy: false, last_error: 'NR-APM-005', message: 'Missing application name in agent configuration'} + APP_NAME_EXCEEDED = {healthy: false, last_error: 'NR-APM-006', message: 'The maximum number of configured app names (3) exceeded'} + PROXY_CONFIG_ERROR = {healthy: false, last_error: 'NR-APM-007', message: 'HTTP Proxy configuration error; response code [%s]'} + AGENT_DISABLED = {healthy: false, last_error: 'NR-APM-008', message: 'Agent is disabled via configuration'} + FAILED_TO_CONNECT = {healthy: false, last_error: 'NR-APM-009', message: 'Failed to connect to New Relic data collector'} + FAILED_TO_PARSE_CONFIG = {healthy: false, last_error: 'NR-APM-010', message: 'Agent config file is not able to be parsed'} + SHUTDOWN = {healthy: true, last_error: 'NR-APM-099', message: 'Agent has shutdown'} + + def create_and_run_health_check_loop + unless health_check_enabled? + @continue = false + end + + return NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_FLEET_ID not found, skipping health checks') unless @fleet_id + return NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found, skipping health checks') unless @delivery_location + return NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less, skipping health checks') unless @frequency > 0 + + NewRelic::Agent.logger.debug('Agent control health check conditions met. Starting health checks.') + NewRelic::Agent.record_metric('Supportability/AgentControl/Health/enabled', 1) + + Thread.new do + while @continue + begin + sleep @frequency + write_file + @continue = false if @status == SHUTDOWN + rescue StandardError => e + NewRelic::Agent.logger.error("Aborting agent control health check. Error raised: #{e}") + @continue = false + end + end + end + end + + def update_status(status, options = []) + return unless @continue + + @status = status + update_message(options) unless options.empty? + end + + private + + def contents + <<~CONTENTS + healthy: #{@status[:healthy]} + status: #{@status[:message]}#{last_error} + start_time_unix_nano: #{@start_time} + status_time_unix_nano: #{nano_time} + CONTENTS + end + + def last_error + @status[:healthy] ? '' : "\nlast_error: #{@status[:last_error]}" + end + + def nano_time + Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond) + end + + def file_name + "health-#{NewRelic::Agent::GuidGenerator.generate_guid(32)}.yml" + end + + def write_file + @path ||= create_file_path + + File.write("#{@path}/#{file_name}", contents) + rescue StandardError => e + NewRelic::Agent.logger.error("Agent control health check raised an error while writing a file: #{e}") + @continue = false + end + + def create_file_path + for abs_path in [File.expand_path(@delivery_location), + File.expand_path(File.join('', @delivery_location))] do + if File.directory?(abs_path) || (Dir.mkdir(abs_path) rescue nil) + return abs_path[%r{^(.*?)/?$}] + end + end + nil + rescue StandardError => e + NewRelic::Agent.logger.error( + 'Agent control health check raised an error while finding or creating the file path defined in ' \ + "NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION: #{e}" + ) + @continue = false + end + + def health_check_enabled? + @fleet_id && @delivery_location && (@frequency > 0) + end + + def update_message(options) + @status[:message] = sprintf(@status[:message], **options) + rescue StandardError => e + NewRelic::Agent.logger.debug("Error raised while updating agent control health check message: #{e}." \ + "Reverting to original message.\noptions = #{options}, @status[:message] = #{@status[:message]}") + end + end + end +end diff --git a/lib/new_relic/agent/new_relic_service.rb b/lib/new_relic/agent/new_relic_service.rb index 467188d455..ec99e98902 100644 --- a/lib/new_relic/agent/new_relic_service.rb +++ b/lib/new_relic/agent/new_relic_service.rb @@ -455,6 +455,8 @@ def attempt_request(request, opts) end def handle_error_response(response, endpoint) + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::HTTP_ERROR, [response.code, endpoint]) + case response when Net::HTTPRequestTimeOut, Net::HTTPTooManyRequests, @@ -637,9 +639,13 @@ def check_post_size(post_string, endpoint) def send_request(opts) request = prep_request(opts) response = relay_request(request, opts) - return response if response.is_a?(Net::HTTPSuccess) || response.is_a?(Net::HTTPAccepted) - handle_error_response(response, opts[:endpoint]) + if response.is_a?(Net::HTTPSuccess) || response.is_a?(Net::HTTPAccepted) + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::HEALTHY) + response + else + handle_error_response(response, opts[:endpoint]) + end end def log_response(response) diff --git a/test/agent_helper.rb b/test/agent_helper.rb index ae7dfea555..4a32cae7b0 100644 --- a/test/agent_helper.rb +++ b/test/agent_helper.rb @@ -115,6 +115,13 @@ def assert_log_contains(log, message) "Could not find message: '#{message.inspect}'. Log contained: #{lines.join("\n")}" end +def refute_log_contains(log, message) + lines = log.array + + refute (lines.any? { |line| line.match(message) }), + "Found message: '#{message.inspect}'. Log contained: #{lines.join("\n")}" +end + def assert_audit_log_contains(audit_log_contents, needle) # Original request bodies dumped to the log have symbol keys, but once # they go through a dump/load, they're strings again, so we strip diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index 4c245066a3..54b80914d8 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -11,6 +11,7 @@ class NewRelic::Agent::Agent::StartTest < Minitest::Test def setup @harvester = stub('dummy harvester') + @health_check = stub('dummy health check') @harvest_samplers = stub('dummy sampler collection') end @@ -62,6 +63,7 @@ def test_check_config_and_start_agent_forking def test_check_config_and_start_agent_normal @harvester.expects(:mark_started) @harvest_samplers.expects(:load_samplers) + @health_check.expects(:create_and_run_health_check_loop) self.expects(:start_worker_thread) self.expects(:install_exit_handler) self.expects(:environment_for_connect) @@ -74,6 +76,7 @@ def test_check_config_and_start_agent_normal def test_check_config_and_start_agent_sync @harvester.expects(:mark_started) @harvest_samplers.expects(:load_samplers) + @health_check.expects(:create_and_run_health_check_loop) self.expects(:connect_in_foreground) self.expects(:start_worker_thread) self.expects(:install_exit_handler) diff --git a/test/new_relic/agent/configuration/orphan_configuration_test.rb b/test/new_relic/agent/configuration/orphan_configuration_test.rb index d85827235d..1788f63303 100644 --- a/test/new_relic/agent/configuration/orphan_configuration_test.rb +++ b/test/new_relic/agent/configuration/orphan_configuration_test.rb @@ -9,7 +9,15 @@ class OrphanedConfigTest < Minitest::Test include NewRelic::TestHelpers::ConfigScanning # :automatic_custom_instrumentation_method_list - the tranform proc handles all processing, no other reference exists - IGNORED_KEYS = %i[automatic_custom_instrumentation_method_list] + # :'agent_control.fleet_id' - the config is set by environment variable in agent control, the symbol config is not used + # :'agent_control.health.delivery_location - the config is set by environment variable in agent control, the symbol config is not used + # :'agent_control.health.frequency' - the config is set by environment variable in agent control, the symbol config is not used + IGNORED_KEYS = %i[ + automatic_custom_instrumentation_method_list + agent_control.fleet_id + agent_control.health.delivery_location + agent_control.health.frequency + ] def setup @default_keys = ::NewRelic::Agent::Configuration::DEFAULTS.keys diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb new file mode 100644 index 0000000000..10b58c168d --- /dev/null +++ b/test/new_relic/agent/health_check_test.rb @@ -0,0 +1,320 @@ +# This file is distributed under New Relic's license terms. +# See https://github.com/newrelic/newrelic-ruby-agent/blob/main/LICENSE for complete details. +# frozen_string_literal: true + +require 'fileutils' +require_relative '../../test_helper' + +class NewRelicHealthCheckTest < Minitest::Test + # example + # file name: health-bc21b5891f5e44fc9272caef924611a8.yml + # healthy: true + # status: Agent has shutdown + # last_error: NR-APM-099 + # status_time_unix_nano: 1724953624761000000 + # start_time_unix_nano: 1724953587605000000 + + def teardown + mocha_teardown + end + + def test_yaml_health_file_written_to_delivery_location + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.send(:write_file) + + assert File.directory?('health'), 'Directory not found' + assert File.exist?('health/health-abc123.yml'), 'File not found' # rubocop:disable Minitest/AssertPathExists + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_health_file_written_to_delivery_location_with_file_path_prefix + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'file://health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.send(:write_file) + + assert File.directory?('./health'), 'Directory not found' + assert File.exist?('./health/health-abc123.yml'), 'File not found' # rubocop:disable Minitest/AssertPathExists + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_name_has_health_plus_uuid_without_hyphens + health_check = NewRelic::Agent::HealthCheck.new + + # ex: health-bc21b5891f5e44fc9272caef924611a8.yml + assert_match(/health-(.*){32}\.ya?ml/, health_check.send(:file_name)) + end + + def test_write_file_called_on_interval + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '3', + 'NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'abc', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.stub(:write_file, nil) do + health_check.expects(:sleep).with(3).times(3) + health_check.expects(:write_file).times(3).then.returns(nil).then.returns(nil).then.raises('whoa!') + health_check.create_and_run_health_check_loop.join + end + end + end + + def test_create_and_run_health_check_loop_exits_after_shutdown + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '3', + 'NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'abc', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.stub(:write_file, nil) do + health_check.expects(:sleep).with(3).times(1) + health_check.expects(:write_file).times(1).then.returns(nil) + health_check.update_status(NewRelic::Agent::HealthCheck::SHUTDOWN) + health_check.create_and_run_health_check_loop.join + end + end + end + + def test_write_file_sets_continue_false_when_error + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + File.stub(:write, ->(arg1, arg2) { raise 'boom!' }) do + health_check = NewRelic::Agent::HealthCheck.new + + assert(health_check.instance_variable_get(:@continue)) + health_check.send(:write_file) + + refute(health_check.instance_variable_get(:@continue)) + end + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_create_file_path_sets_continue_false_when_error_raised + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + File.stub(:directory?, ->(arg1) { raise 'boom!' }) do + health_check = NewRelic::Agent::HealthCheck.new + + assert(health_check.instance_variable_get(:@continue)) + health_check.send(:create_file_path) + + refute(health_check.instance_variable_get(:@continue)) + end + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_has_healthy_field + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.send(:write_file) + + assert_predicate File.readlines('health/health-abc123.yml').grep(/healthy:/), :any? + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_has_status_field + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.send(:write_file) + + assert_predicate File.readlines('health/health-abc123.yml').grep(/status:/), :any? + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_has_last_error_field_when_status_not_healthy + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) + health_check.send(:write_file) + + assert_predicate File.readlines('health/health-abc123.yml').grep(/last_error:/), :any? + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_does_not_have_last_error_field_when_status_healthy + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.update_status(NewRelic::Agent::HealthCheck::HEALTHY) + health_check.send(:write_file) + + refute_predicate File.readlines('health/health-abc123.yml').grep(/last_error:/), :any? + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_nano_time_in_correct_format + health_check = NewRelic::Agent::HealthCheck.new + time = health_check.send(:nano_time) + + assert_instance_of(Integer, time) + assert(time.to_s.length >= 19) + end + + def test_yaml_file_has_same_start_time_unix_nano_for_all_files + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + health_check = NewRelic::Agent::HealthCheck.new + start_time = health_check.instance_variable_get(:@start_time_unix_nano) + health_check.expects(:file_name).times(2).then.returns('health-1.yml').then.returns('health-2.yml') + health_check.send(:write_file) + + assert_predicate File.readlines('health/health-1.yml').grep(/start_time_unix_nano: #{start_time}/), :any? + + health_check.send(:write_file) + + assert_predicate File.readlines('health/health-2.yml').grep(/start_time_unix_nano: #{start_time}/), :any? + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_has_status_time_unix_nano + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.send(:write_file) + + assert_predicate File.readlines('health/health-abc123.yml').grep(/status_time_unix_nano:/), :any? + end + end + ensure + FileUtils.rm_rf('health') + end + + def test_yaml_file_has_new_status_time_each_file + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.expects(:file_name).times(2).then.returns('health-1.yml').then.returns('health-2.yml') + health_check.send(:write_file) + # on a healthy file, the third index/fourth line should hold the status_time_unix_nano data + first_status_time = File.readlines('health/health-1.yml')[3] + health_check.send(:write_file) + second_status_time = File.readlines('health/health-2.yml')[3] + + refute_equal(first_status_time, second_status_time) + end + ensure + FileUtils.rm_rf('health') + end + + def test_agent_health_started_if_required_info_present + with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do + log = with_array_logger(:debug) do + health_check = NewRelic::Agent::HealthCheck.new + health_check.create_and_run_health_check_loop + end + + assert_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_FLEET_ID not found') + refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found') + refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less') + end + end + + def test_agent_health_not_generated_if_agent_control_fleet_id_absent + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do + log = with_array_logger(:debug) do + health_check = NewRelic::Agent::HealthCheck.new + # loop should exit before write_file is called + # raise an error if it's invoked + health_check.stub(:write_file, -> { raise 'kaboom!' }) do + health_check.create_and_run_health_check_loop + end + end + + assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_FLEET_ID not found') + refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + end + end + + def test_agent_health_not_generated_if_delivery_location_absent + with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'mykonos', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do + log = with_array_logger(:debug) do + health_check = NewRelic::Agent::HealthCheck.new + # loop should exit before write_file is called + # raise an error if it's invoked + health_check.stub(:write_file, -> { raise 'kaboom!' }) do + health_check.create_and_run_health_check_loop + end + end + + assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found') + refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + end + end + + def test_agent_health_not_generated_if_frequency_is_zero + with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'anchors-away', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0') do + log = with_array_logger(:debug) do + health_check = NewRelic::Agent::HealthCheck.new + # loop should exit before write_file is called + # raise an error if it's invoked + health_check.stub(:write_file, -> { raise 'kaboom!' }) do + health_check.create_and_run_health_check_loop + end + end + + assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less') + refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + end + end + + def test_agent_health_supportability_metric_generated_recorded_when_health_check_loop_starts + NewRelic::Agent.instance.stats_engine.clear_stats + + with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', + 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.create_and_run_health_check_loop + + assert_metrics_recorded({'Supportability/AgentControl/Health/enabled' => {call_count: 1}}) + end + end + + def test_update_status_is_a_no_op_when_health_checks_disabled + with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => nil, + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => nil, + 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0') do + health_check = NewRelic::Agent::HealthCheck.new + + assert_equal NewRelic::Agent::HealthCheck::HEALTHY, health_check.instance_variable_get(:@status) + + health_check.create_and_run_health_check_loop + health_check.update_status(NewRelic::Agent::HealthCheck::SHUTDOWN) + + assert_equal NewRelic::Agent::HealthCheck::HEALTHY, health_check.instance_variable_get(:@status) + end + end +end From a510eb287ae8e3d5850b9d5effba180951bd3bb4 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 6 Jan 2025 16:17:51 -0800 Subject: [PATCH 02/28] Indent HEREDOC contents --- lib/new_relic/agent/health_check.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index f7788ed28c..7e4d088566 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -66,10 +66,10 @@ def update_status(status, options = []) def contents <<~CONTENTS - healthy: #{@status[:healthy]} - status: #{@status[:message]}#{last_error} - start_time_unix_nano: #{@start_time} - status_time_unix_nano: #{nano_time} + healthy: #{@status[:healthy]} + status: #{@status[:message]}#{last_error} + start_time_unix_nano: #{@start_time} + status_time_unix_nano: #{nano_time} CONTENTS end From e5ad0844a035ddfa77ca1fe47cb57cc4018d9e18 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 7 Jan 2025 10:22:40 -0800 Subject: [PATCH 03/28] Set frequency default to 5 on HealthCheck init --- lib/new_relic/agent/health_check.rb | 2 +- test/new_relic/agent/health_check_test.rb | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index 7e4d088566..ca18b8dafc 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -11,7 +11,7 @@ def initialize # The spec states file paths for the delivery location will begin with file:// # This does not create a valid path in Ruby, so remove the prefix when present @delivery_location = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION']&.gsub('file://', '') - @frequency = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'].to_i + @frequency = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'] ? ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'].to_i : 5 @continue = true @status = HEALTHY end diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 10b58c168d..ab1572186c 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -97,6 +97,12 @@ def test_write_file_sets_continue_false_when_error FileUtils.rm_rf('health') end + def test_frequency_defaults_to_five + # deliberately not setting the `NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY` env var + health_check = NewRelic::Agent::HealthCheck.new + assert_equal 5, health_check.instance_variable_get(:@frequency) + end + def test_create_file_path_sets_continue_false_when_error_raised with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do From f333e684f0f478b0f69fd07daeb2bc464929eda2 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 7 Jan 2025 11:42:04 -0800 Subject: [PATCH 04/28] Write to a single file per agent instance Instead of creating a new file at the interval, reuse the same file for the life of the process. --- lib/new_relic/agent/health_check.rb | 4 +-- test/new_relic/agent/health_check_test.rb | 38 ++++++++++++----------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index ca18b8dafc..ee54f2da3f 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -86,9 +86,9 @@ def file_name end def write_file - @path ||= create_file_path + @file ||= "#{create_file_path}/#{file_name}" - File.write("#{@path}/#{file_name}", contents) + File.write(@file, contents) rescue StandardError => e NewRelic::Agent.logger.error("Agent control health check raised an error while writing a file: #{e}") @continue = false diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index ab1572186c..31d77e4c3c 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -182,18 +182,19 @@ def test_nano_time_in_correct_format assert(time.to_s.length >= 19) end - def test_yaml_file_has_same_start_time_unix_nano_for_all_files + def test_yaml_file_has_same_start_time_unix_every_write with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do - health_check = NewRelic::Agent::HealthCheck.new - start_time = health_check.instance_variable_get(:@start_time_unix_nano) - health_check.expects(:file_name).times(2).then.returns('health-1.yml').then.returns('health-2.yml') - health_check.send(:write_file) + NewRelic::Agent::GuidGenerator.stub(:generate_guid, '1') do + health_check = NewRelic::Agent::HealthCheck.new + start_time = health_check.instance_variable_get(:@start_time_unix_nano) + health_check.send(:write_file) - assert_predicate File.readlines('health/health-1.yml').grep(/start_time_unix_nano: #{start_time}/), :any? + assert_predicate File.readlines('health/health-1.yml').grep(/start_time_unix_nano: #{start_time}/), :any? - health_check.send(:write_file) + health_check.send(:write_file) - assert_predicate File.readlines('health/health-2.yml').grep(/start_time_unix_nano: #{start_time}/), :any? + assert_predicate File.readlines('health/health-1.yml').grep(/start_time_unix_nano: #{start_time}/), :any? + end end ensure FileUtils.rm_rf('health') @@ -212,17 +213,18 @@ def test_yaml_file_has_status_time_unix_nano FileUtils.rm_rf('health') end - def test_yaml_file_has_new_status_time_each_file + def test_yaml_file_has_new_status_time_each_write with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do - health_check = NewRelic::Agent::HealthCheck.new - health_check.expects(:file_name).times(2).then.returns('health-1.yml').then.returns('health-2.yml') - health_check.send(:write_file) - # on a healthy file, the third index/fourth line should hold the status_time_unix_nano data - first_status_time = File.readlines('health/health-1.yml')[3] - health_check.send(:write_file) - second_status_time = File.readlines('health/health-2.yml')[3] - - refute_equal(first_status_time, second_status_time) + NewRelic::Agent::GuidGenerator.stub(:generate_guid, '1') do + health_check = NewRelic::Agent::HealthCheck.new + health_check.send(:write_file) + # on a healthy file, the third index/fourth line should hold the status_time_unix_nano data + first_status_time = File.readlines('health/health-1.yml')[3] + health_check.send(:write_file) + second_status_time = File.readlines('health/health-1.yml')[3] + + refute_equal(first_status_time, second_status_time) + end end ensure FileUtils.rm_rf('health') From aa4409d790049147b987fc8fef04cc12ac635e51 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 7 Jan 2025 11:43:15 -0800 Subject: [PATCH 05/28] Update array destructure for update_message The array was not being correctly destructured, which would raise an error when the status was HTTP_ERROR --- lib/new_relic/agent/health_check.rb | 4 ++-- test/new_relic/agent/health_check_test.rb | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index ee54f2da3f..b08301bbd0 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -115,10 +115,10 @@ def health_check_enabled? end def update_message(options) - @status[:message] = sprintf(@status[:message], **options) + @status[:message] = sprintf(@status[:message], *options) rescue StandardError => e NewRelic::Agent.logger.debug("Error raised while updating agent control health check message: #{e}." \ - "Reverting to original message.\noptions = #{options}, @status[:message] = #{@status[:message]}") + "Reverting to original message. options = #{options}, @status[:message] = #{@status[:message]}") end end end diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 31d77e4c3c..817b548f64 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -325,4 +325,12 @@ def test_update_status_is_a_no_op_when_health_checks_disabled assert_equal NewRelic::Agent::HealthCheck::HEALTHY, health_check.instance_variable_get(:@status) end end + + def test_update_message_works_with_http_arrays + health_check = NewRelic::Agent::HealthCheck.new + health_check.update_status(NewRelic::Agent::HealthCheck::HTTP_ERROR, ['401', :preconnect]) + result = health_check.instance_variable_get(:@status)[:message] + + assert_equal "HTTP error response code [401] recevied from New Relic while sending data type [preconnect]", result + end end From 95808e8f80c31f1760159caa938882188c7a2a0b Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 7 Jan 2025 11:43:59 -0800 Subject: [PATCH 06/28] Rubocop --- test/new_relic/agent/health_check_test.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 817b548f64..e5dbf0e7ee 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -100,6 +100,7 @@ def test_write_file_sets_continue_false_when_error def test_frequency_defaults_to_five # deliberately not setting the `NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY` env var health_check = NewRelic::Agent::HealthCheck.new + assert_equal 5, health_check.instance_variable_get(:@frequency) end @@ -331,6 +332,6 @@ def test_update_message_works_with_http_arrays health_check.update_status(NewRelic::Agent::HealthCheck::HTTP_ERROR, ['401', :preconnect]) result = health_check.instance_variable_get(:@status)[:message] - assert_equal "HTTP error response code [401] recevied from New Relic while sending data type [preconnect]", result + assert_equal 'HTTP error response code [401] recevied from New Relic while sending data type [preconnect]', result end end From 0d55f7d91350f8a249682830a7ec7cc5d9014c23 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 7 Jan 2025 12:08:00 -0800 Subject: [PATCH 07/28] Update test expectation The health check status may be updated for other reasons on the CI, which may cause the message to be inaccurate by the time the result is accessed from the hash --- test/new_relic/agent/health_check_test.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index e5dbf0e7ee..e6780f3dd2 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -329,8 +329,7 @@ def test_update_status_is_a_no_op_when_health_checks_disabled def test_update_message_works_with_http_arrays health_check = NewRelic::Agent::HealthCheck.new - health_check.update_status(NewRelic::Agent::HealthCheck::HTTP_ERROR, ['401', :preconnect]) - result = health_check.instance_variable_get(:@status)[:message] + result = health_check.update_status(NewRelic::Agent::HealthCheck::HTTP_ERROR, ['401', :preconnect]) assert_equal 'HTTP error response code [401] recevied from New Relic while sending data type [preconnect]', result end From 20cdad81fb958db09c9512b16b8655b25cefa694 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 7 Jan 2025 14:57:55 -0800 Subject: [PATCH 08/28] Freeze and dup HealthCheck constants There was a bug related to the HTTP_ERROR constant, where the sprintf string manipulation changed the constant to equal the first value it came across. By freezing the constants and dup'ing the status before assigning it, we can avoid this problem. --- lib/new_relic/agent/health_check.rb | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index b08301bbd0..df7bbd36b0 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -16,18 +16,18 @@ def initialize @status = HEALTHY end - HEALTHY = {healthy: true, last_error: 'NR-APM-000', message: 'Healthy'} - INVALID_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-001', message: 'Invalid liense key (HTTP status code 401)'} - MISSING_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-002', message: 'License key missing in configuration'} - FORCED_DISCONNECT = {healthy: false, last_error: 'NR-APM-003', message: 'Forced disconnect received from New Relic (HTTP status code 410)'} - HTTP_ERROR = {healthy: false, last_error: 'NR-APM-004', message: 'HTTP error response code [%s] recevied from New Relic while sending data type [%s]'} - MISSING_APP_NAME = {healthy: false, last_error: 'NR-APM-005', message: 'Missing application name in agent configuration'} - APP_NAME_EXCEEDED = {healthy: false, last_error: 'NR-APM-006', message: 'The maximum number of configured app names (3) exceeded'} - PROXY_CONFIG_ERROR = {healthy: false, last_error: 'NR-APM-007', message: 'HTTP Proxy configuration error; response code [%s]'} - AGENT_DISABLED = {healthy: false, last_error: 'NR-APM-008', message: 'Agent is disabled via configuration'} - FAILED_TO_CONNECT = {healthy: false, last_error: 'NR-APM-009', message: 'Failed to connect to New Relic data collector'} - FAILED_TO_PARSE_CONFIG = {healthy: false, last_error: 'NR-APM-010', message: 'Agent config file is not able to be parsed'} - SHUTDOWN = {healthy: true, last_error: 'NR-APM-099', message: 'Agent has shutdown'} + HEALTHY = {healthy: true, last_error: 'NR-APM-000', message: 'Healthy'}.freeze + INVALID_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-001', message: 'Invalid liense key (HTTP status code 401)'}.freeze + MISSING_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-002', message: 'License key missing in configuration'}.freeze + FORCED_DISCONNECT = {healthy: false, last_error: 'NR-APM-003', message: 'Forced disconnect received from New Relic (HTTP status code 410)'}.freeze + HTTP_ERROR = {healthy: false, last_error: 'NR-APM-004', message: 'HTTP error response code [%s] recevied from New Relic while sending data type [%s]'}.freeze + MISSING_APP_NAME = {healthy: false, last_error: 'NR-APM-005', message: 'Missing application name in agent configuration'}.freeze + APP_NAME_EXCEEDED = {healthy: false, last_error: 'NR-APM-006', message: 'The maximum number of configured app names (3) exceeded'}.freeze + PROXY_CONFIG_ERROR = {healthy: false, last_error: 'NR-APM-007', message: 'HTTP Proxy configuration error; response code [%s]'}.freeze + AGENT_DISABLED = {healthy: false, last_error: 'NR-APM-008', message: 'Agent is disabled via configuration'}.freeze + FAILED_TO_CONNECT = {healthy: false, last_error: 'NR-APM-009', message: 'Failed to connect to New Relic data collector'}.freeze + FAILED_TO_PARSE_CONFIG = {healthy: false, last_error: 'NR-APM-010', message: 'Agent config file is not able to be parsed'}.freeze + SHUTDOWN = {healthy: true, last_error: 'NR-APM-099', message: 'Agent has shutdown'}.freeze def create_and_run_health_check_loop unless health_check_enabled? @@ -58,7 +58,7 @@ def create_and_run_health_check_loop def update_status(status, options = []) return unless @continue - @status = status + @status = status.dup update_message(options) unless options.empty? end From 9538c385df8755f8207cd8b4ef4b0fce8afbfda9 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle <87386821+kaylareopelle@users.noreply.github.com> Date: Tue, 7 Jan 2025 16:45:00 -0800 Subject: [PATCH 09/28] Update lib/new_relic/agent/configuration/default_source.rb --- lib/new_relic/agent/configuration/default_source.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/new_relic/agent/configuration/default_source.rb b/lib/new_relic/agent/configuration/default_source.rb index 707c77eaf6..954af9b075 100644 --- a/lib/new_relic/agent/configuration/default_source.rb +++ b/lib/new_relic/agent/configuration/default_source.rb @@ -2195,7 +2195,7 @@ def self.notify :public => true, :type => String, :allowed_from_server => false, - :description => 'This assigns a fleet id to the language agent. This id is generated by agent control. If this setting is present, it indicates the agent is running in a super agent/fleet environment and health file(s) will be generated.' + :description => 'This assigns a fleet ID to the language agent. This ID is generated by agent control. If this setting is present, it indicates the agent is running in an agent control/fleet environment and health file(s) will be generated. This configuration will be set by agent control, or one of its components, prior to agent startup.' }, :'agent_control.health.delivery_location' => { :default => nil, From 9d01044a5790eaa80ccf12f336a114db86aa32a4 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Thu, 9 Jan 2025 14:00:13 -0800 Subject: [PATCH 10/28] Start health check loop before license key checked Previously the invalid license key status would be set, but the health check files would not be written until the first request to the web app was made. Now, health checks will begin before the agent officially starts. --- lib/new_relic/agent/agent_helpers/connect.rb | 24 +++++++++++--------- lib/new_relic/agent/agent_helpers/startup.rb | 4 +++- test/new_relic/agent/agent/start_test.rb | 3 +++ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/new_relic/agent/agent_helpers/connect.rb b/lib/new_relic/agent/agent_helpers/connect.rb index 75fc02d49e..193007d2f1 100644 --- a/lib/new_relic/agent/agent_helpers/connect.rb +++ b/lib/new_relic/agent/agent_helpers/connect.rb @@ -66,13 +66,14 @@ def log_error(error) # no longer try to connect to the server, saving the # application and the server load def handle_license_error(error) - ::NewRelic::Agent.logger.error(error.message, + NewRelic::Agent.logger.error(error.message, 'Visit newrelic.com to obtain a valid license key, or to upgrade your account.') + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) disconnect end def handle_unrecoverable_agent_error(error) - ::NewRelic::Agent.logger.error(error.message) + NewRelic::Agent.logger.error(error.message) disconnect shutdown end @@ -96,7 +97,7 @@ def event_harvest_config # connects, then configures the agent using the response from # the connect service def connect_to_server - request_builder = ::NewRelic::Agent::Connect::RequestBuilder.new( + request_builder = NewRelic::Agent::Connect::RequestBuilder.new( @service, Agent.config, event_harvest_config, @@ -104,7 +105,7 @@ def connect_to_server ) connect_response = @service.connect(request_builder.connect_payload) - response_handler = ::NewRelic::Agent::Connect::ResponseHandler.new(self, Agent.config) + response_handler = NewRelic::Agent::Connect::ResponseHandler.new(self, Agent.config) response_handler.configure_agent(connect_response) log_connection(connect_response) if connect_response @@ -114,9 +115,9 @@ def connect_to_server # Logs when we connect to the server, for debugging purposes # - makes sure we know if an agent has not connected def log_connection(config_data) - ::NewRelic::Agent.logger.debug("Connected to NewRelic Service at #{@service.collector.name}") - ::NewRelic::Agent.logger.debug("Agent Run = #{@service.agent_id}.") - ::NewRelic::Agent.logger.debug("Connection data = #{config_data.inspect}") + NewRelic::Agent.logger.debug("Connected to NewRelic Service at #{@service.collector.name}") + NewRelic::Agent.logger.debug("Agent Run = #{@service.agent_id}.") + NewRelic::Agent.logger.debug("Connection data = #{config_data.inspect}") if config_data['messages']&.any? log_collector_messages(config_data['messages']) end @@ -124,7 +125,7 @@ def log_connection(config_data) def log_collector_messages(messages) messages.each do |message| - ::NewRelic::Agent.logger.send(message['level'].downcase, message['message']) + NewRelic::Agent.logger.send(message['level'].downcase, message['message']) end end @@ -186,11 +187,12 @@ def connect(options = {}) opts = connect_options(options) return unless should_connect?(opts[:force_reconnect]) - ::NewRelic::Agent.logger.debug("Connecting Process to New Relic: #$0") + NewRelic::Agent.logger.debug("Connecting Process to New Relic: #$0") connect_to_server @connected_pid = $$ @connect_state = :connected signal_connected + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::HEALTHY) rescue NewRelic::Agent::ForceDisconnectException => e handle_force_disconnect(e) rescue NewRelic::Agent::LicenseException => e @@ -201,7 +203,7 @@ def connect(options = {}) NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT) retry if retry_from_error?(e, opts) rescue Exception => e - ::NewRelic::Agent.logger.error('Exception of unexpected type during Agent#connect():', e) + NewRelic::Agent.logger.error('Exception of unexpected type during Agent#connect():', e) raise end @@ -215,7 +217,7 @@ def retry_from_error?(e, opts) return false unless opts[:keep_retrying] note_connect_failure - ::NewRelic::Agent.logger.info("Will re-attempt in #{connect_retry_period} seconds") + NewRelic::Agent.logger.info("Will re-attempt in #{connect_retry_period} seconds") sleep(connect_retry_period) true end diff --git a/lib/new_relic/agent/agent_helpers/startup.rb b/lib/new_relic/agent/agent_helpers/startup.rb index bc1a98a57b..4fe4828daf 100644 --- a/lib/new_relic/agent/agent_helpers/startup.rb +++ b/lib/new_relic/agent/agent_helpers/startup.rb @@ -36,6 +36,9 @@ def start # setting up the worker thread and the exit handler to shut # down the agent def check_config_and_start_agent + # some health statuses, such as invalid license key, are ran before + # the agent officially starts + @health_check.create_and_run_health_check_loop return unless monitoring? && has_correct_license_key? return if using_forking_dispatcher? @@ -48,7 +51,6 @@ def check_config_and_start_agent # Treatment of @started and env report is important to get right. def setup_and_start_agent(options = {}) @started = true - @health_check.create_and_run_health_check_loop @harvester.mark_started unless in_resque_child_process? diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index 54b80914d8..a2ae867429 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -44,12 +44,14 @@ def test_disabled_negative def test_check_config_and_start_agent_disabled self.expects(:monitoring?).returns(false) + @health_check.expects(:create_and_run_health_check_loop) check_config_and_start_agent end def test_check_config_and_start_agent_incorrect_key self.expects(:monitoring?).returns(true) self.expects(:has_correct_license_key?).returns(false) + @health_check.expects(:create_and_run_health_check_loop) check_config_and_start_agent end @@ -57,6 +59,7 @@ def test_check_config_and_start_agent_forking self.expects(:monitoring?).returns(true) self.expects(:has_correct_license_key?).returns(true) self.expects(:using_forking_dispatcher?).returns(true) + @health_check.expects(:create_and_run_health_check_loop) check_config_and_start_agent end From 549905457525acb69125d418f510ff8c82fe7271 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 16:51:16 -0800 Subject: [PATCH 11/28] Add tests for update_status --- test/new_relic/agent/agent/start_test.rb | 28 +++++++++++ test/new_relic/agent/agent_test.rb | 47 +++++++++++++++++++ .../new_relic/agent/new_relic_service_test.rb | 33 +++++++++++++ 3 files changed, 108 insertions(+) diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index a2ae867429..adb82de108 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -48,6 +48,27 @@ def test_check_config_and_start_agent_disabled check_config_and_start_agent end + def test_monitoring_false_updates_health_status + with_config(:monitoring => false) do + NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::AGENT_DISABLED) + monitoring? + end + end + + def test_missing_app_name_updates_health_status + with_config(:app_name => nil) do + NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::MISSING_APP_NAME) + agent_should_start? + end + end + + def test_missing_license_key_updates_health_status + with_config(:license_key => nil) do + NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::MISSING_LICENSE_KEY) + has_license_key? + end + end + def test_check_config_and_start_agent_incorrect_key self.expects(:monitoring?).returns(true) self.expects(:has_correct_license_key?).returns(false) @@ -193,6 +214,13 @@ def test_correct_license_length_negative end end + def test_correct_license_length_negative_updates_health_status + with_config(:license_key => 'a' * 30) do + NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) + correct_license_length + end + end + def test_using_forking_dispatcher_positive with_config(:dispatcher => :passenger) do assert_predicate self, :using_forking_dispatcher? diff --git a/test/new_relic/agent/agent_test.rb b/test/new_relic/agent/agent_test.rb index 05910c6511..e2107adae4 100644 --- a/test/new_relic/agent/agent_test.rb +++ b/test/new_relic/agent/agent_test.rb @@ -335,6 +335,9 @@ def test_connect_retries_on_server_connection_exception @agent.send(:connect) assert_predicate(@agent, :connected?) + + # status should be healthy because the retry successfully connects + assert_equal NewRelic::Agent::HealthCheck::HEALTHY, @agent.health_check.instance_variable_get(:@status) end def test_connect_does_not_retry_if_keep_retrying_false @@ -342,6 +345,13 @@ def test_connect_does_not_retry_if_keep_retrying_false @agent.send(:connect, :keep_retrying => false) end + def test_agent_health_status_set_to_failed_to_connect + @agent.service.expects(:connect).once.raises(Timeout::Error) + @agent.send(:connect, :keep_retrying => false) + + assert_equal NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT, @agent.health_check.instance_variable_get(:@status) + end + def test_connect_does_not_retry_on_license_error @agent.service.expects(:connect).raises(NewRelic::Agent::LicenseException) @agent.send(:connect) @@ -562,6 +572,14 @@ def test_defer_start_if_no_application_name_configured assert_match(/No application name configured/i, logmsg) end + def test_health_status_updated_if_no_app_name_configured + with_config(:app_name => false) do + @agent.start + end + + assert_equal NewRelic::Agent::HealthCheck::MISSING_APP_NAME, @agent.health_check.instance_variable_get(:@status) + end + def test_harvest_from_container container = mock harvested_items = %w[foo bar baz] @@ -750,6 +768,15 @@ def test_force_disconnect_logs_message refute_empty matching_lines, 'logs should say the agent is disconnecting' end + def test_force_disconnect_sets_health_status + @agent.instance_variable_set(:@service, nil) + @agent.stubs(:sleep) + error = NewRelic::Agent::ForceDisconnectException.new + @agent.handle_force_disconnect(error) + + assert_equal NewRelic::Agent::HealthCheck::FORCED_DISCONNECT, @agent.health_check.instance_variable_get(:@status) + end + def test_discarding_logs_message @agent.service.stubs(:send).raises(UnrecoverableServerException) @@ -801,6 +828,26 @@ def test_exit_connecting_worker_thread assert worker.join(1), 'Worker thread hang on shutdown' end + + def test_agent_health_status_set_to_shutdown_when_healthy + @agent.setup_and_start_agent + + assert_equal NewRelic::Agent::HealthCheck::HEALTHY, @agent.health_check.instance_variable_get(:@status) + + @agent.shutdown + + assert_equal NewRelic::Agent::HealthCheck::SHUTDOWN, @agent.health_check.instance_variable_get(:@status) + end + + def test_agent_health_status_when_not_healthy_is_same_after_shutdown + @agent.setup_and_start_agent + + @agent.health_check.instance_variable_set(:@status, NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) + + @agent.shutdown + + assert_equal NewRelic::Agent::HealthCheck::SHUTDOWN, @agent.health_check.instance_variable_get(:@status) + end end class AgentStartingTest < Minitest::Test diff --git a/test/new_relic/agent/new_relic_service_test.rb b/test/new_relic/agent/new_relic_service_test.rb index 36772c7ac2..90257479e6 100644 --- a/test/new_relic/agent/new_relic_service_test.rb +++ b/test/new_relic/agent/new_relic_service_test.rb @@ -585,6 +585,7 @@ def self.check_status_code_handling(expected_exceptions) method_name = "test_#{status_code}_raises_#{exception_type.name.split('::').last}" define_method(method_name) do @http_handle.respond_to(:metric_data, 'payload', :code => status_code) + assert_raises exception_type do stats_hash = NewRelic::Agent::StatsHash.new @service.metric_data(stats_hash) @@ -608,6 +609,38 @@ def self.check_status_code_handling(expected_exceptions) 429 => NewRelic::Agent::ServerConnectionException, 431 => NewRelic::Agent::UnrecoverableServerException) + def self.check_agent_health_status_updates(expected_exceptions) + expected_exceptions.each do |status_code| + method_name = "test_#{status_code}_updates_agent_health_check" + define_method(method_name) do + NewRelic::Agent.agent.health_check.instance_variable_set(:@continue, true) + NewRelic::Agent.agent.health_check.instance_variable_set(:@status, NewRelic::Agent::HealthCheck::HEALTHY) + + begin + @http_handle.respond_to(:metric_data, 'payload', :code => status_code) + stats_hash = NewRelic::Agent::StatsHash.new + @service.metric_data(stats_hash) + rescue + # no-op, raise the error + end + + expected = { + healthy: false, + last_error: 'NR-APM-004', + message: "HTTP error response code [#{status_code}] recevied from New Relic while sending data type [metric_data]" + } + + assert_equal expected, NewRelic::Agent.agent.health_check.instance_variable_get(:@status) + end + end + end + + # Some status codes may also eventually report other health checks + # Status code 401 is also invalid license key, but that will return a different health check value if that's the reason for the 401 + # Status code 410 is also for forced disconnect, but that forced disconnect is handled where forced disconnect is rescued + # In this method, however, they should report HTTP_ERROR + check_agent_health_status_updates([400, 401, 403, 405, 407, 408, 409, 410, 411, 413, 415, 417, 429, 431]) + # protocol 17 def test_supportability_metrics_for_http_error_responses NewRelic::Agent.drop_buffered_data From 8f257f274d24e274d457e55af124834357a6833c Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 16:59:11 -0800 Subject: [PATCH 12/28] Remove remaining `::` before NewRelic::Agent --- lib/new_relic/agent/agent_helpers/connect.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/new_relic/agent/agent_helpers/connect.rb b/lib/new_relic/agent/agent_helpers/connect.rb index 193007d2f1..c8a493bdaa 100644 --- a/lib/new_relic/agent/agent_helpers/connect.rb +++ b/lib/new_relic/agent/agent_helpers/connect.rb @@ -54,7 +54,7 @@ def note_connect_failure # to tell the user what happened, since this is not an error # we can handle gracefully. def log_error(error) - ::NewRelic::Agent.logger.error("Error establishing connection with New Relic Service at #{control.server}:", + NewRelic::Agent.logger.error("Error establishing connection with New Relic Service at #{control.server}:", error) end From 3b45b9dc6290ecbf3c1ccf98508ecdbb94a87cb4 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 17:27:54 -0800 Subject: [PATCH 13/28] Enable health checks in monitoring? test --- test/new_relic/agent/agent/start_test.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index adb82de108..2568d0ef92 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -50,6 +50,9 @@ def test_check_config_and_start_agent_disabled def test_monitoring_false_updates_health_status with_config(:monitoring => false) do + # make sure the health checks are set up to run + NewRelic::Agent.agent.health_check.instance_variable_get(:@continue, true) + NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::AGENT_DISABLED) monitoring? end From eb1e8fb75cf81375623315640aaeb19e8bf2f4e5 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 17:36:10 -0800 Subject: [PATCH 14/28] get => set --- test/new_relic/agent/agent/start_test.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index 2568d0ef92..8efbb16739 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -51,7 +51,7 @@ def test_check_config_and_start_agent_disabled def test_monitoring_false_updates_health_status with_config(:monitoring => false) do # make sure the health checks are set up to run - NewRelic::Agent.agent.health_check.instance_variable_get(:@continue, true) + NewRelic::Agent.agent.health_check.instance_variable_set(:@continue, true) NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::AGENT_DISABLED) monitoring? From 267a58f11191142b227301472caa723d5bf87c10 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 17:39:38 -0800 Subject: [PATCH 15/28] Undo :: change --- lib/new_relic/agent/agent_helpers/connect.rb | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/new_relic/agent/agent_helpers/connect.rb b/lib/new_relic/agent/agent_helpers/connect.rb index c8a493bdaa..09277d8fbc 100644 --- a/lib/new_relic/agent/agent_helpers/connect.rb +++ b/lib/new_relic/agent/agent_helpers/connect.rb @@ -54,7 +54,7 @@ def note_connect_failure # to tell the user what happened, since this is not an error # we can handle gracefully. def log_error(error) - NewRelic::Agent.logger.error("Error establishing connection with New Relic Service at #{control.server}:", + ::NewRelic::Agent.logger.error("Error establishing connection with New Relic Service at #{control.server}:", error) end @@ -66,14 +66,14 @@ def log_error(error) # no longer try to connect to the server, saving the # application and the server load def handle_license_error(error) - NewRelic::Agent.logger.error(error.message, + ::NewRelic::Agent.logger.error(error.message, 'Visit newrelic.com to obtain a valid license key, or to upgrade your account.') NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) disconnect end def handle_unrecoverable_agent_error(error) - NewRelic::Agent.logger.error(error.message) + ::NewRelic::Agent.logger.error(error.message) disconnect shutdown end @@ -97,7 +97,7 @@ def event_harvest_config # connects, then configures the agent using the response from # the connect service def connect_to_server - request_builder = NewRelic::Agent::Connect::RequestBuilder.new( + request_builder = ::NewRelic::Agent::Connect::RequestBuilder.new( @service, Agent.config, event_harvest_config, @@ -105,7 +105,7 @@ def connect_to_server ) connect_response = @service.connect(request_builder.connect_payload) - response_handler = NewRelic::Agent::Connect::ResponseHandler.new(self, Agent.config) + response_handler = ::NewRelic::Agent::Connect::ResponseHandler.new(self, Agent.config) response_handler.configure_agent(connect_response) log_connection(connect_response) if connect_response @@ -115,9 +115,9 @@ def connect_to_server # Logs when we connect to the server, for debugging purposes # - makes sure we know if an agent has not connected def log_connection(config_data) - NewRelic::Agent.logger.debug("Connected to NewRelic Service at #{@service.collector.name}") - NewRelic::Agent.logger.debug("Agent Run = #{@service.agent_id}.") - NewRelic::Agent.logger.debug("Connection data = #{config_data.inspect}") + ::NewRelic::Agent.logger.debug("Connected to NewRelic Service at #{@service.collector.name}") + ::NewRelic::Agent.logger.debug("Agent Run = #{@service.agent_id}.") + ::NewRelic::Agent.logger.debug("Connection data = #{config_data.inspect}") if config_data['messages']&.any? log_collector_messages(config_data['messages']) end @@ -125,7 +125,7 @@ def log_connection(config_data) def log_collector_messages(messages) messages.each do |message| - NewRelic::Agent.logger.send(message['level'].downcase, message['message']) + ::NewRelic::Agent.logger.send(message['level'].downcase, message['message']) end end @@ -187,7 +187,7 @@ def connect(options = {}) opts = connect_options(options) return unless should_connect?(opts[:force_reconnect]) - NewRelic::Agent.logger.debug("Connecting Process to New Relic: #$0") + ::NewRelic::Agent.logger.debug("Connecting Process to New Relic: #$0") connect_to_server @connected_pid = $$ @connect_state = :connected @@ -203,7 +203,7 @@ def connect(options = {}) NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT) retry if retry_from_error?(e, opts) rescue Exception => e - NewRelic::Agent.logger.error('Exception of unexpected type during Agent#connect():', e) + ::NewRelic::Agent.logger.error('Exception of unexpected type during Agent#connect():', e) raise end @@ -217,7 +217,7 @@ def retry_from_error?(e, opts) return false unless opts[:keep_retrying] note_connect_failure - NewRelic::Agent.logger.info("Will re-attempt in #{connect_retry_period} seconds") + ::NewRelic::Agent.logger.info("Will re-attempt in #{connect_retry_period} seconds") sleep(connect_retry_period) true end From ac1bc317dbb02f430054f5b34237fe65fc98b518 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 17:53:55 -0800 Subject: [PATCH 16/28] Add debugging for CI failure --- lib/new_relic/agent/health_check.rb | 3 ++- test/new_relic/agent/agent/start_test.rb | 4 ++++ test/new_relic/agent/new_relic_service_test.rb | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index df7bbd36b0..6961fbdf61 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -56,8 +56,9 @@ def create_and_run_health_check_loop end def update_status(status, options = []) + puts '***** update_status AGENT_DISABLED *******' if status == AGENT_DISABLED return unless @continue - + puts '***** update_status @continue = true *****' if status == AGENT_DISABLED @status = status.dup update_message(options) unless options.empty? end diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index 8efbb16739..c141a05a08 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -49,12 +49,16 @@ def test_check_config_and_start_agent_disabled end def test_monitoring_false_updates_health_status + puts '*************** monitoring false test ***************' + puts '*************** TEST START ******************' with_config(:monitoring => false) do + puts NewRelic::Agent.agent.health_check.inspect # make sure the health checks are set up to run NewRelic::Agent.agent.health_check.instance_variable_set(:@continue, true) NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::AGENT_DISABLED) monitoring? + puts '************ TEST END *********************' end end diff --git a/test/new_relic/agent/new_relic_service_test.rb b/test/new_relic/agent/new_relic_service_test.rb index 90257479e6..c05ed09322 100644 --- a/test/new_relic/agent/new_relic_service_test.rb +++ b/test/new_relic/agent/new_relic_service_test.rb @@ -639,7 +639,9 @@ def self.check_agent_health_status_updates(expected_exceptions) # Status code 401 is also invalid license key, but that will return a different health check value if that's the reason for the 401 # Status code 410 is also for forced disconnect, but that forced disconnect is handled where forced disconnect is rescued # In this method, however, they should report HTTP_ERROR - check_agent_health_status_updates([400, 401, 403, 405, 407, 408, 409, 410, 411, 413, 415, 417, 429, 431]) + check_agent_health_status_updates([ + 400, 401, 403, 405, 407, 408, 409, 410, 411, 413, 415, 417, 429, 431 + ]) # protocol 17 def test_supportability_metrics_for_http_error_responses From 53d1b1d44e8decb6fdbe55f141bc8fd55f4f4249 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 17:55:02 -0800 Subject: [PATCH 17/28] rubocop --- lib/new_relic/agent/health_check.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index 6961fbdf61..b61daac3b1 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -58,6 +58,7 @@ def create_and_run_health_check_loop def update_status(status, options = []) puts '***** update_status AGENT_DISABLED *******' if status == AGENT_DISABLED return unless @continue + puts '***** update_status @continue = true *****' if status == AGENT_DISABLED @status = status.dup update_message(options) unless options.empty? From 21e65c9babd4c436ca02eb6f7d28f1703fc2f11a Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Mon, 13 Jan 2025 19:13:42 -0800 Subject: [PATCH 18/28] Add check for else --- lib/new_relic/agent/agent_helpers/startup.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/new_relic/agent/agent_helpers/startup.rb b/lib/new_relic/agent/agent_helpers/startup.rb index 4fe4828daf..fd04d1c7e1 100644 --- a/lib/new_relic/agent/agent_helpers/startup.rb +++ b/lib/new_relic/agent/agent_helpers/startup.rb @@ -127,11 +127,13 @@ def connect_in_foreground # Warn the user if they have configured their agent not to # send data, that way we can see this clearly in the log file def monitoring? + puts '********** monitoring? **************' return false if Agent.config[:'serverless_mode.enabled'] if Agent.config[:monitor_mode] true else + puts '********* else reached! *********' NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::AGENT_DISABLED) ::NewRelic::Agent.logger.warn('Agent configured not to send data in this environment.') false From 29067eaf3b42c34d928cefd01e1bbd876ddafab9 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 14 Jan 2025 14:35:27 -0800 Subject: [PATCH 19/28] Remove puts, use correct config name in test --- lib/new_relic/agent/agent_helpers/startup.rb | 2 -- lib/new_relic/agent/health_check.rb | 2 -- test/new_relic/agent/agent/start_test.rb | 8 ++------ 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/new_relic/agent/agent_helpers/startup.rb b/lib/new_relic/agent/agent_helpers/startup.rb index fd04d1c7e1..4fe4828daf 100644 --- a/lib/new_relic/agent/agent_helpers/startup.rb +++ b/lib/new_relic/agent/agent_helpers/startup.rb @@ -127,13 +127,11 @@ def connect_in_foreground # Warn the user if they have configured their agent not to # send data, that way we can see this clearly in the log file def monitoring? - puts '********** monitoring? **************' return false if Agent.config[:'serverless_mode.enabled'] if Agent.config[:monitor_mode] true else - puts '********* else reached! *********' NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::AGENT_DISABLED) ::NewRelic::Agent.logger.warn('Agent configured not to send data in this environment.') false diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index b61daac3b1..df7bbd36b0 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -56,10 +56,8 @@ def create_and_run_health_check_loop end def update_status(status, options = []) - puts '***** update_status AGENT_DISABLED *******' if status == AGENT_DISABLED return unless @continue - puts '***** update_status @continue = true *****' if status == AGENT_DISABLED @status = status.dup update_message(options) unless options.empty? end diff --git a/test/new_relic/agent/agent/start_test.rb b/test/new_relic/agent/agent/start_test.rb index c141a05a08..c40cf94e6e 100644 --- a/test/new_relic/agent/agent/start_test.rb +++ b/test/new_relic/agent/agent/start_test.rb @@ -49,16 +49,12 @@ def test_check_config_and_start_agent_disabled end def test_monitoring_false_updates_health_status - puts '*************** monitoring false test ***************' - puts '*************** TEST START ******************' - with_config(:monitoring => false) do - puts NewRelic::Agent.agent.health_check.inspect + with_config(:monitor_mode => false) do # make sure the health checks are set up to run NewRelic::Agent.agent.health_check.instance_variable_set(:@continue, true) - NewRelic::Agent.agent.health_check.expects(:update_status).with(NewRelic::Agent::HealthCheck::AGENT_DISABLED) + monitoring? - puts '************ TEST END *********************' end end From 05ef5e47e830cbaf336e636f414d4b29a096031a Mon Sep 17 00:00:00 2001 From: Kayla Reopelle <87386821+kaylareopelle@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:31:57 -0800 Subject: [PATCH 20/28] Apply suggestions from code review Co-authored-by: Hannah Ramadan <76922290+hannahramadan@users.noreply.github.com> --- CHANGELOG.md | 2 +- lib/new_relic/agent/health_check.rb | 2 +- test/new_relic/agent/health_check_test.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf5bbc74dd..69cfb3483e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ - **Feature: Add health checks when the agent runs within Agent Control** - When the agent is started with a within an agent control environment, automatic health check files will be created within the configured file destination at the configured frequency. [PR#2995](https://github.com/newrelic/newrelic-ruby-agent/pull/2995) + When the agent is started within an agent control environment, a health check file will be created at the configured file destination for every agent process. The health check files will be updated at the configured frequency. [PR#2995](https://github.com/newrelic/newrelic-ruby-agent/pull/2995) - **Bugfix: Stop emitting inaccurate debug-level log about deprecated configuration options** diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index df7bbd36b0..1755f52d03 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -17,7 +17,7 @@ def initialize end HEALTHY = {healthy: true, last_error: 'NR-APM-000', message: 'Healthy'}.freeze - INVALID_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-001', message: 'Invalid liense key (HTTP status code 401)'}.freeze + INVALID_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-001', message: 'Invalid license key (HTTP status code 401)'}.freeze MISSING_LICENSE_KEY = {healthy: false, last_error: 'NR-APM-002', message: 'License key missing in configuration'}.freeze FORCED_DISCONNECT = {healthy: false, last_error: 'NR-APM-003', message: 'Forced disconnect received from New Relic (HTTP status code 410)'}.freeze HTTP_ERROR = {healthy: false, last_error: 'NR-APM-004', message: 'HTTP error response code [%s] recevied from New Relic while sending data type [%s]'}.freeze diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index e6780f3dd2..b88d0f5e8e 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -50,7 +50,7 @@ def test_yaml_file_name_has_health_plus_uuid_without_hyphens health_check = NewRelic::Agent::HealthCheck.new # ex: health-bc21b5891f5e44fc9272caef924611a8.yml - assert_match(/health-(.*){32}\.ya?ml/, health_check.send(:file_name)) + assert_match(/^health-[0-9a-f]{32}\.ya?ml$/, health_check.send(:file_name)) end def test_write_file_called_on_interval From b3b676615f18c4e7433daaa93502dba7957098f8 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 14 Jan 2025 16:43:13 -0800 Subject: [PATCH 21/28] Add healthy? to health check Make sure the SHUTDOWN status is only applied when the agent is healthy --- lib/new_relic/agent/agent_helpers/shutdown.rb | 4 +++- lib/new_relic/agent/health_check.rb | 4 ++++ test/new_relic/agent/agent_test.rb | 2 +- test/new_relic/agent/health_check_test.rb | 18 ++++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/lib/new_relic/agent/agent_helpers/shutdown.rb b/lib/new_relic/agent/agent_helpers/shutdown.rb index 7746769e0b..d9469065e8 100644 --- a/lib/new_relic/agent/agent_helpers/shutdown.rb +++ b/lib/new_relic/agent/agent_helpers/shutdown.rb @@ -19,7 +19,9 @@ def shutdown revert_to_default_configuration @started = nil - NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::SHUTDOWN) + + NewRelic::Agent.agent.health_check.update_status(NewRelic::Agent::HealthCheck::SHUTDOWN) if NewRelic::Agent.agent.health_check.healthy? + Control.reset end diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index df7bbd36b0..aa5e22be1c 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -62,6 +62,10 @@ def update_status(status, options = []) update_message(options) unless options.empty? end + def healthy? + @status == HEALTHY + end + private def contents diff --git a/test/new_relic/agent/agent_test.rb b/test/new_relic/agent/agent_test.rb index e2107adae4..9b3da72135 100644 --- a/test/new_relic/agent/agent_test.rb +++ b/test/new_relic/agent/agent_test.rb @@ -846,7 +846,7 @@ def test_agent_health_status_when_not_healthy_is_same_after_shutdown @agent.shutdown - assert_equal NewRelic::Agent::HealthCheck::SHUTDOWN, @agent.health_check.instance_variable_get(:@status) + assert_equal NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY, @agent.health_check.instance_variable_get(:@status) end end diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index e6780f3dd2..f711d5a111 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -333,4 +333,22 @@ def test_update_message_works_with_http_arrays assert_equal 'HTTP error response code [401] recevied from New Relic while sending data type [preconnect]', result end + + def test_healthy_true_when_healthy + health_check = NewRelic::Agent::HealthCheck.new + # stub a valid health check, by setting @continue = true + health_check.instance_variable_set(:@continue, true) + health_check.update_status(NewRelic::Agent::HealthCheck::HEALTHY) + + assert_predicate health_check, :healthy? + end + + def test_healthy_false_when_invalid_license_key + health_check = NewRelic::Agent::HealthCheck.new + # stub a valid health check, by setting @continue = true + health_check.instance_variable_set(:@continue, true) + health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) + + refute_predicate health_check, :healthy? + end end From a7e87cb753d1475830629ad5d0aca1dff2b1e147 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 14 Jan 2025 16:51:03 -0800 Subject: [PATCH 22/28] Set @continue = false on init when checks disabled Instead of waiting until create_and_run_health_check_loop is run to verify the necessary config options are present, evaluate whether checks are enabled on initialization. In addition, log the status of the config-related instance variables during initialization. --- lib/new_relic/agent/health_check.rb | 52 +++++++++++++++++------ test/new_relic/agent/agent_test.rb | 12 +++++- test/new_relic/agent/health_check_test.rb | 11 +++-- 3 files changed, 56 insertions(+), 19 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index 35953b2956..c7b1dc764e 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -7,13 +7,12 @@ module Agent class HealthCheck def initialize @start_time = nano_time - @fleet_id = ENV['NEW_RELIC_AGENT_CONTROL_FLEET_ID'] - # The spec states file paths for the delivery location will begin with file:// - # This does not create a valid path in Ruby, so remove the prefix when present - @delivery_location = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION']&.gsub('file://', '') - @frequency = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'] ? ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'].to_i : 5 @continue = true @status = HEALTHY + # the following assignments may set @continue = false if they are invalid + set_fleet_id + set_delivery_location + set_frequency end HEALTHY = {healthy: true, last_error: 'NR-APM-000', message: 'Healthy'}.freeze @@ -30,13 +29,7 @@ def initialize SHUTDOWN = {healthy: true, last_error: 'NR-APM-099', message: 'Agent has shutdown'}.freeze def create_and_run_health_check_loop - unless health_check_enabled? - @continue = false - end - - return NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_FLEET_ID not found, skipping health checks') unless @fleet_id - return NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found, skipping health checks') unless @delivery_location - return NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less, skipping health checks') unless @frequency > 0 + return unless health_checks_enabled? && @continue NewRelic::Agent.logger.debug('Agent control health check conditions met. Starting health checks.') NewRelic::Agent.record_metric('Supportability/AgentControl/Health/enabled', 1) @@ -68,6 +61,37 @@ def healthy? private + def set_fleet_id + @fleet_id = if ENV['NEW_RELIC_AGENT_CONTROL_FLEET_ID'] + ENV['NEW_RELIC_AGENT_CONTROL_FLEET_ID'] + else + NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_FLEET_ID not found, disabling health checks') + @continue = false + nil + end + end + + def set_delivery_location + @delivery_location = if ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION'] + # The spec states file paths for the delivery location will begin with file:// + # This does not create a valid path in Ruby, so remove the prefix when present + ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION']&.gsub('file://', '') + else + NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found, disabling health checks') + @continue = false + nil + end + end + + def set_frequency + @frequency = ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'] ? ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY'].to_i : 5 + + if @frequency <= 0 + NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less, disabling health checks') + @continue = false + end + end + def contents <<~CONTENTS healthy: #{@status[:healthy]} @@ -114,8 +138,8 @@ def create_file_path @continue = false end - def health_check_enabled? - @fleet_id && @delivery_location && (@frequency > 0) + def health_checks_enabled? + @fleet_id && @delivery_location && @frequency > 0 end def update_message(options) diff --git a/test/new_relic/agent/agent_test.rb b/test/new_relic/agent/agent_test.rb index 9b3da72135..b5fde3845b 100644 --- a/test/new_relic/agent/agent_test.rb +++ b/test/new_relic/agent/agent_test.rb @@ -346,6 +346,8 @@ def test_connect_does_not_retry_if_keep_retrying_false end def test_agent_health_status_set_to_failed_to_connect + # stub a valid health check, by setting @continue = true + @agent.health_check.instance_variable_set(:@continue, true) @agent.service.expects(:connect).once.raises(Timeout::Error) @agent.send(:connect, :keep_retrying => false) @@ -574,6 +576,8 @@ def test_defer_start_if_no_application_name_configured def test_health_status_updated_if_no_app_name_configured with_config(:app_name => false) do + # stub a valid health check, by setting @continue = true + @agent.health_check.instance_variable_set(:@continue, true) @agent.start end @@ -772,6 +776,8 @@ def test_force_disconnect_sets_health_status @agent.instance_variable_set(:@service, nil) @agent.stubs(:sleep) error = NewRelic::Agent::ForceDisconnectException.new + # stub a valid health check, by setting @continue = true + @agent.health_check.instance_variable_set(:@continue, true) @agent.handle_force_disconnect(error) assert_equal NewRelic::Agent::HealthCheck::FORCED_DISCONNECT, @agent.health_check.instance_variable_get(:@status) @@ -830,6 +836,8 @@ def test_exit_connecting_worker_thread end def test_agent_health_status_set_to_shutdown_when_healthy + # stub a valid health check, by setting @continue = true + @agent.health_check.instance_variable_set(:@continue, true) @agent.setup_and_start_agent assert_equal NewRelic::Agent::HealthCheck::HEALTHY, @agent.health_check.instance_variable_get(:@status) @@ -840,10 +848,10 @@ def test_agent_health_status_set_to_shutdown_when_healthy end def test_agent_health_status_when_not_healthy_is_same_after_shutdown + # stub a valid health check, by setting @continue = true + @agent.health_check.instance_variable_set(:@continue, true) @agent.setup_and_start_agent - @agent.health_check.instance_variable_set(:@status, NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) - @agent.shutdown assert_equal NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY, @agent.health_check.instance_variable_get(:@status) diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 24fa399fef..9a4334226a 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -85,8 +85,9 @@ def test_write_file_sets_continue_false_when_error NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do File.stub(:write, ->(arg1, arg2) { raise 'boom!' }) do health_check = NewRelic::Agent::HealthCheck.new + # stub a valid health check, by setting @continue = true + health_check.instance_variable_set(:@continue, true) - assert(health_check.instance_variable_get(:@continue)) health_check.send(:write_file) refute(health_check.instance_variable_get(:@continue)) @@ -109,8 +110,8 @@ def test_create_file_path_sets_continue_false_when_error_raised NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do File.stub(:directory?, ->(arg1) { raise 'boom!' }) do health_check = NewRelic::Agent::HealthCheck.new - - assert(health_check.instance_variable_get(:@continue)) + # stub a valid health check, by setting @continue = true + health_check.instance_variable_set(:@continue, true) health_check.send(:create_file_path) refute(health_check.instance_variable_get(:@continue)) @@ -151,6 +152,8 @@ def test_yaml_file_has_last_error_field_when_status_not_healthy with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new + # stub a valid health check, by setting @continue = true + health_check.instance_variable_set(:@continue, true) health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY) health_check.send(:write_file) @@ -329,6 +332,8 @@ def test_update_status_is_a_no_op_when_health_checks_disabled def test_update_message_works_with_http_arrays health_check = NewRelic::Agent::HealthCheck.new + # stub a valid health check, by setting @continue = true + health_check.instance_variable_set(:@continue, true) result = health_check.update_status(NewRelic::Agent::HealthCheck::HTTP_ERROR, ['401', :preconnect]) assert_equal 'HTTP error response code [401] recevied from New Relic while sending data type [preconnect]', result From 9dc3cd4bdc6e17a26a7306c12dbf979769fa2bcd Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Thu, 16 Jan 2025 17:05:58 -0800 Subject: [PATCH 23/28] agent_control.fleet_id => agent_control.enabled The fleet ID setting has been replaced by agent_control.enabled. At this time, enabled will only refer to health checks being enabled. The value is a Boolean and defaults to false. --- .../agent/configuration/default_source.rb | 9 ++++----- lib/new_relic/agent/health_check.rb | 14 ++++++------- .../orphan_configuration_test.rb | 4 ++-- test/new_relic/agent/health_check_test.rb | 20 +++++++++---------- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/lib/new_relic/agent/configuration/default_source.rb b/lib/new_relic/agent/configuration/default_source.rb index 08576e84a9..3f2d31102b 100644 --- a/lib/new_relic/agent/configuration/default_source.rb +++ b/lib/new_relic/agent/configuration/default_source.rb @@ -2189,13 +2189,12 @@ def self.notify :description => 'Specify a list of exceptions you do not want the agent to strip when [strip_exception_messages](#strip_exception_messages-enabled) is `true`. Separate exceptions with a comma. For example, `"ImportantException,PreserveMessageException"`.' }, # Agent Control - :'agent_control.fleet_id' => { - :default => nil, - :allow_nil => true, + :'agent_control.enabled' => { + :default => false, :public => true, - :type => String, + :type => Boolean, :allowed_from_server => false, - :description => 'This assigns a fleet ID to the language agent. This ID is generated by agent control. If this setting is present, it indicates the agent is running in an agent control/fleet environment and health file(s) will be generated. This configuration will be set by agent control, or one of its components, prior to agent startup.' + :description => 'Boolean value that denotes whether agent control functionality should be enabled. At the moment, this functionality is limited to whether agent health should be reported. This configuration will be set using an environment variable by agent control, or one of its components, prior to agent startup.' }, :'agent_control.health.delivery_location' => { :default => nil, diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index c7b1dc764e..ae2a4e38db 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -10,7 +10,7 @@ def initialize @continue = true @status = HEALTHY # the following assignments may set @continue = false if they are invalid - set_fleet_id + set_enabled set_delivery_location set_frequency end @@ -61,13 +61,13 @@ def healthy? private - def set_fleet_id - @fleet_id = if ENV['NEW_RELIC_AGENT_CONTROL_FLEET_ID'] - ENV['NEW_RELIC_AGENT_CONTROL_FLEET_ID'] + def set_enabled + @enabled = if ENV['NEW_RELIC_AGENT_CONTROL_ENABLED'] == 'true' + true else - NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_FLEET_ID not found, disabling health checks') + NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_ENABLED not true, disabling health checks') @continue = false - nil + false end end @@ -139,7 +139,7 @@ def create_file_path end def health_checks_enabled? - @fleet_id && @delivery_location && @frequency > 0 + @enabled && @delivery_location && @frequency > 0 end def update_message(options) diff --git a/test/new_relic/agent/configuration/orphan_configuration_test.rb b/test/new_relic/agent/configuration/orphan_configuration_test.rb index 1788f63303..1bbb756a32 100644 --- a/test/new_relic/agent/configuration/orphan_configuration_test.rb +++ b/test/new_relic/agent/configuration/orphan_configuration_test.rb @@ -9,12 +9,12 @@ class OrphanedConfigTest < Minitest::Test include NewRelic::TestHelpers::ConfigScanning # :automatic_custom_instrumentation_method_list - the tranform proc handles all processing, no other reference exists - # :'agent_control.fleet_id' - the config is set by environment variable in agent control, the symbol config is not used + # :'agent_control.enabled' - the config is set by environment variable in agent control, the symbol config is not used # :'agent_control.health.delivery_location - the config is set by environment variable in agent control, the symbol config is not used # :'agent_control.health.frequency' - the config is set by environment variable in agent control, the symbol config is not used IGNORED_KEYS = %i[ automatic_custom_instrumentation_method_list - agent_control.fleet_id + agent_control.enabled agent_control.health.delivery_location agent_control.health.frequency ] diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 9a4334226a..3f78353f89 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -55,7 +55,7 @@ def test_yaml_file_name_has_health_plus_uuid_without_hyphens def test_write_file_called_on_interval with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '3', - 'NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'abc', + 'NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do health_check = NewRelic::Agent::HealthCheck.new health_check.stub(:write_file, nil) do @@ -68,7 +68,7 @@ def test_write_file_called_on_interval def test_create_and_run_health_check_loop_exits_after_shutdown with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '3', - 'NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'abc', + 'NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do health_check = NewRelic::Agent::HealthCheck.new health_check.stub(:write_file, nil) do @@ -235,7 +235,7 @@ def test_yaml_file_has_new_status_time_each_write end def test_agent_health_started_if_required_info_present - with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide', + with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do log = with_array_logger(:debug) do @@ -244,13 +244,13 @@ def test_agent_health_started_if_required_info_present end assert_log_contains(log, 'Agent control health check conditions met. Starting health checks.') - refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_FLEET_ID not found') + refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_ENABLED not true') refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found') refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less') end end - def test_agent_health_not_generated_if_agent_control_fleet_id_absent + def test_agent_health_not_generated_if_agent_control_enabled_missing with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do log = with_array_logger(:debug) do @@ -262,13 +262,13 @@ def test_agent_health_not_generated_if_agent_control_fleet_id_absent end end - assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_FLEET_ID not found') + assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_ENABLED not true') refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') end end def test_agent_health_not_generated_if_delivery_location_absent - with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'mykonos', + with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do log = with_array_logger(:debug) do health_check = NewRelic::Agent::HealthCheck.new @@ -285,7 +285,7 @@ def test_agent_health_not_generated_if_delivery_location_absent end def test_agent_health_not_generated_if_frequency_is_zero - with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'anchors-away', + with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0') do log = with_array_logger(:debug) do @@ -305,7 +305,7 @@ def test_agent_health_not_generated_if_frequency_is_zero def test_agent_health_supportability_metric_generated_recorded_when_health_check_loop_starts NewRelic::Agent.instance.stats_engine.clear_stats - with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide', + with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do health_check = NewRelic::Agent::HealthCheck.new @@ -316,7 +316,7 @@ def test_agent_health_supportability_metric_generated_recorded_when_health_check end def test_update_status_is_a_no_op_when_health_checks_disabled - with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => nil, + with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'false', 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => nil, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0') do health_check = NewRelic::Agent::HealthCheck.new From 5ab814b897099a5265518313c179367a73e91a44 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Thu, 16 Jan 2025 17:34:36 -0800 Subject: [PATCH 24/28] Set default value for delivery location The default value is '/newrelic/apm/health' in the code, though the spec default is 'file:///newrelic/apm/health'. Since we need to remove the 'file://' anyway, take it out when defining the string to avoid unncessary gsubs --- .../agent/configuration/default_source.rb | 5 ++--- lib/new_relic/agent/health_check.rb | 5 ++--- test/new_relic/agent/health_check_test.rb | 18 ++++-------------- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/lib/new_relic/agent/configuration/default_source.rb b/lib/new_relic/agent/configuration/default_source.rb index 3f2d31102b..4f346751b1 100644 --- a/lib/new_relic/agent/configuration/default_source.rb +++ b/lib/new_relic/agent/configuration/default_source.rb @@ -2197,12 +2197,11 @@ def self.notify :description => 'Boolean value that denotes whether agent control functionality should be enabled. At the moment, this functionality is limited to whether agent health should be reported. This configuration will be set using an environment variable by agent control, or one of its components, prior to agent startup.' }, :'agent_control.health.delivery_location' => { - :default => nil, - :allow_nil => true, + :default => 'file:///newrelic/apm/health', :public => true, :type => String, :allowed_from_server => false, - :description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. For example: `file:///var/lib/newrelic-super-agent/fleet/agents.d/`. This configuration will be set by agent control, or one of its components, prior to agent startup.' + :description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. This defaults to: `file:///newrelic/apm/health`. This configuration will be set using an environment variable by agent control, or one of its components, prior to agent startup.' }, :'agent_control.health.frequency' => { :default => 5, diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index ae2a4e38db..c8c52d2b11 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -77,9 +77,8 @@ def set_delivery_location # This does not create a valid path in Ruby, so remove the prefix when present ENV['NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION']&.gsub('file://', '') else - NewRelic::Agent.logger.debug('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found, disabling health checks') - @continue = false - nil + # The spec default is 'file:///newrelic/apm/health', but since we're just going to remove it anyway... + '/newrelic/apm/health' end end diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 3f78353f89..299238140e 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -245,7 +245,6 @@ def test_agent_health_started_if_required_info_present assert_log_contains(log, 'Agent control health check conditions met. Starting health checks.') refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_ENABLED not true') - refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found') refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less') end end @@ -267,20 +266,11 @@ def test_agent_health_not_generated_if_agent_control_enabled_missing end end - def test_agent_health_not_generated_if_delivery_location_absent + def test_agent_health_falls_back_to_default_value_when_env_var_missing with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do - log = with_array_logger(:debug) do - health_check = NewRelic::Agent::HealthCheck.new - # loop should exit before write_file is called - # raise an error if it's invoked - health_check.stub(:write_file, -> { raise 'kaboom!' }) do - health_check.create_and_run_health_check_loop - end - end - - assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION not found') - refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + health_check = NewRelic::Agent::HealthCheck.new + assert_equal '/newrelic/apm/health', health_check.instance_variable_get(:@delivery_location) end end @@ -317,7 +307,7 @@ def test_agent_health_supportability_metric_generated_recorded_when_health_check def test_update_status_is_a_no_op_when_health_checks_disabled with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'false', - 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => nil, + 'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'whatev', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0') do health_check = NewRelic::Agent::HealthCheck.new From a1bcd2a39ea8ca50dd026cf4cf92bddb85cb0753 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Thu, 16 Jan 2025 17:44:08 -0800 Subject: [PATCH 25/28] Update documentation * Add link to Agent Control documentation * Add environment variable detail to config description * Capitalize Agent Control in logs and docs --- CHANGELOG.md | 2 +- lib/new_relic/agent/configuration/default_source.rb | 8 ++++---- lib/new_relic/agent/health_check.rb | 10 +++++----- test/new_relic/agent/health_check_test.rb | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa7f685d54..01a2c95eac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ - **Feature: Add health checks when the agent runs within Agent Control** - When the agent is started within an agent control environment, a health check file will be created at the configured file destination for every agent process. The health check files will be updated at the configured frequency. [PR#2995](https://github.com/newrelic/newrelic-ruby-agent/pull/2995) + When the agent is started within an [Agent Control](https://docs-preview.newrelic.com/docs/new-relic-agent-control) environment, a health check file will be created at the configured file location for every agent process. By default, this location is: '/newrelic/apm/health'. The health check files will be updated at the configured frequency, which defaults to every five seconds. [PR#2995](https://github.com/newrelic/newrelic-ruby-agent/pull/2995) - **Bugfix: Stop emitting inaccurate debug-level log about deprecated configuration options** diff --git a/lib/new_relic/agent/configuration/default_source.rb b/lib/new_relic/agent/configuration/default_source.rb index 4f346751b1..2945329c53 100644 --- a/lib/new_relic/agent/configuration/default_source.rb +++ b/lib/new_relic/agent/configuration/default_source.rb @@ -2194,21 +2194,21 @@ def self.notify :public => true, :type => Boolean, :allowed_from_server => false, - :description => 'Boolean value that denotes whether agent control functionality should be enabled. At the moment, this functionality is limited to whether agent health should be reported. This configuration will be set using an environment variable by agent control, or one of its components, prior to agent startup.' + :description => 'Boolean value that denotes whether Agent Control functionality should be enabled. At the moment, this functionality is limited to whether agent health should be reported. This configuration will be set using an environment variable by Agent Control, or one of its components, prior to agent startup.' }, :'agent_control.health.delivery_location' => { - :default => 'file:///newrelic/apm/health', + :default => '/newrelic/apm/health', :public => true, :type => String, :allowed_from_server => false, - :description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. This defaults to: `file:///newrelic/apm/health`. This configuration will be set using an environment variable by agent control, or one of its components, prior to agent startup.' + :description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. This defaults to: `file:///newrelic/apm/health`. This configuration will be set using an environment variable by Agent Control, or one of its components, prior to agent startup.' }, :'agent_control.health.frequency' => { :default => 5, :public => true, :type => Integer, :allowed_from_server => false, - :description => 'The interval, in seconds, of how often the health file(s) will be written to. This configuration will be set by agent control, or one of its components, prior to agent startup.' + :description => 'The interval, in seconds, of how often the health file(s) will be written to. This configuration will be set using an environment variable by Agent Control, or one of its components, prior to agent startup.' }, # Thread profiler :'thread_profiler.enabled' => { diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index c8c52d2b11..3424aeb7e3 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -31,7 +31,7 @@ def initialize def create_and_run_health_check_loop return unless health_checks_enabled? && @continue - NewRelic::Agent.logger.debug('Agent control health check conditions met. Starting health checks.') + NewRelic::Agent.logger.debug('Agent Control health check conditions met. Starting health checks.') NewRelic::Agent.record_metric('Supportability/AgentControl/Health/enabled', 1) Thread.new do @@ -41,7 +41,7 @@ def create_and_run_health_check_loop write_file @continue = false if @status == SHUTDOWN rescue StandardError => e - NewRelic::Agent.logger.error("Aborting agent control health check. Error raised: #{e}") + NewRelic::Agent.logger.error("Aborting Agent Control health check. Error raised: #{e}") @continue = false end end @@ -117,7 +117,7 @@ def write_file File.write(@file, contents) rescue StandardError => e - NewRelic::Agent.logger.error("Agent control health check raised an error while writing a file: #{e}") + NewRelic::Agent.logger.error("Agent Control health check raised an error while writing a file: #{e}") @continue = false end @@ -131,7 +131,7 @@ def create_file_path nil rescue StandardError => e NewRelic::Agent.logger.error( - 'Agent control health check raised an error while finding or creating the file path defined in ' \ + 'Agent Control health check raised an error while finding or creating the file path defined in ' \ "NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION: #{e}" ) @continue = false @@ -144,7 +144,7 @@ def health_checks_enabled? def update_message(options) @status[:message] = sprintf(@status[:message], *options) rescue StandardError => e - NewRelic::Agent.logger.debug("Error raised while updating agent control health check message: #{e}." \ + NewRelic::Agent.logger.debug("Error raised while updating Agent Control health check message: #{e}." \ "Reverting to original message. options = #{options}, @status[:message] = #{@status[:message]}") end end diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 299238140e..736ddc8b14 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -243,7 +243,7 @@ def test_agent_health_started_if_required_info_present health_check.create_and_run_health_check_loop end - assert_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + assert_log_contains(log, 'Agent Control health check conditions met. Starting health checks.') refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_ENABLED not true') refute_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less') end @@ -262,7 +262,7 @@ def test_agent_health_not_generated_if_agent_control_enabled_missing end assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_ENABLED not true') - refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + refute_log_contains(log, 'Agent Control health check conditions met. Starting health checks.') end end @@ -288,7 +288,7 @@ def test_agent_health_not_generated_if_frequency_is_zero end assert_log_contains(log, 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY zero or less') - refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.') + refute_log_contains(log, 'Agent Control health check conditions met. Starting health checks.') end end From 0c0688902736b34ca2261adcfe8955a078f987ff Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Fri, 17 Jan 2025 14:39:09 -0800 Subject: [PATCH 26/28] Rubocop - Minitest/EmptyLineBeforeAssertionMethods --- test/new_relic/agent/health_check_test.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index 736ddc8b14..e36212f624 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -270,6 +270,7 @@ def test_agent_health_falls_back_to_default_value_when_env_var_missing with_environment('NEW_RELIC_AGENT_CONTROL_ENABLED' => 'true', 'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5') do health_check = NewRelic::Agent::HealthCheck.new + assert_equal '/newrelic/apm/health', health_check.instance_variable_get(:@delivery_location) end end From c38bff3bd705371744641cc11e4f6362f59a9064 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 21 Jan 2025 11:37:38 -0800 Subject: [PATCH 27/28] Make agent_control settings private --- lib/new_relic/agent/configuration/default_source.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/new_relic/agent/configuration/default_source.rb b/lib/new_relic/agent/configuration/default_source.rb index 2945329c53..071fe4f5fa 100644 --- a/lib/new_relic/agent/configuration/default_source.rb +++ b/lib/new_relic/agent/configuration/default_source.rb @@ -2191,21 +2191,21 @@ def self.notify # Agent Control :'agent_control.enabled' => { :default => false, - :public => true, + :public => false, :type => Boolean, :allowed_from_server => false, :description => 'Boolean value that denotes whether Agent Control functionality should be enabled. At the moment, this functionality is limited to whether agent health should be reported. This configuration will be set using an environment variable by Agent Control, or one of its components, prior to agent startup.' }, :'agent_control.health.delivery_location' => { :default => '/newrelic/apm/health', - :public => true, + :public => false, :type => String, :allowed_from_server => false, :description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. This defaults to: `file:///newrelic/apm/health`. This configuration will be set using an environment variable by Agent Control, or one of its components, prior to agent startup.' }, :'agent_control.health.frequency' => { :default => 5, - :public => true, + :public => false, :type => Integer, :allowed_from_server => false, :description => 'The interval, in seconds, of how often the health file(s) will be written to. This configuration will be set using an environment variable by Agent Control, or one of its components, prior to agent startup.' From 87a09094a0cf66e4619cbbd771f981aed9a22f19 Mon Sep 17 00:00:00 2001 From: Kayla Reopelle Date: Tue, 21 Jan 2025 14:56:06 -0800 Subject: [PATCH 28/28] Do not create the health directory --- lib/new_relic/agent/health_check.rb | 20 ++---------- test/new_relic/agent/health_check_test.rb | 37 ++++++++++++----------- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/lib/new_relic/agent/health_check.rb b/lib/new_relic/agent/health_check.rb index 3424aeb7e3..73bb9bd7c7 100644 --- a/lib/new_relic/agent/health_check.rb +++ b/lib/new_relic/agent/health_check.rb @@ -113,7 +113,7 @@ def file_name end def write_file - @file ||= "#{create_file_path}/#{file_name}" + @file ||= "#{@delivery_location}/#{file_name}" File.write(@file, contents) rescue StandardError => e @@ -121,22 +121,6 @@ def write_file @continue = false end - def create_file_path - for abs_path in [File.expand_path(@delivery_location), - File.expand_path(File.join('', @delivery_location))] do - if File.directory?(abs_path) || (Dir.mkdir(abs_path) rescue nil) - return abs_path[%r{^(.*?)/?$}] - end - end - nil - rescue StandardError => e - NewRelic::Agent.logger.error( - 'Agent Control health check raised an error while finding or creating the file path defined in ' \ - "NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION: #{e}" - ) - @continue = false - end - def health_checks_enabled? @enabled && @delivery_location && @frequency > 0 end @@ -145,7 +129,7 @@ def update_message(options) @status[:message] = sprintf(@status[:message], *options) rescue StandardError => e NewRelic::Agent.logger.debug("Error raised while updating Agent Control health check message: #{e}." \ - "Reverting to original message. options = #{options}, @status[:message] = #{@status[:message]}") + "options = #{options}, @status[:message] = #{@status[:message]}") end end end diff --git a/test/new_relic/agent/health_check_test.rb b/test/new_relic/agent/health_check_test.rb index e36212f624..cdf9bb093c 100644 --- a/test/new_relic/agent/health_check_test.rb +++ b/test/new_relic/agent/health_check_test.rb @@ -19,6 +19,8 @@ def teardown end def test_yaml_health_file_written_to_delivery_location + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -33,6 +35,8 @@ def test_yaml_health_file_written_to_delivery_location end def test_yaml_health_file_written_to_delivery_location_with_file_path_prefix + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'file://health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -81,6 +85,8 @@ def test_create_and_run_health_check_loop_exits_after_shutdown end def test_write_file_sets_continue_false_when_error + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do File.stub(:write, ->(arg1, arg2) { raise 'boom!' }) do @@ -105,24 +111,9 @@ def test_frequency_defaults_to_five assert_equal 5, health_check.instance_variable_get(:@frequency) end - def test_create_file_path_sets_continue_false_when_error_raised - with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do - NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do - File.stub(:directory?, ->(arg1) { raise 'boom!' }) do - health_check = NewRelic::Agent::HealthCheck.new - # stub a valid health check, by setting @continue = true - health_check.instance_variable_set(:@continue, true) - health_check.send(:create_file_path) - - refute(health_check.instance_variable_get(:@continue)) - end - end - end - ensure - FileUtils.rm_rf('health') - end - def test_yaml_file_has_healthy_field + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -136,6 +127,8 @@ def test_yaml_file_has_healthy_field end def test_yaml_file_has_status_field + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -149,6 +142,8 @@ def test_yaml_file_has_status_field end def test_yaml_file_has_last_error_field_when_status_not_healthy + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -165,6 +160,8 @@ def test_yaml_file_has_last_error_field_when_status_not_healthy end def test_yaml_file_does_not_have_last_error_field_when_status_healthy + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -187,6 +184,8 @@ def test_nano_time_in_correct_format end def test_yaml_file_has_same_start_time_unix_every_write + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, '1') do health_check = NewRelic::Agent::HealthCheck.new @@ -205,6 +204,8 @@ def test_yaml_file_has_same_start_time_unix_every_write end def test_yaml_file_has_status_time_unix_nano + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do health_check = NewRelic::Agent::HealthCheck.new @@ -218,6 +219,8 @@ def test_yaml_file_has_status_time_unix_nano end def test_yaml_file_has_new_status_time_each_write + Dir.mkdir('health') + with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => 'health/') do NewRelic::Agent::GuidGenerator.stub(:generate_guid, '1') do health_check = NewRelic::Agent::HealthCheck.new