diff --git a/keylime/cloud_verifier_common.py b/keylime/cloud_verifier_common.py index 0fd6f4acf..e2fa1171b 100644 --- a/keylime/cloud_verifier_common.py +++ b/keylime/cloud_verifier_common.py @@ -19,6 +19,7 @@ from keylime import ca_util from keylime import revocation_notifier from keylime.agentstates import AgentAttestStates +from keylime.failure import Failure, Component from keylime.tpm.tpm_main import tpm from keylime.tpm.tpm_abstract import TPM_Utilities from keylime.common import algorithms @@ -133,11 +134,12 @@ def init_mtls(section='cloud_verifier', generatedir='cv_ca'): return context -def process_quote_response(agent, json_response, agentAttestState): +def process_quote_response(agent, json_response, agentAttestState) -> Failure: """Validates the response from the Cloud agent. This method invokes an Registrar Server call to register, and then check the quote. """ + failure = Failure(Component.QUOTE_VALIDATION) received_public_key = None quote = None # in case of failure in response content do not continue @@ -157,9 +159,11 @@ def process_quote_response(agent, json_response, agentAttestState): logger.debug("received ima_measurement_list_entry: %d", ima_measurement_list_entry) logger.debug("received boottime: %s", boottime) logger.debug("received boot log %s", (mb_measurement_list is not None)) - except Exception: - return None + except Exception as e: + failure.add_event("invalid_data", {"message": "parsing agents get quote respone failed", "data": e}, False) + return failure + # TODO: Are those separate failures? if not isinstance(ima_measurement_list_entry, int): raise Exception("ima_measurement_list_entry parameter must be an integer") @@ -170,7 +174,8 @@ def process_quote_response(agent, json_response, agentAttestState): if received_public_key is None: if agent.get('public_key', "") == "" or agent.get('b64_encrypted_V', "") == "": logger.error("agent did not provide public key and no key or encrypted_v was cached at CV") - return False + failure.add_event("no_pubkey", "agent did not provide public key and no key or encrypted_v was cached at CV", False) + return failure agent['provide_V'] = False received_public_key = agent['public_key'] @@ -180,7 +185,8 @@ def process_quote_response(agent, json_response, agentAttestState): "cloud_verifier", "registrar_port"), agent['agent_id']) if registrar_data is None: logger.warning("AIK not found in registrar, quote not validated") - return False + failure.add_event("no_aik", "AIK not found in registrar, quote not validated", False) + return failure agent['registrar_data'] = registrar_data hash_alg = json_response.get('hash_alg') @@ -194,38 +200,50 @@ def process_quote_response(agent, json_response, agentAttestState): # Ensure hash_alg is in accept_tpm_hash_alg list if not algorithms.is_accepted(hash_alg, agent['accept_tpm_hash_algs']): - raise Exception( - "TPM Quote is using an unaccepted hash algorithm: %s" % hash_alg) + logger.error(f"TPM Quote is using an unaccepted hash algorithm: {hash_alg}") + failure.add_event("invalid_hash_alg", + {"message": f"TPM Quote is using an unaccepted hash algorithm: {hash_alg}", "data": hash_alg}, + False) + return failure # Ensure enc_alg is in accept_tpm_encryption_algs list if not algorithms.is_accepted(enc_alg, agent['accept_tpm_encryption_algs']): - raise Exception( - "TPM Quote is using an unaccepted encryption algorithm: %s" % enc_alg) + logger.error(f"TPM Quote is using an unaccepted encryption algorithm: {enc_alg}") + failure.add_event("invalid_enc_alg", + {"message": f"TPM Quote is using an unaccepted encryption algorithm: {enc_alg}", "data": enc_alg}, + False) + return failure # Ensure sign_alg is in accept_tpm_encryption_algs list if not algorithms.is_accepted(sign_alg, agent['accept_tpm_signing_algs']): - raise Exception( - "TPM Quote is using an unaccepted signing algorithm: %s" % sign_alg) + logger.error(f"TPM Quote is using an unaccepted signing algorithm: {sign_alg}") + failure.add_event("invalid_sign_alg", + {"message": f"TPM Quote is using an unaccepted signing algorithm: {sign_alg}", "data": {sign_alg}}, + False) + return failure if ima_measurement_list_entry == 0: agentAttestState.reset_ima_attestation() elif ima_measurement_list_entry != agentAttestState.get_next_ima_ml_entry(): # If we requested a particular entry number then the agent must return either # starting at 0 (handled above) or with the requested number. - raise Exception( - "Agent did not respond with requested next IMA measurement list entry %d but started at %d" % - (agentAttestState.get_next_ima_ml_entry(), ima_measurement_list_entry)) + logger.error("Agent did not respond with requested next IMA measurement list entry " + f"{agentAttestState.get_next_ima_ml_entry()} but started at {ima_measurement_list_entry}") + failure.add_event("invalid_ima_entry_nb", + {"message": "Agent did not respond with requested next IMA measurement list entry", + "got": ima_measurement_list_entry, "expected": agentAttestState.get_next_ima_ml_entry()}, + False) elif not agentAttestState.is_expected_boottime(boottime): # agent sent a list not starting at 0 and provided a boottime that doesn't # match the expected boottime, so it must have been rebooted; we would fail # attestation this time so we retry with a full attestation next time. agentAttestState.reset_ima_attestation() - return True + return failure agentAttestState.set_boottime(boottime) ima_keyring = ima_file_signatures.ImaKeyring.from_string(agent['ima_sign_verification_keys']) - validQuote = get_tpm_instance().check_quote( + quote_validation_failure = get_tpm_instance().check_quote( agentAttestState, agent['nonce'], received_public_key, @@ -238,21 +256,21 @@ def process_quote_response(agent, json_response, agentAttestState): ima_keyring, mb_measurement_list, agent['mb_refstate']) - if not validQuote: - return False + failure.merge(quote_validation_failure) - # set a flag so that we know that the agent was verified once. - # we only issue notifications for agents that were at some point good - agent['first_verified'] = True + if not failure: + # set a flag so that we know that the agent was verified once. + # we only issue notifications for agents that were at some point good + agent['first_verified'] = True - # has public key changed? if so, clear out b64_encrypted_V, it is no longer valid - if received_public_key != agent.get('public_key', ""): - agent['public_key'] = received_public_key - agent['b64_encrypted_V'] = "" - agent['provide_V'] = True + # has public key changed? if so, clear out b64_encrypted_V, it is no longer valid + if received_public_key != agent.get('public_key', ""): + agent['public_key'] = received_public_key + agent['b64_encrypted_V'] = "" + agent['provide_V'] = True # ok we're done - return validQuote + return failure def prepare_v(agent): @@ -333,6 +351,8 @@ def process_get_status(agent): 'verifier_id' : agent.verifier_id, 'verifier_ip' : agent.verifier_ip, 'verifier_port' : agent.verifier_port, + 'severity_level': agent.severity_level, + 'last_event_id': agent.last_event_id } return response @@ -340,7 +360,7 @@ def process_get_status(agent): # sign a message with revocation key. telling of verification problem -def notify_error(agent, msgtype='revocation'): +def notify_error(agent, msgtype='revocation', event=None): if not config.getboolean('cloud_verifier', 'revocation_notifier'): return @@ -353,6 +373,10 @@ def notify_error(agent, msgtype='revocation'): 'vtpm_policy': agent['vtpm_policy'], 'meta_data': agent['meta_data'], 'event_time': time.asctime()} + if event: + revocation['event_id'] = event.event_id + revocation['severity_label'] = event.severity_label.name + revocation['context'] = event.context tosend = {'msg': json.dumps(revocation).encode('utf-8')} diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py index f13e0fc45..1b4f6e798 100644 --- a/keylime/cloud_verifier_tornado.py +++ b/keylime/cloud_verifier_tornado.py @@ -27,6 +27,7 @@ from keylime import tornado_requests from keylime import api_version as keylime_api_version from keylime.ima_ast import START_HASH +from keylime.failure import MAX_SEVERITY_LABEL, Failure, Component logger = keylime_logging.init_logging('cloudverifier') @@ -702,6 +703,7 @@ def data_received(self, chunk): async def invoke_get_quote(agent, need_pubkey): + failure = Failure(Component.INTERNAL, ["verifier"]) if agent is None: raise Exception("agent deleted while being processed") params = cloud_verifier_common.prepare_get_quote(agent) @@ -724,7 +726,8 @@ async def invoke_get_quote(agent, need_pubkey): else: # catastrophic error, do not continue logger.critical("Unexpected Get Quote response error for cloud agent %s, Error: %s", agent['agent_id'], response.status_code) - asyncio.ensure_future(process_agent(agent, states.FAILED)) + failure.add_event("no_quote", "Unexpected Get Quote reponse from agent", False) + asyncio.ensure_future(process_agent(agent, states.FAILED, failure)) else: try: json_response = json.loads(response.body) @@ -733,13 +736,14 @@ async def invoke_get_quote(agent, need_pubkey): if 'provide_V' not in agent : agent['provide_V'] = True agentAttestState = get_AgentAttestStates().get_by_agent_id(agent['agent_id']) - if cloud_verifier_common.process_quote_response(agent, json_response['results'], agentAttestState): + failure = cloud_verifier_common.process_quote_response(agent, json_response['results'], agentAttestState) + if not failure: if agent['provide_V']: asyncio.ensure_future(process_agent(agent, states.PROVIDE_V)) else: asyncio.ensure_future(process_agent(agent, states.GET_QUOTE)) else: - asyncio.ensure_future(process_agent(agent, states.INVALID_QUOTE)) + asyncio.ensure_future(process_agent(agent, states.INVALID_QUOTE, failure)) # store the attestation state store_attestation_state(agentAttestState) @@ -749,6 +753,7 @@ async def invoke_get_quote(agent, need_pubkey): async def invoke_provide_v(agent): + failure = Failure(Component.INTERNAL, ["verifier"]) if agent is None: raise Exception("Agent deleted while being processed") try: @@ -769,18 +774,19 @@ async def invoke_provide_v(agent): else: # catastrophic error, do not continue logger.critical("Unexpected Provide V response error for cloud agent %s, Error: %s", agent['agent_id'], response.status_code) - asyncio.ensure_future(process_agent(agent, states.FAILED)) + failure.add_event("no_v", {"message": "Unexpected provide V response", "data": response.error}, False) + asyncio.ensure_future(process_agent(agent, states.FAILED, failure)) else: asyncio.ensure_future(process_agent(agent, states.GET_QUOTE)) -async def process_agent(agent, new_operational_state): +async def process_agent(agent, new_operational_state, failure=Failure(Component.INTERNAL, ["verifier"])): # Convert to dict if the agent arg is a db object if not isinstance(agent, dict): agent = _from_db_obj(agent) session = get_session() - try: + try: # pylint: disable=R1702 main_agent_operational_state = agent['operational_state'] try: stored_agent = session.query(VerfierMain).filter_by( @@ -808,24 +814,28 @@ async def process_agent(agent, new_operational_state): # If failed during processing, log regardless and drop it on the floor # The administration application (tenant) can GET the status and act accordingly (delete/retry/etc). if new_operational_state in (states.FAILED, states.INVALID_QUOTE): - agent['operational_state'] = new_operational_state - - # issue notification for invalid quotes - if new_operational_state == states.INVALID_QUOTE: - cloud_verifier_common.notify_error(agent) - - if agent['pending_event'] is not None: - tornado.ioloop.IOLoop.current().remove_timeout( - agent['pending_event']) - for key in exclude_db: - if key in agent: - del agent[key] - session.query(VerfierMain).filter_by( - agent_id=agent['agent_id']).update(agent) - session.commit() - - logger.warning("Agent %s failed, stopping polling", agent['agent_id']) - return + assert failure, "States FAILED and INVALID QUOTE should only be reached with a failure message" + + if agent.get('severity_level') is None or agent['severity_level'] < failure.highest_severity.severity: + agent['severity_level'] = failure.highest_severity.severity + agent['last_event_id'] = failure.highest_severity_event.event_id + agent['operational_state'] = new_operational_state + + # issue notification for invalid quotes + if new_operational_state == states.INVALID_QUOTE: + cloud_verifier_common.notify_error(agent, event=failure.highest_severity_event) + + # When the failure is irrecoverable we stop polling the agent + if not failure.recoverable or failure.highest_severity == MAX_SEVERITY_LABEL: + if agent['pending_event'] is not None: + tornado.ioloop.IOLoop.current().remove_timeout( + agent['pending_event']) + for key in exclude_db: + if key in agent: + del agent[key] + session.query(VerfierMain).filter_by( + agent_id=agent['agent_id']).update(agent) + session.commit() # propagate all state, but remove none DB keys first (using exclude_db) try: @@ -840,6 +850,16 @@ async def process_agent(agent, new_operational_state): except SQLAlchemyError as e: logger.error('SQLAlchemy Error: %s', e) + # If agent was in a failed state we check if we either stop polling + # or just add it again to the event loop + if new_operational_state in [states.FAILED, states.INVALID_QUOTE]: + if not failure.recoverable or failure.highest_severity == MAX_SEVERITY_LABEL: + logger.warning("Agent %s failed, stopping polling", agent['agent_id']) + return + + await invoke_get_quote(agent, False) + return + # if new, get a quote if (main_agent_operational_state == states.START and new_operational_state == states.GET_QUOTE): @@ -876,12 +896,13 @@ async def process_agent(agent, new_operational_state): new_operational_state == states.GET_QUOTE_RETRY): if agent['num_retries'] >= maxr: logger.warning("Agent %s was not reachable for quote in %d tries, setting state to FAILED", agent['agent_id'], maxr) + failure.add_event("not_reachable", "agent was not reachable from verifier", False) if agent['first_verified']: # only notify on previously good agents cloud_verifier_common.notify_error( - agent, msgtype='comm_error') + agent, msgtype='comm_error', event=failure.highest_severity_event) else: logger.debug("Communication error for new agent. No notification will be sent") - await process_agent(agent, states.FAILED) + await process_agent(agent, states.FAILED, failure) else: agent['operational_state'] = states.GET_QUOTE cb = functools.partial(invoke_get_quote, agent, True) @@ -894,9 +915,10 @@ async def process_agent(agent, new_operational_state): new_operational_state == states.PROVIDE_V_RETRY): if agent['num_retries'] >= maxr: logger.warning("Agent %s was not reachable to provide v in %d tries, setting state to FAILED", agent['agent_id'], maxr) + failure.add_event("not_reachable_v", "agent was not reachable to provide V", False) cloud_verifier_common.notify_error( - agent, msgtype='comm_error') - await process_agent(agent, states.FAILED) + agent, msgtype='comm_error', event=failure.highest_severity_event) + await process_agent(agent, states.FAILED, failure) else: agent['operational_state'] = states.PROVIDE_V cb = functools.partial(invoke_provide_v, agent) diff --git a/keylime/db/verifier_db.py b/keylime/db/verifier_db.py index e01f13257..f5ddd6c86 100644 --- a/keylime/db/verifier_db.py +++ b/keylime/db/verifier_db.py @@ -46,6 +46,8 @@ class VerfierMain(Base): ima_pcrs = Column(JSONPickleType(pickler=json)) pcr10 = Column(LargeBinary) next_ima_ml_entry = Column(Integer) + severity_level = Column(Integer, nullable=True) + last_event_id = Column(String(200), nullable=True) class VerifierAllowlist(Base): diff --git a/keylime/migrations/versions/257fe0f0c039_add_fields_for_revocation_context_to_.py b/keylime/migrations/versions/257fe0f0c039_add_fields_for_revocation_context_to_.py new file mode 100644 index 000000000..c5cb3e466 --- /dev/null +++ b/keylime/migrations/versions/257fe0f0c039_add_fields_for_revocation_context_to_.py @@ -0,0 +1,45 @@ +"""Add fields for revocation context to verifier + +Revision ID: 257fe0f0c039 +Revises: f35cdd35eb83 +Create Date: 2021-08-20 12:42:30.427138 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '257fe0f0c039' +down_revision = 'f35cdd35eb83' +branch_labels = None +depends_on = None + + +def upgrade(engine_name): + globals()["upgrade_%s" % engine_name]() + + +def downgrade(engine_name): + globals()["downgrade_%s" % engine_name]() + + + + + +def upgrade_registrar(): + pass + + +def downgrade_registrar(): + pass + + +def upgrade_cloud_verifier(): + op.add_column('verifiermain', sa.Column('severity_level', sa.Integer)) + op.add_column('verifiermain', sa.Column('last_event_id', sa.String)) + + +def downgrade_cloud_verifier(): + op.drop_column('verifiermain', 'severity_level') + op.drop_column('verifiermain', sa.Column('last_event_id', sa.Integer))