diff --git a/src/sec_certs/dataset/cc.py b/src/sec_certs/dataset/cc.py index d5417a23..568e68ce 100644 --- a/src/sec_certs/dataset/cc.py +++ b/src/sec_certs/dataset/cc.py @@ -885,10 +885,22 @@ def _compute_sars(self) -> None: for cert in self: cert.heuristics.extracted_sars = transformer.transform_single_cert(cert) + @staged(logger, "Computing heuristics: certificate versions") + def _compute_cert_versions(self) -> None: + cert_ids = { + cert.dgst: CertificateId(cert.scheme, cert.heuristics.cert_id) + if cert.heuristics.cert_id is not None + else None + for cert in self + } + for cert in self: + cert.compute_heuristics_cert_versions(cert_ids) + def _compute_heuristics(self) -> None: self._compute_normalized_cert_ids() super()._compute_heuristics() self._compute_scheme_data() + self._compute_cert_versions() self._compute_cert_labs() self._compute_sars() diff --git a/src/sec_certs/rules.yaml b/src/sec_certs/rules.yaml index e1e7f1f8..f3d137cf 100644 --- a/src/sec_certs/rules.yaml +++ b/src/sec_certs/rules.yaml @@ -23,7 +23,7 @@ cc_cert_id: # Rapport de certification 2001/02v2 # Certification Report 2003/20 NL: - - "(?:NSCIB-|CC-|NSCIB-CC-)(?P((?P[0-9]{2})-)?(?:-?[0-9]+)+)(?:-?(?P(?:CR|MA|MR)[0-9]*))?" + - "(?:NSCIB-|CC-|NSCIB-CC-)(?P((?P[0-9]{2})-)?(?:-?[0-9]+)+)(?:-?(?P(?:CR|MA|MR)(?P[0-9]*)))?" # Examples: # NSCIB-CC-22-0428888-CR2 (with year=22 and CR2) # NSCIB-CC-228723-CR (no year) @@ -57,11 +57,12 @@ cc_cert_id: # CRP208 # CERTIFICATION REPORT No. P123A ES: - - "(?P[0-9]{4})[-‐](?P[0-9]+)[-‐]INF[-‐](?P[0-9]+)[ -‐]{1,2}[vV](?P[0-9])" + - "(?P[0-9]{4})[-‐](?P[0-9]+)[-‐]INF[-‐](?P[0-9]+)(?:[ -‐]{1,2}[vV](?P[0-9]))?" # Examples: # 2006-4-INF-98 v2 # 2020-34-INF-3784- v1 # 2019-20-INF-3379-v1 + # 2011-14-INF-1095 (also without the version) KR: - "KECS[-‐](?PISIS|NISS|CISS)[-‐](?P[0-9]{2,4})[-‐](?P[0-9]{4})" # XXX: Do not use KECS-CR as those refer to the certificate report and do not represent the certificate id. diff --git a/src/sec_certs/sample/cc.py b/src/sec_certs/sample/cc.py index 63c2cca5..a9aa2262 100644 --- a/src/sec_certs/sample/cc.py +++ b/src/sec_certs/sample/cc.py @@ -2,6 +2,7 @@ import copy import re +from bisect import insort from collections import Counter, defaultdict from dataclasses import dataclass, field from datetime import date, datetime @@ -18,7 +19,7 @@ from sec_certs import constants from sec_certs.cert_rules import SARS_IMPLIED_FROM_EAL, cc_rules, rules, security_level_csv_scan from sec_certs.configuration import config -from sec_certs.sample.cc_certificate_id import canonicalize, schemes +from sec_certs.sample.cc_certificate_id import CertificateId, canonicalize, schemes from sec_certs.sample.certificate import Certificate, References, logger from sec_certs.sample.certificate import Heuristics as BaseHeuristics from sec_certs.sample.certificate import PdfData as BasePdfData @@ -345,6 +346,8 @@ class Heuristics(BaseHeuristics, ComplexSerializableType): related_cves: set[str] | None = field(default=None) cert_lab: list[str] | None = field(default=None) cert_id: str | None = field(default=None) + prev_certificates: list[str] | None = field(default=None) + next_certificates: list[str] | None = field(default=None) st_references: References = field(default_factory=References) report_references: References = field(default_factory=References) @@ -1000,6 +1003,52 @@ def extract_cert_pdf_keywords(cert: CCCertificate) -> CCCertificate: cert.pdf_data.cert_keywords = cert_keywords return cert + def compute_heuristics_cert_versions(self, cert_ids: dict[str, CertificateId | None]) -> None: # noqa: C901 + """ + Fills in the previous and next certificate versions based on the cert ID. + """ + self.heuristics.prev_certificates = [] + self.heuristics.next_certificates = [] + own = cert_ids[self.dgst] + if own is None: + return + if self.scheme not in ("DE", "FR", "ES", "NL", "MY"): + # There is no version in the cert_id, so skip it + return + version = own.meta.get("version") + for other_dgst, other in cert_ids.items(): + if other_dgst == self.dgst: + # Skip ourselves + continue + if other is None or other.scheme != own.scheme: + # The other does not have cert ID or is different scheme or does not have a version. + continue + other_version = other.meta.get("version") + # Go over the own meta and compare, if some field other than version is different, bail out. + # If all except the version are the same, we have a match. + for key, value in own.meta.items(): + if key == "version": + continue + if self.scheme == "DE" and key == "year": + # For German certs we want to also ignore the year in comparison. + continue + if value != other.meta.get(key): + break + else: + if other_version is None and version is None: + # This means a duplicate ID is present, and it has no version. + # Just pass silently. + pass + elif version is None: + insort(self.heuristics.next_certificates, str(other)) + elif other_version is None: + insort(self.heuristics.prev_certificates, str(other)) + else: + if other_version < version: + insort(self.heuristics.prev_certificates, str(other)) + else: + insort(self.heuristics.next_certificates, str(other)) + def compute_heuristics_version(self) -> None: """ Fills in the heuristically obtained version of certified product into attribute in heuristics class. diff --git a/tests/cc/test_cc_misc.py b/tests/cc/test_cc_misc.py index ab906e79..2f9dcea7 100644 --- a/tests/cc/test_cc_misc.py +++ b/tests/cc/test_cc_misc.py @@ -1,6 +1,15 @@ +import pytest + from sec_certs.sample.cc_certificate_id import CertificateId, canonicalize +def canonicalize_n(n, cert_id_str, scheme): + cert_id = cert_id_str + for _ in range(n): + cert_id = canonicalize(cert_id, scheme) + return cert_id + + def test_meta_parse(): i = CertificateId("FR", "Rapport de certification 2001/02v2") assert "year" in i.meta @@ -9,97 +18,114 @@ def test_meta_parse(): assert i.meta["version"] == "2" -def test_canonicalize_fr(): - assert canonicalize("Rapport de certification 2001/02v2", "FR") == "ANSSI-CC-2001/02v2" - assert canonicalize("ANSSI-CC 2001/02-R01", "FR") == "ANSSI-CC-2001/02-R01" - assert canonicalize("ANSSI-CC 2001_02-M01", "FR") == "ANSSI-CC-2001/02-M01" - assert canonicalize("ANSSI-CC-PP-2013/58", "FR") == "ANSSI-CC-PP-2013/58" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_fr(n): + assert canonicalize_n(n, "Rapport de certification 2001/02v2", "FR") == "ANSSI-CC-2001/02v2" + assert canonicalize_n(n, "ANSSI-CC 2001/02-R01", "FR") == "ANSSI-CC-2001/02-R01" + assert canonicalize_n(n, "ANSSI-CC 2001_02-M01", "FR") == "ANSSI-CC-2001/02-M01" + assert canonicalize_n(n, "ANSSI-CC-PP-2013/58", "FR") == "ANSSI-CC-PP-2013/58" -def test_canonicalize_de(): - assert canonicalize("BSI-DSZ-CC-0420-2007", "DE") == "BSI-DSZ-CC-0420-2007" - assert canonicalize("BSI-DSZ-CC-1004", "DE") == "BSI-DSZ-CC-1004" - assert canonicalize("BSI-DSZ-CC-0831-V4-2021", "DE") == "BSI-DSZ-CC-0831-V4-2021" - assert canonicalize("BSI-DSZ-CC-0837-V2-2014-MA-01", "DE") == "BSI-DSZ-CC-0837-V2-2014-MA-01" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_de(n): + assert canonicalize_n(n, "BSI-DSZ-CC-0420-2007", "DE") == "BSI-DSZ-CC-0420-2007" + assert canonicalize_n(n, "BSI-DSZ-CC-1004", "DE") == "BSI-DSZ-CC-1004" + assert canonicalize_n(n, "BSI-DSZ-CC-0831-V4-2021", "DE") == "BSI-DSZ-CC-0831-V4-2021" + assert canonicalize_n(n, "BSI-DSZ-CC-0837-V2-2014-MA-01", "DE") == "BSI-DSZ-CC-0837-V2-2014-MA-01" -def test_canonicalize_us(): - assert canonicalize("CCEVS-VR-VID10015", "US") == "CCEVS-VR-VID-10015" - assert canonicalize("CCEVS-VR-VID10015-2008", "US") == "CCEVS-VR-VID-10015-2008" - assert canonicalize("CCEVS-VR-10880-2018", "US") == "CCEVS-VR-10880-2018" - assert canonicalize("CCEVS-VR-04-0082", "US") == "CCEVS-VR-0082-2004" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_us(n): + assert canonicalize_n(n, "CCEVS-VR-VID10015", "US") == "CCEVS-VR-VID-10015" + assert canonicalize_n(n, "CCEVS-VR-VID10015-2008", "US") == "CCEVS-VR-VID-10015-2008" + assert canonicalize_n(n, "CCEVS-VR-10880-2018", "US") == "CCEVS-VR-10880-2018" + assert canonicalize_n(n, "CCEVS-VR-04-0082", "US") == "CCEVS-VR-0082-2004" -def test_canonicalize_my(): - assert canonicalize("ISCB-5-RPT-C075-CR-v2", "MY") == "ISCB-5-RPT-C075-CR-v2" - assert canonicalize("ISCB-5-RPT-C046-CR-V1a", "MY") == "ISCB-5-RPT-C046-CR-v1a" - assert canonicalize("ISCB-3-RPT-C068-CR-1-v1", "MY") == "ISCB-3-RPT-C068-CR-v1" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_my(n): + assert canonicalize_n(n, "ISCB-5-RPT-C075-CR-v2", "MY") == "ISCB-5-RPT-C075-CR-v2" + assert canonicalize_n(n, "ISCB-5-RPT-C046-CR-V1a", "MY") == "ISCB-5-RPT-C046-CR-v1a" + assert canonicalize_n(n, "ISCB-3-RPT-C068-CR-1-v1", "MY") == "ISCB-3-RPT-C068-CR-v1" -def test_canonicalize_es(): - assert canonicalize("2011-14-INF-1095-v1", "ES") == "2011-14-INF-1095" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_es(n): + assert canonicalize_n(n, "2011-14-INF-1095-v1", "ES") == "2011-14-INF-1095" -def test_canonicalize_sg(): - assert canonicalize("CSA_CC_21005", "SG") == "CSA_CC_21005" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_sg(n): + assert canonicalize_n(n, "CSA_CC_21005", "SG") == "CSA_CC_21005" -def test_canonicalize_in(): - assert canonicalize("IC3S/KOL01/ADVA/EAL2/0520/0021 /CR", "IN") == "IC3S/KOL01/ADVA/EAL2/0520/0021" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_in(n): + assert canonicalize_n(n, "IC3S/KOL01/ADVA/EAL2/0520/0021 /CR", "IN") == "IC3S/KOL01/ADVA/EAL2/0520/0021" -def test_canonicalize_it(): - assert canonicalize("OCSI/CERT/TEC/02/2009/RC", "IT") == "OCSI/CERT/TEC/02/2009/RC" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_it(n): + assert canonicalize_n(n, "OCSI/CERT/TEC/02/2009/RC", "IT") == "OCSI/CERT/TEC/02/2009/RC" -def test_canonicalize_se(): - assert canonicalize("CSEC2017020", "SE") == "CSEC2017020" - assert canonicalize("CSEC 2017020", "SE") == "CSEC2017020" - assert canonicalize("CSEC201003", "SE") == "CSEC2010003" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_se(n): + assert canonicalize_n(n, "CSEC2017020", "SE") == "CSEC2017020" + assert canonicalize_n(n, "CSEC 2017020", "SE") == "CSEC2017020" + assert canonicalize_n(n, "CSEC201003", "SE") == "CSEC2010003" -def test_canonicalize_uk(): - assert canonicalize("CERTIFICATION REPORT No. P123", "UK") == "CRP123" - assert canonicalize("CRP123A", "UK") == "CRP123A" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_uk(n): + assert canonicalize_n(n, "CERTIFICATION REPORT No. P123", "UK") == "CRP123" + assert canonicalize_n(n, "CRP123A", "UK") == "CRP123A" -def test_canonicalize_au(): - assert canonicalize("Certification Report 2007/02", "AU") == "Certificate Number: 2007/02" - assert canonicalize("Certificate Number: 37/2006", "AU") == "Certificate Number: 2006/37" - assert canonicalize("Certificate Number: 2011/73", "AU") == "Certificate Number: 2011/73" - assert canonicalize("Certification Report 97/76", "AU") == "Certificate Number: 1997/76" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_au(n): + assert canonicalize_n(n, "Certification Report 2007/02", "AU") == "Certificate Number: 2007/02" + assert canonicalize_n(n, "Certificate Number: 37/2006", "AU") == "Certificate Number: 2006/37" + assert canonicalize_n(n, "Certificate Number: 2011/73", "AU") == "Certificate Number: 2011/73" + assert canonicalize_n(n, "Certification Report 97/76", "AU") == "Certificate Number: 1997/76" -def test_canonicalize_ca(): - assert canonicalize("383-4-123-CR", "CA") == "383-4-123" - assert canonicalize("383-4-123P", "CA") == "383-4-123" - assert canonicalize("522 EWA 2020", "CA") == "522-EWA-2020" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_ca(n): + assert canonicalize_n(n, "383-4-123-CR", "CA") == "383-4-123" + assert canonicalize_n(n, "383-4-123P", "CA") == "383-4-123" + assert canonicalize_n(n, "522 EWA 2020", "CA") == "522-EWA-2020" -def test_canonicalize_jp(): - assert canonicalize("Certification No. C01234", "JP") == "JISEC-CC-CRP-C01234" - assert canonicalize("CRP-C01234-01", "JP") == "JISEC-CC-CRP-C01234-01" - assert canonicalize("JISEC-CC-CRP-C0689-01-2020", "JP") == "JISEC-CC-CRP-C0689-01-2020" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_jp(n): + assert canonicalize_n(n, "Certification No. C01234", "JP") == "JISEC-CC-CRP-C01234" + assert canonicalize_n(n, "CRP-C01234-01", "JP") == "JISEC-CC-CRP-C01234-01" + assert canonicalize_n(n, "JISEC-CC-CRP-C0689-01-2020", "JP") == "JISEC-CC-CRP-C0689-01-2020" -def test_canonicalize_kr(): - assert canonicalize("KECS-ISIS-0579-2015", "KR") == "KECS-ISIS-0579-2015" - assert canonicalize("KECS-CISS-10-2023", "KR") == "KECS-CISS-0010-2023" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_kr(n): + assert canonicalize_n(n, "KECS-ISIS-0579-2015", "KR") == "KECS-ISIS-0579-2015" + assert canonicalize_n(n, "KECS-CISS-10-2023", "KR") == "KECS-CISS-0010-2023" -def test_canonicalize_no(): - assert canonicalize("SERTIT-12", "NO") == "SERTIT-012" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_no(n): + assert canonicalize_n(n, "SERTIT-12", "NO") == "SERTIT-012" -def test_canonicalize_tr(): - assert canonicalize("21.0.03.0.00.00/TSE-CCCS-85", "TR") == "21.0.03.0.00.00/TSE-CCCS-85" - assert canonicalize("21.0.03/TSE-CCCS-33", "TR") == "21.0.03/TSE-CCCS-33" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_tr(n): + assert canonicalize_n(n, "21.0.03.0.00.00/TSE-CCCS-85", "TR") == "21.0.03.0.00.00/TSE-CCCS-85" + assert canonicalize_n(n, "21.0.03/TSE-CCCS-33", "TR") == "21.0.03/TSE-CCCS-33" -def test_canonicalize_nl(): - assert canonicalize("NSCIB-CC-22-0428888-CR2", "NL") == "NSCIB-CC-22-0428888-CR2" - assert canonicalize("NSCIB-CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR" - assert canonicalize("CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR" +@pytest.mark.parametrize("n", [1, 2]) +def test_canonicalize_nl(n): + assert canonicalize_n(n, "NSCIB-CC-22-0428888-CR2", "NL") == "NSCIB-CC-22-0428888-CR2" + assert canonicalize_n(n, "NSCIB-CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR" + assert canonicalize_n(n, "CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR" def test_certid_compare():