Skip to content

Commit

Permalink
Add cert_id extraction from certificate files.
Browse files Browse the repository at this point in the history
  • Loading branch information
J08nY committed Feb 13, 2024
1 parent fffefed commit 05afc93
Showing 1 changed file with 61 additions and 49 deletions.
110 changes: 61 additions & 49 deletions src/sec_certs/sample/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,52 +277,59 @@ def frontpage_cert_id(self, scheme: str) -> dict[str, float]:

def filename_cert_id(self, scheme: str) -> dict[str, float]:
"""
Get cert_id candidates from the matches in the report filename.
Get cert_id candidates from the matches in the report filename and cert filename.
"""
if not self.report_filename:
return {}
scheme_filename_rules = rules["cc_filename_cert_id"][scheme]
if not scheme_filename_rules:
return {}
scheme_meta = schemes[scheme]
matches: Counter = Counter()
for rule in scheme_filename_rules:
match = re.search(rule, self.report_filename)
if match:
try:
meta = match.groupdict()
cert_id = scheme_meta(meta)
matches[cert_id] += 1
except Exception:
continue
if not matches:
return {}
total = max(matches.values())
results = {}
for candidate, count in matches.items():
results[candidate] = count / total
results: dict[str, float] = {}
for fname in (self.report_filename, self.cert_filename):
if not fname:
continue

matches: Counter = Counter()
for rule in scheme_filename_rules:
match = re.search(rule, fname)
if match:
try:
meta = match.groupdict()
cert_id = scheme_meta(meta)
matches[cert_id] += 1
except Exception:
continue
if not matches:
continue
total = max(matches.values())

Check warning on line 303 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L291-L303

Added lines #L291 - L303 were not covered by tests

for candidate, count in matches.items():
results.setdefault(candidate, 0)
results[candidate] += count / total

Check warning on line 307 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L305-L307

Added lines #L305 - L307 were not covered by tests
# TODO count length in weight
return results

def keywords_cert_id(self, scheme: str) -> dict[str, float]:
"""
Get cert_id candidates from the keywords matches in the report.
Get cert_id candidates from the keywords matches in the report and cert.
"""
if not self.report_keywords:
return {}
cert_id_matches = self.report_keywords.get("cc_cert_id")
if not cert_id_matches:
return {}
results: dict[str, float] = {}
for keywords in (self.report_keywords, self.cert_keywords):
if not keywords:
continue
cert_id_matches = keywords.get("cc_cert_id")
if not cert_id_matches:
continue

Check warning on line 321 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L321

Added line #L321 was not covered by tests

if scheme not in cert_id_matches:
return {}
matches: Counter = Counter(cert_id_matches[scheme])
if not matches:
return {}
total = max(matches.values())
results = {}
for candidate, count in matches.items():
results[candidate] = count / total
if scheme not in cert_id_matches:
continue

Check warning on line 324 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L324

Added line #L324 was not covered by tests
matches: Counter = Counter(cert_id_matches[scheme])
if not matches:
continue

Check warning on line 327 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L327

Added line #L327 was not covered by tests
total = max(matches.values())

for candidate, count in matches.items():
results.setdefault(candidate, 0)
results[candidate] += count / total
# TODO count length in weight
return results

Expand All @@ -332,22 +339,27 @@ def metadata_cert_id(self, scheme: str) -> dict[str, float]:
"""
scheme_rules = rules["cc_cert_id"][scheme]
fields = ("/Title", "/Subject")
matches: Counter = Counter()
for meta_field in fields:
field_val = self.report_metadata.get(meta_field) if self.report_metadata else None
if not field_val:
results: dict[str, float] = {}
for metadata in (self.report_metadata, self.cert_metadata):
if not metadata:
continue
for rule in scheme_rules:
match = re.search(rule, field_val)
if match:
cert_id = normalize_match_string(match.group())
matches[cert_id] += 1
if not matches:
return {}
total = max(matches.values())
results = {}
for candidate, count in matches.items():
results[candidate] = count / total
matches: Counter = Counter()
for meta_field in fields:
field_val = metadata.get(meta_field)
if not field_val:
continue

Check warning on line 350 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L350

Added line #L350 was not covered by tests
for rule in scheme_rules:
match = re.search(rule, field_val)
if match:
cert_id = normalize_match_string(match.group())
matches[cert_id] += 1
if not matches:
continue

Check warning on line 357 in src/sec_certs/sample/cc.py

View check run for this annotation

Codecov / codecov/patch

src/sec_certs/sample/cc.py#L357

Added line #L357 was not covered by tests
total = max(matches.values())

for candidate, count in matches.items():
results.setdefault(candidate, 0)
results[candidate] += count / total
# TODO count length in weight
return results

Expand Down

0 comments on commit 05afc93

Please sign in to comment.