diff --git a/grayskull/license/discovery.py b/grayskull/license/discovery.py index 6e0159a69..4cd8c0acf 100644 --- a/grayskull/license/discovery.py +++ b/grayskull/license/discovery.py @@ -62,6 +62,16 @@ def get_all_licenses_from_spdx() -> List: ] +def _replace_dashes(s: str) -> str: + """ + Replace dashes with spaces. + + :param s: string to replace dashes with spaces + :return: string with dashes replaced by spaces + """ + return s.replace("-", " ") + + def _match_scrambled_exact(candidate, licenses) -> str | None: """ Return license with rearranged word order only. @@ -88,6 +98,9 @@ def match_license(name: str) -> dict: name = re.sub(r"\s+license\s*", "", name.strip(), flags=re.IGNORECASE) name = name.strip() + if name in _get_all_license_choice(all_licenses): + return _get_license(name, all_licenses) + exact_match = _match_scrambled_exact(name, _get_all_license_choice(all_licenses)) if exact_match: best_matches = [(exact_match, 100, 0)] @@ -130,12 +143,21 @@ def match_license(name: str) -> dict: lic[0] for lic in original_matches if lic[1] >= spdx_license[1] ] if len(best_matches) > 1: + # we replace dashes by spaces here to match instances like + # "3-Clause BSD" with "BSD-3-Clause" which otherwise would + # not work with word-based scores like token_sort_ratio spdx_license = process.extractOne( - name, best_matches, scorer=token_sort_ratio + name, + best_matches, + scorer=token_sort_ratio, + processor=_replace_dashes, ) if original_matches and original_matches[0][1] < 0.55: spdx_license = process.extractOne( - name, [m[0] for m in original_matches], scorer=token_sort_ratio + name, + [m[0] for m in original_matches], + scorer=token_sort_ratio, + processor=_replace_dashes, ) if spdx_license[1] != 100 and spdx_license[0].startswith("MIT"): diff --git a/tests/license/test_discovery.py b/tests/license/test_discovery.py index 6bb8dff76..8752ff5d0 100644 --- a/tests/license/test_discovery.py +++ b/tests/license/test_discovery.py @@ -77,6 +77,13 @@ def test_short_license_id(licence_name, short_licence): assert get_short_license_id(licence_name) == short_licence +@pytest.mark.parametrize( + "license_id", [lic["licenseId"] for lic in get_all_licenses_from_spdx()] +) +def test_short_license_id_map_to_self(license_id: str): + assert get_short_license_id(license_id) == license_id + + def test_get_other_names_from_opensource(): assert sorted(get_other_names_from_opensource("MIT")) == sorted(["MIT", "Expat"])