diff --git a/requirements.txt b/requirements.txt index 587effc9..eef36234 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ extractcode-libarchive==3.5.1.210531 fasteners==0.17.3 fingerprints==1.0.3 ftfy==6.0.3 +ftputil==5.0.4 future==0.18.2 gemfileparser==0.8.0 html5lib==1.1 diff --git a/setup.cfg b/setup.cfg index d2562b0f..2ef2d01a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,6 +54,9 @@ install_requires = attrs packageurl-python requests + debian-inspector + ftputil + extractcode[full] [options.packages.find] diff --git a/src/fetchcode/__init__.py b/src/fetchcode/__init__.py index c573991f..a38aa0d2 100644 --- a/src/fetchcode/__init__.py +++ b/src/fetchcode/__init__.py @@ -24,7 +24,7 @@ class Response: - def __init__(self, location, content_type, size, url): + def __init__(self, location, content_type, size, url, success=True): """ Represent the response from fetching a URL with: - `location`: the absolute location of the files that was fetched @@ -36,6 +36,7 @@ def __init__(self, location, content_type, size, url): self.size = size self.content_type = content_type self.location = location + self.success = success def fetch_http(url, location): @@ -43,7 +44,16 @@ def fetch_http(url, location): Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string saving the content in a file at `location` """ - r = requests.get(url) + try: + r = requests.get(url) + except ConnectionError: + raise Exception(f"Failed to fetch: {url}") + + if r.status_code != 200: + success = False + else: + success = True + with open(location, 'wb') as f: f.write(r.content) @@ -51,7 +61,7 @@ def fetch_http(url, location): size = r.headers.get('content-length') size = int(size) if size else None - resp = Response(location=location, content_type=content_type, size=size, url=url) + resp = Response(location=location, content_type=content_type, size=size, url=url, success=success) return resp diff --git a/src/fetchcode/ls.py b/src/fetchcode/ls.py new file mode 100644 index 00000000..69d72366 --- /dev/null +++ b/src/fetchcode/ls.py @@ -0,0 +1,195 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +from datetime import datetime +from functools import total_ordering +import logging +import posixpath +import stat + +from ftputil.stat import UnixParser +from ftputil.error import ParserError + + +TRACE = False + +logger = logging.getLogger(__name__) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) + +""" +Parse directory listings such as a find or ls command output. +These are commonly provided as file indexes in package repositories. +""" + +# TODO: use constants for entry types +DIR = 'd' +FILE = 'f' +LINK = 'l' +SPECIAL = 's' + +# FIXME: do we really need link and special file support? + + +@total_ordering +class Entry(object): + """ + Represent a file, directory or link entry in a directory listing. + """ + __slots__ = 'path', 'type', 'size', 'date', 'target' + + def __init__(self, path=None, type=None, size=None, date=None, target=None): # NOQA + self.path = path + self.type = type + self.size = size + self.date = date + self.target = target + if TRACE: + logger.debug('Entry(): ' + repr(self)) + + def __repr__(self): + base = 'Entry(path=%(path)r, type=%(type)r, size=%(size)r, date=%(date)r' + link_target = ')' + if self.type == LINK: + link_target = ', target=%(target)r)' + return (base + link_target) % self.to_dict() + + def __eq__(self, other): + return isinstance(other, Entry) and self.to_dict() == other.to_dict() + + def __lt__(self, other): + return isinstance(other, Entry) and tuple(self.to_dict().items()) < tuple(other.to_dict().items()) + + def __hash__(self): + return hash(tuple(self.to_dict().items())) + + def to_dict(self): + return { + 'path': self.path, + 'type': self.type, + 'size': self.size, + 'date': self.date, + 'target': self.target, + } + + @classmethod + def from_stat(self, stat_result, base_dir='', use_utc_time=True): + """ + Return a new Entry built from a stat-like tuple and a base + directory. + """ + res_type = None + path = stat_result._st_name + path = clean_path(path) + + # ignore date and size unless a file + date = None + size = 0 + + target = None + mode = stat_result.st_mode + + if stat.S_ISREG(mode): + res_type = FILE + if use_utc_time: + utc_date = datetime.utcfromtimestamp(stat_result.st_mtime) + else: + utc_date = datetime.fromtimestamp(stat_result.st_mtime) + date = datetime.isoformat(utc_date)[:10] + size = stat_result.st_size + + elif stat.S_ISDIR(mode): + res_type = DIR + + elif stat.S_ISLNK(mode): + res_type = LINK + target = stat_result._st_target + + else: + # anything else is some special file of sorts + res_type = SPECIAL + + # rejoin path with base-dir if any + if base_dir and base_dir != '.': + base_dir = clean_path(base_dir) + path = posixpath.join(base_dir, path) + + return Entry(path, res_type, size, date, target) + + +def clean_path(path): + """ + Return a path cleaned from leading and trailing slashes and leading ./. + """ + path = path.strip().strip('/') + if path.startswith('./'): + path = path[2:] + return path.strip() + + +def remove_inode(line): + """ + Return the line with leading inode number and size in block (which are + numbers separated by spaces) are removed. + """ + _, _, line = line.strip().partition(' ') + _, _, line = line.strip().partition(' ') + return line.strip() + + +def parse_directory_listing(dir_listing, from_find=False): + """ + Yield Entry from a `dir_listing` directory listing text. + + If`from_find` is True the directory listing is assumed to come from a "find + -ls" command. Otherwise it is assumed to come from an "ls -alR" command. + + For "find -ls" all lines start with an inode number, e.g. a set of digits. + Note: the "find -ls" is similar to the "ls -ils" format (except for paths): + we have an inode and size in block prefixing each listing line. + """ + lines = dir_listing.splitlines() + parser = UnixParser() + + # default in case this would not be a recursive listing: we always need a base dir + base_dir = '' + for ln, line in enumerate(lines, 1): + line = line.strip() + if parser.ignores_line(line): + continue + + if from_find: + line = remove_inode(line) + + file_stat = None + try: + file_stat = parser.parse_line(line) + if TRACE: + logger.debug('parse_directory_listing:file_stat: ' + repr(file_stat)) + dt = datetime.utcfromtimestamp(file_stat.st_mtime) + dt = datetime.isoformat(dt) + logger.debug('parse_directory_listing:file_stat:date: ' + repr(dt)) + + except ParserError as pe: + # this is likely a directory line from an ls -LR listing. Strip + # trailing colon and keep track of the base directory + if not line.endswith(':'): + raise Exception('Unknown directory listing line format: #%(ln)d: %(line)r' % locals()) + base_dir = line.strip(':') + continue + + if file_stat._st_name in ('.', '..'): + continue + + entry = Entry.from_stat(file_stat, base_dir=base_dir, use_utc_time=False) + if entry: + yield entry diff --git a/src/fetchcode/package.py b/src/fetchcode/package.py index e8603301..913d0df9 100644 --- a/src/fetchcode/package.py +++ b/src/fetchcode/package.py @@ -13,15 +13,11 @@ # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, either express or implied. See the License for the # specific language governing permissions and limitations under the License. - -from attr import attrs, attrib - from packageurl.contrib.route import NoRouteAvailable -from packageurl import PackageURL from packageurl.contrib.route import Router -import requests from fetchcode.packagedcode_models import Package +from src.fetchcode.utils import * router = Router() @@ -38,35 +34,6 @@ def info(url): return -def get_response(url): - """ - Generate `Package` object for a `url` string - """ - resp = requests.get(url) - if resp.status_code == 200: - return resp.json() - - raise Exception(f"Failed to fetch: {url}") - - -def get_pypi_bugtracker_url(project_urls): - bug_tracking_url = project_urls.get("Tracker") - if not (bug_tracking_url): - bug_tracking_url = project_urls.get("Issue Tracker") - if not (bug_tracking_url): - bug_tracking_url = project_urls.get("Bug Tracker") - return bug_tracking_url - - -def get_pypi_codeview_url(project_urls): - code_view_url = project_urls.get("Source") - if not (code_view_url): - code_view_url = project_urls.get("Code") - if not (code_view_url): - code_view_url = project_urls.get("Source Code") - return code_view_url - - @router.route("pkg:cargo/.*") def get_cargo_data_from_purl(purl): """ @@ -327,3 +294,43 @@ def get_rubygems_data_from_purl(purl): download_url=download_url, **purl.to_dict(), ) + + +@router.route("pkg:deb/.*") +def get_debian_packages(purl): + purl = PackageURL.from_string(purl) + name = purl.name + version = purl.version + + # If no arch is provided just return PackageInfo for source package if available. + arch = purl.qualifiers.get("arch", "source") + base_path = f"https://ftp.debian.org/debian/pool/main" + + source = False + + name_parts = name.split("_") + version_parts = version.split("_") + if len(name_parts) == 3: + arch = name_parts[2] + version = name_parts[1] + name = name_parts[0] + elif len(version_parts) == 2: + arch = version_parts[1] + version = version_parts[0] + + if arch == "source": + # This can be either .gz or .xz + package_name = f"{name}_{version}.debian.tar" + source = True + else: + # The Debian binary package file names conform to the following convention: + # _-_.deb + package_name = f"{name}_{version}_{arch}.deb" + + debian_processed_data = process_debian_data(package_name, source) + + # FIXME: What to do when there are multiple licenses + yield Package( + **debian_processed_data, + **purl.to_dict() + ) diff --git a/src/fetchcode/utils.py b/src/fetchcode/utils.py new file mode 100644 index 00000000..45db6a08 --- /dev/null +++ b/src/fetchcode/utils.py @@ -0,0 +1,288 @@ +import gzip +import os + +import debian_inspector +import requests +from debian_inspector import copyright as debcopy, debcon +from extractcode.api import extract_archives +from packageurl import PackageURL + +from fetchcode import ls, fetch +from fetchcode.packagedcode_models import Party, DependentPackage + + +def get_response(url): + """ + Generate `Package` object for a `url` string + """ + resp = requests.get(url) + if resp.status_code == 200: + return resp.json() + + raise Exception(f"Failed to fetch: {url}") + + +def get_pypi_bugtracker_url(project_urls): + bug_tracking_url = project_urls.get("Tracker") + if not bug_tracking_url: + bug_tracking_url = project_urls.get("Issue Tracker") + if not bug_tracking_url: + bug_tracking_url = project_urls.get("Bug Tracker") + return bug_tracking_url + + +def get_pypi_codeview_url(project_urls): + code_view_url = project_urls.get("Source") + if not code_view_url: + code_view_url = project_urls.get("Code") + if not code_view_url: + code_view_url = project_urls.get("Source Code") + return code_view_url + + +def extract_gzip_data(file_loc): + with gzip.open(file_loc, "r") as file: + content = file.read().decode("utf-8") + return content + + +def parse_license(location): + """ + Return a list of License paragraphs from Debian copyright file at location. + """ + copyparas = debcopy.DebianCopyright.from_file(location) + return [ + para + for para in copyparas.paragraphs + if isinstance(para, debian_inspector.copyright.CopyrightLicenseParagraph) + ] + + +def get_vcs_repo(description): + """ + Return a tuple of (vcs_tool, vcs_repo) or (None, None) if no vcs_repo is found. + """ + repos = [] + for vcs_tool, vcs_repo in description.items(): + vcs_tool = vcs_tool.lower() + if not vcs_tool.startswith("vcs-") or vcs_tool.startswith("vcs-browser"): + continue + _, _, vcs_tool = vcs_tool.partition("-") + repos.append((vcs_tool, vcs_repo)) + + if len(repos) > 1: + raise TypeError( + "Debian description with more than one Vcs repos: %(repos)r" % locals() + ) + + if repos: + vcs_tool, vcs_repo = repos[0] + else: + vcs_tool = None + vcs_repo = None + + return vcs_tool, vcs_repo + + +# TODO: Fix this function in minecode +def parse_email(text): + """ + From minecode + Return a tuple of (name, email) extracted from a `text` string. + Debian TeX Maintainers + """ + if not text: + return None, None + name, _, email = text.partition("<") + name = name.strip() + email = email.strip() + if not email: + return name, email + email = email.strip(">") + return name, email + + +def get_dependencies(data): + """ + From minecode + Return a list of DependentPackage extracted from a Debian `data` mapping. + """ + scopes = { + "build-depends": dict(is_runtime=False, is_optional=True), + "depends": dict(is_runtime=True, is_optional=False), + "pre-depends": dict(is_runtime=True, is_optional=False), + # 'Provides': dict(is_runtime=True, is_optional=False), + # 'Recommends': dict(is_runtime=True, is_optional=True), + # 'Suggests': dict(is_runtime=True, is_optional=True), + } + dep_pkgs = [] + for scope, flags in scopes.items(): + depends = data.get(scope) + if not depends: + continue + + dependencies = comma_separated(depends) + name_version = [] + for dependency in dependencies: + version_constraint = None + if "(" in dependency and ")" in dependency: + start = dependency.index("(") + end = dependency.index(")") + version_constraint = dependency[start + 1 : end] + name = dependency.split(" ")[0] + name_version.append([name, version_constraint]) + + # break each dep in package names and version constraints + # FIXME:!!! + # FIXED !!! + for name, version_constraint in name_version: + purl = PackageURL(type="deb", namespace="debian", name=name) + dep = DependentPackage( + purl=purl.to_string(), + scope=scope, + requirement=version_constraint, + **flags, + ) + dep_pkgs.append(dep) + + return dep_pkgs + + +def comma_separated(text): + """ + Return a list of strings from a comma-separated text. + """ + if not text: + return [] + return [t.strip() for t in text.split(",") if t and t.strip()] + + +# TODO: Document All changes for god's sake, don't forget it >.< +def process_debian_data(package_name, source): + """ + :parameter `package_name` Takes package name [with file_extension]. + :parameter `source` a bool True if given package is a source package (.debian.tar.gz) or a binary package(.deb). + :returns a dictionary with keys (licenses, vcs_url, homepage_url, description, size, release_date, download_url) + """ + + base_path = f"https://ftp.debian.org/debian" + + # Fetching ls-lR.gz file. + index_file_response = fetch(f"{base_path}/ls-lR.gz") + if not index_file_response.success: + return Exception(f"Unable to fetch {base_path}/ls-lR.gz") + + index_content = extract_gzip_data(index_file_response.location) + entries = ls.parse_directory_listing(index_content) + + package_entry = None + + if not source: + for entry in entries: + if entry.type == ls.FILE: + if entry.path.startswith("pool/main") and entry.path.endswith( + package_name + ): + package_entry = entry + break + else: + # We need to this because debian source package can end with .gz or .xz + for entry in entries: + if entry.type == ls.FILE: + if ( + entry.path.startswith("pool/main") + and entry.path.endswith(package_name + ".gz") + or entry.path.endswith(package_name + ".xz") + ): + package_entry = entry + package_name = package_entry.path.split("/")[-1] + break + + if package_entry is None: + raise Exception(f"Unable to find {package_name} in Debian Pool") + + # Fetching the actual package + package_pool_location = f"{base_path}/{package_entry.path}" + package_response = fetch(package_pool_location) + if not package_response.success: + raise Exception(f"Unable to fetch {package_pool_location}") + + # Extracting the package using extractcode + for _ in extract_archives(package_response.location): + pass + + extracted_package_location = f"{package_response.location}-extract" + + # If the requested package is a source file [.debian.tar.gz] + if source: + copyright_location = f"{extracted_package_location}/debian/copyright" + control_location = f"{extracted_package_location}/debian/control" + + # If the requested package is a binary Package + else: + + # Doing this because file name can be control.tar.gz-extract or control.tar.xz-extract + control_folder = "control.tar.gz-extract" + for folder in os.listdir(extracted_package_location): + if folder.startswith("control") and folder.endswith("extract"): + control_folder = folder + break + copyright_location = f"{extracted_package_location}/data.tar.xz-extract/usr/share/doc/{package_name.split('_')[0]}/copyright" + control_location = f"{extracted_package_location}/{control_folder}/control" + + # Getting Licenses + licenses = [] + for CopyrightLicenseParagraph in parse_license(copyright_location): + licenses.append(CopyrightLicenseParagraph.license.name) + + with open(control_location, "r") as control_file: + control_file_data = control_file.read() + + control_file_content = debcon.Debian822.from_string(control_file_data).to_dict() + + # Getting vcs_info + tool, vcs_url = get_vcs_repo(control_file_content) + + # Getting HomepageURL + homepage_url = control_file_content.get("homepage") + + # Getting Description + description = control_file_content.get("description") + + # Getting Size + size = package_entry.size + + # Getting release_date + release_date = package_entry.date + + # Getting Maintainers and Uploaders + parties = [] + maintainer_names = comma_separated(control_file_content.get("maintainer", "")) + if maintainer_names: + for maintainer in maintainer_names: + name, email = parse_email(maintainer) + if name: + party = Party(name=name, role="maintainer", email=email) + parties.append(party) + contributor_names = comma_separated(control_file_content.get("uploaders", "")) + if contributor_names: + for contributor in contributor_names: + name, email = parse_email(contributor) + if name: + party = Party(name=name, role="contributor", email=email) + parties.append(party) + + # Getting Dependencies + dependencies = get_dependencies(control_file_content) + + return dict( + declared_license=" ,".join(licenses), + vcs_url=vcs_url, + homepage_url=homepage_url, + description=description, + size=size, + release_date=release_date, + download_url=package_pool_location, + dependencies=dependencies, + parties=parties, + ) diff --git a/tests/data/debian_test_data/debian-binary-expected-data.json b/tests/data/debian_test_data/debian-binary-expected-data.json new file mode 100644 index 00000000..2f710d9c --- /dev/null +++ b/tests/data/debian_test_data/debian-binary-expected-data.json @@ -0,0 +1,79 @@ +{ + "0": { + "type": "deb", + "namespace": null, + "name": "libghc-curl-prof", + "version": "1.3.8-11+b3_armel", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": "Profiling libraries for the libcurl Haskell bindings; profiling libraries\n .\n libcurl is a client-side URL transfer library, supporting FTP, FTPS, HTTP,\n HTTPS, SCP, SFTP, TFTP, TELNET, DICT, LDAP, LDAPS and FILE. libcurl supports\n SSL certificates, HTTP POST, HTTP PUT, FTP uploading, HTTP form based upload,\n proxies, cookies, user+password authentication (Basic, Digest, NTLM, Negotiate,\n Kerberos4), file transfer resume, http proxy tunneling and more!\n .\n This package provides a library for the Haskell programming language, compiled\n for profiling. See http://www.haskell.org/ for more information on Haskell.", + "release_date": "2019-06-22", + "parties": [ + { + "type": null, + "role": "maintainer", + "name": "Debian Haskell Group", + "email": "pkg-haskell-maintainers@lists.alioth.debian.org", + "url": null + } + ], + "keywords": [], + "homepage_url": "http://hackage.haskell.org/package/curl", + "download_url": "https://ftp.debian.org/debian/pool/main/h/haskell-curl/libghc-curl-prof_1.3.8-11+b3_armel.deb", + "api_url": null, + "size": 278616, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": "BSD-3-clause", + "notice_text": null, + "root_path": null, + "dependencies": [ + { + "purl": "pkg:deb/debian/libghc-curl-dev", + "requirement": "= 1.3.8-11+b3", + "scope": "depends", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:deb/debian/libghc-base-prof-4.11.1.0-b951c", + "requirement": null, + "scope": "depends", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:deb/debian/libghc-bytestring-prof-0.10.8.2-40be2", + "requirement": null, + "scope": "depends", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:deb/debian/libghc-containers-prof-0.5.11.0-35743", + "requirement": null, + "scope": "depends", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "purl": "pkg:deb/libghc-curl-prof@1.3.8-11%2Bb3_armel", + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + } +} \ No newline at end of file diff --git a/tests/data/debian_test_data/debian-source-expected-data.json b/tests/data/debian_test_data/debian-source-expected-data.json new file mode 100644 index 00000000..f98ecbe5 --- /dev/null +++ b/tests/data/debian_test_data/debian-source-expected-data.json @@ -0,0 +1 @@ +{"0": {"type": "deb", "namespace": "debian", "name": "leatherman", "version": "1.12.1+dfsg-1.2", "qualifiers": {"arch": "source"}, "subpath": null, "primary_language": null, "description": null, "release_date": "2021-12-04", "parties": [{"type": null, "role": "maintainer", "name": "Puppet Package Maintainers", "email": "pkg-puppet-devel@lists.alioth.debian.org", "url": null}, {"type": null, "role": "contributor", "name": "Apollon Oikonomopoulos", "email": "apoikos@debian.org", "url": null}], "keywords": [], "homepage_url": "https://github.com/puppetlabs/leatherman", "download_url": "https://ftp.debian.org/debian/pool/main/l/leatherman/leatherman_1.12.1+dfsg-1.2.debian.tar.xz", "api_url": null, "size": 7332, "sha1": null, "md5": null, "sha256": null, "sha512": null, "bug_tracking_url": null, "code_view_url": null, "vcs_url": "https://salsa.debian.org/puppet-team/leatherman.git", "copyright": null, "license_expression": null, "declared_license": "BSL-1.0", "notice_text": null, "root_path": null, "dependencies": [{"purl": "pkg:deb/debian/cmake", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/debhelper-compat", "requirement": "= 13", "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-dev", "requirement": ">=1.73.0", "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-log-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-nowide-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-locale-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-thread-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-date-time-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-filesystem-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-system-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-chrono-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libboost-regex-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libcurl4-openssl-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/libruby", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/ruby", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/rapidjson-dev", "requirement": null, "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}, {"purl": "pkg:deb/debian/catch", "requirement": ">= 1.10~", "scope": "build-depends", "is_runtime": false, "is_optional": true, "is_resolved": false}], "contains_source_code": null, "source_packages": [], "purl": "pkg:deb/debian/leatherman@1.12.1%2Bdfsg-1.2?arch=source", "repository_homepage_url": null, "repository_download_url": null, "api_data_url": null}} \ No newline at end of file diff --git a/tests/data/debian_test_data/leatherman_1.12.1+dfsg-1.2.debian.tar.xz b/tests/data/debian_test_data/leatherman_1.12.1+dfsg-1.2.debian.tar.xz new file mode 100644 index 00000000..cffafe3b Binary files /dev/null and b/tests/data/debian_test_data/leatherman_1.12.1+dfsg-1.2.debian.tar.xz differ diff --git a/tests/data/debian_test_data/libghc-curl-prof_1.3.8-11+b3_armel_mock_data.deb b/tests/data/debian_test_data/libghc-curl-prof_1.3.8-11+b3_armel_mock_data.deb new file mode 100644 index 00000000..78e31cd4 Binary files /dev/null and b/tests/data/debian_test_data/libghc-curl-prof_1.3.8-11+b3_armel_mock_data.deb differ diff --git a/tests/data/debian_test_data/ls-lR_mock.gz b/tests/data/debian_test_data/ls-lR_mock.gz new file mode 100644 index 00000000..7bba791b Binary files /dev/null and b/tests/data/debian_test_data/ls-lR_mock.gz differ diff --git a/tests/test_package.py b/tests/test_package.py index dd639a88..d4652898 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -15,18 +15,26 @@ # specific language governing permissions and limitations under the License. import json -import pytest from unittest import mock +import pytest + from fetchcode.package import info def file_data(file_name): with open(file_name) as file: data = file.read() + # return data return json.loads(data) +def return_file(file_name): + with open(file_name, "rb") as file: + data = file.read() + return data + + def match_data(packages, expected_data): data = [dict(p.to_dict()) for p in packages] expected_data_dict = dict(expected_data) @@ -106,3 +114,29 @@ def test_tuby_package_with_invalid_url(mock_get): purl = "pkg:ruby/file" packages = list(info(purl)) assert "Failed to fetch: https://rubygems.org/api/v1/gems/file.json" == e_info + + +@mock.patch("fetchcode.utils.fetch") +def test_debian_binary_packages(mock_get): + side_effect = [ + return_file("tests/data/debian_test_data/ls-lR_mock.gz"), + return_file("tests/data/debian_test_data/libghc-curl-prof_1.3.8-11+b3_armel_mock_data.deb") + ] + mock_get.side_effect = side_effect + purl = "pkg:deb/libghc-curl-prof@1.3.8-11%2Bb3_armel" + packages = list(info(purl)) + expected_data = file_data("tests/data/debian_test_data/debian-binary-expected-data.json") + match_data(packages, expected_data) + + +@mock.patch("fetchcode.utils.fetch") +def test_debian_source_packages(mock_get): + purl = "pkg:deb/debian/leatherman@1.12.1%2Bdfsg-1.2?arch=source" + side_effect = [ + return_file("tests/data/debian_test_data/ls-lR_mock.gz"), + return_file("tests/data/debian_test_data/leatherman_1.12.1+dfsg-1.2.debian.tar.xz") + ] + mock_get.side_effect = side_effect + packages = list(info(purl)) + expected_data = file_data("tests/data/debian_test_data/debian-source-expected-data.json") + match_data(packages, expected_data)