diff --git a/src/clusterfuzz/_internal/bot/fuzzers/utils.py b/src/clusterfuzz/_internal/bot/fuzzers/utils.py index 62742e06c1..ffe8d4eac1 100644 --- a/src/clusterfuzz/_internal/bot/fuzzers/utils.py +++ b/src/clusterfuzz/_internal/bot/fuzzers/utils.py @@ -13,10 +13,13 @@ # limitations under the License. """Fuzzer utils.""" +import functools import os import re import stat import tempfile +from typing import Callable +from typing import Optional from clusterfuzz._internal.base import utils from clusterfuzz._internal.metrics import logs @@ -30,7 +33,7 @@ EXTRA_BUILD_DIR = '__extra_build' -def is_fuzz_target_local(file_path, file_handle=None): +def is_fuzz_target(file_path, file_opener: Optional[Callable] = None): """Returns whether |file_path| is a fuzz target binary (local path).""" if '@' in file_path: # GFT targets often have periods in the name that get misinterpreted as an @@ -53,7 +56,7 @@ def is_fuzz_target_local(file_path, file_handle=None): # Ignore files with disallowed extensions (to prevent opening e.g. .zips). return False - if not file_handle and not os.path.exists(file_path): + if not file_opener and not os.path.exists(file_path): # Ignore non-existent files for cases when we don't have a file handle. return False @@ -72,24 +75,27 @@ def is_fuzz_target_local(file_path, file_handle=None): logs.warning('Tried to read from non-regular file: %s.' % file_path) return False - # Use already provided file handle or open the file. - local_file_handle = file_handle or open(file_path, 'rb') - - result = False - for pattern in FUZZ_TARGET_SEARCH_BYTES: - # TODO(metzman): Bound this call so we don't read forever if something went - # wrong. - local_file_handle.seek(0) - result = utils.search_bytes_in_file(pattern, local_file_handle) - if result: - break - - if not file_handle: - # If this local file handle is owned by our function, close it now. - # Otherwise, it is caller's responsibility. - local_file_handle.close() - - return result + # Either use the file opener or open the file ourselves. + if not file_opener: + file_opener = functools.partial(open, mode='rb') + try: + with file_opener(file_path) as file_handle: + result = False + for pattern in FUZZ_TARGET_SEARCH_BYTES: + # TODO(metzman): Bound this call so we don't read forever if something + # went wrong. + file_handle.seek(0) + result = utils.search_bytes_in_file(pattern, file_handle) + if result: + break + + file_handle.close() + + return result + except Exception as e: + # In case we could not open the file, we consider it's not a fuzzer. + logs.warning(f'Could not open {file_path}: {e}') + return False def get_fuzz_targets_local(path): @@ -103,7 +109,7 @@ def get_fuzz_targets_local(path): continue file_path = os.path.join(root, filename) - if is_fuzz_target_local(file_path): + if is_fuzz_target(file_path): fuzz_target_paths.append(file_path) return fuzz_target_paths diff --git a/src/clusterfuzz/_internal/build_management/build_archive.py b/src/clusterfuzz/_internal/build_management/build_archive.py index 0e15bcba34..133fc1c2c1 100644 --- a/src/clusterfuzz/_internal/build_management/build_archive.py +++ b/src/clusterfuzz/_internal/build_management/build_archive.py @@ -190,14 +190,10 @@ def list_fuzz_targets(self) -> List[str]: from clusterfuzz._internal.bot.fuzzers import utils as fuzzer_utils for archive_file in self.list_members(): - file_content = self.try_open(archive_file.name) - if fuzzer_utils.is_fuzz_target_local(archive_file.name, file_content): + if fuzzer_utils.is_fuzz_target(archive_file.name, self.open): fuzz_target = fuzzer_utils.normalize_target_name(archive_file.name) self._fuzz_targets[fuzz_target] = archive_file.name - if file_content: - file_content.close() - return list(self._fuzz_targets.keys()) def unpacked_size(self, fuzz_target: Optional[str] = None) -> int: @@ -299,23 +295,19 @@ def get_target_dependencies( return res -# pylint: disable=redefined-builtin -def open(archive_path: str) -> BuildArchive: - """Opens the archive and gets the appropriate build archive based on the - `archive_path`. The resulting object is usable as a normal archive reader, - but provides additional feature related to build handling. +def open_with_reader(reader: archive.ArchiveReader) -> BuildArchive: + """Open the archive and gets the appropriate build archive based on the + provided archive information. Args: - archive_path: the path to the archive. + reader: the archive reader. Raises: - If the file could not be opened or if the archive type cannot be handled. + If the archive reader cannot be handled. Returns: - the build archive. + The build archive. """ - reader = archive.open(archive_path) - # Unfortunately, there is no good heuristic for determining which build # archive implementation to use. # Hopefully, we can search in the archive whether some files are present and @@ -328,3 +320,41 @@ def open(archive_path: str) -> BuildArchive: if reader.file_exists(args_gn_path): return ChromeBuildArchive(reader) return DefaultBuildArchive(reader) + + +def open(archive_path: str) -> BuildArchive: # pylint: disable=redefined-builtin + """Opens the archive and gets the appropriate build archive based on the + `archive_path`. The resulting object is usable as a normal archive reader, + but provides additional feature related to build handling. + + Args: + archive_path: the path to the archive. + + Raises: + If the file could not be opened or if the archive type cannot be handled. + + Returns: + The build archive. + """ + reader = archive.open(archive_path) + return open_with_reader(reader) + + +def open_uri(uri: str) -> BuildArchive: + """Opens a build archive over HTTP. This is only compatible with chromium as + of now. + + Args: + uri: the URI pointing to the zip file. + + Returns: + The build archive. + """ + reader = archive.ZipArchiveReader(archive.HttpZipFile(uri)) + return open_with_reader(reader) + + +def unzip_over_http_compatible(build_url: str) -> bool: + """Whether the build URL is compatible with unzipping over HTTP. + """ + return archive.HttpZipFile.is_uri_compatible(build_url) diff --git a/src/clusterfuzz/_internal/build_management/build_manager.py b/src/clusterfuzz/_internal/build_management/build_manager.py index af969d78a4..f6fd13dc28 100644 --- a/src/clusterfuzz/_internal/build_management/build_manager.py +++ b/src/clusterfuzz/_internal/build_management/build_manager.py @@ -14,11 +14,13 @@ """Build manager.""" from collections import namedtuple +import contextlib import os import re import shutil import subprocess import time +from typing import Optional from clusterfuzz._internal.base import errors from clusterfuzz._internal.base import utils @@ -402,23 +404,19 @@ def _post_setup_success(self, update_revision=True): if instrumented_library_paths: self._patch_rpaths(instrumented_library_paths) - def _unpack_build(self, base_build_dir, build_dir, build_url): - """Unpacks a build from a build url into the build directory.""" - # Track time taken to unpack builds so that it doesn't silently regress. - start_time = time.time() - - logs.info(f'Unpacking build from {build_url} into {build_dir}.') + @contextlib.contextmanager + def _download_and_open_build_archive(self, base_build_dir: str, + build_dir: str, build_url: str): + """Downloads the build archive at `build_url` and opens it. - # Free up memory. - utils.python_gc() - - # Remove the current build. - logs.info(f'Removing build directory {build_dir}.') - if not shell.remove_directory(build_dir, recreate=True): - logs.error(f'Unable to clear build directory {build_dir}.') - _handle_unrecoverable_error_on_windows() - return False + Args: + base_build_dir: the base build directory + build_dir: the current build directory + build_url: the build URL + Yields: + the build archive + """ # Download build archive locally. build_local_archive = os.path.join(build_dir, os.path.basename(build_url)) @@ -431,15 +429,83 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): 'Failed to make space for download. ' 'Cleared all data directories to free up space, exiting.') - logs.info(f'Downloading build from {build_url}.') + logs.info(f'Downloading build from {build_url} to {build_local_archive}.') try: storage.copy_file_from(build_url, build_local_archive) except Exception as e: logs.error(f'Unable to download build from {build_url}: {e}') - return False + raise try: with build_archive.open(build_local_archive) as build: + yield build + finally: + shell.remove_file(build_local_archive) + + def _open_build_archive(self, base_build_dir: str, build_dir: str, + build_url: str, http_build_url: Optional[str], + unpack_everything: Optional[bool]): + """Gets a handle on a build archive for the current build. Depending on the + provided parameters, this function might download the build archive into + the build directory or directly use remote HTTP archive. + + Args: + unpack_everything: wether we should unpack the whole archive or try + selective unpacking. + base_build_dir: the base build directory. + build_dir: the current build directory. + build_url: the build URL. + http_build_url: the HTTP build URL. + + Raises: + if an error occurred while accessing the file over HTTP or while + downloading the file on disk. + + Returns: + the build archive. + """ + # We only want to use remote unzipping if we're not unpacking everything and + # if the HTTP URL is compatible with remote unzipping. + allow_unpack_over_http = environment.get_value( + 'ALLOW_UNPACK_OVER_HTTP', default_value=False) + can_unzip_over_http = ( + allow_unpack_over_http and not unpack_everything and http_build_url and + build_archive.unzip_over_http_compatible(http_build_url)) + + if not can_unzip_over_http: + return self._download_and_open_build_archive(base_build_dir, build_dir, + build_url) + logs.info("Opening an archive over HTTP, skipping archive download.") + assert http_build_url + return build_archive.open_uri(http_build_url) + + def _unpack_build(self, + base_build_dir, + build_dir, + build_url, + http_build_url=None): + """Unpacks a build from a build url into the build directory.""" + # Track time taken to unpack builds so that it doesn't silently regress. + start_time = time.time() + + unpack_everything = environment.get_value( + 'UNPACK_ALL_FUZZ_TARGETS_AND_FILES') + + logs.info(f'Unpacking build from {build_url} into {build_dir}.') + + # Free up memory. + utils.python_gc() + + # Remove the current build. + logs.info(f'Removing build directory {build_dir}.') + if not shell.remove_directory(build_dir, recreate=True): + logs.error(f'Unable to clear build directory {build_dir}.') + _handle_unrecoverable_error_on_windows() + return False + + try: + with self._open_build_archive(base_build_dir, build_dir, build_url, + http_build_url, unpack_everything) as build: unpack_everything = environment.get_value( 'UNPACK_ALL_FUZZ_TARGETS_AND_FILES') @@ -463,8 +529,7 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): 'Cleared all data directories to free up space, exiting.') # Unpack the local build archive. - logs.info( - f'Unpacking build archive {build_local_archive} to {build_dir}.') + logs.info(f'Unpacking build archive {build_url} to {build_dir}.') trusted = not utils.is_oss_fuzz() build.unpack( @@ -473,7 +538,7 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): trusted=trusted) except Exception as e: - logs.error(f'Unable to unpack build archive {build_local_archive}: {e}') + logs.error(f'Unable to unpack build archive {build_url}: {e}') return False if unpack_everything: @@ -484,9 +549,6 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): partial_build_file_path = os.path.join(build_dir, PARTIAL_BUILD_FILE) utils.write_data_to_file('', partial_build_file_path) - # No point in keeping the archive around. - shell.remove_file(build_local_archive) - elapsed_time = time.time() - start_time elapsed_mins = elapsed_time / 60. log_func = logs.warning if elapsed_time > UNPACK_TIME_LIMIT else logs.info @@ -605,10 +667,20 @@ def __init__(self, revision, build_url, build_prefix='', - fuzz_target=None): + fuzz_target=None, + http_build_url=None): + """RegularBuild constructor. See Build constructor for other parameters. + + Args: + http_build_url: the http build URL. E.g. + http://storage.com/foo/bar.zip. Defaults to None. + build_url: the GCS bucket URL where the build is stored. E.g. + gs://foo/bar.zip. + """ super().__init__( base_build_dir, revision, build_prefix, fuzz_target=fuzz_target) self.build_url = build_url + self.http_build_url = http_build_url if build_prefix: self.build_dir_name = build_prefix.lower() @@ -630,7 +702,7 @@ def setup(self): build_update = not self.exists() if build_update: if not self._unpack_build(self.base_build_dir, self.build_dir, - self.build_url): + self.build_url, self.http_build_url): return False logs.info('Retrieved build r%d.' % self.revision) @@ -1116,6 +1188,9 @@ def setup_regular_build(revision, return None + # build_url points to a GCP bucket, and we're only converting it to its HTTP + # endpoint so that we can use remote unzipping. + http_build_url = build_url.replace('gs://', 'https://storage.googleapis.com/') base_build_dir = _base_build_dir(bucket_path) build_class = RegularBuild @@ -1133,7 +1208,8 @@ def setup_regular_build(revision, revision, build_url, build_prefix=build_prefix, - fuzz_target=fuzz_target) + fuzz_target=fuzz_target, + http_build_url=http_build_url) if build.setup(): result = build else: diff --git a/src/clusterfuzz/_internal/system/archive.py b/src/clusterfuzz/_internal/system/archive.py index e3a7ed185d..f0b180be27 100644 --- a/src/clusterfuzz/_internal/system/archive.py +++ b/src/clusterfuzz/_internal/system/archive.py @@ -15,6 +15,7 @@ import abc import dataclasses +import io import os import tarfile from typing import BinaryIO @@ -24,6 +25,8 @@ from typing import Union import zipfile +import requests + from clusterfuzz._internal.metrics import logs FILE_ATTRIBUTE = '0o10' @@ -39,6 +42,11 @@ ARCHIVE_FILE_EXTENSIONS = ( ZIP_FILE_EXTENSIONS + TAR_FILE_EXTENSIONS + LZMA_FILE_EXTENSIONS) +# This is the size of the internal buffer that we're using to cache HTTP +# range bytes. +HTTP_BUFFER_SIZE = 50 * 1024 * 1024 # 50 MB +HTTP_REQUEST_TIMEOUT = 5 # 5 seconds + StrBytesPathLike = Union[str, bytes, os.PathLike] MatchCallback = Callable[[str], bool] @@ -373,9 +381,9 @@ class ArchiveError(Exception): """ArchiveError""" -# pylint: disable=redefined-builtin -def open(archive_path: str, - file_obj: Optional[BinaryIO] = None) -> ArchiveReader: +def open( # pylint: disable=redefined-builtin + archive_path: str, + file_obj: Optional[BinaryIO] = None) -> ArchiveReader: """Opens the archive and gets the appropriate archive reader based on the `archive_path`. If `file_obj` is not none, the binary file-like object will be used to read the archive instead of opening `archive_path`. @@ -407,6 +415,154 @@ class ArchiveType: TAR_LZMA = 3 +@dataclasses.dataclass +class CacheBlock: + """Represents a cache entry for the HttpZipFile. + Members: + start: the start of the byte range in the file. + end: the end of the byte range in the file (inclusive). + content: the sequence of bytes for this range. + """ + start: int + end: int + content: bytes + + +class HttpZipFile(io.IOBase): + """This class is a very simple file-like object representation of a zip file + over HTTP. + It uses the 'Accept-Ranges' feature of HTTP to fetch parts (or all) of the + file. + See https://docs.python.org/3/glossary.html#term-file-like-object for the + definition of a file-like object. + """ + + @staticmethod + def is_uri_compatible(uri: str) -> bool: + try: + res = requests.head(uri, timeout=HTTP_REQUEST_TIMEOUT) + return res.headers.get('Accept-Ranges') is not None + except Exception as e: + logs.warning(f'Request to {uri} failed: {e}') + return False + + def __init__(self, uri: str): + self.uri = uri + self.session = requests.Session() + # This slows down the overall performances, because compressing something + # that's already encoded is unlikely to shrink its size, and we'll just + # waste time decoding the data twice. + self.session.headers.pop('Accept-Encoding') + resp = self.session.head(self.uri) + self.file_size = int(resp.headers.get('Content-Length', default=0)) + self._current_block = CacheBlock(0, 0, b'') + self._pos = 0 + assert resp.headers.get('Accept-Ranges') is not None + + def seekable(self) -> bool: + """Whether this is seekable.""" + return True + + def readable(self) -> bool: + """Whether this stream is readable.""" + return True + + def seek(self, offset: int, from_what: int = 0) -> int: + """Provides a seek implementation. + + Args: + offset: the offset + from_what: from where the offset should be computed. Defaults to 0. + """ + if from_what == 0: + self._pos = offset + elif from_what == 1: + self._pos = self._pos + offset + else: + self._pos = self.file_size + offset + self._pos = max(min(self._pos, self.file_size), 0) + return self._pos + + def tell(self) -> int: + """Provides a tell implementation. Returns the current cursor position. + + Returns: + the current cursor position. + """ + return self._pos + + def _fetch_from_http(self, start: int, end: int) -> bytes: + """Fetches the bytes from the HTTP URI using the ranges. + + Args: + start: the start of the range. + end (int): the end of the range (inclusive). + + Returns: + the read bytes. + """ + resp = self.session.get(self.uri, headers={'Range': f'bytes={start}-{end}'}) + return resp.content + + def _fetch_from_cache(self, start: int, end: int) -> bytes: + """Fetches the bytes from the cache. If the cache does not contain the + bytes, it will read bytes from HTTP and cache the result. + + Args: + start: the start of the range. + end: the end of the range (inclusive). + + Returns: + the read bytes. + """ + if self._current_block.start > start or self._current_block.end < end: + # We're reading at most `HTTP_BUFFER_SIZE` from the HTTP range request. + read_ahead_end = min(self.file_size - 1, start + HTTP_BUFFER_SIZE - 1) + self._current_block = CacheBlock( + start, read_ahead_end, self._fetch_from_http(start, read_ahead_end)) + inner_start = start - self._current_block.start + inner_end = end - self._current_block.start + return self._current_block.content[inner_start:inner_end + 1] + + def read(self, size: Optional[int] = -1) -> bytes: + """Read into this file-object. + + Args: + size: the size of the read. If not specified, reads all. + + Returns: + the read bytes. + """ + if not size or size == -1: + size = self.file_size - self._pos + read_size = min(self.file_size - self._pos, size) + end_range = self._pos + read_size - 1 + # If the request if greater than the buffer size, we're not caching + # the request, because it is likely that we are trying to read a big + # file in the archive, in which case it's unlikely that we read it again + # in a subsequent read. For small reads, however, it is likely that we are, + # for example, unpacking a whole directory. In this case, reading ahead + # helps reducing the number of HTTP requests that are being made. + if read_size > HTTP_BUFFER_SIZE: + content = self._fetch_from_http(self._pos, end_range) + else: + content = self._fetch_from_cache(self._pos, end_range) + self._pos += read_size + return content + + def read1(self, size: Optional[int] = -1) -> bytes: + """Read into the file-object in at most on system call. + This is exactly similar to the read implementation in our case. + + Args: + size: The size to read. Defaults to -1. + + Returns: + the read bytes. + """ + return self.read(size) + + def get_archive_type(archive_path: str) -> ArchiveType: """Get the type of the archive. diff --git a/src/clusterfuzz/_internal/tests/core/bot/fuzzers/utils_test.py b/src/clusterfuzz/_internal/tests/core/bot/fuzzers/utils_test.py index eaf399866b..30b3951e5c 100644 --- a/src/clusterfuzz/_internal/tests/core/bot/fuzzers/utils_test.py +++ b/src/clusterfuzz/_internal/tests/core/bot/fuzzers/utils_test.py @@ -24,7 +24,7 @@ class IsFuzzTargetLocalTest(unittest.TestCase): - """is_fuzz_target_local tests.""" + """is_fuzz_target tests.""" def setUp(self): test_helpers.patch_environ(self) @@ -42,74 +42,84 @@ def _create_file(self, name, contents=b''): def test_not_a_fuzzer_invalid_name(self): path = self._create_file('abc$_fuzzer', contents=b'LLVMFuzzerTestOneInput') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_not_a_fuzzer_blocklisted_name(self): path = self._create_file( 'jazzer_driver', contents=b'LLVMFuzzerTestOneInput') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_not_a_fuzzer_without_extension(self): path = self._create_file('abc', contents=b'anything') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_not_a_fuzzer_with_extension(self): path = self._create_file('abc.dict', contents=b'LLVMFuzzerTestOneInput') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_not_a_fuzzer_with_extension_and_suffix(self): path = self._create_file( 'abc_fuzzer.dict', contents=b'LLVMFuzzerTestOneInput') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_fuzzer_posix(self): path = self._create_file('abc_fuzzer', contents=b'anything') - self.assertTrue(utils.is_fuzz_target_local(path)) + self.assertTrue(utils.is_fuzz_target(path)) def test_fuzzer_win(self): path = self._create_file('abc_fuzzer.exe', contents=b'anything') - self.assertTrue(utils.is_fuzz_target_local(path)) + self.assertTrue(utils.is_fuzz_target(path)) def test_fuzzer_py(self): path = self._create_file('abc_fuzzer.par', contents=b'anything') - self.assertTrue(utils.is_fuzz_target_local(path)) + self.assertTrue(utils.is_fuzz_target(path)) def test_fuzzer_not_exist(self): - self.assertFalse(utils.is_fuzz_target_local('/not_exist_fuzzer')) + self.assertFalse(utils.is_fuzz_target('/not_exist_fuzzer')) def test_fuzzer_without_suffix(self): path = self._create_file( 'abc', contents=b'anything\nLLVMFuzzerTestOneInput') - self.assertTrue(utils.is_fuzz_target_local(path)) + self.assertTrue(utils.is_fuzz_target(path)) def test_fuzzer_with_name_regex_match(self): environment.set_value('FUZZER_NAME_REGEX', '.*_custom$') path = self._create_file('a_custom', contents=b'anything') - self.assertTrue(utils.is_fuzz_target_local(path)) + self.assertTrue(utils.is_fuzz_target(path)) def test_fuzzer_with_file_string_and_without_name_regex_match(self): environment.set_value('FUZZER_NAME_REGEX', '.*_custom$') path = self._create_file( 'nomatch', contents=b'anything\nLLVMFuzzerTestOneInput') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_fuzzer_without_file_string_and_without_name_regex_match(self): environment.set_value('FUZZER_NAME_REGEX', '.*_custom$') path = self._create_file('nomatch', contents=b'anything') - self.assertFalse(utils.is_fuzz_target_local(path)) + self.assertFalse(utils.is_fuzz_target(path)) def test_fuzzer_with_fuzzer_name_and_without_name_regex_match(self): environment.set_value('FUZZER_NAME_REGEX', '.*_custom$') path = self._create_file( 'a_fuzzer', contents=b'anything\nLLVMFuzzerTestOneInput') - self.assertTrue(utils.is_fuzz_target_local(path)) + self.assertTrue(utils.is_fuzz_target(path)) def test_file_handle(self): """Test with a file handle.""" + + class MockFileOpener: + + def __init__(self, fileobj): + self.fileobj = fileobj + + def __call__(self, _): + return self.fileobj + path = self._create_file( 'abc', contents=b'anything\nLLVMFuzzerTestOneInput') - with open(path, 'rb') as f: - self.assertTrue(utils.is_fuzz_target_local('name', f)) + f_opener = MockFileOpener(open(path, 'rb')) + self.assertTrue(utils.is_fuzz_target('name', f_opener)) + self.assertTrue(f_opener.fileobj.closed) class GetSupportingFileTest(unittest.TestCase): diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py index 1f96e5d39f..c56180ea1f 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_archive_test.py @@ -132,15 +132,15 @@ def setUp(self): test_helpers.patch(self, [ 'clusterfuzz._internal.system.archive.ArchiveReader', 'clusterfuzz._internal.system.archive.open', - 'clusterfuzz._internal.bot.fuzzers.utils.is_fuzz_target_local', + 'clusterfuzz._internal.bot.fuzzers.utils.is_fuzz_target', ]) self.mock.open.return_value.list_members.return_value = [] - self.mock.is_fuzz_target_local.side_effect = self._mock_is_fuzz_target_local + self.mock.is_fuzz_target.side_effect = self._mock_is_fuzz_target self.build = build_archive.ChromeBuildArchive(self.mock.open.return_value) self._declared_fuzzers = [] self.maxDiff = None - def _mock_is_fuzz_target_local(self, target, _=None): + def _mock_is_fuzz_target(self, target, _=None): target = os.path.basename(target) return target in self._declared_fuzzers diff --git a/src/clusterfuzz/_internal/tests/core/build_management/build_manager_test.py b/src/clusterfuzz/_internal/tests/core/build_management/build_manager_test.py index cd1dd9c670..c8f273cc24 100644 --- a/src/clusterfuzz/_internal/tests/core/build_management/build_manager_test.py +++ b/src/clusterfuzz/_internal/tests/core/build_management/build_manager_test.py @@ -46,6 +46,7 @@ def _mock_unpack_build( _, build_dir, build_url, + http_build_url=None, ): """Mock _unpack_build.""" if not shell.remove_directory(build_dir, recreate=True): @@ -305,7 +306,8 @@ def test_setup(self): self.mock._unpack_build.assert_called_once_with( mock.ANY, '/builds/path_be4c9ca0267afcd38b7c1a3eebb5998d0908f025', '/builds/path_be4c9ca0267afcd38b7c1a3eebb5998d0908f025/revisions', - 'gs://path/file-release-2.zip') + 'gs://path/file-release-2.zip', + 'https://storage.googleapis.com/path/file-release-2.zip') self._assert_env_vars() self.assertEqual(os.environ['APP_REVISION'], '2') @@ -323,6 +325,20 @@ def test_setup(self): # Non-existent revisions do not result in any builds being set up. self.assertIsNone(build_manager.setup_regular_build(3)) + def test_setup_with_http_url(self): + """Tests setup build with compatible http remote zipping.""" + os.environ['RELEASE_BUILD_BUCKET_PATH'] = ( + 'gs://path/file-release-([0-9]+).zip') + self.mock.get_build_urls_list.return_value = [ + 'gs://path/file-release-10.zip', + 'gs://path/file-release-2.zip', + 'gs://path/file-release-1.zip', + ] + self.mock.time.return_value = 1000.0 + build = build_manager.setup_regular_build(2) + self.assertEqual(build.http_build_url, + 'https://storage.googleapis.com/path/file-release-2.zip') + def test_setup_with_extra(self): """Tests setting up a build with an extra build set.""" os.environ['RELEASE_BUILD_BUCKET_PATH'] = ( @@ -355,12 +371,13 @@ def mock_get_build_urls_list(bucket_path, reverse=True): mock.call( mock.ANY, '/builds/path_be4c9ca0267afcd38b7c1a3eebb5998d0908f025', '/builds/path_be4c9ca0267afcd38b7c1a3eebb5998d0908f025/revisions', - 'gs://path/file-release-2.zip'), + 'gs://path/file-release-2.zip', + 'https://storage.googleapis.com/path/file-release-2.zip'), mock.call( mock.ANY, '/builds/path_be4c9ca0267afcd38b7c1a3eebb5998d0908f025/revisions', '/builds/path_be4c9ca0267afcd38b7c1a3eebb5998d0908f025/revisions/__extra_build', - 'gs://path2/file-release-2.zip') + 'gs://path2/file-release-2.zip', None) ]) self._assert_env_vars() @@ -411,6 +428,8 @@ def setUp(self): 'clusterfuzz._internal.bot.fuzzers.utils.get_fuzz_targets', 'clusterfuzz._internal.build_management.build_archive.BuildArchive', 'clusterfuzz._internal.build_management.build_archive.open', + 'clusterfuzz._internal.build_management.build_archive.open_uri', + 'clusterfuzz._internal.build_management.build_archive.unzip_over_http_compatible', 'clusterfuzz._internal.build_management.build_manager.get_build_urls_list', 'clusterfuzz._internal.build_management.build_manager._make_space', 'clusterfuzz._internal.system.shell.clear_temp_directory', @@ -440,9 +459,14 @@ def setUp(self): self.mock.open.return_value.__enter__.return_value.list_fuzz_targets.return_value = [ 'target1', 'target2', 'target3' ] + self.mock.open_uri.return_value.__enter__.return_value.list_fuzz_targets.return_value = [ + 'target1', 'target2', 'target3' + ] + self.mock.unzip_over_http_compatible.return_value = False self.mock._make_space.return_value = True self.mock.open.return_value.__enter__.return_value.unpack.return_value = True + self.mock.open_uri.return_value.__enter__.return_value.unpack.return_value = True self.mock.time.return_value = 1000.0 os.environ['RELEASE_BUILD_BUCKET_PATH'] = ( @@ -671,6 +695,76 @@ def __eq__(_, fuzz_target): # pylint: disable=no-self-argument trusted=True) ]) + def test_setup_fuzz_over_http(self): + """Tests setup fuzzing with compatible remote unzipping.""" + os.environ['TASK_NAME'] = 'fuzz' + os.environ['RELEASE_BUILD_URL_PATTERN'] = ( + 'https://example.com/path/file-release-([0-9]+).zip') + os.environ['ALLOW_UNPACK_OVER_HTTP'] = "True" + self.mock.unzip_over_http_compatible.return_value = True + self.mock.time.return_value = 1000.0 + build = build_manager.setup_regular_build(2) + self.assertIsInstance(build, build_manager.RegularBuild) + self.assertEqual(_get_timestamp(build.base_build_dir), 1000.0) + + self._assert_env_vars() + self.assertEqual(os.environ['APP_REVISION'], '2') + + self.assertEqual( + 1, + self.mock.open_uri.return_value.__enter__.return_value.unpack.call_count + ) + self.assertEqual(1, self.mock.unzip_over_http_compatible.call_count) + + # Test setting up build again. + self.mock.time.return_value = 1005.0 + build = build_manager.setup_regular_build(2) + + self.assertIsInstance(build, build_manager.RegularBuild) + + self.assertEqual(_get_timestamp(build.base_build_dir), 1005.0) + + # Since it was a partial build, the unpack should be called again. + self.assertEqual( + 2, + self.mock.open_uri.return_value.__enter__.return_value.unpack.call_count + ) + + self.assertCountEqual(['target1', 'target2', 'target3'], build.fuzz_targets) + + def test_setup_fuzz_over_http_unpack_all(self): + """Tests setup fuzzing with compatible remote unzipping.""" + os.environ['UNPACK_ALL_FUZZ_TARGETS_AND_FILES'] = 'True' + os.environ['TASK_NAME'] = 'fuzz' + os.environ['RELEASE_BUILD_URL_PATTERN'] = ( + 'https://example.com/path/file-release-([0-9]+).zip') + self.mock.unzip_over_http_compatible.return_value = True + self.mock.time.return_value = 1000.0 + build = build_manager.setup_regular_build(2) + self.assertIsInstance(build, build_manager.RegularBuild) + self.assertEqual(_get_timestamp(build.base_build_dir), 1000.0) + + self._assert_env_vars() + self.assertEqual(os.environ['APP_REVISION'], '2') + + self.assertEqual( + 1, self.mock.open.return_value.__enter__.return_value.unpack.call_count) + + # Test setting up build again. + os.environ['FUZZ_TARGET'] = '' + self.mock.time.return_value = 1005.0 + build = build_manager.setup_regular_build(2) + + self.assertIsInstance(build, build_manager.RegularBuild) + + self.assertEqual(_get_timestamp(build.base_build_dir), 1005.0) + + # Not a partial build, so unpack shouldn't be called again. + self.assertEqual( + 1, self.mock.open.return_value.__enter__.return_value.unpack.call_count) + + self.assertCountEqual(['target1', 'target2', 'target3'], build.fuzz_targets) + def test_delete(self): """Test deleting this build.""" os.environ['FUZZ_TARGET'] = 'fuzz_target' @@ -1237,7 +1331,7 @@ def tearDown(self): # pylint: disable=unused-argument def mock_unpack_build(self, test_build_dir, actual_self, base_build_dir, - build_dir, url): + build_dir, url, http_build_url): test_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'build_manager_data', test_build_dir) diff --git a/src/clusterfuzz/fuzz/__init__.py b/src/clusterfuzz/fuzz/__init__.py index c5316c0358..6279b798a9 100644 --- a/src/clusterfuzz/fuzz/__init__.py +++ b/src/clusterfuzz/fuzz/__init__.py @@ -42,9 +42,9 @@ def get_engine(name): return engine_impl -def is_fuzz_target(file_path, file_handle=None): +def is_fuzz_target(file_path, file_opener=None): """Returns whether |file_path| is a fuzz target.""" - return utils.is_fuzz_target_local(file_path, file_handle) + return utils.is_fuzz_target(file_path, file_opener) def get_fuzz_targets(directory):