From 1ea3e07e7fe19bd353b0a636ee008b6b8949c1b7 Mon Sep 17 00:00:00 2001 From: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> Date: Wed, 27 Nov 2024 21:13:50 -0500 Subject: [PATCH] Remove unnecessary check for UUID collision (#4445) This should speed up preprocess for fuzz task. --- .../_internal/google_cloud_utils/storage.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/clusterfuzz/_internal/google_cloud_utils/storage.py b/src/clusterfuzz/_internal/google_cloud_utils/storage.py index d9d4141ed2..2d38897f16 100644 --- a/src/clusterfuzz/_internal/google_cloud_utils/storage.py +++ b/src/clusterfuzz/_internal/google_cloud_utils/storage.py @@ -1396,24 +1396,16 @@ def get_arbitrary_signed_upload_urls(remote_directory: str, num_uploads: int) -> List[str]: """Returns |num_uploads| number of signed upload URLs to upload files with unique arbitrary names to remote_directory.""" - # We verify there are no collisions for uuid4s in CF because it would be bad - # if there is a collision and in most cases it's cheap (and because we - # probably didn't understand the likelihood of this happening when we started, - # see https://stackoverflow.com/a/24876263). It is not cheap if we had to do - # this 10,000 times. Instead create a prefix filename and check that no file - # has that name. Then the arbitrary names will all use that prefix. + # We don't verify there are no collisions for uuid4s because it's extremely + # unlikely, takes time, and it's basically benign if it happens (it + # won't) since we will just clobber some other corpus uploads from + # the same day. unique_id = uuid.uuid4() base_name = unique_id.hex if not remote_directory.endswith('/'): remote_directory = remote_directory + '/' # The remote_directory ends with slash. base_path = f'{remote_directory}{base_name}' - base_search_path = f'{base_path}*' - if exists(base_search_path): - # Raise the error and let retry go again. There is a vanishingly small - # chance that we get more collisions. This is vulnerable to races, but is - # probably unneeded anyway. - raise ValueError(f'UUID collision found {str(unique_id)}') urls = (f'{base_path}-{idx}' for idx in range(num_uploads)) logs.info('Signing URLs for arbitrary uploads.')