Skip to content

Commit

Permalink
Allow configuring default corpora bucket location. (#4479)
Browse files Browse the repository at this point in the history
Allow instances to specify the GCS bucket location for data bundle
buckets in `project.yaml` as a new key: `data_bundle_bucket_location`.

This will allow creating regional buckets instead of using the default
`US` multi-region which results in high data transfer costs in Chrome's
instance.
  • Loading branch information
letitz authored Dec 17, 2024
1 parent 65e0e7b commit e4b0136
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 19 deletions.
3 changes: 2 additions & 1 deletion src/clusterfuzz/_internal/datastore/data_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,8 @@ def add_build_metadata(job_type,
def create_data_bundle_bucket_and_iams(data_bundle_name, emails):
"""Creates a data bundle bucket and adds iams for access."""
bucket_name = get_data_bundle_bucket_name(data_bundle_name)
if not storage.create_bucket_if_needed(bucket_name):
location = local_config.ProjectConfig().get('data_bundle_bucket_location')
if not storage.create_bucket_if_needed(bucket_name, location=location):
return False

client = storage.create_discovery_storage_client()
Expand Down
16 changes: 11 additions & 5 deletions src/clusterfuzz/_internal/google_cloud_utils/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
class StorageProvider:
"""Core storage provider interface."""

def create_bucket(self, name, object_lifecycle, cors):
def create_bucket(self, name, object_lifecycle, cors, location):
"""Create a new bucket."""
raise NotImplementedError

Expand Down Expand Up @@ -198,7 +198,7 @@ def _chunk_size(self):

return None

def create_bucket(self, name, object_lifecycle, cors):
def create_bucket(self, name, object_lifecycle, cors, location):
"""Create a new bucket."""
project_id = utils.get_application_id()
request_body = {'name': name}
Expand All @@ -208,6 +208,9 @@ def create_bucket(self, name, object_lifecycle, cors):
if cors:
request_body['cors'] = cors

if location:
request_body['location'] = location

client = create_discovery_storage_client()
try:
client.buckets().insert(project=project_id, body=request_body).execute()
Expand Down Expand Up @@ -543,7 +546,7 @@ def convert_path_for_write(self, remote_path, directory=OBJECTS_DIR):

return fs_path

def create_bucket(self, name, object_lifecycle, cors):
def create_bucket(self, name, object_lifecycle, cors, location):
"""Create a new bucket."""
bucket_path = self._fs_bucket_path(name)
if os.path.exists(bucket_path):
Expand Down Expand Up @@ -905,13 +908,16 @@ def set_bucket_iam_policy(client, bucket_name, iam_policy):
return None


def create_bucket_if_needed(bucket_name, object_lifecycle=None, cors=None):
def create_bucket_if_needed(bucket_name,
object_lifecycle=None,
cors=None,
location=None):
"""Creates a GCS bucket."""
provider = _provider()
if provider.get_bucket(bucket_name):
return True

if not provider.create_bucket(bucket_name, object_lifecycle, cors):
if not provider.create_bucket(bucket_name, object_lifecycle, cors, location):
return False

time.sleep(CREATE_BUCKET_DELAY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def setUp(self):
self.local_gcs_buckets_path = tempfile.mkdtemp()
os.environ['LOCAL_GCS_BUCKETS_PATH'] = self.local_gcs_buckets_path
os.environ['TEST_BLOBS_BUCKET'] = 'blobs-bucket'
storage._provider().create_bucket('blobs-bucket', None, None)
storage._provider().create_bucket('blobs-bucket', None, None, None)
helpers.patch(self, [
'clusterfuzz._internal.bot.fuzzers.engine_common.unpack_seed_corpus_if_needed',
'clusterfuzz._internal.bot.tasks.task_creation.create_tasks',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import parameterized
from pyfakefs import fake_filesystem_unittest

from clusterfuzz._internal.config import local_config
from clusterfuzz._internal.datastore import data_handler
from clusterfuzz._internal.datastore import data_types
from clusterfuzz._internal.google_cloud_utils import blobs
Expand Down Expand Up @@ -73,14 +72,27 @@ class DataHandlerTest(unittest.TestCase):

def setUp(self):
helpers.patch_environ(self)
project_config_get = local_config.ProjectConfig.get
helpers.patch(self, [
'clusterfuzz._internal.base.utils.default_project_name',
'clusterfuzz._internal.config.db_config.get',
('project_config_get',
'clusterfuzz._internal.config.local_config.ProjectConfig.get'),
'clusterfuzz._internal.config.local_config.ProjectConfig',
('get_storage_provider',
'clusterfuzz._internal.google_cloud_utils.storage._provider'),
'clusterfuzz._internal.google_cloud_utils.storage.create_discovery_storage_client',
'clusterfuzz._internal.google_cloud_utils.storage.get_bucket_iam_policy',
])

self.mock.default_project_name.return_value = 'project'

self.storage_provider = mock.Mock()
self.mock.get_storage_provider.return_value = self.storage_provider

self.project_config = {}
self.mock.ProjectConfig.return_value = self.project_config

# Disable artificial delay when creating buckets.
storage.CREATE_BUCKET_DELAY = 0

self.job = data_types.Job(
name='linux_asan_chrome',
environment_string=('SUMMARY_PREFIX = project\n'
Expand Down Expand Up @@ -175,8 +187,6 @@ def setUp(self):

environment.set_value('FUZZ_DATA', '/tmp/inputs/fuzzer-common-data-bundles')
environment.set_value('FUZZERS_DIR', '/tmp/inputs/fuzzers')
self.mock.default_project_name.return_value = 'project'
self.mock.project_config_get.side_effect = project_config_get

def test_find_testcase(self):
"""Ensure that find_testcase behaves as expected."""
Expand Down Expand Up @@ -449,15 +459,34 @@ def test_get_issue_summary_bad_cast_without_crash_function(self):
summary, 'project: Bad-cast to blink::LayoutBlock from '
'blink::LayoutTableSection')

def test_create_data_bundle_bucket_and_iams(self):
self.storage_provider.get_bucket.return_value = None
self.storage_provider.create_bucket.return_value = True

self.assertTrue(data_handler.create_data_bundle_bucket_and_iams('test', []))

self.storage_provider.create_bucket.assert_called_with(
'test-corpus.test-clusterfuzz.appspot.com', None, None, None)

def test_create_data_bundle_bucket_and_iams_with_location(self):
self.storage_provider.get_bucket.return_value = None
self.storage_provider.create_bucket.return_value = True

self.project_config['data_bundle_bucket_location'] = 'NORTH-POLE'

self.assertTrue(data_handler.create_data_bundle_bucket_and_iams('test', []))

self.storage_provider.create_bucket.assert_called_with(
'test-corpus.test-clusterfuzz.appspot.com', None, None, 'NORTH-POLE')

def test_get_data_bundle_name_default(self):
"""Test getting the default data bundle bucket name."""
self.assertEqual('test-corpus.test-clusterfuzz.appspot.com',
data_handler.get_data_bundle_bucket_name('test'))

def test_get_data_bundle_name_custom_suffix(self):
"""Test getting the data bundle bucket name with custom suffix."""
self.mock.project_config_get.side_effect = None
self.mock.project_config_get.return_value = 'custom.suffix.com'
self.project_config['bucket_domain_suffix'] = 'custom.suffix.com'
self.assertEqual('test-corpus.custom.suffix.com',
data_handler.get_data_bundle_bucket_name('test'))

Expand Down Expand Up @@ -485,7 +514,7 @@ def test_filter_stack_trace_upload(self):
exceeds limit and an upload_url is provided."""
blob_name = blobs.generate_new_blob_name()
blobs_bucket = 'blobs_bucket'
storage._provider().create_bucket(blobs_bucket, None, None) # pylint: disable=protected-access
storage._provider().create_bucket(blobs_bucket, None, None, None) # pylint: disable=protected-access

gcs_path = storage.get_cloud_storage_file_path(blobs_bucket, blob_name)
signed_upload_url = storage.get_signed_upload_url(gcs_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def setUp(self):
test_utils.set_up_pyfakefs(self)
os.environ['LOCAL_GCS_BUCKETS_PATH'] = '/local'
os.environ['TEST_BLOBS_BUCKET'] = 'blobs-bucket'
self.provider.create_bucket('blobs-bucket', None, None)
self.provider.create_bucket('blobs-bucket', None, None, None)

def test_get_blob_signed_upload_url_then_delete_blob(self):
"""Tests get_blob_signed_upload_url."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def setUp(self):

def test_create_bucket(self):
"""Test create_bucket."""
self.provider.create_bucket('test-bucket', None, None)
self.provider.create_bucket('test-bucket', None, None, None)
self.assertTrue(os.path.isdir('/local/test-bucket'))

def test_get_bucket(self):
Expand Down Expand Up @@ -281,7 +281,7 @@ def test_download_signed_url(self):
def test_upload_signed_url(self):
"""Tests upload_signed_url."""
contents = b'aa'
self.provider.create_bucket('test-bucket', None, None)
self.provider.create_bucket('test-bucket', None, None, None)
self.provider.upload_signed_url(contents, 'gs://test-bucket/a')
with open('/local/test-bucket/objects/a', 'rb') as fp:
return self.assertEqual(fp.read(), contents)
Expand Down

0 comments on commit e4b0136

Please sign in to comment.