From c02eff7d9e610c86a83324f9f14e72aa39803dfd Mon Sep 17 00:00:00 2001 From: Kevin Meinhardt Date: Wed, 13 Nov 2024 21:08:24 +0100 Subject: [PATCH 1/2] Upload multiple filters Remove unecessary and redundant code Fix ordering of cache/stash + increase validity of tests Upload multiple filters More logs + correct handling of attachment_type Verify cron passes correct args to task TMP: Ignore soft blocks Add waffle switch Fix invalid class reference Update to correct waffle switch Update to fix the test reafactoring add missing tests Apply suggestions from code review Co-authored-by: William Durand Updates from review Ensure blocks of type X are excluded from stash if filter of type X is also being uploaded TMP: squash Better exclusion of stashes from updated filters + more comment resolution Delete correct files from remote settings: - delete any existing attachments matching filters we are reuploading - delete any stashes that are older than the oldest filter Add tests for shape of stash Add test for the craziness that is stash Simple is better than complex Compare to base filter when determining stash Use block type level base filter Apply suggestions from code review Co-authored-by: William Durand --- src/olympia/blocklist/cron.py | 22 +- .../management/commands/export_blocklist.py | 10 +- src/olympia/blocklist/mlbf.py | 116 ++++-- src/olympia/blocklist/tests/test_commands.py | 7 +- src/olympia/blocklist/tests/test_cron.py | 271 ++++++++++---- src/olympia/blocklist/tests/test_mlbf.py | 343 ++++++++++++++++-- 6 files changed, 618 insertions(+), 151 deletions(-) diff --git a/src/olympia/blocklist/cron.py b/src/olympia/blocklist/cron.py index b3b9eab642bb..550d44e6d517 100644 --- a/src/olympia/blocklist/cron.py +++ b/src/olympia/blocklist/cron.py @@ -73,20 +73,25 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): get_last_generation_time() ) + base_filters: dict[BlockType, MLBF | None] = {key: None for key in BlockType} base_filters_to_update: List[BlockType] = [] create_stash = False # Determine which base filters need to be re uploaded # and whether a new stash needs to be created. for block_type in BlockType: - # This prevents us from updating a stash or filter based on new soft blocks. - if block_type == BlockType.SOFT_BLOCKED: + # This prevents us from updating a stash or filter based on new soft blocks + # until we are ready to enable soft blocking. + if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active( + 'enable-soft-blocking' + ): log.info( 'Skipping soft-blocks because enable-soft-blocking switch is inactive' ) continue base_filter = MLBF.load_from_storage(get_base_generation_time(block_type)) + base_filters[block_type] = base_filter # Add this block type to the list of filters to be re-uploaded. if ( @@ -111,13 +116,24 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): 'blocklist.cron.upload_mlbf_to_remote_settings.blocked_count', len(mlbf.data.blocked_items), ) + statsd.incr( + 'blocklist.cron.upload_mlbf_to_remote_settings.soft_blocked_count', + len(mlbf.data.soft_blocked_items), + ) statsd.incr( 'blocklist.cron.upload_mlbf_to_remote_settings.not_blocked_count', len(mlbf.data.not_blocked_items), ) if create_stash: - mlbf.generate_and_write_stash(previous_filter) + # We generate unified stashes, which means they can contain data + # for both soft and hard blocks. We need the base filters of each + # block type to determine what goes in a stash. + mlbf.generate_and_write_stash( + previous_mlbf=previous_filter, + blocked_base_filter=base_filters[BlockType.BLOCKED], + soft_blocked_base_filter=base_filters[BlockType.SOFT_BLOCKED], + ) for block_type in base_filters_to_update: mlbf.generate_and_write_filter(block_type) diff --git a/src/olympia/blocklist/management/commands/export_blocklist.py b/src/olympia/blocklist/management/commands/export_blocklist.py index 1846ac92246f..8c4dfbaf4c1b 100644 --- a/src/olympia/blocklist/management/commands/export_blocklist.py +++ b/src/olympia/blocklist/management/commands/export_blocklist.py @@ -4,6 +4,7 @@ import olympia.core.logger from olympia.blocklist.mlbf import MLBF +from olympia.blocklist.models import BlockType log = olympia.core.logger.getLogger('z.amo.blocklist') @@ -29,6 +30,12 @@ def add_arguments(self, parser): 'the database', default=None, ) + parser.add_argument( + '--block-type', + help='Block type to export', + default=None, + choices=[block_type.name for block_type in BlockType], + ) def load_json(self, json_path): with open(json_path) as json_file: @@ -38,6 +45,7 @@ def load_json(self, json_path): def handle(self, *args, **options): log.debug('Exporting blocklist to file') mlbf = MLBF.generate_from_db(options.get('id')) + block_type = BlockType[options.get('block_type')] if options.get('block_guids_input'): mlbf.blocked_items = list( @@ -52,4 +60,4 @@ def handle(self, *args, **options): ) ) - mlbf.generate_and_write_filter() + mlbf.generate_and_write_filter(block_type) diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index 99f437bd01ab..c9f23d75ed88 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -33,7 +33,7 @@ def ordered_diff_lists( return extras, deletes, changed_count -def generate_mlbf(stats, blocked, not_blocked): +def generate_mlbf(stats, include, exclude): log.info('Starting to generating bloomfilter') cascade = FilterCascade( @@ -41,20 +41,21 @@ def generate_mlbf(stats, blocked, not_blocked): salt=secrets.token_bytes(16), ) - len_blocked = len(blocked) - len_unblocked = len(not_blocked) + len_include = len(include) + len_exclude = len(exclude) - # We can only set error rates if both blocked and unblocked are non-empty - if len_blocked > 0 and len_unblocked > 0: - error_rates = sorted((len_blocked, len_unblocked)) + # We can only set error rates if both include and exclude are non-empty + if len_include > 0 and len_exclude > 0: + error_rates = sorted((len_include, len_exclude)) cascade.set_crlite_error_rates( include_len=error_rates[0], exclude_len=error_rates[1] ) - stats['mlbf_blocked_count'] = len(blocked) - stats['mlbf_notblocked_count'] = len(not_blocked) + # TODO: https://github.com/mozilla/addons/issues/15204 + stats['mlbf_blocked_count'] = len(include) + stats['mlbf_notblocked_count'] = len(exclude) - cascade.initialize(include=blocked, exclude=not_blocked) + cascade.initialize(include=include, exclude=exclude) stats['mlbf_version'] = cascade.version stats['mlbf_layers'] = cascade.layerCount() @@ -64,7 +65,7 @@ def generate_mlbf(stats, blocked, not_blocked): f'Filter cascade layers: {cascade.layerCount()}, ' f'bit: {cascade.bitCount()}' ) - cascade.verify(include=blocked, exclude=not_blocked) + cascade.verify(include=include, exclude=exclude) return cascade @@ -225,13 +226,34 @@ def delete(self): self.storage.rm_stored_dir(self.storage.base_location) log.info(f'Deleted {self.storage.base_location}') - def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED): + def generate_and_write_filter(self, block_type: BlockType): + """ + Generate and write the bloom filter for a given block type. + Included items will be items in the specified block type list. + Excluded items will be items in all other data types. + + We use the language of include and exclude to distinguish this concept + from blocked and unblocked which are more specific to the block type. + """ stats = {} + include_items = [] + exclude_items = [] + + # Map over the data types in the MLBFDataType enum + for data_type in MLBFDataType: + # if the data type is in the specified block type, + # add it to the include items + if data_type.name == block_type.name: + include_items = self.data[data_type] + # otherwise add items to the exclude items + else: + exclude_items.extend(self.data[data_type]) + bloomfilter = generate_mlbf( stats=stats, - blocked=self.data.blocked_items, - not_blocked=self.data.not_blocked_items, + include=include_items, + exclude=exclude_items, ) # write bloomfilter to old and new file names @@ -250,6 +272,7 @@ def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED): stats['mlbf_filesize'] = os.stat(mlbf_path).st_size log.info(json.dumps(stats)) + return bloomfilter def generate_diffs( self, previous_mlbf: 'MLBF' = None @@ -262,46 +285,59 @@ def generate_diffs( for block_type in BlockType } - def generate_and_write_stash(self, previous_mlbf: 'MLBF' = None): + def generate_and_write_stash( + self, + previous_mlbf: 'MLBF' = None, + blocked_base_filter: 'MLBF' = None, + soft_blocked_base_filter: 'MLBF' = None, + ): """ Generate and write the stash file representing changes between the previous and current bloom filters. See: https://bugzilla.mozilla.org/show_bug.cgi?id=soft-blocking - In order to support Firefox clients that don't support soft blocking, - unblocked is a union of deletions from blocked and deletions from - soft_blocked, filtering out any versions that are in the newly blocked - list. - - Versions that move from hard to soft blocked will be picked up by old - clients as no longer hard blocked by being in the unblocked list. + Since we might be generating both a filter and a stash at the exact same time, + we need to compute a stash that doesn't include the data already in the newly + created filter. - Clients supporting soft blocking will also see soft blocked versions as - unblocked, but they won't unblocked them because the list of - soft-blocked versions takes precedence over the list of unblocked - versions. + Items that are removed from one block type and added to another are + excluded from the unblocked list to prevent double counting. - Versions that move from soft to hard blocked will be picked up by - all clients in the blocked list. Note, even though the version is removed - from the soft blocked list, it is important that we do not include it - in the "unblocked" stash (like for hard blocked items) as this would - result in the version being in both blocked and unblocked stashes. + If a block type needs a new filter, we do not include any items for that + block type in the stash to prevent double counting items. """ + # Map block types to hard coded stash keys for compatibility + # with the expected keys in remote settings. + STASH_KEYS = { + BlockType.BLOCKED: 'blocked', + BlockType.SOFT_BLOCKED: 'softblocked', + } + UNBLOCKED_STASH_KEY = 'unblocked' + + # Base stash includes all of the expected keys from STASH_KEYS + unblocked + stash_json = {key: [] for key in [UNBLOCKED_STASH_KEY, *STASH_KEYS.values()]} + diffs = self.generate_diffs(previous_mlbf) blocked_added, blocked_removed, _ = diffs[BlockType.BLOCKED] - stash_json = { - 'blocked': blocked_added, - 'unblocked': blocked_removed, - } + added_items = set(blocked_added) + + if not self.should_upload_filter(BlockType.BLOCKED, blocked_base_filter): + stash_json[STASH_KEYS[BlockType.BLOCKED]] = blocked_added + stash_json[UNBLOCKED_STASH_KEY] = blocked_removed if waffle.switch_is_active('enable-soft-blocking'): soft_blocked_added, soft_blocked_removed, _ = diffs[BlockType.SOFT_BLOCKED] - stash_json['softblocked'] = soft_blocked_added - stash_json['unblocked'] = [ - unblocked - for unblocked in (blocked_removed + soft_blocked_removed) - if unblocked not in blocked_added - ] + added_items.update(soft_blocked_added) + if not self.should_upload_filter( + BlockType.SOFT_BLOCKED, soft_blocked_base_filter + ): + stash_json[STASH_KEYS[BlockType.SOFT_BLOCKED]] = soft_blocked_added + stash_json[UNBLOCKED_STASH_KEY].extend(soft_blocked_removed) + + # Remove any items that were added to a block type. + stash_json[UNBLOCKED_STASH_KEY] = [ + item for item in stash_json[UNBLOCKED_STASH_KEY] if item not in added_items + ] # write stash stash_path = self.stash_path diff --git a/src/olympia/blocklist/tests/test_commands.py b/src/olympia/blocklist/tests/test_commands.py index 317916f7a7ca..c33d9fc44482 100644 --- a/src/olympia/blocklist/tests/test_commands.py +++ b/src/olympia/blocklist/tests/test_commands.py @@ -37,6 +37,11 @@ def test_command(self): updated_by=user, ) - call_command('export_blocklist', '1') + call_command('export_blocklist', '1', '--block-type', BlockType.BLOCKED.name) mlbf = MLBF.load_from_storage(1) assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) + call_command( + 'export_blocklist', '1', '--block-type', BlockType.SOFT_BLOCKED.name + ) + mlbf = MLBF.load_from_storage(1) + assert mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) diff --git a/src/olympia/blocklist/tests/test_cron.py b/src/olympia/blocklist/tests/test_cron.py index 587ae4df2354..17118f38baf3 100644 --- a/src/olympia/blocklist/tests/test_cron.py +++ b/src/olympia/blocklist/tests/test_cron.py @@ -38,6 +38,7 @@ @freeze_time('2020-01-01 12:34:56') @override_switch('blocklist_mlbf_submit', active=True) +@override_switch('enable-soft-blocking', active=False) class TestUploadToRemoteSettings(TestCase): def setUp(self): self.user = user_factory() @@ -95,6 +96,9 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): filter_list = [BlockType.BLOCKED.name] + if enable_soft_blocking: + filter_list.append(BlockType.SOFT_BLOCKED.name) + with override_switch('enable-soft-blocking', active=enable_soft_blocking): upload_mlbf_to_remote_settings(force_base=True) @@ -113,9 +117,7 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): ) self.assertEqual( mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)), - # Until we are ready to enable soft blocking - # there should never be a soft block filter. - False, + enable_soft_blocking, ) assert not mlbf.storage.exists(mlbf.stash_path) @@ -159,10 +161,7 @@ def test_skip_update_unless_no_base_mlbf_for_blocked(self): @override_switch('enable-soft-blocking', active=True) def test_skip_update_unless_no_base_mlbf_for_soft_blocked_with_switch_enabled(self): self._test_skip_update_unless_no_base_mlbf( - # Until we enable soft blocking even if there is no soft block base filter - # and the switch is active, no update expected - BlockType.SOFT_BLOCKED, - filter_list=None, + BlockType.SOFT_BLOCKED, filter_list=[BlockType.SOFT_BLOCKED.name] ) def test_skip_update_unless_no_base_mlbf_for_soft_blocked_with_switch_disabled( @@ -249,10 +248,7 @@ def test_skip_update_unless_new_blocks_for_soft_blocked_with_switch_enabled(self self._test_skip_update_unless_new_blocks( block_type=BlockType.SOFT_BLOCKED, enable_soft_blocking=True, - # Until we enable soft blocking - # even if there is a new soft block - # and switch is active, expect no update - expect_update=False, + expect_update=True, ) def test_send_statsd_counts(self): @@ -270,6 +266,12 @@ def test_send_statsd_counts(self): mock.call('blocklist.cron.upload_mlbf_to_remote_settings.blocked_count', 1) in statsd_calls ) + assert ( + mock.call( + 'blocklist.cron.upload_mlbf_to_remote_settings.soft_blocked_count', 1 + ) + in statsd_calls + ) assert ( mock.call( 'blocklist.cron.upload_mlbf_to_remote_settings.not_blocked_count', 1 @@ -370,10 +372,7 @@ def test_upload_stash_unless_force_base_for_blocked_with_switch_enabled(self): self._test_upload_stash_unless_force_base( block_types=[BlockType.BLOCKED], expect_stash=True, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -399,11 +398,8 @@ def test_upload_stash_unless_force_base_for_soft_blocked_with_switch_enabled(sel """ self._test_upload_stash_unless_force_base( block_types=[BlockType.SOFT_BLOCKED], - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - expect_stash=False, - filter_list=[BlockType.BLOCKED], + expect_stash=True, + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -429,10 +425,7 @@ def test_upload_stash_unless_force_base_for_both_blocked_with_switch_enabled(sel self._test_upload_stash_unless_force_base( block_types=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], expect_stash=True, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -448,10 +441,7 @@ def test_dont_upload_stash_unless_force_base_for_both_blocked_with_switch_enable self._test_upload_stash_unless_force_base( block_types=[], expect_stash=False, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -491,14 +481,11 @@ def test_upload_stash_unless_missing_base_filter(self): with override_switch('enable-soft-blocking', active=True): upload_mlbf_to_remote_settings() - assert not mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) + assert mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) assert ( mock.call( self.current_time, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED.name], + filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], create_stash=False, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -538,49 +525,175 @@ def test_upload_stash_unless_enough_changes(self): mock.call( self.current_time, filter_list=[block_type.name], - create_stash=False, + create_stash=True, ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list ) new_mlbf = MLBF.load_from_storage(self.current_time) assert new_mlbf.storage.exists(new_mlbf.filter_path(block_type)) - assert not new_mlbf.storage.exists(new_mlbf.stash_path) + assert new_mlbf.storage.exists(new_mlbf.stash_path) + with new_mlbf.storage.open(new_mlbf.stash_path, 'r') as f: + data = json.load(f) + # We expect an empty list for hard blocks because we are + # uploading a new hard filter. + assert len(data['blocked']) == 0 + # We can still see there are 2 hard blocked versions in the new filter. + assert len(new_mlbf.data.blocked_items) == 2 + assert len(data['softblocked']) == 1 @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 1) - def test_upload_stash_even_if_filter_is_updated(self): - """ - If enough changes of one type are made, update the filter, but still upload - a stash if there are changes of other types. - """ + def _test_upload_stash_and_filter( + self, + enable_soft_blocking: bool, + expected_stash: dict | None, + expected_filters: List[BlockType], + ): + with override_switch('enable-soft-blocking', active=enable_soft_blocking): + upload_mlbf_to_remote_settings() + + # Generation time is set to current time so we can load the MLBF. + mlbf = MLBF.load_from_storage(self.current_time) + + if expected_stash is None: + assert not mlbf.storage.exists( + mlbf.stash_path + ), 'Expected no stash but one exists' + else: + assert mlbf.storage.exists( + mlbf.stash_path + ), f'Expected stash {expected_stash} but none exists' + with mlbf.storage.open(mlbf.stash_path, 'r') as f: + data = json.load(f) + for key, expected_count in expected_stash.items(): + assert ( + len(data[key]) == expected_count + ), f'Expected {expected_count} {key} but got {len(data[key])}' + + for expected_filter in expected_filters: + assert mlbf.storage.exists( + mlbf.filter_path(expected_filter) + ), f'Expected filter {expected_filter} but none exists' + + def test_upload_blocked_stash_and_softblock_filter(self): + # Enough blocks for a stash + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + # Enough soft blocks for a filter + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + + # Expected stash does not change + expected_stash = { + 'blocked': 1, + # Expect no soft blocks because there are enough for a filter. + 'softblocked': 0, + # There are no unblocked versions + 'unblocked': 0, + } + + self._test_upload_stash_and_filter( + # Even though there are enough soft blocks, soft blocking is disabled + enable_soft_blocking=False, + expected_stash=expected_stash, + # Expect no filter as soft blocking is disabled + expected_filters=[], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + # Soft blocking is enabled, so we expect the same stash and a new filter + enable_soft_blocking=True, + expected_stash=expected_stash, + # Expect a soft blocked filter + expected_filters=[BlockType.SOFT_BLOCKED], + ) + + def test_upload_soft_blocked_stash_and_blocked_filter(self): + # Enough soft blocks for a stash + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + # Enough blocked versions for a filter + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + + self._test_upload_stash_and_filter( + enable_soft_blocking=False, + # Expect no stash because there are enough blocked versions for a filter + # and soft blocking is disabled + expected_stash=None, + # Expect a blocked filter + expected_filters=[BlockType.BLOCKED], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + enable_soft_blocking=True, + # Expect a stash and a blocked filter + expected_stash={ + # Expect no blocked versions because there are enough for a filter + 'blocked': 0, + # Expect a soft blocked version when there is one + # and soft blocking is enabled + 'softblocked': 1, + # There are no unblocked versions + 'unblocked': 0, + }, + # Expect a blocked filter + expected_filters=[BlockType.BLOCKED], + ) + + def test_upload_blocked_and_softblocked_filter(self): + # Enough blocked versions for a filter self._block_version(is_signed=True, block_type=BlockType.BLOCKED) self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + # Enough soft blocks for a filter + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) - upload_mlbf_to_remote_settings() - assert self.mocks[ - 'olympia.blocklist.cron.upload_filter.delay' - ].call_args_list == [ - mock.call( - self.current_time, - filter_list=[BlockType.BLOCKED.name], - create_stash=False, - ) - ] - mlbf = MLBF.load_from_storage(self.current_time) - assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) - assert not mlbf.storage.exists(mlbf.stash_path) - with override_switch('enable-soft-blocking', active=True): - self._block_version(is_signed=True, block_type=BlockType.BLOCKED) - self._block_version(is_signed=True, block_type=BlockType.BLOCKED) - upload_mlbf_to_remote_settings() - self.mocks['olympia.blocklist.cron.upload_filter.delay'].assert_called_with( - self.current_time, - filter_list=[BlockType.BLOCKED.name], - create_stash=False, - ) - mlbf = MLBF.load_from_storage(self.current_time) - assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) - assert not mlbf.storage.exists(mlbf.stash_path) + self._test_upload_stash_and_filter( + enable_soft_blocking=False, + expected_stash=None, + expected_filters=[BlockType.BLOCKED], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + enable_soft_blocking=True, + expected_stash=None, + expected_filters=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], + ) + + def test_upload_blocked_and_softblocked_stash(self): + # Enough blocked versions for a stash + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + # Enough soft blocks for a stash + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + + self._test_upload_stash_and_filter( + enable_soft_blocking=False, + expected_stash={ + # Expect a blocked version + 'blocked': 1, + # Expect no soft blocks because soft blocking is disabled + 'softblocked': 0, + # There are no unblocked versions + 'unblocked': 0, + }, + expected_filters=[], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + enable_soft_blocking=True, + expected_stash={ + # We still have the blocked version + 'blocked': 1, + # Expect a soft blocked version because there is one + # and soft blocking is enabled + 'softblocked': 1, + # There are no unblocked versions + 'unblocked': 0, + }, + expected_filters=[], + ) def test_remove_storage_if_no_update(self): """ @@ -622,17 +735,34 @@ def test_compares_against_base_filter_if_missing_previous_filter(self): # and only the soft blocked version is missing from the base filter assert not self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + # now with softblocking enabled we can account for the soft blocked version + with override_switch('enable-soft-blocking', active=True): + upload_mlbf_to_remote_settings(force_base=False) + assert self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + @override_switch('enable-soft-blocking', active=True) - def test_dont_skip_update_if_all_blocked_or_not_blocked(self): + def _test_dont_skip_update_if_all_blocked_or_not_blocked( + self, block_type: BlockType + ): """ If all versions are either blocked or not blocked, skip the update. """ for _ in range(0, 10): - self._block_version(block_type=BlockType.BLOCKED) + self._block_version(block_type=block_type) upload_mlbf_to_remote_settings() assert self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + def test_dont_skip_update_if_all_blocked_or_not_blocked_for_soft_blocked(self): + self._test_dont_skip_update_if_all_blocked_or_not_blocked( + block_type=BlockType.SOFT_BLOCKED + ) + + def test_dont_skip_update_if_all_blocked_or_not_blocked_for_blocked(self): + self._test_dont_skip_update_if_all_blocked_or_not_blocked( + block_type=BlockType.BLOCKED + ) + def test_invalid_cache_results_in_diff(self): self._block_version(block_type=BlockType.BLOCKED) @@ -680,6 +810,13 @@ def test_pass_correct_arguments_to_upload_filter(self): filter_list=[BlockType.BLOCKED.name], create_stash=False, ) + with override_switch('enable-soft-blocking', active=True): + upload_mlbf_to_remote_settings(force_base=True) + spy_delay.assert_called_with( + self.current_time, + filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], + create_stash=False, + ) class TestTimeMethods(TestCase): diff --git a/src/olympia/blocklist/tests/test_mlbf.py b/src/olympia/blocklist/tests/test_mlbf.py index b76dfd6b36fe..572f08a975fb 100644 --- a/src/olympia/blocklist/tests/test_mlbf.py +++ b/src/olympia/blocklist/tests/test_mlbf.py @@ -1,5 +1,6 @@ import json from functools import cached_property +from unittest import mock from waffle.testutils import override_switch @@ -378,33 +379,59 @@ def test_diff_no_changes(self): BlockType.BLOCKED: ([], [], 0), BlockType.SOFT_BLOCKED: ([], [], 0), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } - def test_diff_block_added(self): + def test_block_added(self): addon, block = self._blocked_addon() base_mlbf = MLBF.generate_from_db('test') new_block = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (new_block_hash,) = MLBF.hash_filter_inputs( + [(new_block.block.guid, new_block.version.version)] + ) + new_soft_block = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + (new_soft_block_hash,) = MLBF.hash_filter_inputs( + [(new_soft_block.block.guid, new_soft_block.version.version)] + ) next_mlbf = MLBF.generate_from_db('test_two') assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( - MLBF.hash_filter_inputs( - [(new_block.block.guid, new_block.version.version)] - ), + [new_block_hash], [], 1, ), - BlockType.SOFT_BLOCKED: ([], [], 0), + BlockType.SOFT_BLOCKED: ([new_soft_block_hash], [], 1), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [new_block_hash], + 'softblocked': [], + 'unblocked': [], + } + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [new_block_hash], + 'softblocked': [new_soft_block_hash], + 'unblocked': [], + } - def test_diff_block_removed(self): + def test_block_removed(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') block_version.delete() next_mlbf = MLBF.generate_from_db('test_two') @@ -412,46 +439,59 @@ def test_diff_block_removed(self): assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( [], - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], 1, ), BlockType.SOFT_BLOCKED: ([], [], 0), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [block_hash], + } - def test_diff_block_added_and_removed(self): + def test_block_added_and_removed(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') new_block = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (new_block_hash,) = MLBF.hash_filter_inputs( + [(new_block.block.guid, new_block.version.version)] + ) block_version.delete() next_mlbf = MLBF.generate_from_db('test_two') assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( - MLBF.hash_filter_inputs( - [(new_block.block.guid, new_block.version.version)] - ), - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [new_block_hash], + [block_hash], 2, ), BlockType.SOFT_BLOCKED: ([], [], 0), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [new_block_hash], + 'softblocked': [], + 'unblocked': [block_hash], + } - def test_diff_block_hard_to_soft(self): + def test_block_hard_to_soft(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') block_version.update(block_type=BlockType.SOFT_BLOCKED) next_mlbf = MLBF.generate_from_db('test_two') @@ -459,45 +499,163 @@ def test_diff_block_hard_to_soft(self): assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( [], - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], 1, ), BlockType.SOFT_BLOCKED: ( - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], [], 1, ), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [block_hash], + } + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [block_hash], + 'unblocked': [], + } - def test_diff_block_soft_to_hard(self): + def test_block_soft_to_hard(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.SOFT_BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') block_version.update(block_type=BlockType.BLOCKED) next_mlbf = MLBF.generate_from_db('test_two') assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], [], 1, ), BlockType.SOFT_BLOCKED: ( [], - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], 1, ), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [block_hash], + 'softblocked': [], + 'unblocked': [], + } + + @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 2) + def test_hard_to_soft_multiple(self): + addon, block = self._blocked_addon() + block_versions = [ + self._block_version(block, self._version(addon)) for _ in range(2) + ] + block_hashes = MLBF.hash_filter_inputs( + [ + (block_version.block.guid, block_version.version.version) + for block_version in block_versions + ] + ) + base_mlbf = MLBF.generate_from_db('test') + + for block_version in block_versions: + block_version.update(block_type=BlockType.SOFT_BLOCKED) + + next_mlbf = MLBF.generate_from_db('test_two') + + assert not next_mlbf.should_upload_filter(BlockType.SOFT_BLOCKED, base_mlbf) + + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': block_hashes, + } + + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': block_hashes, + 'unblocked': [], + } + + @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 1) + def test_stash_is_empty_if_uploading_new_filter(self): + mlbf = MLBF.generate_from_db('test') + + # No changes yet so no new filter and empty stash + assert not mlbf.should_upload_filter(BlockType.BLOCKED) + assert mlbf.generate_and_write_stash() == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } + + # One of each version produces a stash + addon, block = self._blocked_addon() + hard_block = self._block_version( + block, self._version(addon), block_type=BlockType.BLOCKED + ) + soft_block = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + hard_stash, soft_stash = MLBF.hash_filter_inputs( + [ + (hard_block.block.guid, hard_block.version.version), + (soft_block.block.guid, soft_block.version.version), + ] + ) + + next_mlbf = MLBF.generate_from_db('test_two') + # No new filter yet + assert not next_mlbf.should_upload_filter(BlockType.BLOCKED, mlbf) + + # Only the hard blocked version is included in the stash + assert next_mlbf.generate_and_write_stash(mlbf) == { + 'blocked': [hard_stash], + 'softblocked': [], + 'unblocked': [], + } + + # With soft blocking enabled, both versions are included in the stash + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(mlbf) == { + 'blocked': [hard_stash], + 'softblocked': [soft_stash], + 'unblocked': [], + } + + # Harden the soft blocked version + soft_block.update(block_type=BlockType.BLOCKED) + final_mlbf = MLBF.generate_from_db('test_three') + + # When comparing to the base filter, the stash is empty + assert final_mlbf.generate_and_write_stash( + previous_mlbf=next_mlbf, + blocked_base_filter=mlbf, + soft_blocked_base_filter=None, + ) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } + + # When comparing to the previous mlbf, + # the stash includes the hard blocked version + assert final_mlbf.generate_and_write_stash( + previous_mlbf=next_mlbf, + blocked_base_filter=next_mlbf, + soft_blocked_base_filter=None, + ) == { + 'blocked': [soft_stash], + 'softblocked': [], + 'unblocked': [], + } def test_diff_invalid_cache(self): addon, block = self._blocked_addon(file_kw={'is_signed': True}) @@ -591,6 +749,7 @@ def test_diff_all_possible_changes(self): # as they have some kind of block applied. assert first_mlbf.generate_and_write_stash() == { 'blocked': [five_hash, six_hash], + 'softblocked': [], 'unblocked': [], } @@ -642,6 +801,8 @@ def test_diff_all_possible_changes(self): assert second_mlbf.generate_and_write_stash(previous_mlbf=first_mlbf) == { 'blocked': [three_hash, two_hash], + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], # 4 is omitted because it's soft blocked and the waffle switch is off 'unblocked': [five_hash, six_hash], } @@ -650,7 +811,9 @@ def test_diff_all_possible_changes(self): assert second_mlbf.generate_and_write_stash(previous_mlbf=first_mlbf) == { 'blocked': [three_hash, two_hash], 'softblocked': [five_hash, one_hash], - 'unblocked': [five_hash, six_hash, four_hash], + # 3 and 5 are omitted because they transitioned + # from one block type to another + 'unblocked': [six_hash, four_hash], } def test_generate_stash_returns_expected_stash(self): @@ -669,6 +832,8 @@ def test_generate_stash_returns_expected_stash(self): with mlbf.storage.open(mlbf.stash_path, 'r') as f: assert json.load(f) == { 'blocked': MLBF.hash_filter_inputs(expected_blocked), + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], 'unblocked': [], } @@ -689,6 +854,8 @@ def test_generate_stash_returns_expected_stash(self): with next_mlbf.storage.open(next_mlbf.stash_path, 'r') as f: assert json.load(f) == { 'blocked': [], + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], 'unblocked': MLBF.hash_filter_inputs(expected_unblocked), } @@ -699,6 +866,104 @@ def test_generate_stash_returns_expected_stash(self): 'unblocked': MLBF.hash_filter_inputs(expected_unblocked), } + @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 2) + def test_generate_empty_stash_when_all_items_in_filter(self): + # Add a hard blocked version and 2 soft blocked versions + addon, block = self._blocked_addon( + file_kw={'is_signed': True}, block_type=BlockType.BLOCKED + ) + hard_block = block.blockversion_set.first() + (hard_block_hash,) = MLBF.hash_filter_inputs( + [(block.guid, hard_block.version.version)] + ) + soft_blocks = [ + self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + for _ in range(2) + ] + + base_mlbf = MLBF.generate_from_db('base') + + # Transition the hard block to soft blocked + # and delete the other soft blocks + hard_block.update(block_type=BlockType.SOFT_BLOCKED) + for soft_block in soft_blocks: + soft_block.delete() + + mlbf = MLBF.generate_from_db('test') + + # We should create a soft blocked filter and a stash + assert mlbf.should_upload_filter(BlockType.SOFT_BLOCKED, base_mlbf) + assert mlbf.should_upload_stash(BlockType.BLOCKED, base_mlbf) + + # If soft blocking is disabled, then softening a block is treated + # as an unblock. + assert mlbf.generate_and_write_stash( + previous_mlbf=base_mlbf, + blocked_base_filter=base_mlbf, + soft_blocked_base_filter=None, + ) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [hard_block_hash], + } + + # Recreate the mlbf and re-run with softblocking enabled + mlbf.delete() + + with override_switch('enable-soft-blocking', active=True): + mlbf.generate_from_db('test') + + assert mlbf.should_upload_filter(BlockType.SOFT_BLOCKED, base_mlbf) + # We have a softened block so we should upload stash, even though + # it will be empty since the block will be handled by the filter + assert mlbf.should_upload_stash(BlockType.BLOCKED, base_mlbf) + + # If soft blocking is enabled, then we will expect a new soft block filter + # and no softblock stash since the blocks will be handled by the filter + # similarly we do not include the blocked version in the unblocked stash + # because it is now soft blocked. + # We actually would like this to result in no stash being created + # Bug: https://github.com/mozilla/addons/issues/15202 + assert mlbf.generate_and_write_stash( + previous_mlbf=base_mlbf, + blocked_base_filter=base_mlbf, + soft_blocked_base_filter=base_mlbf, + ) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } + + def test_generate_filter_returns_expected_data(self): + addon, block = self._blocked_addon() + not_blocked = self._version(addon) + not_blocked_version = not_blocked.version + hard_blocked = self._block_version( + block, self._version(addon), block_type=BlockType.BLOCKED + ) + hard_blocked_version = hard_blocked.version.version + soft_blocked = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + soft_blocked_version = soft_blocked.version.version + mlbf = MLBF.generate_from_db('test') + + mlbf.generate_and_write_filter(BlockType.BLOCKED).verify( + include=MLBF.hash_filter_inputs([(addon.guid, hard_blocked_version)]), + exclude=MLBF.hash_filter_inputs( + [(addon.guid, soft_blocked_version), (addon.guid, not_blocked_version)] + ), + ) + + mlbf.generate_and_write_filter(BlockType.SOFT_BLOCKED).verify( + include=MLBF.hash_filter_inputs([(addon.guid, soft_blocked_version)]), + exclude=MLBF.hash_filter_inputs( + [(addon.guid, hard_blocked_version), (addon.guid, not_blocked_version)] + ), + ) + def test_changed_count_returns_expected_count(self): addon, block = self._blocked_addon() self._block_version(block, self._version(addon), block_type=BlockType.BLOCKED) @@ -752,16 +1017,19 @@ def test_changed_count_returns_expected_count(self): == 1 ) + def _test_not_raises_if_versions_blocked(self, block_type: BlockType): + mlbf = MLBF.generate_from_db('test') + self._blocked_addon(file_kw={'is_signed': True}, block_type=block_type) + assert mlbf.data[block_type] == [] + mlbf.generate_and_write_filter(block_type) + def test_generate_filter_not_raises_if_all_versions_unblocked(self): """ When we create a bloom filter where all versions fall into the "not filtered" category This can create invalid error rates because the error rate depends on these numbers being non-zero. """ - mlbf = MLBF.generate_from_db('test') - self._blocked_addon(file_kw={'is_signed': True}) - assert mlbf.data.blocked_items == [] - mlbf.generate_and_write_filter() + self._test_not_raises_if_versions_blocked(BlockType.BLOCKED) def test_generate_filter_not_raises_if_all_versions_blocked(self): """ @@ -769,10 +1037,7 @@ def test_generate_filter_not_raises_if_all_versions_blocked(self): the "not filtered" category This can create invalid error rates because the error rate depends on these numbers being non-zero. """ - mlbf = MLBF.generate_from_db('test') - self._blocked_addon(file_kw={'is_signed': False}) - assert mlbf.data.not_blocked_items == [] - mlbf.generate_and_write_filter() + self._test_not_raises_if_versions_blocked(BlockType.SOFT_BLOCKED) def test_duplicate_guid_is_blocked(self): """ From 67be5ff0a220911e88c00c26274ff6b1f93446c4 Mon Sep 17 00:00:00 2001 From: Kevin Meinhardt Date: Wed, 27 Nov 2024 10:33:12 +0100 Subject: [PATCH 2/2] Update src/olympia/blocklist/mlbf.py Co-authored-by: William Durand --- src/olympia/blocklist/mlbf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index c9f23d75ed88..b9a0a59e6d54 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -305,6 +305,13 @@ def generate_and_write_stash( If a block type needs a new filter, we do not include any items for that block type in the stash to prevent double counting items. + + We used to generate a list of `unblocked` versions as a union of deletions + from blocked and deletions from soft_blocked, filtering out any versions + that are in the newly blocked list in order to support Firefox clients that + don't support soft blocking. That, unfortunately, caused other issues so + currently we are very conservative, and we do not fully support old clients. + See: https://github.com/mozilla/addons/issues/15208 """ # Map block types to hard coded stash keys for compatibility # with the expected keys in remote settings.