diff --git a/src/olympia/blocklist/cron.py b/src/olympia/blocklist/cron.py index b3b9eab642bb..550d44e6d517 100644 --- a/src/olympia/blocklist/cron.py +++ b/src/olympia/blocklist/cron.py @@ -73,20 +73,25 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): get_last_generation_time() ) + base_filters: dict[BlockType, MLBF | None] = {key: None for key in BlockType} base_filters_to_update: List[BlockType] = [] create_stash = False # Determine which base filters need to be re uploaded # and whether a new stash needs to be created. for block_type in BlockType: - # This prevents us from updating a stash or filter based on new soft blocks. - if block_type == BlockType.SOFT_BLOCKED: + # This prevents us from updating a stash or filter based on new soft blocks + # until we are ready to enable soft blocking. + if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active( + 'enable-soft-blocking' + ): log.info( 'Skipping soft-blocks because enable-soft-blocking switch is inactive' ) continue base_filter = MLBF.load_from_storage(get_base_generation_time(block_type)) + base_filters[block_type] = base_filter # Add this block type to the list of filters to be re-uploaded. if ( @@ -111,13 +116,24 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): 'blocklist.cron.upload_mlbf_to_remote_settings.blocked_count', len(mlbf.data.blocked_items), ) + statsd.incr( + 'blocklist.cron.upload_mlbf_to_remote_settings.soft_blocked_count', + len(mlbf.data.soft_blocked_items), + ) statsd.incr( 'blocklist.cron.upload_mlbf_to_remote_settings.not_blocked_count', len(mlbf.data.not_blocked_items), ) if create_stash: - mlbf.generate_and_write_stash(previous_filter) + # We generate unified stashes, which means they can contain data + # for both soft and hard blocks. We need the base filters of each + # block type to determine what goes in a stash. + mlbf.generate_and_write_stash( + previous_mlbf=previous_filter, + blocked_base_filter=base_filters[BlockType.BLOCKED], + soft_blocked_base_filter=base_filters[BlockType.SOFT_BLOCKED], + ) for block_type in base_filters_to_update: mlbf.generate_and_write_filter(block_type) diff --git a/src/olympia/blocklist/management/commands/export_blocklist.py b/src/olympia/blocklist/management/commands/export_blocklist.py index 1846ac92246f..8c4dfbaf4c1b 100644 --- a/src/olympia/blocklist/management/commands/export_blocklist.py +++ b/src/olympia/blocklist/management/commands/export_blocklist.py @@ -4,6 +4,7 @@ import olympia.core.logger from olympia.blocklist.mlbf import MLBF +from olympia.blocklist.models import BlockType log = olympia.core.logger.getLogger('z.amo.blocklist') @@ -29,6 +30,12 @@ def add_arguments(self, parser): 'the database', default=None, ) + parser.add_argument( + '--block-type', + help='Block type to export', + default=None, + choices=[block_type.name for block_type in BlockType], + ) def load_json(self, json_path): with open(json_path) as json_file: @@ -38,6 +45,7 @@ def load_json(self, json_path): def handle(self, *args, **options): log.debug('Exporting blocklist to file') mlbf = MLBF.generate_from_db(options.get('id')) + block_type = BlockType[options.get('block_type')] if options.get('block_guids_input'): mlbf.blocked_items = list( @@ -52,4 +60,4 @@ def handle(self, *args, **options): ) ) - mlbf.generate_and_write_filter() + mlbf.generate_and_write_filter(block_type) diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index 99f437bd01ab..b9a0a59e6d54 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -33,7 +33,7 @@ def ordered_diff_lists( return extras, deletes, changed_count -def generate_mlbf(stats, blocked, not_blocked): +def generate_mlbf(stats, include, exclude): log.info('Starting to generating bloomfilter') cascade = FilterCascade( @@ -41,20 +41,21 @@ def generate_mlbf(stats, blocked, not_blocked): salt=secrets.token_bytes(16), ) - len_blocked = len(blocked) - len_unblocked = len(not_blocked) + len_include = len(include) + len_exclude = len(exclude) - # We can only set error rates if both blocked and unblocked are non-empty - if len_blocked > 0 and len_unblocked > 0: - error_rates = sorted((len_blocked, len_unblocked)) + # We can only set error rates if both include and exclude are non-empty + if len_include > 0 and len_exclude > 0: + error_rates = sorted((len_include, len_exclude)) cascade.set_crlite_error_rates( include_len=error_rates[0], exclude_len=error_rates[1] ) - stats['mlbf_blocked_count'] = len(blocked) - stats['mlbf_notblocked_count'] = len(not_blocked) + # TODO: https://github.com/mozilla/addons/issues/15204 + stats['mlbf_blocked_count'] = len(include) + stats['mlbf_notblocked_count'] = len(exclude) - cascade.initialize(include=blocked, exclude=not_blocked) + cascade.initialize(include=include, exclude=exclude) stats['mlbf_version'] = cascade.version stats['mlbf_layers'] = cascade.layerCount() @@ -64,7 +65,7 @@ def generate_mlbf(stats, blocked, not_blocked): f'Filter cascade layers: {cascade.layerCount()}, ' f'bit: {cascade.bitCount()}' ) - cascade.verify(include=blocked, exclude=not_blocked) + cascade.verify(include=include, exclude=exclude) return cascade @@ -225,13 +226,34 @@ def delete(self): self.storage.rm_stored_dir(self.storage.base_location) log.info(f'Deleted {self.storage.base_location}') - def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED): + def generate_and_write_filter(self, block_type: BlockType): + """ + Generate and write the bloom filter for a given block type. + Included items will be items in the specified block type list. + Excluded items will be items in all other data types. + + We use the language of include and exclude to distinguish this concept + from blocked and unblocked which are more specific to the block type. + """ stats = {} + include_items = [] + exclude_items = [] + + # Map over the data types in the MLBFDataType enum + for data_type in MLBFDataType: + # if the data type is in the specified block type, + # add it to the include items + if data_type.name == block_type.name: + include_items = self.data[data_type] + # otherwise add items to the exclude items + else: + exclude_items.extend(self.data[data_type]) + bloomfilter = generate_mlbf( stats=stats, - blocked=self.data.blocked_items, - not_blocked=self.data.not_blocked_items, + include=include_items, + exclude=exclude_items, ) # write bloomfilter to old and new file names @@ -250,6 +272,7 @@ def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED): stats['mlbf_filesize'] = os.stat(mlbf_path).st_size log.info(json.dumps(stats)) + return bloomfilter def generate_diffs( self, previous_mlbf: 'MLBF' = None @@ -262,46 +285,66 @@ def generate_diffs( for block_type in BlockType } - def generate_and_write_stash(self, previous_mlbf: 'MLBF' = None): + def generate_and_write_stash( + self, + previous_mlbf: 'MLBF' = None, + blocked_base_filter: 'MLBF' = None, + soft_blocked_base_filter: 'MLBF' = None, + ): """ Generate and write the stash file representing changes between the previous and current bloom filters. See: https://bugzilla.mozilla.org/show_bug.cgi?id=soft-blocking - In order to support Firefox clients that don't support soft blocking, - unblocked is a union of deletions from blocked and deletions from - soft_blocked, filtering out any versions that are in the newly blocked - list. + Since we might be generating both a filter and a stash at the exact same time, + we need to compute a stash that doesn't include the data already in the newly + created filter. - Versions that move from hard to soft blocked will be picked up by old - clients as no longer hard blocked by being in the unblocked list. + Items that are removed from one block type and added to another are + excluded from the unblocked list to prevent double counting. - Clients supporting soft blocking will also see soft blocked versions as - unblocked, but they won't unblocked them because the list of - soft-blocked versions takes precedence over the list of unblocked - versions. + If a block type needs a new filter, we do not include any items for that + block type in the stash to prevent double counting items. - Versions that move from soft to hard blocked will be picked up by - all clients in the blocked list. Note, even though the version is removed - from the soft blocked list, it is important that we do not include it - in the "unblocked" stash (like for hard blocked items) as this would - result in the version being in both blocked and unblocked stashes. + We used to generate a list of `unblocked` versions as a union of deletions + from blocked and deletions from soft_blocked, filtering out any versions + that are in the newly blocked list in order to support Firefox clients that + don't support soft blocking. That, unfortunately, caused other issues so + currently we are very conservative, and we do not fully support old clients. + See: https://github.com/mozilla/addons/issues/15208 """ + # Map block types to hard coded stash keys for compatibility + # with the expected keys in remote settings. + STASH_KEYS = { + BlockType.BLOCKED: 'blocked', + BlockType.SOFT_BLOCKED: 'softblocked', + } + UNBLOCKED_STASH_KEY = 'unblocked' + + # Base stash includes all of the expected keys from STASH_KEYS + unblocked + stash_json = {key: [] for key in [UNBLOCKED_STASH_KEY, *STASH_KEYS.values()]} + diffs = self.generate_diffs(previous_mlbf) blocked_added, blocked_removed, _ = diffs[BlockType.BLOCKED] - stash_json = { - 'blocked': blocked_added, - 'unblocked': blocked_removed, - } + added_items = set(blocked_added) + + if not self.should_upload_filter(BlockType.BLOCKED, blocked_base_filter): + stash_json[STASH_KEYS[BlockType.BLOCKED]] = blocked_added + stash_json[UNBLOCKED_STASH_KEY] = blocked_removed if waffle.switch_is_active('enable-soft-blocking'): soft_blocked_added, soft_blocked_removed, _ = diffs[BlockType.SOFT_BLOCKED] - stash_json['softblocked'] = soft_blocked_added - stash_json['unblocked'] = [ - unblocked - for unblocked in (blocked_removed + soft_blocked_removed) - if unblocked not in blocked_added - ] + added_items.update(soft_blocked_added) + if not self.should_upload_filter( + BlockType.SOFT_BLOCKED, soft_blocked_base_filter + ): + stash_json[STASH_KEYS[BlockType.SOFT_BLOCKED]] = soft_blocked_added + stash_json[UNBLOCKED_STASH_KEY].extend(soft_blocked_removed) + + # Remove any items that were added to a block type. + stash_json[UNBLOCKED_STASH_KEY] = [ + item for item in stash_json[UNBLOCKED_STASH_KEY] if item not in added_items + ] # write stash stash_path = self.stash_path diff --git a/src/olympia/blocklist/tests/test_commands.py b/src/olympia/blocklist/tests/test_commands.py index 317916f7a7ca..c33d9fc44482 100644 --- a/src/olympia/blocklist/tests/test_commands.py +++ b/src/olympia/blocklist/tests/test_commands.py @@ -37,6 +37,11 @@ def test_command(self): updated_by=user, ) - call_command('export_blocklist', '1') + call_command('export_blocklist', '1', '--block-type', BlockType.BLOCKED.name) mlbf = MLBF.load_from_storage(1) assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) + call_command( + 'export_blocklist', '1', '--block-type', BlockType.SOFT_BLOCKED.name + ) + mlbf = MLBF.load_from_storage(1) + assert mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) diff --git a/src/olympia/blocklist/tests/test_cron.py b/src/olympia/blocklist/tests/test_cron.py index 587ae4df2354..17118f38baf3 100644 --- a/src/olympia/blocklist/tests/test_cron.py +++ b/src/olympia/blocklist/tests/test_cron.py @@ -38,6 +38,7 @@ @freeze_time('2020-01-01 12:34:56') @override_switch('blocklist_mlbf_submit', active=True) +@override_switch('enable-soft-blocking', active=False) class TestUploadToRemoteSettings(TestCase): def setUp(self): self.user = user_factory() @@ -95,6 +96,9 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): filter_list = [BlockType.BLOCKED.name] + if enable_soft_blocking: + filter_list.append(BlockType.SOFT_BLOCKED.name) + with override_switch('enable-soft-blocking', active=enable_soft_blocking): upload_mlbf_to_remote_settings(force_base=True) @@ -113,9 +117,7 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): ) self.assertEqual( mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)), - # Until we are ready to enable soft blocking - # there should never be a soft block filter. - False, + enable_soft_blocking, ) assert not mlbf.storage.exists(mlbf.stash_path) @@ -159,10 +161,7 @@ def test_skip_update_unless_no_base_mlbf_for_blocked(self): @override_switch('enable-soft-blocking', active=True) def test_skip_update_unless_no_base_mlbf_for_soft_blocked_with_switch_enabled(self): self._test_skip_update_unless_no_base_mlbf( - # Until we enable soft blocking even if there is no soft block base filter - # and the switch is active, no update expected - BlockType.SOFT_BLOCKED, - filter_list=None, + BlockType.SOFT_BLOCKED, filter_list=[BlockType.SOFT_BLOCKED.name] ) def test_skip_update_unless_no_base_mlbf_for_soft_blocked_with_switch_disabled( @@ -249,10 +248,7 @@ def test_skip_update_unless_new_blocks_for_soft_blocked_with_switch_enabled(self self._test_skip_update_unless_new_blocks( block_type=BlockType.SOFT_BLOCKED, enable_soft_blocking=True, - # Until we enable soft blocking - # even if there is a new soft block - # and switch is active, expect no update - expect_update=False, + expect_update=True, ) def test_send_statsd_counts(self): @@ -270,6 +266,12 @@ def test_send_statsd_counts(self): mock.call('blocklist.cron.upload_mlbf_to_remote_settings.blocked_count', 1) in statsd_calls ) + assert ( + mock.call( + 'blocklist.cron.upload_mlbf_to_remote_settings.soft_blocked_count', 1 + ) + in statsd_calls + ) assert ( mock.call( 'blocklist.cron.upload_mlbf_to_remote_settings.not_blocked_count', 1 @@ -370,10 +372,7 @@ def test_upload_stash_unless_force_base_for_blocked_with_switch_enabled(self): self._test_upload_stash_unless_force_base( block_types=[BlockType.BLOCKED], expect_stash=True, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -399,11 +398,8 @@ def test_upload_stash_unless_force_base_for_soft_blocked_with_switch_enabled(sel """ self._test_upload_stash_unless_force_base( block_types=[BlockType.SOFT_BLOCKED], - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - expect_stash=False, - filter_list=[BlockType.BLOCKED], + expect_stash=True, + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -429,10 +425,7 @@ def test_upload_stash_unless_force_base_for_both_blocked_with_switch_enabled(sel self._test_upload_stash_unless_force_base( block_types=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], expect_stash=True, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -448,10 +441,7 @@ def test_dont_upload_stash_unless_force_base_for_both_blocked_with_switch_enable self._test_upload_stash_unless_force_base( block_types=[], expect_stash=False, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -491,14 +481,11 @@ def test_upload_stash_unless_missing_base_filter(self): with override_switch('enable-soft-blocking', active=True): upload_mlbf_to_remote_settings() - assert not mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) + assert mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) assert ( mock.call( self.current_time, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED.name], + filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], create_stash=False, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -538,49 +525,175 @@ def test_upload_stash_unless_enough_changes(self): mock.call( self.current_time, filter_list=[block_type.name], - create_stash=False, + create_stash=True, ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list ) new_mlbf = MLBF.load_from_storage(self.current_time) assert new_mlbf.storage.exists(new_mlbf.filter_path(block_type)) - assert not new_mlbf.storage.exists(new_mlbf.stash_path) + assert new_mlbf.storage.exists(new_mlbf.stash_path) + with new_mlbf.storage.open(new_mlbf.stash_path, 'r') as f: + data = json.load(f) + # We expect an empty list for hard blocks because we are + # uploading a new hard filter. + assert len(data['blocked']) == 0 + # We can still see there are 2 hard blocked versions in the new filter. + assert len(new_mlbf.data.blocked_items) == 2 + assert len(data['softblocked']) == 1 @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 1) - def test_upload_stash_even_if_filter_is_updated(self): - """ - If enough changes of one type are made, update the filter, but still upload - a stash if there are changes of other types. - """ + def _test_upload_stash_and_filter( + self, + enable_soft_blocking: bool, + expected_stash: dict | None, + expected_filters: List[BlockType], + ): + with override_switch('enable-soft-blocking', active=enable_soft_blocking): + upload_mlbf_to_remote_settings() + + # Generation time is set to current time so we can load the MLBF. + mlbf = MLBF.load_from_storage(self.current_time) + + if expected_stash is None: + assert not mlbf.storage.exists( + mlbf.stash_path + ), 'Expected no stash but one exists' + else: + assert mlbf.storage.exists( + mlbf.stash_path + ), f'Expected stash {expected_stash} but none exists' + with mlbf.storage.open(mlbf.stash_path, 'r') as f: + data = json.load(f) + for key, expected_count in expected_stash.items(): + assert ( + len(data[key]) == expected_count + ), f'Expected {expected_count} {key} but got {len(data[key])}' + + for expected_filter in expected_filters: + assert mlbf.storage.exists( + mlbf.filter_path(expected_filter) + ), f'Expected filter {expected_filter} but none exists' + + def test_upload_blocked_stash_and_softblock_filter(self): + # Enough blocks for a stash + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + # Enough soft blocks for a filter + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + + # Expected stash does not change + expected_stash = { + 'blocked': 1, + # Expect no soft blocks because there are enough for a filter. + 'softblocked': 0, + # There are no unblocked versions + 'unblocked': 0, + } + + self._test_upload_stash_and_filter( + # Even though there are enough soft blocks, soft blocking is disabled + enable_soft_blocking=False, + expected_stash=expected_stash, + # Expect no filter as soft blocking is disabled + expected_filters=[], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + # Soft blocking is enabled, so we expect the same stash and a new filter + enable_soft_blocking=True, + expected_stash=expected_stash, + # Expect a soft blocked filter + expected_filters=[BlockType.SOFT_BLOCKED], + ) + + def test_upload_soft_blocked_stash_and_blocked_filter(self): + # Enough soft blocks for a stash + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + # Enough blocked versions for a filter + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + + self._test_upload_stash_and_filter( + enable_soft_blocking=False, + # Expect no stash because there are enough blocked versions for a filter + # and soft blocking is disabled + expected_stash=None, + # Expect a blocked filter + expected_filters=[BlockType.BLOCKED], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + enable_soft_blocking=True, + # Expect a stash and a blocked filter + expected_stash={ + # Expect no blocked versions because there are enough for a filter + 'blocked': 0, + # Expect a soft blocked version when there is one + # and soft blocking is enabled + 'softblocked': 1, + # There are no unblocked versions + 'unblocked': 0, + }, + # Expect a blocked filter + expected_filters=[BlockType.BLOCKED], + ) + + def test_upload_blocked_and_softblocked_filter(self): + # Enough blocked versions for a filter self._block_version(is_signed=True, block_type=BlockType.BLOCKED) self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + # Enough soft blocks for a filter + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) - upload_mlbf_to_remote_settings() - assert self.mocks[ - 'olympia.blocklist.cron.upload_filter.delay' - ].call_args_list == [ - mock.call( - self.current_time, - filter_list=[BlockType.BLOCKED.name], - create_stash=False, - ) - ] - mlbf = MLBF.load_from_storage(self.current_time) - assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) - assert not mlbf.storage.exists(mlbf.stash_path) - with override_switch('enable-soft-blocking', active=True): - self._block_version(is_signed=True, block_type=BlockType.BLOCKED) - self._block_version(is_signed=True, block_type=BlockType.BLOCKED) - upload_mlbf_to_remote_settings() - self.mocks['olympia.blocklist.cron.upload_filter.delay'].assert_called_with( - self.current_time, - filter_list=[BlockType.BLOCKED.name], - create_stash=False, - ) - mlbf = MLBF.load_from_storage(self.current_time) - assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) - assert not mlbf.storage.exists(mlbf.stash_path) + self._test_upload_stash_and_filter( + enable_soft_blocking=False, + expected_stash=None, + expected_filters=[BlockType.BLOCKED], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + enable_soft_blocking=True, + expected_stash=None, + expected_filters=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], + ) + + def test_upload_blocked_and_softblocked_stash(self): + # Enough blocked versions for a stash + self._block_version(is_signed=True, block_type=BlockType.BLOCKED) + # Enough soft blocks for a stash + self._block_version(is_signed=True, block_type=BlockType.SOFT_BLOCKED) + + self._test_upload_stash_and_filter( + enable_soft_blocking=False, + expected_stash={ + # Expect a blocked version + 'blocked': 1, + # Expect no soft blocks because soft blocking is disabled + 'softblocked': 0, + # There are no unblocked versions + 'unblocked': 0, + }, + expected_filters=[], + ) + + # Now try again with soft blocking enabled + self._test_upload_stash_and_filter( + enable_soft_blocking=True, + expected_stash={ + # We still have the blocked version + 'blocked': 1, + # Expect a soft blocked version because there is one + # and soft blocking is enabled + 'softblocked': 1, + # There are no unblocked versions + 'unblocked': 0, + }, + expected_filters=[], + ) def test_remove_storage_if_no_update(self): """ @@ -622,17 +735,34 @@ def test_compares_against_base_filter_if_missing_previous_filter(self): # and only the soft blocked version is missing from the base filter assert not self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + # now with softblocking enabled we can account for the soft blocked version + with override_switch('enable-soft-blocking', active=True): + upload_mlbf_to_remote_settings(force_base=False) + assert self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + @override_switch('enable-soft-blocking', active=True) - def test_dont_skip_update_if_all_blocked_or_not_blocked(self): + def _test_dont_skip_update_if_all_blocked_or_not_blocked( + self, block_type: BlockType + ): """ If all versions are either blocked or not blocked, skip the update. """ for _ in range(0, 10): - self._block_version(block_type=BlockType.BLOCKED) + self._block_version(block_type=block_type) upload_mlbf_to_remote_settings() assert self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + def test_dont_skip_update_if_all_blocked_or_not_blocked_for_soft_blocked(self): + self._test_dont_skip_update_if_all_blocked_or_not_blocked( + block_type=BlockType.SOFT_BLOCKED + ) + + def test_dont_skip_update_if_all_blocked_or_not_blocked_for_blocked(self): + self._test_dont_skip_update_if_all_blocked_or_not_blocked( + block_type=BlockType.BLOCKED + ) + def test_invalid_cache_results_in_diff(self): self._block_version(block_type=BlockType.BLOCKED) @@ -680,6 +810,13 @@ def test_pass_correct_arguments_to_upload_filter(self): filter_list=[BlockType.BLOCKED.name], create_stash=False, ) + with override_switch('enable-soft-blocking', active=True): + upload_mlbf_to_remote_settings(force_base=True) + spy_delay.assert_called_with( + self.current_time, + filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], + create_stash=False, + ) class TestTimeMethods(TestCase): diff --git a/src/olympia/blocklist/tests/test_mlbf.py b/src/olympia/blocklist/tests/test_mlbf.py index b76dfd6b36fe..572f08a975fb 100644 --- a/src/olympia/blocklist/tests/test_mlbf.py +++ b/src/olympia/blocklist/tests/test_mlbf.py @@ -1,5 +1,6 @@ import json from functools import cached_property +from unittest import mock from waffle.testutils import override_switch @@ -378,33 +379,59 @@ def test_diff_no_changes(self): BlockType.BLOCKED: ([], [], 0), BlockType.SOFT_BLOCKED: ([], [], 0), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } - def test_diff_block_added(self): + def test_block_added(self): addon, block = self._blocked_addon() base_mlbf = MLBF.generate_from_db('test') new_block = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (new_block_hash,) = MLBF.hash_filter_inputs( + [(new_block.block.guid, new_block.version.version)] + ) + new_soft_block = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + (new_soft_block_hash,) = MLBF.hash_filter_inputs( + [(new_soft_block.block.guid, new_soft_block.version.version)] + ) next_mlbf = MLBF.generate_from_db('test_two') assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( - MLBF.hash_filter_inputs( - [(new_block.block.guid, new_block.version.version)] - ), + [new_block_hash], [], 1, ), - BlockType.SOFT_BLOCKED: ([], [], 0), + BlockType.SOFT_BLOCKED: ([new_soft_block_hash], [], 1), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [new_block_hash], + 'softblocked': [], + 'unblocked': [], + } + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [new_block_hash], + 'softblocked': [new_soft_block_hash], + 'unblocked': [], + } - def test_diff_block_removed(self): + def test_block_removed(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') block_version.delete() next_mlbf = MLBF.generate_from_db('test_two') @@ -412,46 +439,59 @@ def test_diff_block_removed(self): assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( [], - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], 1, ), BlockType.SOFT_BLOCKED: ([], [], 0), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [block_hash], + } - def test_diff_block_added_and_removed(self): + def test_block_added_and_removed(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') new_block = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (new_block_hash,) = MLBF.hash_filter_inputs( + [(new_block.block.guid, new_block.version.version)] + ) block_version.delete() next_mlbf = MLBF.generate_from_db('test_two') assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( - MLBF.hash_filter_inputs( - [(new_block.block.guid, new_block.version.version)] - ), - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [new_block_hash], + [block_hash], 2, ), BlockType.SOFT_BLOCKED: ([], [], 0), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [new_block_hash], + 'softblocked': [], + 'unblocked': [block_hash], + } - def test_diff_block_hard_to_soft(self): + def test_block_hard_to_soft(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') block_version.update(block_type=BlockType.SOFT_BLOCKED) next_mlbf = MLBF.generate_from_db('test_two') @@ -459,45 +499,163 @@ def test_diff_block_hard_to_soft(self): assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( [], - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], 1, ), BlockType.SOFT_BLOCKED: ( - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], [], 1, ), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [block_hash], + } + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [block_hash], + 'unblocked': [], + } - def test_diff_block_soft_to_hard(self): + def test_block_soft_to_hard(self): addon, block = self._blocked_addon() block_version = self._block_version( block, self._version(addon), block_type=BlockType.SOFT_BLOCKED ) + (block_hash,) = MLBF.hash_filter_inputs( + [(block_version.block.guid, block_version.version.version)] + ) base_mlbf = MLBF.generate_from_db('test') block_version.update(block_type=BlockType.BLOCKED) next_mlbf = MLBF.generate_from_db('test_two') assert next_mlbf.generate_diffs(previous_mlbf=base_mlbf) == { BlockType.BLOCKED: ( - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], [], 1, ), BlockType.SOFT_BLOCKED: ( [], - MLBF.hash_filter_inputs( - [(block_version.block.guid, block_version.version.version)] - ), + [block_hash], 1, ), } + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [block_hash], + 'softblocked': [], + 'unblocked': [], + } + + @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 2) + def test_hard_to_soft_multiple(self): + addon, block = self._blocked_addon() + block_versions = [ + self._block_version(block, self._version(addon)) for _ in range(2) + ] + block_hashes = MLBF.hash_filter_inputs( + [ + (block_version.block.guid, block_version.version.version) + for block_version in block_versions + ] + ) + base_mlbf = MLBF.generate_from_db('test') + + for block_version in block_versions: + block_version.update(block_type=BlockType.SOFT_BLOCKED) + + next_mlbf = MLBF.generate_from_db('test_two') + + assert not next_mlbf.should_upload_filter(BlockType.SOFT_BLOCKED, base_mlbf) + + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': block_hashes, + } + + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(previous_mlbf=base_mlbf) == { + 'blocked': [], + 'softblocked': block_hashes, + 'unblocked': [], + } + + @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 1) + def test_stash_is_empty_if_uploading_new_filter(self): + mlbf = MLBF.generate_from_db('test') + + # No changes yet so no new filter and empty stash + assert not mlbf.should_upload_filter(BlockType.BLOCKED) + assert mlbf.generate_and_write_stash() == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } + + # One of each version produces a stash + addon, block = self._blocked_addon() + hard_block = self._block_version( + block, self._version(addon), block_type=BlockType.BLOCKED + ) + soft_block = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + hard_stash, soft_stash = MLBF.hash_filter_inputs( + [ + (hard_block.block.guid, hard_block.version.version), + (soft_block.block.guid, soft_block.version.version), + ] + ) + + next_mlbf = MLBF.generate_from_db('test_two') + # No new filter yet + assert not next_mlbf.should_upload_filter(BlockType.BLOCKED, mlbf) + + # Only the hard blocked version is included in the stash + assert next_mlbf.generate_and_write_stash(mlbf) == { + 'blocked': [hard_stash], + 'softblocked': [], + 'unblocked': [], + } + + # With soft blocking enabled, both versions are included in the stash + with override_switch('enable-soft-blocking', active=True): + assert next_mlbf.generate_and_write_stash(mlbf) == { + 'blocked': [hard_stash], + 'softblocked': [soft_stash], + 'unblocked': [], + } + + # Harden the soft blocked version + soft_block.update(block_type=BlockType.BLOCKED) + final_mlbf = MLBF.generate_from_db('test_three') + + # When comparing to the base filter, the stash is empty + assert final_mlbf.generate_and_write_stash( + previous_mlbf=next_mlbf, + blocked_base_filter=mlbf, + soft_blocked_base_filter=None, + ) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } + + # When comparing to the previous mlbf, + # the stash includes the hard blocked version + assert final_mlbf.generate_and_write_stash( + previous_mlbf=next_mlbf, + blocked_base_filter=next_mlbf, + soft_blocked_base_filter=None, + ) == { + 'blocked': [soft_stash], + 'softblocked': [], + 'unblocked': [], + } def test_diff_invalid_cache(self): addon, block = self._blocked_addon(file_kw={'is_signed': True}) @@ -591,6 +749,7 @@ def test_diff_all_possible_changes(self): # as they have some kind of block applied. assert first_mlbf.generate_and_write_stash() == { 'blocked': [five_hash, six_hash], + 'softblocked': [], 'unblocked': [], } @@ -642,6 +801,8 @@ def test_diff_all_possible_changes(self): assert second_mlbf.generate_and_write_stash(previous_mlbf=first_mlbf) == { 'blocked': [three_hash, two_hash], + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], # 4 is omitted because it's soft blocked and the waffle switch is off 'unblocked': [five_hash, six_hash], } @@ -650,7 +811,9 @@ def test_diff_all_possible_changes(self): assert second_mlbf.generate_and_write_stash(previous_mlbf=first_mlbf) == { 'blocked': [three_hash, two_hash], 'softblocked': [five_hash, one_hash], - 'unblocked': [five_hash, six_hash, four_hash], + # 3 and 5 are omitted because they transitioned + # from one block type to another + 'unblocked': [six_hash, four_hash], } def test_generate_stash_returns_expected_stash(self): @@ -669,6 +832,8 @@ def test_generate_stash_returns_expected_stash(self): with mlbf.storage.open(mlbf.stash_path, 'r') as f: assert json.load(f) == { 'blocked': MLBF.hash_filter_inputs(expected_blocked), + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], 'unblocked': [], } @@ -689,6 +854,8 @@ def test_generate_stash_returns_expected_stash(self): with next_mlbf.storage.open(next_mlbf.stash_path, 'r') as f: assert json.load(f) == { 'blocked': [], + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], 'unblocked': MLBF.hash_filter_inputs(expected_unblocked), } @@ -699,6 +866,104 @@ def test_generate_stash_returns_expected_stash(self): 'unblocked': MLBF.hash_filter_inputs(expected_unblocked), } + @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 2) + def test_generate_empty_stash_when_all_items_in_filter(self): + # Add a hard blocked version and 2 soft blocked versions + addon, block = self._blocked_addon( + file_kw={'is_signed': True}, block_type=BlockType.BLOCKED + ) + hard_block = block.blockversion_set.first() + (hard_block_hash,) = MLBF.hash_filter_inputs( + [(block.guid, hard_block.version.version)] + ) + soft_blocks = [ + self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + for _ in range(2) + ] + + base_mlbf = MLBF.generate_from_db('base') + + # Transition the hard block to soft blocked + # and delete the other soft blocks + hard_block.update(block_type=BlockType.SOFT_BLOCKED) + for soft_block in soft_blocks: + soft_block.delete() + + mlbf = MLBF.generate_from_db('test') + + # We should create a soft blocked filter and a stash + assert mlbf.should_upload_filter(BlockType.SOFT_BLOCKED, base_mlbf) + assert mlbf.should_upload_stash(BlockType.BLOCKED, base_mlbf) + + # If soft blocking is disabled, then softening a block is treated + # as an unblock. + assert mlbf.generate_and_write_stash( + previous_mlbf=base_mlbf, + blocked_base_filter=base_mlbf, + soft_blocked_base_filter=None, + ) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [hard_block_hash], + } + + # Recreate the mlbf and re-run with softblocking enabled + mlbf.delete() + + with override_switch('enable-soft-blocking', active=True): + mlbf.generate_from_db('test') + + assert mlbf.should_upload_filter(BlockType.SOFT_BLOCKED, base_mlbf) + # We have a softened block so we should upload stash, even though + # it will be empty since the block will be handled by the filter + assert mlbf.should_upload_stash(BlockType.BLOCKED, base_mlbf) + + # If soft blocking is enabled, then we will expect a new soft block filter + # and no softblock stash since the blocks will be handled by the filter + # similarly we do not include the blocked version in the unblocked stash + # because it is now soft blocked. + # We actually would like this to result in no stash being created + # Bug: https://github.com/mozilla/addons/issues/15202 + assert mlbf.generate_and_write_stash( + previous_mlbf=base_mlbf, + blocked_base_filter=base_mlbf, + soft_blocked_base_filter=base_mlbf, + ) == { + 'blocked': [], + 'softblocked': [], + 'unblocked': [], + } + + def test_generate_filter_returns_expected_data(self): + addon, block = self._blocked_addon() + not_blocked = self._version(addon) + not_blocked_version = not_blocked.version + hard_blocked = self._block_version( + block, self._version(addon), block_type=BlockType.BLOCKED + ) + hard_blocked_version = hard_blocked.version.version + soft_blocked = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + soft_blocked_version = soft_blocked.version.version + mlbf = MLBF.generate_from_db('test') + + mlbf.generate_and_write_filter(BlockType.BLOCKED).verify( + include=MLBF.hash_filter_inputs([(addon.guid, hard_blocked_version)]), + exclude=MLBF.hash_filter_inputs( + [(addon.guid, soft_blocked_version), (addon.guid, not_blocked_version)] + ), + ) + + mlbf.generate_and_write_filter(BlockType.SOFT_BLOCKED).verify( + include=MLBF.hash_filter_inputs([(addon.guid, soft_blocked_version)]), + exclude=MLBF.hash_filter_inputs( + [(addon.guid, hard_blocked_version), (addon.guid, not_blocked_version)] + ), + ) + def test_changed_count_returns_expected_count(self): addon, block = self._blocked_addon() self._block_version(block, self._version(addon), block_type=BlockType.BLOCKED) @@ -752,16 +1017,19 @@ def test_changed_count_returns_expected_count(self): == 1 ) + def _test_not_raises_if_versions_blocked(self, block_type: BlockType): + mlbf = MLBF.generate_from_db('test') + self._blocked_addon(file_kw={'is_signed': True}, block_type=block_type) + assert mlbf.data[block_type] == [] + mlbf.generate_and_write_filter(block_type) + def test_generate_filter_not_raises_if_all_versions_unblocked(self): """ When we create a bloom filter where all versions fall into the "not filtered" category This can create invalid error rates because the error rate depends on these numbers being non-zero. """ - mlbf = MLBF.generate_from_db('test') - self._blocked_addon(file_kw={'is_signed': True}) - assert mlbf.data.blocked_items == [] - mlbf.generate_and_write_filter() + self._test_not_raises_if_versions_blocked(BlockType.BLOCKED) def test_generate_filter_not_raises_if_all_versions_blocked(self): """ @@ -769,10 +1037,7 @@ def test_generate_filter_not_raises_if_all_versions_blocked(self): the "not filtered" category This can create invalid error rates because the error rate depends on these numbers being non-zero. """ - mlbf = MLBF.generate_from_db('test') - self._blocked_addon(file_kw={'is_signed': False}) - assert mlbf.data.not_blocked_items == [] - mlbf.generate_and_write_filter() + self._test_not_raises_if_versions_blocked(BlockType.SOFT_BLOCKED) def test_duplicate_guid_is_blocked(self): """