diff --git a/src/olympia/blocklist/cron.py b/src/olympia/blocklist/cron.py index 0ad980fab22f..832932ab0b40 100644 --- a/src/olympia/blocklist/cron.py +++ b/src/olympia/blocklist/cron.py @@ -80,7 +80,10 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): # and whether a new stash needs to be created for block_type in BlockType: # This prevents us from updating a stash or filter based on new soft blocks - if block_type == BlockType.SOFT_BLOCKED: + # until we are ready to enable soft blocking. + if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active( + 'enable-soft-blocking' + ): log.info( 'Skipping soft-blocks because enable-soft-blocking switch is inactive' ) @@ -111,18 +114,15 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): 'blocklist.cron.upload_mlbf_to_remote_settings.blocked_count', len(mlbf.data.blocked_items), ) + statsd.incr( + 'blocklist.cron.upload_mlbf_to_remote_settings.soft_blocked_count', + len(mlbf.data.soft_blocked_items), + ) statsd.incr( 'blocklist.cron.upload_mlbf_to_remote_settings.not_blocked_count', len(mlbf.data.not_blocked_items), ) - # Until we are ready to enable soft blocking, it should not be possible - # to create a stash and a filter at the same iteration - if create_stash and len(base_filters_to_update) > 0: - raise Exception( - 'Cannot upload stash and filter without implementing soft blocking' - ) - if create_stash: mlbf.generate_and_write_stash(previous_filter) diff --git a/src/olympia/blocklist/management/commands/export_blocklist.py b/src/olympia/blocklist/management/commands/export_blocklist.py index 1846ac92246f..8c4dfbaf4c1b 100644 --- a/src/olympia/blocklist/management/commands/export_blocklist.py +++ b/src/olympia/blocklist/management/commands/export_blocklist.py @@ -4,6 +4,7 @@ import olympia.core.logger from olympia.blocklist.mlbf import MLBF +from olympia.blocklist.models import BlockType log = olympia.core.logger.getLogger('z.amo.blocklist') @@ -29,6 +30,12 @@ def add_arguments(self, parser): 'the database', default=None, ) + parser.add_argument( + '--block-type', + help='Block type to export', + default=None, + choices=[block_type.name for block_type in BlockType], + ) def load_json(self, json_path): with open(json_path) as json_file: @@ -38,6 +45,7 @@ def load_json(self, json_path): def handle(self, *args, **options): log.debug('Exporting blocklist to file') mlbf = MLBF.generate_from_db(options.get('id')) + block_type = BlockType[options.get('block_type')] if options.get('block_guids_input'): mlbf.blocked_items = list( @@ -52,4 +60,4 @@ def handle(self, *args, **options): ) ) - mlbf.generate_and_write_filter() + mlbf.generate_and_write_filter(block_type) diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index cfd9980654ce..5fb0ecbfa5c7 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -33,7 +33,7 @@ def ordered_diff_lists( return extras, deletes, changed_count -def generate_mlbf(stats, blocked, not_blocked): +def generate_mlbf(stats, include, exclude): log.info('Starting to generating bloomfilter') cascade = FilterCascade( @@ -41,20 +41,21 @@ def generate_mlbf(stats, blocked, not_blocked): salt=secrets.token_bytes(16), ) - len_blocked = len(blocked) - len_unblocked = len(not_blocked) + len_include = len(include) + len_exclude = len(exclude) - # We can only set error rates if both blocked and unblocked are non-empty - if len_blocked > 0 and len_unblocked > 0: - error_rates = sorted((len_blocked, len_unblocked)) + # We can only set error rates if both include and exclude are non-empty + if len_include > 0 and len_exclude > 0: + error_rates = sorted((len_include, len_exclude)) cascade.set_crlite_error_rates( include_len=error_rates[0], exclude_len=error_rates[1] ) - stats['mlbf_blocked_count'] = len(blocked) - stats['mlbf_notblocked_count'] = len(not_blocked) + # TODO: these stats need to be block_type aware + stats['mlbf_blocked_count'] = len(include) + stats['mlbf_notblocked_count'] = len(exclude) - cascade.initialize(include=blocked, exclude=not_blocked) + cascade.initialize(include=include, exclude=exclude) stats['mlbf_version'] = cascade.version stats['mlbf_layers'] = cascade.layerCount() @@ -64,7 +65,7 @@ def generate_mlbf(stats, blocked, not_blocked): f'Filter cascade layers: {cascade.layerCount()}, ' f'bit: {cascade.bitCount()}' ) - cascade.verify(include=blocked, exclude=not_blocked) + cascade.verify(include=include, exclude=exclude) return cascade @@ -224,13 +225,33 @@ def delete(self): self.storage.rm_stored_dir(self.storage.base_location) log.info(f'Deleted {self.storage.base_location}') - def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED): + def generate_and_write_filter(self, block_type: BlockType): + """ + Generate and write the bloom filter for a given block type. + Included items will be items in the specified block type list. + Excluded items will be items in all other data types. + We use the language of inlcude and exclude to distinguish this concept + from blocked and unblocked which are more specific to the block type. + """ stats = {} + include_items = [] + exclude_items = [] + + # Map over the data types in the MLBFDataType enum + for data_type in MLBFDataType: + # if the data type is in the specified block type, + # add it to the include items + if data_type.name == block_type.name: + include_items = self.data[data_type] + # otherwise add items to the exclude items + else: + exclude_items.extend(self.data[data_type]) + bloomfilter = generate_mlbf( stats=stats, - blocked=self.data.blocked_items, - not_blocked=self.data.not_blocked_items, + include=include_items, + exclude=exclude_items, ) # write bloomfilter @@ -241,6 +262,7 @@ def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED): stats['mlbf_filesize'] = os.stat(mlbf_path).st_size log.info(json.dumps(stats)) + return bloomfilter def generate_diffs( self, previous_mlbf: 'MLBF' = None @@ -278,23 +300,47 @@ def generate_and_write_stash(self, previous_mlbf: 'MLBF' = None): in the "unblocked" stash (like for hard blocked items) as this would result in the version being in both blocked and unblocked stashes. """ - diffs = self.generate_diffs(previous_mlbf) - blocked_added, blocked_removed, _ = diffs[BlockType.BLOCKED] - stash_json = { - 'blocked': blocked_added, - 'unblocked': blocked_removed, + # Map block types to hard coded stash keys for compatibility + # with the expected keys in remote settings. + STASH_KEYS = { + BlockType.BLOCKED: 'blocked', + BlockType.SOFT_BLOCKED: 'softblocked', } + UNBLOCKED_STASH_KEY = 'unblocked' - if waffle.switch_is_active('enable-soft-blocking'): - soft_blocked_added, soft_blocked_removed, _ = diffs[BlockType.SOFT_BLOCKED] - stash_json['softblocked'] = soft_blocked_added - stash_json['unblocked'] = [ - unblocked - for unblocked in (blocked_removed + soft_blocked_removed) - if unblocked not in blocked_added - ] + # Base stash includes all of the expected keys from STASH_KEYS + unblocked + stash_json = {key: [] for key in [UNBLOCKED_STASH_KEY, *STASH_KEYS.values()]} + + diffs = self.generate_diffs(previous_mlbf) + soft_block_enabled = waffle.switch_is_active('enable-soft-blocking') + set_blocked_any_type = set() + # Build the stash json from the diff of each block type + for block_type in BlockType: + skip_no_changes = not self.should_upload_stash(block_type, previous_mlbf) + skip_uploading_filter = self.should_upload_filter(block_type, previous_mlbf) + skip_soft_block = ( + block_type == BlockType.SOFT_BLOCKED and not soft_block_enabled + ) - # write stash + # Skip the stash if any of the conditions are met + if any([skip_no_changes, skip_uploading_filter, skip_soft_block]): + continue + + added, removed, _ = diffs[block_type] + # Items added to the block type are included in that block list + # using the hard coded stash key + stash_json[STASH_KEYS[block_type]] = added + # TODO: add a better test for this. + set_blocked_any_type.update(added) + # Items removed from that block type are included in the unblocked list + for item in removed: + # Items already on the hard block list should be excluded from the + # unblocked list to prevent double counting of a version that has moved + # from soft to hard blocked. + if item not in set_blocked_any_type: + stash_json[UNBLOCKED_STASH_KEY].append(item) + + # write the stash json to the stash file stash_path = self.stash_path with self.storage.open(stash_path, 'w') as json_file: log.info(f'Writing to file {stash_path}') diff --git a/src/olympia/blocklist/tests/test_commands.py b/src/olympia/blocklist/tests/test_commands.py index 317916f7a7ca..c33d9fc44482 100644 --- a/src/olympia/blocklist/tests/test_commands.py +++ b/src/olympia/blocklist/tests/test_commands.py @@ -37,6 +37,11 @@ def test_command(self): updated_by=user, ) - call_command('export_blocklist', '1') + call_command('export_blocklist', '1', '--block-type', BlockType.BLOCKED.name) mlbf = MLBF.load_from_storage(1) assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) + call_command( + 'export_blocklist', '1', '--block-type', BlockType.SOFT_BLOCKED.name + ) + mlbf = MLBF.load_from_storage(1) + assert mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) diff --git a/src/olympia/blocklist/tests/test_cron.py b/src/olympia/blocklist/tests/test_cron.py index aab74c65b1fe..52a166564cfa 100644 --- a/src/olympia/blocklist/tests/test_cron.py +++ b/src/olympia/blocklist/tests/test_cron.py @@ -38,6 +38,7 @@ @freeze_time('2020-01-01 12:34:56') @override_switch('blocklist_mlbf_submit', active=True) +@override_switch('enable-soft-blocking', active=False) class TestUploadToRemoteSettings(TestCase): def setUp(self): self.user = user_factory() @@ -95,6 +96,9 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): filter_list = [BlockType.BLOCKED.name] + if enable_soft_blocking: + filter_list.append(BlockType.SOFT_BLOCKED.name) + with override_switch('enable-soft-blocking', active=enable_soft_blocking): upload_mlbf_to_remote_settings(force_base=True) @@ -113,9 +117,7 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): ) self.assertEqual( mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)), - # Until we are ready to enable soft blocking - # there should never be a soft block filter. - False, + enable_soft_blocking, ) assert not mlbf.storage.exists(mlbf.stash_path) @@ -159,10 +161,7 @@ def test_skip_update_unless_no_base_mlbf_for_blocked(self): @override_switch('enable-soft-blocking', active=True) def test_skip_update_unless_no_base_mlbf_for_soft_blocked_with_switch_enabled(self): self._test_skip_update_unless_no_base_mlbf( - # Until we enable soft blocking even if there is no soft block base filter - # and the switch is active, no update expected - BlockType.SOFT_BLOCKED, - filter_list=None, + BlockType.SOFT_BLOCKED, filter_list=[BlockType.SOFT_BLOCKED.name] ) def test_skip_update_unless_no_base_mlbf_for_soft_blocked_with_switch_disabled( @@ -249,10 +248,7 @@ def test_skip_update_unless_new_blocks_for_soft_blocked_with_switch_enabled(self self._test_skip_update_unless_new_blocks( block_type=BlockType.SOFT_BLOCKED, enable_soft_blocking=True, - # Until we enable soft blocking - # even if there is a new soft block - # and switch is active, expect no update - expect_update=False, + expect_update=True, ) def test_send_statsd_counts(self): @@ -270,6 +266,12 @@ def test_send_statsd_counts(self): mock.call('blocklist.cron.upload_mlbf_to_remote_settings.blocked_count', 1) in statsd_calls ) + assert ( + mock.call( + 'blocklist.cron.upload_mlbf_to_remote_settings.soft_blocked_count', 1 + ) + in statsd_calls + ) assert ( mock.call( 'blocklist.cron.upload_mlbf_to_remote_settings.not_blocked_count', 1 @@ -370,10 +372,7 @@ def test_upload_stash_unless_force_base_for_blocked_with_switch_enabled(self): self._test_upload_stash_unless_force_base( block_types=[BlockType.BLOCKED], expect_stash=True, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -399,11 +398,8 @@ def test_upload_stash_unless_force_base_for_soft_blocked_with_switch_enabled(sel """ self._test_upload_stash_unless_force_base( block_types=[BlockType.SOFT_BLOCKED], - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - expect_stash=False, - filter_list=[BlockType.BLOCKED], + expect_stash=True, + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -429,10 +425,7 @@ def test_upload_stash_unless_force_base_for_both_blocked_with_switch_enabled(sel self._test_upload_stash_unless_force_base( block_types=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], expect_stash=True, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -448,10 +441,7 @@ def test_dont_upload_stash_unless_force_base_for_both_blocked_with_switch_enable self._test_upload_stash_unless_force_base( block_types=[], expect_stash=False, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED], + filter_list=[BlockType.BLOCKED, BlockType.SOFT_BLOCKED], enable_soft_blocking=True, ) @@ -491,14 +481,11 @@ def test_upload_stash_unless_missing_base_filter(self): with override_switch('enable-soft-blocking', active=True): upload_mlbf_to_remote_settings() - assert not mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) + assert mlbf.storage.exists(mlbf.filter_path(BlockType.SOFT_BLOCKED)) assert ( mock.call( self.current_time, - # Even if updating a soft block and the switch is active - # don't expect a soft block filter update until we - # implement support for that - filter_list=[BlockType.BLOCKED.name], + filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], create_stash=False, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -539,13 +526,21 @@ def test_upload_stash_unless_enough_changes(self): mock.call( self.current_time, filter_list=[block_type.name], - create_stash=False, + create_stash=True, ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list ) new_mlbf = MLBF.load_from_storage(self.current_time) assert new_mlbf.storage.exists(new_mlbf.filter_path(block_type)) - assert not new_mlbf.storage.exists(new_mlbf.stash_path) + assert new_mlbf.storage.exists(new_mlbf.stash_path) + with new_mlbf.storage.open(new_mlbf.stash_path, 'r') as f: + data = json.load(f) + # We expect an empty stash for hard blocks because we are + # uploading a new filter. + assert len(data['blocked']) == 0 + # We can still see there are 2 hard blocked versions in the new filter. + assert len(new_mlbf.data.blocked_items) == 2 + assert len(data['softblocked']) == 1 @mock.patch('olympia.blocklist.mlbf.BASE_REPLACE_THRESHOLD', 1) def test_upload_stash_even_if_filter_is_updated(self): @@ -577,12 +572,15 @@ def test_upload_stash_even_if_filter_is_updated(self): self.mocks['olympia.blocklist.cron.upload_filter.delay'].assert_called_with( self.current_time, filter_list=[BlockType.BLOCKED.name], - create_stash=False, + create_stash=True, ) mlbf = MLBF.load_from_storage(self.current_time) - mlbf = MLBF.load_from_storage(self.current_time) assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED)) - assert not mlbf.storage.exists(mlbf.stash_path) + assert mlbf.storage.exists(mlbf.stash_path) + with mlbf.storage.open(mlbf.stash_path, 'r') as f: + data = json.load(f) + assert len(data['blocked']) == 0 + assert len(data['softblocked']) == 1 def test_remove_storage_if_no_update(self): """ @@ -624,17 +622,34 @@ def test_compares_against_base_filter_if_missing_previous_filter(self): # and only the soft blocked version is missing from the base filter assert not self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + # now with softblocking enabled we can account for the soft blocked version + with override_switch('enable-soft-blocking', active=True): + upload_mlbf_to_remote_settings(force_base=False) + assert self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + @override_switch('enable-soft-blocking', active=True) - def test_dont_skip_update_if_all_blocked_or_not_blocked(self): + def _test_dont_skip_update_if_all_blocked_or_not_blocked( + self, block_type: BlockType + ): """ If all versions are either blocked or not blocked, skip the update. """ for _ in range(0, 10): - self._block_version(block_type=BlockType.BLOCKED) + self._block_version(block_type=block_type) upload_mlbf_to_remote_settings() assert self.mocks['olympia.blocklist.cron.upload_filter.delay'].called + def test_dont_skip_update_if_all_blocked_or_not_blocked_for_soft_blocked(self): + self._test_dont_skip_update_if_all_blocked_or_not_blocked( + block_type=BlockType.SOFT_BLOCKED + ) + + def test_dont_skip_update_if_all_blocked_or_not_blocked_for_blocked(self): + self._test_dont_skip_update_if_all_blocked_or_not_blocked( + block_type=BlockType.BLOCKED + ) + def test_invalid_cache_results_in_diff(self): self._block_version(block_type=BlockType.BLOCKED) @@ -682,6 +697,13 @@ def test_pass_correct_arguments_to_upload_filter(self): filter_list=[BlockType.BLOCKED.name], create_stash=False, ) + with override_switch('enable-soft-blocking', active=True): + upload_mlbf_to_remote_settings(force_base=True) + spy_delay.assert_called_with( + self.current_time, + filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], + create_stash=False, + ) class TestTimeMethods(TestCase): diff --git a/src/olympia/blocklist/tests/test_mlbf.py b/src/olympia/blocklist/tests/test_mlbf.py index af2e4adfc1a0..3c5f05110be3 100644 --- a/src/olympia/blocklist/tests/test_mlbf.py +++ b/src/olympia/blocklist/tests/test_mlbf.py @@ -579,6 +579,7 @@ def test_diff_all_possible_changes(self): # as they have some kind of block applied. assert first_mlbf.generate_and_write_stash() == { 'blocked': [five_hash, six_hash], + 'softblocked': [], 'unblocked': [], } @@ -630,6 +631,8 @@ def test_diff_all_possible_changes(self): assert second_mlbf.generate_and_write_stash(previous_mlbf=first_mlbf) == { 'blocked': [three_hash, two_hash], + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], # 4 is omitted because it's soft blocked and the waffle switch is off 'unblocked': [five_hash, six_hash], } @@ -657,6 +660,8 @@ def test_generate_stash_returns_expected_stash(self): with mlbf.storage.open(mlbf.stash_path, 'r') as f: assert json.load(f) == { 'blocked': MLBF.hash_filter_inputs(expected_blocked), + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], 'unblocked': [], } @@ -677,6 +682,8 @@ def test_generate_stash_returns_expected_stash(self): with next_mlbf.storage.open(next_mlbf.stash_path, 'r') as f: assert json.load(f) == { 'blocked': [], + # Soft blocked is empty because the waffle switch is off + 'softblocked': [], 'unblocked': MLBF.hash_filter_inputs(expected_unblocked), } @@ -687,6 +694,34 @@ def test_generate_stash_returns_expected_stash(self): 'unblocked': MLBF.hash_filter_inputs(expected_unblocked), } + def test_generate_filter_returns_expected_data(self): + addon, block = self._blocked_addon() + not_blocked = self._version(addon) + not_blocked_version = not_blocked.version + hard_blocked = self._block_version( + block, self._version(addon), block_type=BlockType.BLOCKED + ) + hard_blocked_version = hard_blocked.version.version + soft_blocked = self._block_version( + block, self._version(addon), block_type=BlockType.SOFT_BLOCKED + ) + soft_blocked_version = soft_blocked.version.version + mlbf = MLBF.generate_from_db('test') + + mlbf.generate_and_write_filter(BlockType.BLOCKED).verify( + include=MLBF.hash_filter_inputs([(addon.guid, hard_blocked_version)]), + exclude=MLBF.hash_filter_inputs( + [(addon.guid, soft_blocked_version), (addon.guid, not_blocked_version)] + ), + ) + + mlbf.generate_and_write_filter(BlockType.SOFT_BLOCKED).verify( + include=MLBF.hash_filter_inputs([(addon.guid, soft_blocked_version)]), + exclude=MLBF.hash_filter_inputs( + [(addon.guid, hard_blocked_version), (addon.guid, not_blocked_version)] + ), + ) + def test_changed_count_returns_expected_count(self): addon, block = self._blocked_addon() self._block_version(block, self._version(addon), block_type=BlockType.BLOCKED) @@ -740,16 +775,19 @@ def test_changed_count_returns_expected_count(self): == 1 ) + def _test_not_raises_if_versions_blocked(self, block_type: BlockType): + mlbf = MLBF.generate_from_db('test') + self._blocked_addon(file_kw={'is_signed': True}, block_type=block_type) + assert mlbf.data[block_type] == [] + mlbf.generate_and_write_filter(block_type) + def test_generate_filter_not_raises_if_all_versions_unblocked(self): """ When we create a bloom filter where all versions fall into the "not filtered" category This can create invalid error rates because the error rate depends on these numbers being non-zero. """ - mlbf = MLBF.generate_from_db('test') - self._blocked_addon(file_kw={'is_signed': True}) - assert mlbf.data.blocked_items == [] - mlbf.generate_and_write_filter() + self._test_not_raises_if_versions_blocked(BlockType.BLOCKED) def test_generate_filter_not_raises_if_all_versions_blocked(self): """ @@ -757,10 +795,7 @@ def test_generate_filter_not_raises_if_all_versions_blocked(self): the "not filtered" category This can create invalid error rates because the error rate depends on these numbers being non-zero. """ - mlbf = MLBF.generate_from_db('test') - self._blocked_addon(file_kw={'is_signed': False}) - assert mlbf.data.not_blocked_items == [] - mlbf.generate_and_write_filter() + self._test_not_raises_if_versions_blocked(BlockType.SOFT_BLOCKED) def test_duplicate_guid_is_blocked(self): """