Skip to content

Commit

Permalink
Generate stash including soft blocked versions
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinMind committed Nov 6, 2024
1 parent baa72e6 commit f4b5386
Show file tree
Hide file tree
Showing 3 changed files with 362 additions and 78 deletions.
9 changes: 6 additions & 3 deletions src/olympia/blocklist/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from olympia.zadmin.models import get_config

from .mlbf import MLBF
from .models import Block, BlocklistSubmission
from .models import Block, BlocklistSubmission, BlockType
from .tasks import cleanup_old_files, process_blocklistsubmission, upload_filter
from .utils import datetime_to_ts

Expand Down Expand Up @@ -89,7 +89,9 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
else base_filter
)

changes_count = mlbf.blocks_changed_since_previous(previous_filter)
changes_count = mlbf.blocks_changed_since_previous(
BlockType.BLOCKED, previous_filter
)
statsd.incr(
'blocklist.cron.upload_mlbf_to_remote_settings.blocked_changed', changes_count
)
Expand Down Expand Up @@ -119,7 +121,8 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
force_base
or base_filter is None
or previous_filter is None
or mlbf.blocks_changed_since_previous(base_filter) > BASE_REPLACE_THRESHOLD
or mlbf.blocks_changed_since_previous(BlockType.BLOCKED, base_filter)
> BASE_REPLACE_THRESHOLD
)

if make_base_filter:
Expand Down
79 changes: 56 additions & 23 deletions src/olympia/blocklist/mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import secrets
from enum import Enum
from typing import List, Optional, Set, Tuple
from typing import Dict, List, Optional, Set, Tuple

from django.utils.functional import cached_property

Expand All @@ -19,6 +19,15 @@
log = olympia.core.logger.getLogger('z.amo.blocklist')


def diff_lists(
previous: List[str], current: List[str]
) -> Tuple[Set[str], Set[str], int]:
extras = set(current) - set(previous)
deletes = set(previous) - set(current)
changed_count = len(extras) + len(deletes) if len(previous) > 0 else len(current)
return extras, deletes, changed_count


def generate_mlbf(stats, blocked, not_blocked):
log.info('Starting to generating bloomfilter')

Expand Down Expand Up @@ -123,8 +132,16 @@ def __init__(self, *args, **kwargs):

@cached_property
def _all_blocks(self):
return BlockVersion.objects.filter(version__file__is_signed=True).values_list(
'block__guid', 'version__version', 'version_id', 'block_type', named=True
return (
BlockVersion.objects.filter(version__file__is_signed=True)
.order_by('id')
.values_list(
'block__guid',
'version__version',
'version_id',
'block_type',
named=True,
)
)

def _format_blocks(self, block_type: BlockType) -> List[str]:
Expand All @@ -148,9 +165,9 @@ def soft_blocked_items(self) -> List[str]:
def not_blocked_items(self) -> List[str]:
all_blocks_ids = [version.version_id for version in self._all_blocks]
not_blocked_items = MLBF.hash_filter_inputs(
Version.unfiltered.exclude(id__in=all_blocks_ids or ()).values_list(
'addon__addonguid__guid', 'version'
)
Version.unfiltered.exclude(id__in=all_blocks_ids or ())
.order_by('id')
.values_list('addon__addonguid__guid', 'version')
)
# even though we exclude all the version ids in the query there's an
# edge case where the version string occurs twice for an addon so we
Expand Down Expand Up @@ -213,33 +230,49 @@ def generate_and_write_filter(self):

def generate_diffs(
self, previous_mlbf: 'MLBF' = None
) -> Tuple[Set[str], Set[str], int]:
previous = set(
[] if previous_mlbf is None else previous_mlbf.data.blocked_items
)
current = set(self.data.blocked_items)
extras = current - previous
deletes = previous - current
changed_count = (
len(extras) + len(deletes) if len(previous) > 0 else len(current)
)
return extras, deletes, changed_count
) -> Dict[BlockType, Tuple[Set[str], Set[str], int]]:
return {
block_type: diff_lists(
[] if previous_mlbf is None else previous_mlbf.data[block_type],
self.data[block_type],
)
for block_type in BlockType
}

def generate_and_write_stash(self, previous_mlbf: 'MLBF' = None):
# compare previous with current blocks
extras, deletes, _ = self.generate_diffs(previous_mlbf)
"""
Generate and write the stash file representing
changes between the previous and current MLBF Filters.
In order to support older FX clients that only understood blocked and unblocked
The unblocked list is a union of deletions from blocked and soft_blocked.
Items that have moved from hard to soft blocked are then interpreted as
unblocked from the hard blocked list in newer and older clients.
Newer clients will then see that the item is softblocked
and will apply the softblocked filter.
"""
diffs = self.generate_diffs(previous_mlbf)
blocked_added, blocked_removed, _ = diffs[BlockType.BLOCKED]
soft_blocked_added, soft_blocked_removed, _ = diffs[BlockType.SOFT_BLOCKED]
stash_json = {
'blocked': list(extras),
'unblocked': list(deletes),
'blocked': sorted(list(blocked_added)),
'softblocked': sorted(list(soft_blocked_added)),
'unblocked': sorted(
list(blocked_removed | soft_blocked_removed - blocked_added)
),
}
# write stash
stash_path = self.stash_path
with self.storage.open(stash_path, 'w') as json_file:
log.info(f'Writing to file {stash_path}')
json.dump(stash_json, json_file)
return stash_json

def blocks_changed_since_previous(self, previous_mlbf: 'MLBF' = None):
return self.generate_diffs(previous_mlbf)[2]
def blocks_changed_since_previous(
self, block_type: BlockType, previous_mlbf: 'MLBF' = None
):
return self.generate_diffs(previous_mlbf)[block_type][2]

@classmethod
def load_from_storage(
Expand Down
Loading

0 comments on commit f4b5386

Please sign in to comment.