Skip to content

Commit

Permalink
Add support for soft/hard block in upload_to_mllf_to_remote_settings …
Browse files Browse the repository at this point in the history
…cron (#22885)
  • Loading branch information
KevinMind authored Nov 25, 2024
1 parent 6011add commit 6e44ecb
Show file tree
Hide file tree
Showing 6 changed files with 515 additions and 178 deletions.
107 changes: 47 additions & 60 deletions src/olympia/blocklist/cron.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from datetime import datetime
from typing import List

import waffle
from django_statsd.clients import statsd

import olympia.core.logger
from olympia.constants.blocklist import (
BASE_REPLACE_THRESHOLD,
MLBF_BASE_ID_CONFIG_KEY,
MLBF_TIME_CONFIG_KEY,
)
from olympia.zadmin.models import get_config

from .mlbf import MLBF
from .models import Block, BlocklistSubmission, BlockType
from .tasks import cleanup_old_files, process_blocklistsubmission, upload_filter
from .tasks import process_blocklistsubmission, upload_filter
from .utils import datetime_to_ts


Expand All @@ -28,9 +28,9 @@ def get_last_generation_time():
return get_config(MLBF_TIME_CONFIG_KEY, None, json_value=True)


def get_base_generation_time():
def get_base_generation_time(block_type: BlockType):
return get_config(
MLBF_BASE_ID_CONFIG_KEY(BlockType.BLOCKED, compat=True), None, json_value=True
MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True), None, json_value=True
)


Expand Down Expand Up @@ -66,48 +66,45 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
# An add-on version/file from after this time can't be reliably asserted -
# there may be false positives or false negatives.
# https://github.com/mozilla/addons-server/issues/13695
generation_time = get_generation_time()
# This timestamp represents the last time the MLBF was generated and uploaded.
# It could have been a base filter or a stash.
last_generation_time = get_last_generation_time()
# This timestamp represents the point in time when
# the base filter was generated and uploaded.
base_generation_time = get_base_generation_time()

mlbf = MLBF.generate_from_db(generation_time)

base_filter = (
MLBF.load_from_storage(base_generation_time)
if base_generation_time is not None
else None
mlbf = MLBF.generate_from_db(get_generation_time())
previous_filter = MLBF.load_from_storage(
# This timestamp represents the last time the MLBF was generated and uploaded.
# It could have been a base filter or a stash.
get_last_generation_time()
)

previous_filter = (
# Only load previoous filter if there is a timestamp to use
# and that timestamp is not the same as the base_filter
MLBF.load_from_storage(last_generation_time)
if last_generation_time is not None
and (base_filter is None or base_filter.created_at != last_generation_time)
else base_filter
)

changes_count = mlbf.blocks_changed_since_previous(
BlockType.BLOCKED, previous_filter
)
statsd.incr(
'blocklist.cron.upload_mlbf_to_remote_settings.blocked_changed', changes_count
)
need_update = (
force_base
or base_filter is None
or (
previous_filter is not None
and previous_filter.created_at < get_blocklist_last_modified_time()
)
or changes_count > 0
)
if not need_update:
base_filters_to_update: List[BlockType] = []
create_stash = False

# Determine which base filters need to be re uploaded
# and whether a new stash needs to be created.
for block_type in BlockType:
# This prevents us from updating a stash or filter based on new soft blocks.
if block_type == BlockType.SOFT_BLOCKED:
log.info(
'Skipping soft-blocks because enable-soft-blocking switch is inactive'
)
continue

base_filter = MLBF.load_from_storage(get_base_generation_time(block_type))

# Add this block type to the list of filters to be re-uploaded.
if (
force_base
or base_filter is None
or mlbf.should_upload_filter(block_type, base_filter)
):
base_filters_to_update.append(block_type)
# Only update the stash if we should AND if we aren't already
# re-uploading the filter for this block type.
elif mlbf.should_upload_stash(block_type, previous_filter or base_filter):
create_stash = True

skip_update = len(base_filters_to_update) == 0 and not create_stash
if skip_update:
log.info('No new/modified/deleted Blocks in database; skipping MLBF generation')
# Delete the locally generated MLBF directory and files as they are not needed.
mlbf.delete()
return

statsd.incr(
Expand All @@ -119,28 +116,18 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
len(mlbf.data.not_blocked_items),
)

make_base_filter = (
force_base
or base_filter is None
or previous_filter is None
or mlbf.blocks_changed_since_previous(BlockType.BLOCKED, base_filter)
> BASE_REPLACE_THRESHOLD
)

if make_base_filter:
mlbf.generate_and_write_filter()
else:
if create_stash:
mlbf.generate_and_write_stash(previous_filter)

for block_type in base_filters_to_update:
mlbf.generate_and_write_filter(block_type)

upload_filter.delay(
generation_time,
filter_list=[BlockType.BLOCKED.name] if make_base_filter else [],
create_stash=not make_base_filter,
mlbf.created_at,
filter_list=[key.name for key in base_filters_to_update],
create_stash=create_stash,
)

if base_filter:
cleanup_old_files.delay(base_filter_id=base_filter.created_at)


def process_blocklistsubmissions():
qs = BlocklistSubmission.objects.filter(
Expand Down
47 changes: 43 additions & 4 deletions src/olympia/blocklist/mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from olympia.amo.utils import SafeStorage
from olympia.blocklist.models import BlockType, BlockVersion
from olympia.blocklist.utils import datetime_to_ts
from olympia.constants.blocklist import BASE_REPLACE_THRESHOLD
from olympia.versions.models import Version


Expand Down Expand Up @@ -79,7 +80,8 @@ class BaseMLBFLoader:
def __init__(self, storage: SafeStorage):
self.storage = storage

def data_type_key(self, key: MLBFDataType) -> str:
@classmethod
def data_type_key(cls, key: MLBFDataType) -> str:
return key.name.lower()

@cached_property
Expand Down Expand Up @@ -207,13 +209,22 @@ def hash_filter_inputs(cls, input_list):
for (guid, version) in input_list
]

def filter_path(self, _block_type: BlockType = BlockType.BLOCKED):
return self.storage.path('filter')
def filter_path(self, block_type: BlockType, compat: bool = False):
# Override the return value of the BLOCKED filter
# to for backwards compatibility with the old file name
if block_type == BlockType.BLOCKED and compat:
return self.storage.path('filter')
return self.storage.path(f'filter-{BaseMLBFLoader.data_type_key(block_type)}')

@property
def stash_path(self):
return self.storage.path('stash.json')

def delete(self):
if self.storage.exists(self.storage.base_location):
self.storage.rm_stored_dir(self.storage.base_location)
log.info(f'Deleted {self.storage.base_location}')

def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED):
stats = {}

Expand All @@ -223,7 +234,15 @@ def generate_and_write_filter(self, block_type: BlockType = BlockType.BLOCKED):
not_blocked=self.data.not_blocked_items,
)

# write bloomfilter
# write bloomfilter to old and new file names
mlbf_path = self.filter_path(block_type, compat=True)
with self.storage.open(mlbf_path, 'wb') as filter_file:
log.info(f'Writing to file {mlbf_path}')
bloomfilter.tofile(filter_file)
stats['mlbf_filesize'] = os.stat(mlbf_path).st_size

# also write to the new file name. After the switch is complete,
# this file will be used and the old file will be deleted.
mlbf_path = self.filter_path(block_type)
with self.storage.open(mlbf_path, 'wb') as filter_file:
log.info(f'Writing to file {mlbf_path}')
Expand Down Expand Up @@ -297,6 +316,26 @@ def blocks_changed_since_previous(
_, _, changed_count = self.generate_diffs(previous_mlbf)[block_type]
return changed_count

def should_upload_filter(
self, block_type: BlockType = BlockType.BLOCKED, previous_mlbf: 'MLBF' = None
):
return (
self.blocks_changed_since_previous(
block_type=block_type, previous_mlbf=previous_mlbf
)
> BASE_REPLACE_THRESHOLD
)

def should_upload_stash(
self, block_type: BlockType = BlockType.BLOCKED, previous_mlbf: 'MLBF' = None
):
return (
self.blocks_changed_since_previous(
block_type=block_type, previous_mlbf=previous_mlbf
)
> 0
)

@classmethod
def load_from_storage(
cls, created_at: str = datetime_to_ts(), error_on_missing: bool = False
Expand Down
3 changes: 2 additions & 1 deletion src/olympia/blocklist/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
version_factory,
)
from olympia.blocklist.mlbf import MLBF
from olympia.blocklist.models import BlockType


class TestExportBlocklist(TestCase):
Expand Down Expand Up @@ -38,4 +39,4 @@ def test_command(self):

call_command('export_blocklist', '1')
mlbf = MLBF.load_from_storage(1)
assert mlbf.storage.exists(mlbf.filter_path())
assert mlbf.storage.exists(mlbf.filter_path(BlockType.BLOCKED))
Loading

0 comments on commit 6e44ecb

Please sign in to comment.