Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FastAPI Redis Caching #4195

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
1849fd3
feat: get logins working with drill
ThomasLaPiana Sep 29, 2023
1d60191
feat: add more endpoints
ThomasLaPiana Sep 29, 2023
5229381
feat: separate each endpoint into its own task
ThomasLaPiana Sep 29, 2023
220f060
feat: optimize the Drill test
ThomasLaPiana Sep 29, 2023
eb0d7e7
feat: make the privacy-experience endpoint async
ThomasLaPiana Sep 29, 2023
800a944
feat: sprinkle some `async sleep` magic into the privacy-experience e…
ThomasLaPiana Sep 29, 2023
abe119c
feat: remove an unused requirement
ThomasLaPiana Sep 29, 2023
2ae8afb
fix: static checks
ThomasLaPiana Sep 29, 2023
1f1f12b
Add FastAPI Redis Caching
ThomasLaPiana Sep 29, 2023
6b33377
Merge branch 'add-more-drill-endpoints' into ThomasLaPiana-add-fastap…
ThomasLaPiana Sep 30, 2023
ffe5a7e
Merge branch 'main' into ThomasLaPiana-add-fastapi-redis-caching
ThomasLaPiana Oct 11, 2023
1347246
fix: remove outdated redis import
ThomasLaPiana Oct 11, 2023
161e271
feat: add caching to the privacy-experience endpoint
ThomasLaPiana Oct 11, 2023
38f32bc
feat: rough POC of custom caching for the privacy experience endpoint
ThomasLaPiana Oct 12, 2023
4d50fd9
feat: additional cleanup
ThomasLaPiana Oct 12, 2023
fdf1d95
Revert "feat: additional cleanup"
ThomasLaPiana Oct 12, 2023
cdb6e49
feat: cleanup, passing more tests
ThomasLaPiana Oct 12, 2023
be70d0c
feat: add headers to the privacy-experience response object indicatin…
ThomasLaPiana Oct 13, 2023
2b64527
checkin: add headers and cache tests, but not passing
ThomasLaPiana Oct 13, 2023
a8a45a2
fix: more tests and almost all static checks
ThomasLaPiana Oct 13, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ PyMySQL==1.0.2
pymssql==2.2.8
python-jose[cryptography]==3.3.0
pyyaml==6.0.1
redis==3.5.3
redis==4.6.0
rich-click==1.6.1
sendgrid==6.9.7
slowapi==0.1.8
Expand Down
52 changes: 41 additions & 11 deletions src/fides/api/api/v1/endpoints/privacy_experience_endpoints.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import asyncio
import uuid
from functools import lru_cache
from html import escape, unescape
from typing import Dict, List, Optional

from fastapi import Depends, HTTPException
from fastapi import Query as FastAPIQuery
from fastapi import Request, Response
from fastapi_pagination import Page, Params
from fastapi_pagination import paginate as fastapi_paginate
from fastapi_pagination.bases import AbstractPage
from loguru import logger
from sqlalchemy.orm import Query, Session
from starlette.status import (
Expand All @@ -25,7 +23,6 @@
)
from fides.api.models.privacy_notice import PrivacyNotice
from fides.api.models.privacy_request import ProvidedIdentity
from fides.api.schemas.privacy_experience import PrivacyExperienceResponse
from fides.api.util.api_router import APIRouter
from fides.api.util.consent_util import (
PRIVACY_EXPERIENCE_ESCAPE_FIELDS,
Expand All @@ -46,7 +43,12 @@

router = APIRouter(tags=["Privacy Experience"], prefix=urls.V1_URL_PREFIX)

BUST_CACHE_HEADER = "bust-endpoint-cache"
CACHE_HEADER = "X-Endpoint-Cache"
PRIVACY_EXPERIENCE_CACHE: Dict[str, Dict] = {}


@lru_cache(maxsize=20, typed=True)
def get_privacy_experience_or_error(
db: Session, experience_id: str
) -> PrivacyExperience:
Expand All @@ -64,6 +66,7 @@ def get_privacy_experience_or_error(
return privacy_experience


@lru_cache(maxsize=20, typed=True)
def _filter_experiences_by_region_or_country(
db: Session, region: Optional[str], experience_query: Query
) -> Query:
Expand Down Expand Up @@ -119,16 +122,14 @@ def _filter_experiences_by_region_or_country(
return db.query(PrivacyExperience).filter(False)


# TODO: readd the fides limiter
@router.get(
urls.PRIVACY_EXPERIENCE,
status_code=HTTP_200_OK,
response_model=Page[PrivacyExperienceResponse],
)
@fides_limiter.limit(CONFIG.security.public_request_rate_limit)
async def privacy_experience_list(
*,
db: Session = Depends(deps.get_db),
params: Params = Depends(),
show_disabled: Optional[bool] = True,
region: Optional[str] = None,
component: Optional[ComponentType] = None,
Expand All @@ -140,7 +141,7 @@ async def privacy_experience_list(
include_meta: Optional[bool] = False,
request: Request, # required for rate limiting
response: Response, # required for rate limiting
) -> AbstractPage[PrivacyExperience]:
) -> Dict:
"""
Public endpoint that returns a list of PrivacyExperience records for individual regions with
relevant privacy notices or tcf contents embedded in the response.
Expand All @@ -149,7 +150,6 @@ async def privacy_experience_list(
notices as well.

:param db:
:param params:
:param show_disabled: If False, returns only enabled Experiences and Notices
:param region: Return the Experiences for the given region
:param component: Returns Experiences of the given component type
Expand All @@ -163,7 +163,33 @@ async def privacy_experience_list(
:param response:
:return:
"""
logger.info("Finding all Privacy Experiences with pagination params '{}'", params)

# These are the parameters that get used to create the cache.
param_hash_list = [
show_disabled,
region,
component,
content_required,
has_config,
fides_user_device_id,
systems_applicable,
include_gvl,
include_meta,
]
# Create a custom hash that avoids unhashable parameters
cache_hash = "_".join([repr(x) for x in param_hash_list])

if request.headers.get(BUST_CACHE_HEADER):
PRIVACY_EXPERIENCE_CACHE.clear()

if PRIVACY_EXPERIENCE_CACHE.get(cache_hash):
logger.debug("Cache HIT: {}", cache_hash)
response.headers[CACHE_HEADER] = "HIT"
return PRIVACY_EXPERIENCE_CACHE[cache_hash]

logger.debug("Cache MISS: {}", cache_hash)
response.headers[CACHE_HEADER] = "MISS"

fides_user_provided_identity: Optional[ProvidedIdentity] = None
if fides_user_device_id:
try:
Expand Down Expand Up @@ -259,7 +285,11 @@ async def privacy_experience_list(

results.append(privacy_experience)

return fastapi_paginate(results, params=params)
# This is structured to look like a paginated result to minimize impact from
# the caching changes
api_result = {"items": results, "total": len(results)}
PRIVACY_EXPERIENCE_CACHE[cache_hash] = api_result
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just want to comment early here. I see you're caching the output of the entire endpoint: now the performance gains make more sense. This is a neat strategy, I'm just more concerned about accuracy now. I had been assuming we'd instead cache some time-consuming individual pieces that go into the experience like the output of get_tcf_contents.

For example, fides_user_device_id is a unique user identifier. If present, we supplement the experience with previously-saved preferences for that user. So if the user saves new preferences, this experience would still return their outdated preferences.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense! Without knowledge of what is actually going on here I can't make those fine-grained caching decisions.

So we can either handle this on the front-end using the header that resets the cache, or we can use a more fine-grained caching method in which I'd need some more guidance on what is safe to cache.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

real quick, something like the output of get_tcf_contents which is the bulk of TCF experiences and PrivacyExperience.get_related_privacy_notices which is for non-TCF experiences but perhaps the latter refactored into a separate method that isn't taking fides_user_device_id in, that is added separately

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note here, I'm moving a great deal of privacy preferences / privacy experiences endpoints, supporting methods, TCF experiences, etc. to Fidesplus -
https://ethyca.atlassian.net/browse/PROD-1258

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for the heads up! In that case I'll move this work over there. I haven't been able to circle back to this yet but Adam already figured it out for some other endpoints we have :)

return api_result


def embed_experience_details(
Expand Down
7 changes: 3 additions & 4 deletions src/fides/api/util/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from bson.objectid import ObjectId
from loguru import logger
from redis import Redis
from redis.client import Script # type: ignore
from redis.exceptions import ConnectionError as ConnectionErrorFromRedis

from fides.api import common_exceptions
Expand Down Expand Up @@ -63,6 +62,7 @@ def _custom_decoder(json_dict: Dict[str, Any]) -> Dict[str, Any]:
return json_dict


# pylint: disable=abstract-method
class FidesopsRedis(Redis):
"""
An extension to Redis' python bindings to support auto expiring data input. This class
Expand Down Expand Up @@ -95,10 +95,9 @@ def get_keys_by_prefix(self, prefix: str, chunk_size: int = 1000) -> List[str]:

def delete_keys_by_prefix(self, prefix: str) -> None:
"""Delete all keys starting with a given prefix"""
s: Script = self.register_script(
self.register_script(
f"for _,k in ipairs(redis.call('keys','{prefix}*')) do redis.call('del',k) end"
)
s()
)()

def get_values(self, keys: List[str]) -> Dict[str, Optional[Any]]:
"""Retrieve all values corresponding to the set of input keys and return them as a
Expand Down
Loading
Loading