Skip to content

Commit

Permalink
Using get_unique_data_categories function
Browse files Browse the repository at this point in the history
  • Loading branch information
galvana committed Dec 13, 2024
1 parent 53667bf commit 667432d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 35 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""add unique data category function
Revision ID: 201c1af2a90e
Revises: c90d46f6d3f2
Create Date: 2024-12-13 01:37:43.709477
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "201c1af2a90e"
down_revision = "c90d46f6d3f2"
branch_labels = None
depends_on = None


def upgrade():
op.execute(
"""
CREATE OR REPLACE FUNCTION get_unique_data_categories(dataset_refs text[])
RETURNS text[] AS $$
BEGIN
RETURN COALESCE((
SELECT array_agg(DISTINCT category)
FROM (
SELECT jsonb_array_elements_text(
jsonb_path_query(collections::jsonb, '$.** ? (@.data_categories != null).data_categories')
) AS category
FROM ctl_datasets
WHERE fides_key = ANY(dataset_refs)
) extracted_categories
), ARRAY[]::text[]);
END;
$$ LANGUAGE plpgsql STABLE;
"""
)


def downgrade():
op.execute("DROP FUNCTION IF EXISTS get_unique_data_categories(text[]);")
39 changes: 4 additions & 35 deletions src/fides/api/models/sql_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
Text,
TypeDecorator,
UniqueConstraint,
any_,
case,
cast,
func,
Expand Down Expand Up @@ -328,36 +327,6 @@ class PolicyCtl(Base, FidesBase):
rules = Column(JSON)


def create_data_categories_property(dataset_references):
"""
Creates a column property that extracts a unique set of data categories from multiple datasets.
Takes a list of dataset references and returns a column property containing a single array of
all unique data categories found across all the specified datasets. This combines and deduplicates
data categories from every level of the JSON structure in each dataset's collections.
"""

subquery = (
select(
[
func.jsonb_array_elements_text(
text(
"jsonb_path_query(collections::jsonb, '$.** ? (@.data_categories != null).data_categories')"
)
).label("category")
]
)
.select_from(Dataset)
.where(Dataset.fides_key == any_(dataset_references))
).cte()

return column_property(
select([func.array_agg(func.distinct(subquery.c.category))])
.select_from(subquery)
.scalar_subquery()
)


# System
class System(Base, FidesBase):
"""
Expand Down Expand Up @@ -436,8 +405,8 @@ class System(Base, FidesBase):
"Cookies", back_populates="system", lazy="selectin", uselist=True, viewonly=True
)

dataset_data_categories: List[str] = create_data_categories_property(
dataset_references
dataset_data_categories: List[str] = column_property(
func.get_unique_data_categories(dataset_references)
)

@classmethod
Expand Down Expand Up @@ -527,8 +496,8 @@ class PrivacyDeclaration(Base):
"Cookies", back_populates="privacy_declaration", lazy="joined", uselist=True
)

dataset_data_categories: List[str] = create_data_categories_property(
dataset_references
dataset_data_categories: List[str] = column_property(
func.get_unique_data_categories(dataset_references)
)

@classmethod
Expand Down

0 comments on commit 667432d

Please sign in to comment.