Skip to content

Commit

Permalink
Merge pull request #28 from cparmet/more-assert-methods
Browse files Browse the repository at this point in the history
Add more validation methods
  • Loading branch information
cparmet authored Jun 25, 2024
2 parents 4b7ff72 + e19dcde commit ff91abb
Show file tree
Hide file tree
Showing 19 changed files with 2,206 additions and 250 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ dev.ipynb
dev_script.py

dist/*
site/*
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ repos:
- id: debug-statements
- id: detect-private-key
- id: end-of-file-fixer
- id: trailing-whitespace # Disabled because some markdown styles require trailing whitespace
- id: trailing-whitespace
exclude: md
# args: [--markdown-linebreak-ext=md]

- repo: https://github.com/psf/black
rev: 22.12.0
Expand Down
134 changes: 81 additions & 53 deletions README.md

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
143 changes: 90 additions & 53 deletions docs/usage.md

Large diffs are not rendered by default.

578 changes: 560 additions & 18 deletions pandas_checks/DataFrameChecks.py

Large diffs are not rendered by default.

538 changes: 520 additions & 18 deletions pandas_checks/SeriesChecks.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pandas_checks/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _render_text(
)
display(
Markdown(
f"<{tag} style='text-align: left'>{lead_in_rendered + ' ' if lead_in_rendered else ''}<span 'color:{text_color};' 'background-color:{text_background_color}'>{_filter_emojis(text)}</span></{tag}>"
f"<{tag} style='text-align: left'>{lead_in_rendered + ' ' if lead_in_rendered else ''}<span style='color:{text_color}; background-color:{text_background_color}'>{_filter_emojis(text)}</span></{tag}>"
)
)

Expand Down Expand Up @@ -301,7 +301,7 @@ def _lead_in(lead_in: Union[str, None], foreground: str, background: str) -> str
The formatted lead-in text.
"""
return (
f"<span style='color:{foreground}; background-color:{background}'>{_filter_emojis(lead_in).strip()}:</span>"
f"<span style='color:{foreground}; background-color:{background}'>{_filter_emojis(lead_in).strip()}</span>:"
if lead_in
else ""
)
Expand Down
2 changes: 0 additions & 2 deletions pandas_checks/run_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from typing import Any, Callable, List, Union

import pandas as pd

from .display import _display_check
from .options import get_mode

Expand Down
66 changes: 65 additions & 1 deletion pandas_checks/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""
Utility functions for the pandas_checks package.
"""
from datetime import datetime, timedelta
from inspect import getsourcelines
from typing import Callable
from typing import Any, Callable, Type, Union

import pandas as pd
from pandas.core.groupby.groupby import DataError

from .display import _display_line


def _lambda_to_string(lambda_func: Callable) -> str:
Expand All @@ -22,3 +26,63 @@ def _lambda_to_string(lambda_func: Callable) -> str:
Try other ways to get just the argument we want.
"""
return "".join(getsourcelines(lambda_func)[0]).lstrip(" .")


def _has_nulls(
data: Union[pd.DataFrame, pd.Series],
fail_message: str,
raise_exception: bool = True,
exception_to_raise: Type[BaseException] = DataError,
) -> bool:
"""Utility function to check for nulls as part of a larger check"""
if isinstance(data, pd.DataFrame):
has_nulls = data.isna().any().any()
elif isinstance(data, pd.Series):
has_nulls = data.isna().any()
else:
raise AttributeError(f"Unexpected data type in _has_nulls(): {type(data)}")

if has_nulls:
if raise_exception:
raise exception_to_raise(
f"{fail_message}: Nulls present (to disable, pass `assert_not_null=False`)"
)
else:
_display_line(
lead_in=fail_message,
line="Nulls present (to disable, pass `assert_not_null=False`)",
colors={
"lead_in_text_color": pd.get_option(
"pdchecks.fail_message_fg_color"
),
"lead_in_background_color": pd.get_option(
"pdchecks.fail_message_bg_color"
),
},
)
return has_nulls


def _series_is_type(s: pd.Series, dtype: Type[Any]) -> bool:
"""Utility function to check if a series has an expected type.
Includes special handling for strings, since 'object' type in Pandas
may not mean a string"""
if dtype in [str, "str"]:
return pd.api.types.is_string_dtype(s)
elif dtype in [datetime, "datetime", "date"]:
return pd.api.types.is_datetime64_any_dtype(
s
) or pd.api.types.is_datetime64tz_dtype(s)
elif dtype in [timedelta, "timedelta"]:
return pd.api.types.is_timedelta64_dtype(s)
else:
return s.dtypes == dtype


def _is_type(data: pd.DataFrame, dtype: Type[Any]) -> bool:
"""Utility function to check if a dataframe's columns or one series has an expected type.
Includes special handling for strings, since 'object' type in Pandas
may not mean a string"""
if isinstance(data, pd.Series):
return _series_is_type(data, dtype)
return all([_series_is_type(data[col], dtype) for col in data.columns])
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pandas-checks"
version = "0.1.8"
version = "0.2.0"
description = "Non-invasive health checks for Pandas method chains"
authors = ["Chad Parmet <[email protected]>"]
readme = "README.md"
Expand Down
98 changes: 97 additions & 1 deletion tests/cases_dataframechecks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Dataframe methods to test in batch"""
"""DataframeCheck methods to test in batch
that they don't change the actual dataframe
in the method chain """

import pandas_checks as pdc

Expand All @@ -15,6 +17,100 @@ def method_assert_data():
)


def method_assert_datetime():
return lambda df, args: df.check.assert_datetime(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_float():
return lambda df, args: df.check.assert_float(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_int():
return lambda df, args: df.check.assert_int(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_less_than():
return lambda df, args: df.check.assert_less_than(
max=1000,
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_greater_than():
return lambda df, args: df.check.assert_greater_than(
min=-1000,
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_negative():
return lambda df, args: df.check.assert_negative(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_not_null():
return lambda df, args: df.check.assert_not_null(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_null():
return lambda df, args: df.check.assert_null(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_positive():
return lambda df, args: df.check.assert_positive(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_str():
return lambda df, args: df.check.assert_str(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_timedelta():
return lambda df, args: df.check.assert_timedelta(
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_type():
return lambda df, args: df.check.assert_type(
dtype=float,
subset=args["first_num_col"],
raise_exception=False,
)


def method_assert_unique():
return lambda df, args: df.check.assert_unique(
subset=args["first_num_col"],
raise_exception=False,
)


def method_columns():
return lambda df, _: df.check.columns(fn=lambda df: df.dropna(), check_name="Test")

Expand Down
Loading

0 comments on commit ff91abb

Please sign in to comment.