Skip to content

Commit

Permalink
Notebook branch (#173)
Browse files Browse the repository at this point in the history
+ gzip example, cruft removal
  • Loading branch information
simplymathematics authored Mar 20, 2024
1 parent 1c6db5b commit 9cbae22
Show file tree
Hide file tree
Showing 116 changed files with 27,060 additions and 5,223 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ jobs:
- uses: actions/checkout@v2
- uses: psf/black@stable
with:
options: "--check --verbose"
src: "deckard/"
jupyter: true
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,19 @@ deckard/deckard.egg-info/*

*log.txt
*.hydra


# envs
env/


# random pdfs
*.pdf
# random pngs
*.png

# screenlog
screenlog.*

# tmp.py
tmp.py
62 changes: 31 additions & 31 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
repos:
- repo: https://github.com/asottile/add-trailing-comma
rev: v2.2.3
hooks:
- id: add-trailing-comma
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0 # Use the ref you want to point at
hooks:
- id: check-builtin-literals
- id: check-case-conflict
- id: check-symlinks
- id: check-toml
- id: detect-private-key
- id: end-of-file-fixer
- id: check-yaml
args : ['--unsafe']
- repo: https://github.com/hadialqattan/pycln
rev: v2.1.1 # Possible releases: https://github.com/hadialqattan/pycln/releases
hooks:
- id: pycln
args: [deckard/]
- repo: https://github.com/pycqa/flake8
rev: '5.0.4' # pick a git hash / tag to point to
hooks:
- id: flake8
exclude: __init__.py
args: [--ignore=E501 W503]
- repo: https://github.com/psf/black
rev: 22.8.0
hooks:
- id: black
- repo: https://github.com/asottile/add-trailing-comma
rev: v3.1.0
hooks:
- id: add-trailing-comma
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0 # Use the ref you want to point at
hooks:
- id: check-builtin-literals
- id: check-case-conflict
- id: check-symlinks
- id: check-toml
- id: detect-private-key
- id: end-of-file-fixer
- id: check-yaml
args: [--unsafe]
- repo: https://github.com/hadialqattan/pycln
rev: v2.4.0 # Possible releases: https://github.com/hadialqattan/pycln/releases
hooks:
- id: pycln
args: [deckard/]
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
# It is recommended to specify the latest version of Python
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3
language_version: python3
- repo: https://github.com/pycqa/flake8
rev: 7.0.0 # pick a git hash / tag to point to
hooks:
- id: flake8
exclude: __init__.py
args: [--ignore=E501 W503]
2 changes: 0 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ RUN python3 -m pip install nvidia-pyindex nvidia-cuda-runtime-cu11
RUN git clone https://github.com/simplymathematics/deckard.git
WORKDIR /deckard
RUN python3 -m pip install --editable .
RUN python3 -m pip install pytest torch torchvision tensorflow
RUN git clone https://github.com/Trusted-AI/adversarial-robustness-toolbox.git
RUN cd adversarial-robustness-toolbox && python3 -m pip install .
RUN apt install python-is-python3
RUN pytest test
2 changes: 1 addition & 1 deletion deckard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
},
},
"loggers": {
"deckard": {"handlers": ["default"]},
"deckard": {"handlers": ["default"], "level": "INFO", "propagate": True},
"tests": {"handlers": ["test"], "level": "DEBUG", "propagate": True},
},
}
Expand Down
11 changes: 6 additions & 5 deletions deckard/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
""""Runs a submodule passed as an arg."""

#!/usr/bin/env python3
import argparse
import subprocess
import logging
Expand Down Expand Up @@ -44,9 +43,11 @@ def parse_and_repro(args, default_config="default.yaml", config_dir="conf"):
if len(args) == 0:
assert (
save_params_file(
config_dir=Path(Path(), config_dir)
if not Path(config_dir).is_absolute()
else Path(config_dir),
config_dir=(
Path(Path(), config_dir)
if not Path(config_dir).is_absolute()
else Path(config_dir)
),
config_file=default_config,
)
is None
Expand Down
40 changes: 37 additions & 3 deletions deckard/base/attack/attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from omegaconf import DictConfig, OmegaConf
from hydra.utils import instantiate
from art.utils import to_categorical, compute_success
from sklearn.utils.validation import check_is_fitted
from sklearn.base import BaseEstimator
from sklearn.exceptions import NotFittedError
from random import randint
from ..data import Data
from ..model import Model
Expand Down Expand Up @@ -117,7 +120,13 @@ class EvasionAttack:
kwargs: Union[dict, None] = field(default_factory=dict)

def __init__(
self, name: str, data: Data, model: Model, init: dict, attack_size=-1, **kwargs
self,
name: str,
data: Data,
model: Model,
init: dict,
attack_size=-1,
**kwargs,
):
self.name = name
self.data = data
Expand Down Expand Up @@ -148,6 +157,10 @@ def __call__(
if attack_file is not None and Path(attack_file).exists():
samples = self.data.load(attack_file)
else:
print(f"Type of self.init: {type(self.init)}")
print(f"Type of self.init.model: {type(self.init.model)}")
print(f"Type of model: {type(model)}")

atk = self.init(model=model, attack_size=self.attack_size)

if targeted is True:
Expand Down Expand Up @@ -466,7 +479,13 @@ class InferenceAttack:
kwargs: Union[dict, None] = field(default_factory=dict)

def __init__(
self, name: str, data: Data, model: Model, init: dict, attack_size=-1, **kwargs
self,
name: str,
data: Data,
model: Model,
init: dict,
attack_size=-1,
**kwargs,
):
self.name = name
self.data = data
Expand Down Expand Up @@ -577,7 +596,13 @@ class ExtractionAttack:
kwargs: Union[dict, None] = field(default_factory=dict)

def __init__(
self, name: str, data: Data, model: Model, init: dict, attack_size=-1, **kwargs
self,
name: str,
data: Data,
model: Model,
init: dict,
attack_size=-1,
**kwargs,
):
self.name = name
self.data = data
Expand Down Expand Up @@ -798,12 +823,21 @@ def __call__(
adv_predictions_file=None,
adv_probabilities_file=None,
adv_losses_file=None,
**kwargs,
):
name = self.init.name
kwargs = deepcopy(self.kwargs)
kwargs.update({"init": self.init.kwargs})
data = self.data()
data, model = self.model.initialize(data)
if isinstance(model, BaseEstimator):
try:
check_is_fitted(model), "Model must be fitted before calling attack."
except NotFittedError as e:
logger.warning(
f"Model not fitted. Fitting model before attack. Error: {e}",
)
model, _ = self.model.fit(data=data, model=model)
if "art" not in str(type(model)):
model = self.model.art(model=model, data=data)
if self.method == "evasion":
Expand Down
61 changes: 42 additions & 19 deletions deckard/base/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from dataclasses import dataclass, field
from pathlib import Path
from typing import Union

import numpy as np
from pandas import DataFrame, read_csv, Series

from omegaconf import OmegaConf
from validators import url
from ..utils import my_hash
from .generator import DataGenerator
from .sampler import SklearnDataSampler
Expand All @@ -28,6 +28,7 @@ class Data:
)
target: Union[str, None] = None
name: Union[str, None] = None
drop: list = field(default_factory=list)

def __init__(
self,
Expand All @@ -36,6 +37,8 @@ def __init__(
sample: SklearnDataSampler = None,
sklearn_pipeline: SklearnDataPipeline = None,
target: str = None,
drop: list = [],
**kwargs,
):
"""Initialize the data object. If the data is generated, then generate the data and sample it. If the data is loaded, then load the data and sample it.
Expand All @@ -46,9 +49,6 @@ def __init__(
sklearn_pipeline (SklearnDataPipeline, optional): The sklearn pipeline. Defaults to None.
target (str, optional): The target column. Defaults to None.
"""
logger.info(
f"Instantiating {self.__class__.__name__} with name={name} and generate={generate} and sample={sample} and sklearn_pipeline={sklearn_pipeline} and target={target}",
)
if generate is not None:
self.generate = (
generate
Expand All @@ -66,16 +66,19 @@ def __init__(
else:
self.sample = SklearnDataSampler()
if sklearn_pipeline is not None:
sklearn_pipeline = OmegaConf.to_container(
OmegaConf.create(sklearn_pipeline),
)
self.sklearn_pipeline = (
sklearn_pipeline
if isinstance(sklearn_pipeline, (SklearnDataPipeline, type(None)))
if isinstance(sklearn_pipeline, (SklearnDataPipeline))
else SklearnDataPipeline(**sklearn_pipeline)
)
else:
self.sklearn_pipeline = None
self.drop = drop
self.target = target
self.name = name if name is not None else my_hash(self)
logger.debug(f"Instantiating Data with id: {self.get_name()}")

def get_name(self):
"""Get the name of the data object."""
Expand All @@ -91,7 +94,6 @@ def initialize(self, filename=None):
"""
if filename is not None and Path(filename).exists():
result = self.load(filename)
assert len(result) == 4, f"Data is not generated: {self.name}"
elif self.generate is not None:
result = self.generate()
else:
Expand All @@ -100,14 +102,23 @@ def initialize(self, filename=None):
assert self.target is not None, "Target is not specified"
y = result[self.target]
X = result.drop(self.target, axis=1)
X = np.array(X)
y = np.array(y)
if self.drop != []:
X = X.drop(self.drop, axis=1)
X = X.to_numpy()
y = y.to_numpy()
result = [X, y]
else:
if self.drop != []:
raise ValueError(
f"Drop is not supported for non-DataFrame data. Data is type {type(result)}",
)
if len(result) == 2:
result = self.sample(*result)
assert (
len(result) == 4
), f"Data is not generated: {self.name} {result}. Length: {len(result)},"
if self.sklearn_pipeline is not None:
result = self.sklearn_pipeline(*result)
return result

def load(self, filename) -> DataFrame:
Expand All @@ -125,6 +136,8 @@ def load(self, filename) -> DataFrame:
elif suffix in [".pkl", ".pickle"]:
with open(filename, "rb") as f:
data = pickle.load(f)
elif suffix in [".npz"]:
data = np.load(filename)
else: # pragma: no cover
raise ValueError(f"Unknown file type {suffix}")
return data
Expand All @@ -138,6 +151,10 @@ def save(self, data, filename):
logger.info(f"Saving data to {filename}")
suffix = Path(filename).suffix
Path(filename).parent.mkdir(parents=True, exist_ok=True)
if isinstance(data, dict):
for k, v in data.items():
v = str(v)
data[k] = v
if suffix in [".json"]:
if isinstance(data, (Series, DataFrame)):
data = data.to_dict()
Expand All @@ -155,16 +172,20 @@ def save(self, data, filename):
else: # pragma: no cover
raise ValueError(f"Unknown data type {type(data)} for {filename}.")
with open(filename, "w") as f:
json.dump(data, f)
json.dump(data, f, indent=4, sort_keys=True)
elif suffix in [".csv"]:
assert isinstance(
data,
(Series, DataFrame, dict, np.ndarray),
), f"Data must be a Series, DataFrame, or dict, not {type(data)} to save to {filename}"
DataFrame(data).to_csv(filename, index=False)
if isinstance(data, (np.ndarray)):
data = DataFrame(data)
data.to_csv(filename, index=False)
elif suffix in [".pkl", ".pickle"]:
with open(filename, "wb") as f:
pickle.dump(data, f)
elif suffix in [".npz"]:
np.savez(filename, data)
else: # pragma: no cover
raise ValueError(f"Unknown file type {type(suffix)} for {suffix}")
assert Path(filename).exists()
Expand All @@ -174,19 +195,19 @@ def __call__(
data_file=None,
train_labels_file=None,
test_labels_file=None,
**kwargs,
) -> list:
"""Loads data from file if it exists, otherwise generates data and saves it to file. Returns X_train, X_test, y_train, y_test as a list of arrays, typed according to the framework.
:param filename: str
:return: list
"""
result_dict = {}
if data_file is not None and Path(data_file).exists():
data = self.load(data_file)
assert len(data) == 4, f"Some data is missing: {self.name}"
if Path(self.name).is_file() or url(self.name):
new_data_file = data_file
data_file = self.name
else:
data = self.initialize(filename=data_file)
assert len(data) == 4, f"Some data is missing: {self.name}"
data_file = self.save(data, data_file)
new_data_file = data_file
result_dict = {}
data = self.initialize(data_file)
result_dict["data"] = data
if train_labels_file is not None:
self.save(data[2], train_labels_file)
Expand All @@ -198,4 +219,6 @@ def __call__(
assert Path(
test_labels_file,
).exists(), f"Error saving test labels to {test_labels_file}"
if new_data_file is not None:
self.save(data, new_data_file)
return data
Loading

0 comments on commit 9cbae22

Please sign in to comment.