diff --git a/README.md b/README.md index 83371d8b..5126319f 100644 --- a/README.md +++ b/README.md @@ -40,34 +40,11 @@ Running `dvc repro` in that folder will reproduce the experiment outlined in the └── test : test suite -### - To build the package (optional and is a very rough draft): -``` -###################################### -# Ubuntu 22.04, 20.04 -sudo apt update -sudo apt install python3-venv python3-pip python3-dev python3-setuptools -sudo apt install software-properties-common -y -sudo add-apt-repository ppa:deadsnakes/ppa -sudo apt install python3.9 -y -sudo apt install msttcorefonts -qqpython3-distutils #fonts (optional) -export SETUPTOOLS_USE_DISTUTILS=stdlib -###################################### -python3 -m venv env -source env/bin/activate -git clone --recurse-submodules -j8 https://github.com/simplymathematics/deckard.git -# git submodule update --init --recursive # To just update the submodules -python3 -m pip install deckard/adversarial-robustness-toolbox/ -python3 -m pip install -e deckard/ -python3 -m pip install pyinstaller -python3 -m pip install -u numba pip setuptools -cd deckard && pyinstaller --onefile deckard.py -n deckard -``` -After adding it to your path, you can then run deckard like a package: +After adding it to your path, you can then run it as a module: ``` -deckard examples/sklearn +python -m deckard --config_name mnist.yaml --config_folder examples/power ``` diff --git a/build_instructions.md b/build_instructions.md new file mode 100644 index 00000000..029f0e99 --- /dev/null +++ b/build_instructions.md @@ -0,0 +1,19 @@ +###################################### +# Ubuntu 22.04, 20.04 +sudo apt update +sudo apt install python3-venv python3-pip python3-dev python3-setuptools +sudo apt install software-properties-common -y +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt install python3.9 -y +sudo apt install msttcorefonts -qqpython3-distutils #fonts (optional) +export SETUPTOOLS_USE_DISTUTILS=stdlib +###################################### +python3 -m venv env +source env/bin/activate +git clone --recurse-submodules -j8 https://github.com/simplymathematics/deckard.git +# git submodule update --init --recursive # To just update the submodules +python3 -m pip install deckard/adversarial-robustness-toolbox/ +python3 -m pip install -e deckard/ +python3 -m pip install pyinstaller +python3 -m pip install -u numba pip setuptools +cd deckard && pyinstaller --onefile deckard.py -n deckard diff --git a/deckard/__main__.py b/deckard/__main__.py index d8b9e9dc..03153d92 100644 --- a/deckard/__main__.py +++ b/deckard/__main__.py @@ -40,11 +40,13 @@ def run_submodule(submodule, args): return 0 -def parse_and_repro(args, default_config="default.yaml"): +def parse_and_repro(args, default_config="default.yaml", config_dir="conf"): if len(args) == 0: assert ( save_params_file( - config_dir=Path(Path(), "conf"), + config_dir=Path(Path(), config_dir) + if not Path(config_dir).is_absolute() + else Path(config_dir), config_file=default_config, ) is None @@ -84,6 +86,7 @@ def parse_and_repro(args, default_config="default.yaml"): type=str, help="default hydra configuration file that you would like to reproduce with dvc repro.", ) + parser.add_argument("--config_dir", type=str, default="conf") parser.add_argument("other_args", type=str, nargs="*") args = parser.parse_args() submodule = args.submodule @@ -98,6 +101,9 @@ def parse_and_repro(args, default_config="default.yaml"): else: other_args = [] if submodule is None: - assert parse_and_repro(other_args, args.config_file) == 0 + assert ( + parse_and_repro(other_args, args.config_file, config_dir=args.config_dir) + == 0 + ) else: assert run_submodule(submodule, other_args) == 0 diff --git a/deckard/base/experiment/experiment.py b/deckard/base/experiment/experiment.py index a19a8be1..522ae9d3 100644 --- a/deckard/base/experiment/experiment.py +++ b/deckard/base/experiment/experiment.py @@ -29,6 +29,7 @@ class Experiment: name: Union[str, None] = field(default_factory=str) stage: Union[str, None] = field(default_factory=str) optimizers: Union[list, None] = field(default_factory=list) + device_id: str = "cpu" kwargs: Union[dict, None] = field(default_factory=dict) def __init__( @@ -37,6 +38,7 @@ def __init__( model: Model, scorers: ScorerDict, files: list, + device_id: str = "cpu", attack: Attack = None, name=None, stage=None, @@ -100,6 +102,7 @@ def __init__( else: # pragma: no cover raise ValueError("attack must be a dict, DictConfig, or Attack object.") assert isinstance(self.attack, (Attack, type(None))) + self.device_id = device_id self.stage = stage self.optimizers = optimizers self.kwargs = kwargs @@ -251,6 +254,7 @@ def __call__(self): old_score_dict = self.data.load(files["score_dict_file"]) old_score_dict.update(**score_dict) score_dict = old_score_dict + score_dict.update({"device_id": self.device_id}) self.data.save(score_dict, files["score_dict_file"]) else: # pragma: no cover raise ValueError("Scorer is None. Please specify a scorer.") diff --git a/deckard/base/model/model.py b/deckard/base/model/model.py index 1c520f62..bd69bc4a 100644 --- a/deckard/base/model/model.py +++ b/deckard/base/model/model.py @@ -190,7 +190,7 @@ def __call__(self, data: list, model: object, library=None): else: raise e time_dict = { - "train_time": end, + "train_time": (start - end) / 1e9, "train_time_per_sample": end / (len(data[0]) * 1e9), "train_time_start": start, "train_time_end": end, @@ -573,8 +573,8 @@ def predict(self, data=None, model=None, predictions_file=None): return ( predictions, { - "predict_time": end, - "predict_time_per_sample": end / (len(data[0]) * 1e9), + "predict_time": (start - end) / 1e9, + "predict_time_per_sample": start - end / (len(data[0]) * 1e9), "predict_start_time": start, "predict_stop_time": end, "predict_device": device, @@ -623,7 +623,7 @@ def predict_proba(self, data=None, model=None, probabilities_file=None): return ( predictions, { - "predict_proba_time": end, + "predict_proba_time": (start - end) / 1e9, "predict_proba_time_per_sample": end / (len(data[0]) * 1e9), "predict_proba_start_time": start, "predict_proba_stop_time": end, @@ -680,7 +680,7 @@ def predict_log_loss(self, data, model, losses_file=None): return ( predictions, { - "predict_log_proba_time": end, + "predict_log_proba_time": (start - end) / 1e9, "predict_log_proba_time_per_sample": end / (len(data[0]) * 1e9), "predict_log_proba_start_time": start, "predict_log_proba_stop_time": end, diff --git a/examples/power/conf/cifar10.yaml b/examples/power/conf/cifar10.yaml index e30e8f55..51c4f46c 100644 --- a/examples/power/conf/cifar10.yaml +++ b/examples/power/conf/cifar10.yaml @@ -7,6 +7,7 @@ defaults: - scorers: default - override hydra/sweeper : optuna - override hydra/sweeper/sampler : tpe +device_id : ${oc.env:DECKARD_DEVICE,cpu} stage : '???' direction : - maximize @@ -18,7 +19,7 @@ direction : optimizers: - accuracy - train_time - - predict_time + - predict_proba_time - adv_accuracy - adv_fit_time - adv_predict_time @@ -45,7 +46,7 @@ hydra: params: ++data.sample.random_state: int(range(0, 1)) ++model.art.initialize.optimizer.lr: range(0.0001, 1) - ++model.trainer.nb_epoch: int(range(1, 100)) + ++model.trainer.nb_epoch: int(range(1, 50)) ++model.trainer.batch_size: int(range(10, 10000)) ++attack.init.eps : range(0.01, 1.0) ++attack.init.eps_step : ${eval:'(.1)*${attack.init.eps}'} diff --git a/examples/power/conf/cifar100.yaml b/examples/power/conf/cifar100.yaml index 141f0d60..dcffb55c 100644 --- a/examples/power/conf/cifar100.yaml +++ b/examples/power/conf/cifar100.yaml @@ -8,6 +8,7 @@ defaults: - override hydra/sweeper : optuna - override hydra/sweeper/sampler : tpe # - override hydra/launcher : rq +device_id : ${oc.env:DECKARD_DEVICE,cpu} stage : '???' direction : - maximize @@ -19,7 +20,7 @@ direction : optimizers: - accuracy - train_time - - predict_time + - predict_proba_time - adv_accuracy - adv_fit_time - adv_predict_time @@ -46,7 +47,7 @@ hydra: params: ++data.sample.random_state: int(range(0, 1)) ++model.art.initialize.optimizer.lr: range(0.0001, 1) - ++model.trainer.nb_epoch: int(range(1, 100)) + ++model.trainer.nb_epoch: int(range(1, 50)) ++model.trainer.batch_size: int(range(10, 10000)) ++attack.init.eps : range(0.01, 1.0) ++attack.init.eps_step : ${eval:'(.1)*${attack.init.eps}'} diff --git a/examples/power/conf/data/torch_cifar.yaml b/examples/power/conf/data/torch_cifar.yaml new file mode 100644 index 00000000..ee60717a --- /dev/null +++ b/examples/power/conf/data/torch_cifar.yaml @@ -0,0 +1,16 @@ +_target_: deckard.base.data.Data +generate: + _target_: deckard.base.data.generator.DataGenerator + name: torch_cifar10 +sample: + _target_: deckard.base.data.sampler.SklearnDataSampler + random_state : 0 + stratify: True + train_size : 100 + test_size : 100 +sklearn_pipeline: + _target_: deckard.base.data.sklearn_pipeline.SklearnDataPipeline + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: True + with_std: True diff --git a/examples/power/conf/data/torch_cifar100.yaml b/examples/power/conf/data/torch_cifar100.yaml new file mode 100644 index 00000000..4153eeee --- /dev/null +++ b/examples/power/conf/data/torch_cifar100.yaml @@ -0,0 +1,16 @@ +_target_: deckard.base.data.Data +generate: + _target_: deckard.base.data.generator.DataGenerator + name: torch_cifar100 +sample: + _target_: deckard.base.data.sampler.SklearnDataSampler + random_state : 0 + stratify: True + train_size : 100 + test_size : 100 +sklearn_pipeline: + _target_: deckard.base.data.sklearn_pipeline.SklearnDataPipeline + preprocessor: + name: sklearn.preprocessing.StandardScaler + with_mean: True + with_std: True diff --git a/examples/power/conf/mnist.yaml b/examples/power/conf/mnist.yaml index 610afee3..546b7707 100644 --- a/examples/power/conf/mnist.yaml +++ b/examples/power/conf/mnist.yaml @@ -8,6 +8,7 @@ defaults: - override hydra/sweeper : optuna - override hydra/sweeper/sampler : tpe # - override hydra/launcher : rq +device_id : ${oc.env:DECKARD_DEVICE,cpu} stage : '???' direction : - maximize @@ -19,7 +20,7 @@ direction : optimizers: - accuracy - train_time - - predict_time + - predict_proba_time - adv_accuracy - adv_fit_time - adv_predict_time @@ -46,7 +47,7 @@ hydra: params: ++data.sample.random_state: int(range(0, 1)) ++model.art.initialize.optimizer.lr: range(0.0001, 1) - ++model.trainer.nb_epoch: int(range(1, 100)) + ++model.trainer.nb_epoch: int(range(1, 50)) ++model.trainer.batch_size: int(range(10, 10000)) ++attack.init.eps : range(0.01, 1.0) ++attack.init.eps_step : ${eval:'(.1)*${attack.init.eps}'} diff --git a/examples/power/conf/model/art/initialize/default.yaml b/examples/power/conf/model/art/initialize/default.yaml index b694473b..becd0f9a 100644 --- a/examples/power/conf/model/art/initialize/default.yaml +++ b/examples/power/conf/model/art/initialize/default.yaml @@ -5,3 +5,4 @@ optimizer: lr : 0.01 momentum : 0.9 clip_values : [0, 255] +device_type : ${oc.env:DECKARD_DEVICE_TYPE,cpu} diff --git a/examples/power/dvc.lock b/examples/power/dvc.lock index e3a46f41..c8adb982 100644 --- a/examples/power/dvc.lock +++ b/examples/power/dvc.lock @@ -1,15 +1,14 @@ schema: '2.0' stages: train: - cmd: python -m deckard.layers.experiment train --config_file cifar100.yaml + cmd: python -m deckard.layers.experiment train --config_file mnist.yaml params: params.yaml: data: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -22,6 +21,7 @@ stages: name: sklearn.preprocessing.StandardScaler with_mean: true with_std: true + device_id: blah files: _target_: deckard.base.files.FileConfig adv_predictions_file: adv_predictions.json @@ -31,7 +31,7 @@ stages: data_dir: data data_file: data data_type: .pkl - directory: cifar100 + directory: mnist model_dir: models model_file: model model_type: .pt @@ -48,8 +48,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -68,6 +67,7 @@ stages: - 255 criterion: name: torch.nn.CrossEntropyLoss + device_type: cpu optimizer: lr: 0.01 momentum: 0.9 @@ -77,8 +77,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -94,8 +93,7 @@ stages: init: _target_: deckard.base.model.ModelInitializer name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 + num_channels: 1 library: pytorch trainer: batch_size: 1024 @@ -111,17 +109,17 @@ stages: direction: minimize name: sklearn.metrics.log_loss outs: - - path: cifar100/reports/train/default/score_dict.json + - path: mnist/reports/train/default/score_dict.json hash: md5 - md5: 0d7b5dc69390c3934014592f0ca69754 - size: 752 + md5: a23b38083d2ee4f50aed9643828a1d50 + size: 778 attack: - cmd: python -m deckard.layers.experiment attack --config_file cifar100.yaml + cmd: python -m deckard.layers.experiment attack --config_file mnist.yaml deps: - path: params.yaml hash: md5 - md5: 3d21fe2d291f5b25fa57e440ddaa9d5e - size: 7525 + md5: 421a8c108a0f0ed875eaf34c7c403028 + size: 7294 params: params.yaml: attack: @@ -131,8 +129,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -157,8 +154,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -177,6 +173,7 @@ stages: - 255 criterion: name: torch.nn.CrossEntropyLoss + device_type: cpu optimizer: lr: 0.01 momentum: 0.9 @@ -186,8 +183,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -203,8 +199,7 @@ stages: init: _target_: deckard.base.model.ModelInitializer name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 + num_channels: 1 library: pytorch trainer: batch_size: 1024 @@ -219,8 +214,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -239,6 +233,7 @@ stages: - 255 criterion: name: torch.nn.CrossEntropyLoss + device_type: cpu optimizer: lr: 0.01 momentum: 0.9 @@ -248,8 +243,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -265,8 +259,7 @@ stages: init: _target_: deckard.base.model.ModelInitializer name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 + num_channels: 1 library: pytorch trainer: batch_size: 1024 @@ -275,8 +268,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -289,6 +281,7 @@ stages: name: sklearn.preprocessing.StandardScaler with_mean: true with_std: true + device_id: blah files: _target_: deckard.base.files.FileConfig adv_predictions_file: adv_predictions.json @@ -298,7 +291,7 @@ stages: data_dir: data data_file: data data_type: .pkl - directory: cifar100 + directory: mnist model_dir: models model_file: model model_type: .pt @@ -315,8 +308,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -335,6 +327,7 @@ stages: - 255 criterion: name: torch.nn.CrossEntropyLoss + device_type: cpu optimizer: lr: 0.01 momentum: 0.9 @@ -344,8 +337,7 @@ stages: _target_: deckard.base.data.Data generate: _target_: deckard.base.data.generator.DataGenerator - name: torch_cifar100 - path: original_data/ + name: torch_mnist sample: _target_: deckard.base.data.sampler.SklearnDataSampler random_state: 0 @@ -361,8 +353,7 @@ stages: init: _target_: deckard.base.model.ModelInitializer name: torch_example.ResNet18 - num_channels: 3 - num_classes: 100 + num_channels: 1 library: pytorch trainer: batch_size: 1024 @@ -378,34 +369,34 @@ stages: direction: minimize name: sklearn.metrics.log_loss outs: - - path: cifar100/attacks/attack.pkl + - path: mnist/attacks/attack.pkl hash: md5 - md5: 9ba2fe1196bcbf2379946652865ace9b - size: 123046 - - path: cifar100/data/data.pkl + md5: 0883f181fe7b420d94a7a6f6183b80e1 + size: 31517 + - path: mnist/data/data.pkl hash: md5 - md5: 594841554582ca2a77c0a73c7e8842cb - size: 2537889 - - path: cifar100/models/model.optimizer.pt + md5: 165a2c2fc0af6a07838b6c6715d79d53 + size: 635489 + - path: mnist/models/model.optimizer.pt hash: md5 - md5: 56e67611c3cc7e9c416c91cc6a6b2640 - size: 44989358 - - path: cifar100/models/model.pt + md5: 076d8b5b7ffe61960d8cbb9401791c03 + size: 44779566 + - path: mnist/models/model.pt hash: md5 - md5: 7a728af7fb0ca90e720cb5d25fee89af - size: 44995266 - - path: cifar100/reports/attack/default/adv_predictions.json + md5: b754314423c9a78e5283933dac7b9490 + size: 44785474 + - path: mnist/reports/attack/default/adv_predictions.json hash: md5 - md5: c3a0272213db04f9dbbb2c509abfe597 - size: 20979 - - path: cifar100/reports/attack/default/predictions.json + md5: c036f1b18ccc3d02510f8360c421d296 + size: 2122 + - path: mnist/reports/attack/default/predictions.json hash: md5 - md5: 3b039c0276e39808bab5b4363e26a062 - size: 214402 - - path: cifar100/reports/attack/default/score_dict.json + md5: 3fba1682d8c0a2c1d693eba9e7249bc5 + size: 21273 + - path: mnist/reports/attack/default/score_dict.json hash: md5 - md5: 44d2a553a36ce676b9a1a445345bd88c - size: 1168 + md5: dd50c47e1702962ffce30881240931bc + size: 1196 attacks@ResNet18: cmd: bash attacks.sh ++attack.attack_size=100 ++model.init.name=torch_example.ResNet18 stage=attack ++hydra.sweeper.storage=sqlite:///mnist/reports/attack/ResNet18.db @@ -439,24 +430,24 @@ stages: outs: - path: ../../deckard.egg-info hash: md5 - md5: 244a42d703838a2a3ffeb80ea7415b3f.dir - size: 13515 + md5: 8c0912e644d7243c260961631a0da14f.dir + size: 6202 nfiles: 5 install_torch: cmd: python -m pip install torch torchvision parse_params: - cmd: python -m deckard.layers.parse --config_file cifar100.yaml + cmd: python -m deckard.layers.parse --config_file mnist.yaml deps: - path: conf/ hash: md5 - md5: 8344c81b7bbf726ade0803d83eac19c6.dir - size: 124687 + md5: 171c70170a4d7cbdf5e3de1a663ca093.dir + size: 124847 nfiles: 21 outs: - path: params.yaml hash: md5 - md5: 3d21fe2d291f5b25fa57e440ddaa9d5e - size: 7525 + md5: 421a8c108a0f0ed875eaf34c7c403028 + size: 7294 install_redis: cmd: bash redis.sh outs: @@ -513,13 +504,13 @@ stages: deps: - path: mnist/reports/attack/default/score_dict.json hash: md5 - md5: a856df313ca8316d37f6ffad821d1fc9 - size: 1168 + md5: 92d2cfd1894810ad15286a2d1424b6a2 + size: 1176 outs: - path: mnist/reports/attack/mnist.db hash: md5 - md5: 16a0df62da4de50ebe7a6c50a0e5ddca - size: 147456 + md5: f55b6676e87b257cf3d074e6c1625bb8 + size: 135168 compile@attack: cmd: python -m deckard.layers.compile --report_folder mnist/reports/attack --results_file mnist/reports/attack.csv diff --git a/examples/power/dvc.yaml b/examples/power/dvc.yaml index 42e43b0b..eb637fd6 100644 --- a/examples/power/dvc.yaml +++ b/examples/power/dvc.yaml @@ -28,6 +28,7 @@ stages: - model - scorers - files + - device_id # outs: # - ${files.directory}/${files.data_dir}/${files.data_file}${files.data_type} # - ${files.directory}/${files.model_dir}/${files.model_file}${files.model_type} @@ -46,6 +47,7 @@ stages: - attack - scorers - files + - device_id outs: - ${files.directory}/${files.attack_dir}/${files.attack_file}${files.attack_type} - ${files.directory}/${files.reports}/attack/${files.name}/${files.adv_predictions_file}