Skip to content

Commit

Permalink
Trying to fix both tests and linting (#258)
Browse files Browse the repository at this point in the history
* trying to fix both tests and linting

* fixing flake8 versiion

* trying to fix flake8

* fixing linting errors

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fixing linting CI

* fix linting

* fix linting

* trying to make flake8 work
  • Loading branch information
jeandut authored Nov 18, 2022
1 parent 8963217 commit f1e0564
Show file tree
Hide file tree
Showing 57 changed files with 210 additions and 394 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,14 @@ jobs:

- name: Install dependencies
run: pip install isort black==22.3.0
pip install flake8

- name: Run black
run: black --check .
run: black --line-length=89 --check .


- name: Run FLAKE8
run: flake8 --max-line-length=89 --per-file-ignores="*/__init__.py:F401" ./flamby

- name: Run isort
run: isort .
2 changes: 2 additions & 0 deletions .github/workflows/pr_validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "FLamby.tex", "FLamby Documentation", "Collaboration", "manual"),
(master_doc, "FLamby.tex", "FLamby Documentation", "Collaboration", "manual")
]


Expand All @@ -200,7 +200,7 @@
"FLamby",
"One line description of project.",
"Miscellaneous",
),
)
]


Expand Down
4 changes: 1 addition & 3 deletions flamby/benchmarks/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,7 @@ def ensemble_perf_from_predictions(
return ensemble_perf


def set_dataset_specific_config(
dataset_name, compute_ensemble_perf=False, use_gpu=True
):
def set_dataset_specific_config(dataset_name, compute_ensemble_perf=False, use_gpu=True):
"""_summary_
Parameters
Expand Down
3 changes: 1 addition & 2 deletions flamby/benchmarks/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def get_dataset_args(
for param in params:
try:
p = getattr(
__import__(f"flamby.datasets.{dataset_name}", fromlist=param),
param,
__import__(f"flamby.datasets.{dataset_name}", fromlist=param), param
)
except AttributeError:
p = None
Expand Down
10 changes: 2 additions & 8 deletions flamby/benchmarks/fed_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,10 +464,7 @@ def main(args_cli):

parser = argparse.ArgumentParser()
parser.add_argument(
"--GPU",
type=int,
default=0,
help="GPU to run the training on (if available)",
"--GPU", type=int, default=0, help="GPU to run the training on (if available)"
)
parser.add_argument(
"--cpu-only",
Expand All @@ -488,10 +485,7 @@ def main(args_cli):
help="Do 0 round and 0 epoch to check if the script is working",
)
parser.add_argument(
"--workers",
type=int,
default=0,
help="Numbers of workers for the dataloader",
"--workers", type=int, default=0, help="Numbers of workers for the dataloader"
)
parser.add_argument(
"--learning_rate",
Expand Down
5 changes: 1 addition & 4 deletions flamby/create_dataset_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--path",
type=str,
help="The path where the dataset is located",
required=True,
"--path", type=str, help="The path where the dataset is located", required=True
)
parser.add_argument(
"--dataset-name",
Expand Down
3 changes: 2 additions & 1 deletion flamby/datasets/fed_camelyon16/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def __init__(
self.features_centers = []
self.features_sets = []
self.perms = {}
# We need this ist to be sorted for reproducibility but shuffled to avoid weirdness
# We need this list to be sorted for reproducibility but shuffled to
# avoid weirdness
npys_list = sorted(self.tiles_dir.glob("*.npy"))
random.seed(0)
random.shuffle(npys_list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from google_client import create_service
from googleapiclient.errors import HttpError
Expand Down Expand Up @@ -65,12 +66,11 @@ def main(path_to_secret, output_folder, port=6006, debug=False):
len(train_df.index) + len(test_df.index)
)
downloaded_images_status_file["Slide"] = None
downloaded_images_status_file.Slide.iloc[: len(train_df.index)] = train_df[
"name"
]
downloaded_images_status_file.Slide.iloc[len(train_df.index) :] = test_df[
"name"
]
total_size = len(train_df.index) + len(test_df.index)
train_idxs = np.arange(0, len(train_df.index))
test_idxs = np.arange(len(train_df.index), total_size)
downloaded_images_status_file.Slide.iloc[train_idxs] = train_df["name"]
downloaded_images_status_file.Slide.iloc[test_idxs] = test_df["name"]
downloaded_images_status_file.to_csv(
downloaded_images_status_file_path, index=False
)
Expand All @@ -92,7 +92,9 @@ def main(path_to_secret, output_folder, port=6006, debug=False):
port=port,
)
regex = "(?<=https://drive.google.com/file/d/)[a-zA-Z0-9]+"
# Resourcekey is now mandatory (credit @Kris in: https://stackoverflow.com/questions/71343002/downloading-files-from-public-google-drive-in-python-scoping-issues)
# Resourcekey is now mandatory (credit @Kris in:
# https://stackoverflow.com/questions/71343002/
# downloading-files-from-public-google-drive-in-python-scoping-issues)
regex_rkey = "(?<=resourcekey=).+"
for current_df in [train_df, test_df]:
for i in tqdm(range(len(current_df.index))):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ def __len__(self):

def __getitem__(self, idx):
pil_image = self.slide.read_region(
self.coords[idx].astype("int_"),
self.level,
(self.tile_size, self.tile_size),
self.coords[idx].astype("int_"), self.level, (self.tile_size, self.tile_size)
).convert("RGB")
if self.transform is not None:
pil_image = self.transform(pil_image)
Expand Down
6 changes: 2 additions & 4 deletions flamby/datasets/fed_camelyon16/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ class Baseline(nn.Module):
def __init__(self):
super(Baseline, self).__init__()
# As per the article
self.O = 2048 # Original dimension of the input embeddings
self.Od = 2048 # Original dimension of the input embeddings
self.M = 128 # New dimension of the input embedding

self.L = 128 # Dimension of the new features after query and value projections
self.K = 1000 # Number of elements in each bag

self.feature_extractor_part1 = nn.Sequential(
nn.Linear(self.O, self.M),
)
self.feature_extractor_part1 = nn.Sequential(nn.Linear(self.Od, self.M))
# The Gated Attention using tanh and sigmoid from Eq 9
# from https://arxiv.org/abs/1802.04712

Expand Down
10 changes: 4 additions & 6 deletions flamby/datasets/fed_dummy_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ def __len__(self):
return self.size

def __getitem__(self, idx):
return torch.rand(3, 224, 224).to(self.X_dtype), torch.randint(0, 2, (1,)).to(
self.y_dtype
return (
torch.rand(3, 224, 224).to(self.X_dtype),
torch.randint(0, 2, (1,)).to(self.y_dtype),
)


Expand Down Expand Up @@ -53,10 +54,7 @@ def forward(self, X):
m = Baseline()
lo = BaselineLoss()
dl = DataLoader(
FedDummyDataset(center=1, train=True),
batch_size=32,
shuffle=True,
num_workers=0,
FedDummyDataset(center=1, train=True), batch_size=32, shuffle=True, num_workers=0
)
it = iter(dl)
X, y = next(it)
Expand Down
14 changes: 3 additions & 11 deletions flamby/datasets/fed_heart_disease/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,7 @@ def __init__(
self.y_dtype = y_dtype
self.debug = debug

self.centers_number = {
"cleveland": 0,
"hungarian": 1,
"switzerland": 2,
"va": 3,
}
self.centers_number = {"cleveland": 0, "hungarian": 1, "switzerland": 2, "va": 3}

self.features = pd.DataFrame()
self.labels = pd.DataFrame()
Expand Down Expand Up @@ -165,9 +160,7 @@ def __init__(
}

# We finally broadcast the means and stds over all datasets
self.mean_of_features = torch.zeros(
(len(self.features), 13), dtype=self.X_dtype
)
self.mean_of_features = torch.zeros((len(self.features), 13), dtype=self.X_dtype)
self.std_of_features = torch.ones((len(self.features), 13), dtype=self.X_dtype)
for i in range(self.mean_of_features.shape[0]):
self.mean_of_features[i] = self.centers_stats[self.centers[i]]["mean"]
Expand All @@ -177,8 +170,7 @@ def __init__(
to_select = [(self.sets[idx] == "train") for idx, _ in enumerate(self.features)]
features_train = [fp for idx, fp in enumerate(self.features) if to_select[idx]]
features_tensor_train = torch.cat(
[features_train[i][None, :] for i in range(len(features_train))],
axis=0,
[features_train[i][None, :] for i in range(len(features_train))], axis=0
)
self.mean_of_features_pooled_train = features_tensor_train.mean(axis=0)
self.std_of_features_pooled_train = features_tensor_train.std(axis=0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,9 @@ def main(output_folder, debug=False):

# location of the files in the UCI archive
accept_license(
"https://archive-beta.ics.uci.edu/ml/datasets/heart+disease",
"fed_heart_disease",
)
base_url = (
"https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/"
"https://archive-beta.ics.uci.edu/ml/datasets/heart+disease", "fed_heart_disease"
)
base_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/"
centers = ["cleveland", "hungarian", "switzerland", "va"]
md5_hashes = [
"2d91a8ff69cfd9616aa47b59d6f843db",
Expand Down Expand Up @@ -69,9 +66,7 @@ def main(output_folder, debug=False):
sys.exit()

# get status of download
downloaded_status_file_path = os.path.join(
output_folder, "download_status_file.csv"
)
downloaded_status_file_path = os.path.join(output_folder, "download_status_file.csv")
if not (os.path.exists(downloaded_status_file_path)):
downloaded_status_file = pd.DataFrame()
downloaded_status_file["Status"] = ["Not found"] * 4
Expand Down
1 change: 0 additions & 1 deletion flamby/datasets/fed_heart_disease/metric.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
from sklearn.metrics import roc_auc_score


def metric(y_true, y_pred):
Expand Down
24 changes: 4 additions & 20 deletions flamby/datasets/fed_isic2019/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,7 @@


def train_model(
model,
optimizer,
scheduler,
dataloaders,
dataset_sizes,
device,
lossfunc,
num_epochs,
model, optimizer, scheduler, dataloaders, dataset_sizes, device, lossfunc, num_epochs
):
"""Training function
Parameters
Expand Down Expand Up @@ -224,16 +217,10 @@ def main(args):

parser = argparse.ArgumentParser()
parser.add_argument(
"--GPU",
type=int,
default=0,
help="GPU to run the training on (if available)",
"--GPU", type=int, default=0, help="GPU to run the training on (if available)"
)
parser.add_argument(
"--workers",
type=int,
default=4,
help="Numbers of workers for the dataloader",
"--workers", type=int, default=4, help="Numbers of workers for the dataloader"
)
args = parser.parse_args()

Expand All @@ -243,10 +230,7 @@ def main(args):

sz = 200
test_aug = albumentations.Compose(
[
albumentations.CenterCrop(sz, sz),
albumentations.Normalize(always_apply=True),
]
[albumentations.CenterCrop(sz, sz), albumentations.Normalize(always_apply=True)]
)
test_dataset = dataset.FedIsic2019(train=False, pooled=True)
test_dataloader = torch.utils.data.DataLoader(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,17 @@
for i, row in ISIC_2019_Training_Metadata.iterrows():
if pd.isnull(row["lesion_id"]):
image = row["image"]
os.system(
"rm " + data_directory + "/ISIC_2019_Training_Input/" + image + ".jpg"
)
os.system("rm " + data_directory + "/ISIC_2019_Training_Input/" + image + ".jpg")
if image != ISIC_2019_Training_GroundTruth["image"][i]:
print("Mismatch between Metadata and Ground Truth")
ISIC_2019_Training_GroundTruth = ISIC_2019_Training_GroundTruth.drop(i)
ISIC_2019_Training_Metadata = ISIC_2019_Training_Metadata.drop(i)

# generating dataset field from lesion_id field in the metadata dataframe
ISIC_2019_Training_Metadata["dataset"] = ISIC_2019_Training_Metadata["lesion_id"].str[
:4
]
ISIC_2019_Training_Metadata["dataset"] = ISIC_2019_Training_Metadata["lesion_id"].str[:4]

# join with HAM10000 metadata in order to expand the HAM datacenters
result = pd.merge(
ISIC_2019_Training_Metadata, HAM10000_metadata, how="left", on="image"
)
result = pd.merge(ISIC_2019_Training_Metadata, HAM10000_metadata, how="left", on="image")
result["dataset"] = result["dataset_x"] + result["dataset_y"].astype(str)
result.drop(["dataset_x", "dataset_y", "lesion_id"], axis=1, inplace=True)

Expand Down
15 changes: 3 additions & 12 deletions flamby/datasets/fed_isic2019/heterogeneity_pic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,13 @@ def forward(self, image):

parser = argparse.ArgumentParser()
parser.add_argument(
"--GPU",
type=int,
default=0,
help="GPU to run the training on (if available)",
"--GPU", type=int, default=0, help="GPU to run the training on (if available)"
)
parser.add_argument(
"--workers",
type=int,
default=0,
help="Numbers of workers for the dataloader",
"--workers", type=int, default=0, help="Numbers of workers for the dataloader"
)
parser.add_argument(
"--seed",
type=int,
default=42,
help="The seed for the UMPA and dataloading",
"--seed", type=int, default=42, help="The seed for the UMPA and dataloading"
)
args = parser.parse_args()
np.random.seed(args.seed)
Expand Down
14 changes: 10 additions & 4 deletions flamby/datasets/fed_ixi/dataset_creation_scripts/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@ def dl_ixi_tiny(output_folder, debug=False):
The folder where to download the dataset.
"""
print(
"The IXI dataset is made available under the Creative Commons CC BY-SA 3.0 license.\n\
If you use the IXI data please acknowledge the source of the IXI data, e.g. the following website: https://brain-development.org/ixi-dataset/\n\
IXI Tiny is derived from the same source. Acknowledge the following reference on TorchIO : https://torchio.readthedocs.io/datasets.html#ixitiny\n\
Pérez-García F, Sparks R, Ourselin S. TorchIO: a Python library for efficient loading, preprocessing, augmentation and patch-based sampling of medical images in deep learning. arXiv:2003.04696 [cs, eess, stat]. 2020. https://doi.org/10.48550/arXiv.2003.04696"
"The IXI dataset is made available under the Creative Commons CC BY-SA \
3.0 license.\n\
If you use the IXI data please acknowledge the source of the IXI data, e.g.\
the following website: https://brain-development.org/ixi-dataset/\
IXI Tiny is derived from the same source. Acknowledge the following reference\
on TorchIO : https://torchio.readthedocs.io/datasets.html#ixitiny\
Pérez-García F, Sparks R, Ourselin S. TorchIO: a Python library for \
efficient loading, preprocessing, augmentation and patch-based sampling \
of medical images in deep learning. arXiv:2003.04696 [cs, eess, stat]. \
2020. https://doi.org/10.48550/arXiv.2003.04696"
)
accept_license("https://brain-development.org/ixi-dataset/", "fed_ixi")
os.makedirs(output_folder, exist_ok=True)
Expand Down
Loading

0 comments on commit f1e0564

Please sign in to comment.