Skip to content

Commit

Permalink
fixing docstrings (#374)
Browse files Browse the repository at this point in the history
* fixing docstrings
* cleaning

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Borda and pre-commit-ci[bot] authored Sep 17, 2024
1 parent 8eb516a commit 2f78ec1
Show file tree
Hide file tree
Showing 30 changed files with 152 additions and 207 deletions.
7 changes: 0 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ repos:
additional_dependencies: [tomli]
#args: ["--write-changes"] # uncomment if you want to get automatic fixing

- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
hooks:
- id: docformatter
additional_dependencies: [tomli]
args: ["--in-place"]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.2
hooks:
Expand Down
6 changes: 1 addition & 5 deletions examples/multi_modal/create_labelencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@


def create_labelencoder():
"""
Create a label encoder
Returns:
"""
"""Create a label encoder."""
data = ["Cancelation", "IBAN Change", "Damage Report"]
# Create an instance of LabelEncoder
label_encoder = LabelEncoder()
Expand Down
44 changes: 19 additions & 25 deletions examples/multi_modal/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,12 @@ def __init__(self):
self.hyperparameters = HYPERPARAMETERS

def load_labelencoder(self):
"""
Function to load the label encoder from s3
Returns:
"""
"""Function to load the label encoder from s3."""
return joblib.load(self.hyperparameters["label_encoder_name"])

def load_tokenizer(self):
"""
load the tokenizer files and the pre-training model path from s3 specified in the hyperparameters
"""Loads the tokenizer files and the pre-training model path from s3 specified in the hyperparameters.
Returns: tokenizer
"""
# Load Bert tokenizer
Expand All @@ -62,13 +59,10 @@ def __init__(self, input_dir: Union[str, Any], hyperparameters: Union[dict, Any]
self.labelencoder = EC.load_labelencoder()

def tokenize_data(self, tokenizer, texts, max_length: int):
"""
Tokenize the text
Args:
tokenizer:
texts:
max_length:
Returns: input_ids, attention_masks
"""Tokenize the text.
Returns: input_ids, attention_masks.
"""
encoded_text = tokenizer(
texts,
Expand Down Expand Up @@ -98,11 +92,10 @@ class MixedDataModule(pl.LightningDataModule):
"""Own DataModule form the pytorch lightning DataModule."""

def __init__(self, hyperparameters: dict):
"""
Init if the Data Module
"""Initialize if the Data Module.
Args:
data_path: dataframe with the data
hyperparameters: Hyperparameters
hyperparameters: Hyperparameters.
"""
super().__init__()
self.hyperparameters = hyperparameters
Expand Down Expand Up @@ -130,10 +123,11 @@ def __init__(self, hyperparameters: dict):
)

def train_dataloader(self) -> DataLoader:
"""
Define the training dataloader
"""Define the training dataloader.
Returns:
training dataloader
training dataloader.
"""
dataset_train = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand All @@ -150,10 +144,10 @@ def train_dataloader(self) -> DataLoader:
)

def val_dataloader(self) -> DataLoader:
"""
Define the validation dataloader
"""Defines the validation dataloader.
Returns:
validation dataloader
validation dataloader.
"""
dataset_val = DocumentClassificationDataset(
hyperparameters=self.hyperparameters,
Expand All @@ -169,8 +163,8 @@ def val_dataloader(self) -> DataLoader:
)

def test_dataloader(self) -> DataLoader:
"""
Define the test dataloader
"""Defines the test dataloader.
Returns:
test dataloader
"""
Expand Down
104 changes: 27 additions & 77 deletions examples/multi_modal/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
mode: train, test or val
report_confusion_matrix: sklearn confusion matrix
report: sklear classification report
Returns:
"""
df_cm = pd.DataFrame(report_confusion_matrix)
Expand All @@ -87,17 +86,7 @@ def save_reports(self, model_dir, mode, report_confusion_matrix, report):
logger.info("Confusion Matrix and Classification report are saved.")

def save_test_evaluations(self, model_dir, mode, y_pred, y_true, confis, numerical_id_):
"""
Save a pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset
Args:
model_dir:
mode:
y_pred:
y_true:
confis:
numerical_id_:
Returns:
"""
"""Save pandas dataframe with prediction and ground truth and identifier (numerical id) of the test dataset."""
df_test = pd.DataFrame()
df_test["pred"] = y_pred
df_test["confidence"] = confis.max(axis=1)
Expand Down Expand Up @@ -151,43 +140,37 @@ def forward(
"""Forward path, calculate the computational graph in the forward direction.
Used for train, test and val.
Args:
y: tensor with text data as tokens
Returns:
computional graph
"""
return self.module(x, y, z)

def training_step(self, batch: Dict[str, torch.Tensor]) -> Dict:
"""
Call the eval share for training
Args:
batch: tensor
"""Call the eval share for training.
Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.
"""
return self._shared_eval_step(batch, "train")

def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
"""
Call the eval share for validation
Args:
batch:
batch_idx:
"""Call the eval share for validation.
Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.
"""
return self._shared_eval_step(batch, "val")

def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict:
"""
Call the eval share for test
Args:
batch:
batch_idx:
"""Call the eval share for test.
Returns:
dict with loss, outputs and ground_truth
dict with loss, outputs and ground_truth.
"""
ret = self._shared_eval_step(batch, "test")
self.pred_list.append(ret)
Expand All @@ -199,6 +182,7 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:
Args:
batch: tensor
mode: train, test or val
Returns:
dict with loss, outputs and ground_truth
Expand Down Expand Up @@ -227,14 +211,8 @@ def _shared_eval_step(self, batch: Dict[str, torch.Tensor], mode: str) -> Dict:

return {"outputs": out, "loss": loss, "ground_truth": ground_truth, "numerical_id": numerical_id}

def _epoch_end(self, mode: str):
"""
Calculate loss and metricies at end of epoch
Args:
mode:
Returns:
None
"""
def _epoch_end(self, mode: str) -> None:
"""Calculate loss and metrics at end of epoch."""
if mode == "val":
output = self.val_metrics.compute()
self.log_dict(output)
Expand All @@ -249,14 +227,7 @@ def _epoch_end(self, mode: str):
self.test_metrics.reset()

def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader_idx: int = 0) -> torch.Tensor:
"""Model prediction without softmax and argmax to predict class label.
Args:
outputs:
Returns:
None
"""
"""Model prediction without softmax and argmax to predict class label."""
self.eval()
with torch.no_grad():
ids = batch["ID"]
Expand All @@ -265,51 +236,30 @@ def predict(self, batch: Dict[str, torch.Tensor], batch_idx: int = 0, dataloader
return self.forward(ids, atts, img)

def on_test_epoch_end(self) -> None:
"""
Calculate the metrics at the end of epoch for test step
Args:
outputs:
Returns:
None
"""
"""Calculate the metrics at the end of epoch for test step."""
self._epoch_end("test")

def on_validation_epoch_end(self):
"""
Calculate the metrics at the end of epoch for val step
Args:
outputs:
Returns:
None
"""
def on_validation_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for val step."""
self._epoch_end("val")

def on_train_epoch_end(self):
"""
Calculate the metrics at the end of epoch for train step
Args:
outputs:
Returns:
None
"""
def on_train_epoch_end(self) -> None:
"""Calculate the metrics at the end of epoch for train step."""
self._epoch_end("train")

def configure_optimizers(self) -> Any:
"""
Configure the optimizer
"""Configure the optimizer.
Returns:
optimizer
"""
optimizer = AdamW(self.parameters(), lr=self.learning_rate, weight_decay=self.hyperparameters["weight_decay"])
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)
return [optimizer], [{"scheduler": scheduler, "interval": "epoch"}]

def configure_callbacks(self) -> Union[Sequence[pl.pytorch.Callback], pl.pytorch.Callback]:
"""Configure Early stopping or Model Checkpointing.
Returns:
"""
"""Configure Early stopping or Model Checkpointing."""
early_stop = EarlyStopping(
monitor="val_MulticlassAccuracy", patience=self.hyperparameters["patience"], mode="max"
)
Expand Down
16 changes: 7 additions & 9 deletions examples/multi_modal/model_arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
"""Forward path, calculate the computational graph in the forward direction.
Used for train, test and val.
Args:
input_ids
attention_mask
Returns:
computional graph
Expand Down Expand Up @@ -72,9 +70,9 @@ def __init__(self, endpoint_mode: bool, hyperparameters: dict):
self.dropout = nn.Dropout(self.hyperparameters["dropout"])

def get_bert_model(self):
"""
Load the pre trained bert model weights
Returns: model
"""Load the pre-trained bert model weights.
Returns: model.
"""
model = BertModel.from_pretrained("bert-base-cased")
return BertClassifier(model)
Expand All @@ -89,9 +87,9 @@ def forward(
validation.
Args:
x (torch.Tensor): Tensor with id token
y (torch.Tensor): Tensor with attention tokens.
z (torch.Tensor): Tensor with image.
x: Tensor with id token
y: Tensor with attention tokens.
z: Tensor with image.
Returns:
torch.Tensor: The output tensor representing the computational graph.
Expand Down
Loading

0 comments on commit 2f78ec1

Please sign in to comment.