Skip to content

Commit

Permalink
Refactor from print to tqdm write
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 committed Jan 22, 2025
1 parent 387d864 commit dec40b0
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 118 deletions.
31 changes: 18 additions & 13 deletions references/classification/train_pytorch_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, a
model.train()
# Iterate over the batches of the dataset
epoch_train_loss, batch_cnt = 0.0, 0.0
pbar = tqdm(train_loader, position=1)
pbar = tqdm(train_loader, dynamic_ncols=True)
for images, targets in pbar:
if torch.cuda.is_available():
images = images.cuda()
Expand Down Expand Up @@ -157,7 +157,8 @@ def evaluate(model, val_loader, batch_transforms, amp=False):
model.eval()
# Validation loop
val_loss, correct, samples, batch_cnt = 0, 0, 0, 0
for images, targets in tqdm(val_loader):
pbar = tqdm(val_loader, dynamic_ncols=True)
for images, targets in pbar:
images = batch_transforms(images)

if torch.cuda.is_available():
Expand All @@ -174,6 +175,8 @@ def evaluate(model, val_loader, batch_transforms, amp=False):
# Compute metric
correct += (out.argmax(dim=1) == targets).sum().item()

pbar.set_description(f"Validation loss: {loss.item():.6}")

val_loss += loss.item()
batch_cnt += 1
samples += images.shape[0]
Expand All @@ -184,7 +187,8 @@ def evaluate(model, val_loader, batch_transforms, amp=False):


def main(args):
print(args)
pbar = tqdm(disable=True)
pbar.write(str(args))

if args.push_to_hub:
login_to_hub()
Expand Down Expand Up @@ -219,7 +223,7 @@ def main(args):
sampler=SequentialSampler(val_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
pbar.write(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand All @@ -228,7 +232,7 @@ def main(args):

# Resume weights
if isinstance(args.resume, str):
print(f"Resuming {args.resume}")
pbar.write(f"Resuming {args.resume}")
checkpoint = torch.load(args.resume, map_location="cpu")
model.load_state_dict(checkpoint)

Expand All @@ -248,9 +252,9 @@ def main(args):
model = model.cuda()

if args.test_only:
print("Running evaluation")
pbar.write("Running evaluation")
val_loss, acc = evaluate(model, val_loader, batch_transforms)
print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
pbar.write(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
return

st = time.time()
Expand Down Expand Up @@ -283,7 +287,7 @@ def main(args):
sampler=RandomSampler(train_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
pbar.write(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down Expand Up @@ -364,14 +368,15 @@ def main(args):
early_stopper = EarlyStopper(patience=args.early_stop_epochs, min_delta=args.early_stop_delta)
for epoch in range(args.epochs):
train_loss, actual_lr = fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, amp=args.amp)
pbar.write(f"Epoch {epoch + 1}/{args.epochs} - Training loss: {train_loss:.6} | LR: {actual_lr:.6}")

# Validation loop at the end of each epoch
val_loss, acc = evaluate(model, val_loader, batch_transforms)
if val_loss < min_loss:
print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
pbar.write(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
torch.save(model.state_dict(), Path(args.output_dir) / f"{exp_name}.pt")
min_loss = val_loss
print(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
pbar.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")

# W&B
if args.wb:
Expand All @@ -393,7 +398,7 @@ def main(args):
logger.report_scalar(title="Accuracy", series="acc", value=acc, iteration=epoch)

if args.early_stop and early_stopper.early_stop(val_loss):
print("Training halted early due to reaching patience limit.")
pbar.write("Training halted early due to reaching patience limit.")
break

if args.wb:
Expand All @@ -403,11 +408,11 @@ def main(args):
push_to_hf_hub(model, exp_name, task="classification", run_config=args)

if args.export_onnx:
print("Exporting model to ONNX...")
pbar.write("Exporting model to ONNX...")
dummy_batch = next(iter(val_loader))
dummy_input = dummy_batch[0].cuda() if torch.cuda.is_available() else dummy_batch[0]
model_path = export_model_to_onnx(model, exp_name, dummy_input)
print(f"Exported model saved in {model_path}")
pbar.write(f"Exported model saved in {model_path}")


def parse_args():
Expand Down
31 changes: 18 additions & 13 deletions references/classification/train_pytorch_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, a
model.train()
# Iterate over the batches of the dataset
epoch_train_loss, batch_cnt = 0.0, 0.0
pbar = tqdm(train_loader, position=1)
pbar = tqdm(train_loader, dynamic_ncols=True)
for images, targets in pbar:
if torch.cuda.is_available():
images = images.cuda()
Expand Down Expand Up @@ -168,7 +168,8 @@ def evaluate(model, val_loader, batch_transforms, amp=False):
model.eval()
# Validation loop
val_loss, correct, samples, batch_cnt = 0.0, 0.0, 0.0, 0.0
for images, targets in tqdm(val_loader):
pbar = tqdm(val_loader, dynamic_ncols=True)
for images, targets in pbar:
images = batch_transforms(images)

if torch.cuda.is_available():
Expand All @@ -185,6 +186,8 @@ def evaluate(model, val_loader, batch_transforms, amp=False):
# Compute metric
correct += (out.argmax(dim=1) == targets).sum().item()

pbar.set_description(f"Validation loss: {loss.item():.6}")

val_loss += loss.item()
batch_cnt += 1
samples += images.shape[0]
Expand All @@ -195,7 +198,8 @@ def evaluate(model, val_loader, batch_transforms, amp=False):


def main(args):
print(args)
pbar = tqdm(disable=True)
pbar.write(str(args))

if args.push_to_hub:
login_to_hub()
Expand Down Expand Up @@ -227,7 +231,7 @@ def main(args):
sampler=SequentialSampler(val_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")
pbar.write(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {len(val_loader)} batches)")

batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

Expand All @@ -236,7 +240,7 @@ def main(args):

# Resume weights
if isinstance(args.resume, str):
print(f"Resuming {args.resume}")
pbar.write(f"Resuming {args.resume}")
checkpoint = torch.load(args.resume, map_location="cpu")
model.load_state_dict(checkpoint)

Expand All @@ -256,9 +260,9 @@ def main(args):
model = model.cuda()

if args.test_only:
print("Running evaluation")
pbar.write("Running evaluation")
val_loss, acc = evaluate(model, val_loader, batch_transforms)
print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
pbar.write(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
return

st = time.time()
Expand Down Expand Up @@ -289,7 +293,7 @@ def main(args):
sampler=RandomSampler(train_set),
pin_memory=torch.cuda.is_available(),
)
print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")
pbar.write(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {len(train_loader)} batches)")

if args.show_samples:
x, target = next(iter(train_loader))
Expand Down Expand Up @@ -370,14 +374,15 @@ def main(args):
early_stopper = EarlyStopper(patience=args.early_stop_epochs, min_delta=args.early_stop_delta)
for epoch in range(args.epochs):
train_loss, actual_lr = fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, amp=args.amp)
pbar.write(f"Epoch {epoch + 1}/{args.epochs} - Training loss: {train_loss:.6} | LR: {actual_lr:.6}")

# Validation loop at the end of each epoch
val_loss, acc = evaluate(model, val_loader, batch_transforms)
if val_loss < min_loss:
print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
pbar.write(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
torch.save(model.state_dict(), Path(args.output_dir) / f"{exp_name}.pt")
min_loss = val_loss
print(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
pbar.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")

# W&B
if args.wb:
Expand All @@ -399,7 +404,7 @@ def main(args):
logger.report_scalar(title="Accuracy", series="acc", value=acc, iteration=epoch)

if args.early_stop and early_stopper.early_stop(val_loss):
print("Training halted early due to reaching patience limit.")
pbar.write("Training halted early due to reaching patience limit.")
break

if args.wb:
Expand All @@ -409,11 +414,11 @@ def main(args):
push_to_hf_hub(model, exp_name, task="classification", run_config=args)

if args.export_onnx:
print("Exporting model to ONNX...")
pbar.write("Exporting model to ONNX...")
dummy_batch = next(iter(val_loader))
dummy_input = dummy_batch[0].cuda() if torch.cuda.is_available() else dummy_batch[0]
model_path = export_model_to_onnx(model, exp_name, dummy_input)
print(f"Exported model saved in {model_path}")
pbar.write(f"Exported model saved in {model_path}")


def parse_args():
Expand Down
29 changes: 17 additions & 12 deletions references/classification/train_tensorflow_character.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def fit_one_epoch(model, train_loader, batch_transforms, optimizer, amp=False):
train_iter = iter(train_loader)
# Iterate over the batches of the dataset
epoch_train_loss, batch_cnt = 0, 0
pbar = tqdm(train_iter, position=1)
pbar = tqdm(train_iter, dynamic_ncols=True)
for images, targets in pbar:
images = batch_transforms(images)

Expand All @@ -127,13 +127,16 @@ def evaluate(model, val_loader, batch_transforms):
# Validation loop
val_loss, correct, samples, batch_cnt = 0, 0, 0, 0
val_iter = iter(val_loader)
for images, targets in tqdm(val_iter):
pbar = tqdm(val_iter, dynamic_ncols=True)
for images, targets in pbar:
images = batch_transforms(images)
out = model(images, training=False)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(targets, out)
# Compute metric
correct += int((out.numpy().argmax(1) == targets.numpy()).sum())

pbar.set_description(f"Validation loss: {loss.numpy().mean():.6}")

val_loss += loss.numpy().mean()
batch_cnt += 1
samples += images.shape[0]
Expand All @@ -151,7 +154,8 @@ def collate_fn(samples):


def main(args):
print(args)
pbar = tqdm(disable=True)
pbar.write(str(args))

if args.push_to_hub:
login_to_hub()
Expand Down Expand Up @@ -184,7 +188,7 @@ def main(args):
drop_last=False,
collate_fn=collate_fn,
)
print(
pbar.write(
f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in {val_loader.num_batches} batches)"
)

Expand All @@ -206,9 +210,9 @@ def main(args):
])

if args.test_only:
print("Running evaluation")
pbar.write("Running evaluation")
val_loss, acc = evaluate(model, val_loader, batch_transforms)
print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
pbar.write(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
return

st = time.time()
Expand Down Expand Up @@ -239,7 +243,7 @@ def main(args):
drop_last=True,
collate_fn=collate_fn,
)
print(
pbar.write(
f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in {train_loader.num_batches} batches)"
)

Expand Down Expand Up @@ -334,14 +338,15 @@ def main(args):
early_stopper = EarlyStopper(patience=args.early_stop_epochs, min_delta=args.early_stop_delta)
for epoch in range(args.epochs):
train_loss, actual_lr = fit_one_epoch(model, train_loader, batch_transforms, optimizer, args.amp)
pbar.write(f"Epoch {epoch + 1}/{args.epochs} - Training loss: {train_loss:.6} | LR: {actual_lr:.6}")

# Validation loop at the end of each epoch
val_loss, acc = evaluate(model, val_loader, batch_transforms)
if val_loss < min_loss:
print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
pbar.write(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
model.save_weights(Path(args.output_dir) / f"{exp_name}.weights.h5")
min_loss = val_loss
print(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
pbar.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")

# W&B
if args.wb:
Expand All @@ -363,7 +368,7 @@ def main(args):
logger.report_scalar(title="Accuracy", series="acc", value=acc, iteration=epoch)

if args.early_stop and early_stopper.early_stop(val_loss):
print("Training halted early due to reaching patience limit.")
pbar.write("Training halted early due to reaching patience limit.")
break

if args.wb:
Expand All @@ -373,15 +378,15 @@ def main(args):
push_to_hf_hub(model, exp_name, task="classification", run_config=args)

if args.export_onnx:
print("Exporting model to ONNX...")
pbar.write("Exporting model to ONNX...")
if args.arch == "vit_b":
# fixed batch size for vit
dummy_input = [tf.TensorSpec([1, args.input_size, args.input_size, 3], tf.float32, name="input")]
else:
# dynamic batch size
dummy_input = [tf.TensorSpec([None, args.input_size, args.input_size, 3], tf.float32, name="input")]
model_path, _ = export_model_to_onnx(model, exp_name, dummy_input)
print(f"Exported model saved in {model_path}")
pbar.write(f"Exported model saved in {model_path}")


def parse_args():
Expand Down
Loading

0 comments on commit dec40b0

Please sign in to comment.