Skip to content

Commit

Permalink
Log more messages to catch better possible errors
Browse files Browse the repository at this point in the history
  • Loading branch information
danifranco committed May 22, 2024
1 parent 5e418b7 commit a11945f
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 58 deletions.
23 changes: 15 additions & 8 deletions biapy_check_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import requests
from pathlib import Path

def check_configuration(cfg, jobname, check_data_paths=True):
def check_configuration(cfg, jobname, check_data_paths=True, logger=None):
"""
Check if the configuration is good.
"""
Expand Down Expand Up @@ -156,7 +156,8 @@ def check_configuration(cfg, jobname, check_data_paths=True):
"PROBLEM.TYPE not in ['SEMANTIC_SEG', 'INSTANCE_SEG', 'CLASSIFICATION', 'DETECTION', 'DENOISING', 'SUPER_RESOLUTION', 'SELF_SUPERVISED', 'IMAGE_TO_IMAGE']"

if cfg.PROBLEM.NDIM == '3D' and cfg.TEST.FULL_IMG:
print("WARNING: TEST.FULL_IMG == True while using PROBLEM.NDIM == '3D'. As 3D images are usually 'huge'"
if logger is not None:
logger.warning("TEST.FULL_IMG == True while using PROBLEM.NDIM == '3D'. As 3D images are usually 'huge'"
", full image statistics will be disabled to avoid GPU memory overflow")

if cfg.LOSS.TYPE != "CE" and cfg.PROBLEM.TYPE not in ['SEMANTIC_SEG', 'DETECTION']:
Expand All @@ -178,15 +179,17 @@ def check_configuration(cfg, jobname, check_data_paths=True):
url = 'http://www.doi.org/'+cfg.MODEL.BMZ.SOURCE_MODEL_DOI
r = requests.get(url, stream=True, verify=True)
if r.status_code >= 200 and r.status_code < 400:
print(f'BMZ model DOI: {cfg.MODEL.BMZ.SOURCE_MODEL_DOI} found')
if logger is not None:
logger.info(f'BMZ model DOI: {cfg.MODEL.BMZ.SOURCE_MODEL_DOI} found')
else:
raise ValueError(f'BMZ model DOI: {cfg.MODEL.BMZ.SOURCE_MODEL_DOI} not found. Aborting!')

elif cfg.MODEL.SOURCE == "torchvision":
if cfg.MODEL.TORCHVISION_MODEL_NAME == "":
raise ValueError("'MODEL.TORCHVISION_MODEL_NAME' needs to be configured when 'MODEL.SOURCE' is 'torchvision'")
if cfg.TEST.AUGMENTATION:
print("WARNING: 'TEST.AUGMENTATION' is not available using TorchVision models")
if logger is not None:
logger.info("WARNING: 'TEST.AUGMENTATION' is not available using TorchVision models")

if cfg.TEST.AUGMENTATION and cfg.TEST.REDUCE_MEMORY:
raise ValueError("'TEST.AUGMENTATION' and 'TEST.REDUCE_MEMORY' are incompatible as the function used to make the rotation "
Expand Down Expand Up @@ -406,7 +409,8 @@ def check_configuration(cfg, jobname, check_data_paths=True):
raise ValueError('To use preprocessing DATA.TRAIN.IN_MEMORY needs to be True.')
if not cfg.DATA.VAL.IN_MEMORY and cfg.DATA.PREPROCESS.VAL:
if cfg.DATA.VAL.FROM_TRAIN:
print("WARNING: validation preprocessing will be done based on 'DATA.PREPROCESS.TRAIN', as 'DATA.VAL.FROM_TRAIN' is selected")
if logger is not None:
logger.info("WARNING: validation preprocessing will be done based on 'DATA.PREPROCESS.TRAIN', as 'DATA.VAL.FROM_TRAIN' is selected")
else:
raise ValueError('To use preprocessing DATA.VAL.IN_MEMORY needs to be True.')
if not cfg.DATA.TEST.IN_MEMORY and cfg.DATA.PREPROCESS.TEST:
Expand Down Expand Up @@ -525,11 +529,13 @@ def check_configuration(cfg, jobname, check_data_paths=True):
if cfg.DATA.VAL.CROSS_VAL_NFOLD < cfg.DATA.VAL.CROSS_VAL_FOLD:
raise ValueError("'DATA.VAL.CROSS_VAL_NFOLD' can not be less than 'DATA.VAL.CROSS_VAL_FOLD'")
if not cfg.DATA.VAL.IN_MEMORY:
print("WARNING: ignoring 'DATA.VAL.IN_MEMORY' as it is always True when 'DATA.VAL.CROSS_VAL' is enabled")
if logger is not None:
logger.info("WARNING: ignoring 'DATA.VAL.IN_MEMORY' as it is always True when 'DATA.VAL.CROSS_VAL' is enabled")
if cfg.DATA.TEST.USE_VAL_AS_TEST and not cfg.DATA.VAL.CROSS_VAL:
raise ValueError("'DATA.TEST.USE_VAL_AS_TEST' can only be used when 'DATA.VAL.CROSS_VAL' is selected")
if cfg.DATA.TEST.USE_VAL_AS_TEST and not cfg.TRAIN.ENABLE and cfg.DATA.TEST.IN_MEMORY:
print("WARNING: 'DATA.TEST.IN_MEMORY' is disabled when 'DATA.TEST.USE_VAL_AS_TEST' is enabled")
if logger is not None:
logger.warning("'DATA.TEST.IN_MEMORY' is disabled when 'DATA.TEST.USE_VAL_AS_TEST' is enabled")
if len(cfg.DATA.TRAIN.RESOLUTION) != 1 and len(cfg.DATA.TRAIN.RESOLUTION) != dim_count:
raise ValueError("When PROBLEM.NDIM == {} DATA.TRAIN.RESOLUTION tuple must be length {}, given {}."
.format(cfg.PROBLEM.NDIM, dim_count, cfg.DATA.TRAIN.RESOLUTION))
Expand Down Expand Up @@ -588,7 +594,8 @@ def check_configuration(cfg, jobname, check_data_paths=True):
if cfg.DATA.TRAIN.REPLICATE:
if cfg.PROBLEM.TYPE == 'CLASSIFICATION' or \
(cfg.PROBLEM.TYPE == 'SELF_SUPERVISED' and cfg.PROBLEM.SELF_SUPERVISED.PRETEXT_TASK == "masking"):
print("WARNING: 'DATA.TRAIN.REPLICATE' has no effect in the selected workflow")
if logger is not None:
logger.warning("'DATA.TRAIN.REPLICATE' has no effect in the selected workflow")

### Model ###
if cfg.MODEL.SOURCE == "biapy":
Expand Down
32 changes: 17 additions & 15 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,23 @@
os.environ['QT_MAC_WANTS_LAYER'] = '1'

class MainWindow(QMainWindow):
def __init__(self, log_file, log_dir):
def __init__(self, logger, log_dir):
"""
Main window constructor.
Parameters
----------
log_file : str
File to log the output.
logger : str
Logger to log the file to log all the information.
log_dir : str
Logging directory for the current BiaPy GUI execution.
"""
super(MainWindow, self).__init__()
self.ui = Ui_MainWindow()
self.ui.setupUi(self)

self.log_file = log_file
self.logger = logger
self.log_dir = log_dir
self.cfg = Settings()

Expand Down Expand Up @@ -839,7 +839,7 @@ def closeEvent(self, event):
if self.yes_no.answer:
# Kill all run windows
for x in self.cfg.settings['running_workers']:
print("Killing subprocess . . .")
self.logger.info("Killing subprocess . . .")
x.gui.forcing_close = True
x.gui.close()

Expand All @@ -848,7 +848,7 @@ def closeEvent(self, event):
try:
self.thread_spin.quit()
except Exception as e:
print(f"Possible expected error during thread_spin deletion: {e}")
self.logger.error(f"Possible expected error during thread_spin deletion: {e}")
# Finally close the main window
self.close()
else:
Expand All @@ -857,9 +857,9 @@ def closeEvent(self, event):
def check_new_gui_version(self):
# Changed version
sha, vtag = get_git_revision_short_hash(self)
print(f"Local GUI version: {self.cfg.settings['biapy_gui_version']}")
print(f"Remote last version's hash: {sha}")
print(f"Remote last version: {vtag}")
self.logger.info(f"Local GUI version: {self.cfg.settings['biapy_gui_version']}")
self.logger.info(f"Remote last version's hash: {sha}")
self.logger.info(f"Remote last version: {vtag}")
if sha is not None and vtag is not None and vtag != self.cfg.settings['biapy_gui_version']:
self.dialog_exec("There is a new version of BiaPy's graphical user interface available. Please, "
"download it <a href='https://biapyx.github.io'>here</a>", reason="inform_user")
Expand All @@ -878,10 +878,12 @@ def center_window(widget, geometry):
if __name__ == "__main__":
window = None
log_dir = os.path.join(tempfile._get_default_tempdir(), "BiaPy")
log_file = os.path.join(log_dir, "BiaPy_"+next(tempfile._get_candidate_names()))
random_str = next(tempfile._get_candidate_names())
log_file = os.path.join(log_dir, f"BiaPy_{random_str}")
os.makedirs(log_dir, exist_ok=True)
logger = logging.getLogger('BiaPy')
logging.basicConfig(filename=log_file, level=logging.ERROR)
logging.basicConfig(filename=log_file, format='%(asctime)s %(message)s', filemode='w')
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
StyleSheet = """
QComboBox {
selection-background-color: rgb(64,144,253);
Expand All @@ -906,7 +908,7 @@ def eventFilter(self, obj, ev):
filter = WheelEventFilter()
app.installEventFilter(filter)

window = MainWindow(log_file, log_dir)
window = MainWindow(logger, log_dir)
window.show()

# Center the main GUI in the middle of the first screen
Expand All @@ -918,7 +920,7 @@ def eventFilter(self, obj, ev):

def excepthook(exc_type, exc_value, exc_tb):
tb = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
print("error message:\n", tb)
logger.info("error message:\n", tb)
tb += f"\nYou can also provide the log error here: \n{log_file}\n"
tb += "\nExiting BiaPy as its functionality may be damaged!\n"

Expand Down
36 changes: 18 additions & 18 deletions run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def kill_all_processes(self):
try:
self.parent_worker.finished_signal.emit()
except Exception as e:
print(f"Possible expected error during BiaPy's running thread deletion: {e}")
self.parent_worker.main_gui.logger.error(f"Possible expected error during BiaPy's running thread deletion: {e}")
self.close()

def init_log(self, container_info):
Expand All @@ -93,7 +93,7 @@ def update_gui(self, signal):
elif signal == 1:
self.update_log()
else:
print("Nothing")
self.parent_worker.main_gui.logger.info("Nothing")

def update_log(self):
finished_good = False
Expand All @@ -116,7 +116,7 @@ def update_cont_state(self, svalue):
try:
self.parent_worker.biapy_container.reload()
except Exception as e:
print(f"Possible expected error during container status reload(): {e}")
self.parent_worker.main_gui.logger.error(f"Possible expected error during container status reload(): {e}")

st = self.parent_worker.biapy_container.status
if st == "running":
Expand Down Expand Up @@ -208,13 +208,13 @@ def __init__(self, main_gui, config, container_name, worker_id, output_folder, u
self.break_pulling = False

def stop_worker(self):
print("Stopping the container . . . ")
self.main_gui.logger.info("Stopping the container . . . ")
if self.biapy_container is not None:
self.biapy_container.stop(timeout=1)
try:
self.update_cont_state_signal.emit(1)
except Exception as e:
print(f"Possible expected error during BiaPy's running thread deletion: {e}")
self.main_gui.logger.error(f"Possible expected error during BiaPy's running thread deletion: {e}")
self.gui.run_window.stop_container_bn.setEnabled(False)
self.gui.run_window.test_progress_label.setEnabled(False)
self.gui.run_window.test_progress_bar.setEnabled(False)
Expand All @@ -223,7 +223,7 @@ def stop_worker(self):
self.gui.run_window.train_progress_bar.setEnabled(False)
self.gui.run_window.train_epochs_label.setEnabled(False)
else:
print("Container not running yet")
self.main_gui.logger.info("Container not running yet")
# To kill pulling process if it is running
self.break_pulling = True

Expand Down Expand Up @@ -251,7 +251,7 @@ def run(self):
# Collect the output of the container and update the GUI
self.total_layers = {}
for item in self.docker_client.api.pull(self.main_gui.cfg.settings['biapy_container_name'], stream=True, decode=True):
print(item)
self.main_gui.logger.info(item)
if item["status"] == 'Pulling fs layer':
self.total_layers[item["id"]+"_download"] = 0
self.total_layers[item["id"]+"_extract"] = 0
Expand All @@ -264,7 +264,7 @@ def run(self):
self.total_layers[item["id"]+"_extract"] = 1

if self.break_pulling:
print("Stopping pulling process . . .")
self.main_gui.logger.info("Stopping pulling process . . .")
return
# Update GUI
steps = np.sum([int(float(x)*10) for x in self.total_layers.values()])
Expand Down Expand Up @@ -296,7 +296,7 @@ def run(self):
try:
temp_cfg = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
self.main_gui.logger.error(exc)

dist_backend = "gloo" if self.windows_os else "nccl"
command = ["--config", "/BiaPy_files/input.yaml", "--result_dir", "{}".format(self.output_folder_in_container),
Expand Down Expand Up @@ -418,7 +418,7 @@ def run(self):
self.test_files = len(sorted(next(os.walk(self.config['DATA']['TEST']['PATH']))[2]))
self.gui.run_window.test_progress_bar.setMaximum(self.test_files)

print("Creating temporal input YAML file")
self.main_gui.logger.info("Creating temporal input YAML file")
with open(real_cfg_input, 'w') as outfile:
yaml.dump(temp_cfg, outfile, default_flow_style=False)

Expand All @@ -429,11 +429,11 @@ def run(self):

# Run container
# check_command = [ "python3", "-u", "-c", "'import torch; print(torch.cuda.is_available())'"]
print(f"Command: {command}")
print(f"Volumes: {volumes}")
print(f"GPU (IDs): {gpus}")
print(f"CPUs: {cpu_count}")
print(f"GUI version: {self.main_gui.cfg.settings['biapy_gui_version']}")
self.main_gui.logger.info(f"Command: {command}")
self.main_gui.logger.info(f"Volumes: {volumes}")
self.main_gui.logger.info(f"GPU (IDs): {gpus}")
self.main_gui.logger.info(f"CPUs: {cpu_count}")
self.main_gui.logger.info(f"GUI version: {self.main_gui.cfg.settings['biapy_gui_version']}")
nofile_limit = docker.types.Ulimit(name='nofile', soft=10000, hard=10000)
self.biapy_container = self.docker_client.containers.run(
self.container_name,
Expand All @@ -449,7 +449,7 @@ def run(self):
cpu_count=cpu_count,
)
self.process_steps = "running"
print("Container created!")
self.main_gui.logger.info("Container created!")

# Set the window header
self.container_info = \
Expand Down Expand Up @@ -497,7 +497,7 @@ def run(self):
for log in self.biapy_container.logs(stream=True):
l = log.decode("utf-8")
try:
print(l.encode("utf-8") if self.windows_os else l, end="")
self.main_gui.logger.info(l.encode("utf-8") if self.windows_os else l, end="")
except:
pass
try:
Expand Down Expand Up @@ -543,7 +543,7 @@ def run(self):
f.close()
except:
# Print first the traceback (only visible through terminal)
print(traceback.format_exc())
self.main_gui.logger.error(traceback.format_exc())

# Try to log the error in the error file
ferr = open(self.container_stderr_file, "w")
Expand Down
4 changes: 2 additions & 2 deletions ui_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,7 @@ def run_biapy(main_window):
if len(old_biapy_images) > 0:
main_window.yes_no_exec("Seems that there is one or more old BiaPy containers. Do yo want to remove them to save disk space?")
if main_window.yes_no.answer:
print("Removing old containers")
main_window.logger.info("Removing old containers")
for i in range(len(old_biapy_images)):
main_window.docker_client.images.remove(old_biapy_images[i], force=True)

Expand All @@ -1527,7 +1527,7 @@ def run_biapy(main_window):
if dockerhub_image_tag != local_biapy_image_tag:
main_window.yes_no_exec("There is another BiaPy container. Do yo want to remove the current one to save disk space?")
if main_window.yes_no.answer:
print("Removing last valid container")
main_window.logger.info("Removing last valid container")
main_window.docker_client.images.remove(main_window.cfg.settings['biapy_container_name'], force=True)

# Firs time
Expand Down
Loading

0 comments on commit a11945f

Please sign in to comment.