-
Notifications
You must be signed in to change notification settings - Fork 0
/
bidsification_script.py
158 lines (142 loc) · 7.85 KB
/
bidsification_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import shutil
import sys
import glob
from pathlib import Path
import pandas as pd
# -----------------
# STEP 0: variables
# -----------------
root_dir = '/exports/fsw/Bendlab/SamenUniek'
raw_sessions = ['MCC_ses01-lab']
bids_sessions = ['ses-w01lab']
file_type = ['3DT1', 'SNAT1', 'SNAT2', 'SNAT3', 'PCG1', 'PCG2', 'PCG3', 'rsfMRI', 'hires', 'B0-map_RS', 'B0-map', 'B0-map', 'B0-map', 'jones30_A', 'jones30_P']
new_file_type = ['T1mri', 'bold_SNAT1', 'bold_SNAT2', 'bold_SNAT3', 'bold_PCG1', 'bold_PCG2', 'bold_PCG3', 'bold_rsfmr', 'T2str', 'B0RS', 'Bzero1', 'Bzero2', 'Bzero3', 'DTIap', 'DTIpa', 'unknown_type', 'log']
cols = ['participant','nr_files'] + new_file_type
prefix = 'sub-mcc'
conversion_log_dir = os.path.join(root_dir, 'conversion_logs')
# Create top-level pseudobids directory
pseudobids_dir = os.path.join(root_dir, 'pseudobids')
if not os.path.exists(pseudobids_dir):
os.mkdir(pseudobids_dir)
# --------------------------------
# STEP 1: Loop through sessions, participants:
# - rename PAR and REC files (in place)
# - copy participant files to new pseudobids directory structure
# --------------------------------
for i, session in enumerate(raw_sessions):
raw_data_dir = os.path.join(root_dir, session)
print(raw_data_dir)
# Log file
conversion_log_fn = os.path.join(conversion_log_dir, session + '_conversion_log.csv')
# If the log file already exists, read contents into dataframe. If not, create dataframe.
if os.path.isfile(conversion_log_fn):
df = pd.read_csv(conversion_log_fn)
else:
df = pd.DataFrame(columns=cols)
# Read directory names from raw data foler, write to text file
for p, participant in enumerate(os.listdir(raw_data_dir)):
# Check in log-file if conversion has already been done.
# If done, skip.
if participant in df['participant'].tolist():
print(f"Participant {participant} already converted to pseudobids. Skipping...")
continue
# Access participant_dir, continue if it exists
participant_dir = os.path.join(raw_data_dir, participant)
first_b0_found = False
b0_found = 0
fsl_found = False
if os.path.isdir(participant_dir):
print(f"{str(p).zfill(3)}: {participant}")
all_files = [name for name in os.listdir(participant_dir) if os.path.isfile(os.path.join(participant_dir, name))]
new_row = [None] * len(cols)
new_row[0] = participant
new_row[1] = len(all_files)
all_codes = [('0' + file[11:-4] if len(file[11:-4]) < 4 else file[11:-4]) for file in all_files] # assumes unique codes
all_codes_sorted = sorted(all_codes)
all_codes_sorted = list(dict.fromkeys(all_codes_sorted))
for j, code in enumerate(all_codes_sorted):
if 'FSL' in code:
new_row[-2] = code
continue
if code[0] == '0':
code = code[1:]
fns = glob.glob(os.path.join(participant_dir, '*_' + code + '.PAR'))
if len(fns) > 1:
if new_row[-1] is not None:
new_row[-1] = f"{new_row[-1]} | WARNING: found {len(fns)} files with pattern {code}.PAR for participant {participant}. Using first one..."
else:
new_row[-1] = f"WARNING: found {len(fns)} files with pattern {code}.PAR for participant {participant}. Using first one..."
print(new_row[-1])
continue
elif len(fns) == 0:
if new_row[-1] is not None:
new_row[-1] = f"{new_row[-1]} | ERROR: found NO files with pattern {code}.PAR for participant {participant}. Ignoring this file..."
else:
new_row[-1] = f"ERROR: found NO files with pattern {code}.PAR for participant {participant}. Ignoring this file..."
print(new_row[-1])
continue
name = fns[0]
# open and read the protecolline needed for renaming
with open(name, 'r') as f:
protocolline = f.readlines()
line = protocolline[13]
# Find the first value in the file_type list that exists in protocolline 13 (old identifier)
match = next((x for x in file_type if x in line), False)
# Find the index in the new_file_type list that corresponds to the match (new identifier)
if not match:
if new_row[-1] is not None:
new_row[-1] = f"{new_row[-1]} | ERROR: no known file type found in ({code}.PAR) file for participant {participant}. Ignoring this file..."
else:
new_row[-1] = f"ERROR: no known file type found in ({code}.PAR) file for participant {participant}. Ignoring this file..."
continue
elif match == 'B0-map':
if not first_b0_found:
first_b0_found = True
b0_found = b0_found + 1
idx = 9 + b0_found
if new_row[-1] is not None:
new_row[-1] = f"{new_row[-1]} | NOTE: B0 map found ({code}.PAR) for participant {participant}."
else:
new_row[-1] = f"NOTE: B0 map found ({code}.PAR) for participant {participant}."
print(new_row[-1])
else:
idx = file_type.index(match)
new_row[idx+2] = code
# Rename PAR file, if it doesn't already exist
if new_file_type[idx] in name:
print('WARNING: renamed file ' + name + ' already exists in the folder! This file will therefore be skipped!')
else:
rename = name[:-4] + '_' + new_file_type[idx] + name[-4:]
os.rename(name, rename)
# Rename REC file, if it doesn't already exist
nameREC = name[:-4] + '.REC'
if os.path.isfile(nameREC):
renameREC = name[:-4] + '_' + new_file_type[idx] + '.REC'
# If the renameREC file does not yet exist, proceed with renaming
if not os.path.isfile(renameREC):
os.rename(nameREC, renameREC)
else:
print('WARNING: file ' + renameREC + ' already exists in the folder! This file will therefore be skipped!')
else:
print('ERROR: corresponding REC file not found for: ' + name)
# Create bids-like directory structure for participant
sub_dir = os.path.join(pseudobids_dir, prefix + str(participant[4:10]))
if not os.path.exists(sub_dir):
os.mkdir(sub_dir)
# Session-level directory
session_dir = os.path.join(sub_dir, str(bids_sessions[i]))
# Copy renamed raw data to pseudobids directory
if os.path.exists(session_dir):
all_files_to_copy = [fn for fn in os.listdir(participant_dir) if os.path.isfile(os.path.join(participant_dir, fn))]
for file_to_copy in all_files_to_copy:
if not 'FSL' in file_to_copy:
shutil.copy2(os.path.join(participant_dir, file_to_copy), session_dir)
else:
shutil.copytree(participant_dir, session_dir, ignore=shutil.ignore_patterns('FSL*'))
# Add participant info to log file
df_new_row = pd.DataFrame([new_row], columns=cols)
df = df.append(df_new_row, ignore_index=True)
df.to_csv(conversion_log_fn)
else:
print('Error: participant directory not found for ' + participant)