diff --git a/README.md b/README.md index d87b94b..717dbc8 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,69 @@ # Diagnostics project -Script go in the `scripts` directory. +Scripts go in the `scripts` directory. -Library code (such as Python modules or packages) goes in the `packages` directory. +Library code (Python modules) goes in the `findoutlie` directory. -You should put this `packages` directory on your Python PATH. +You should put the code in this `findoutlie` directory on your Python PATH. -This file has instructions on how to get, validate and process the data. +This README file has instructions on how to get, validate and process the data. + +## Before you start +### Create a repository from the template +The first step is to click on the green button at the top-right that says *Use this template*. +More detailed instructions can be found at [GitHub's documentation pages](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-repository-from-a-template). + +Once you have your own repository under your username, please edit this `README.md` file and remove this section to avoid confusion. +Again, [GitHub's documentation](https://docs.github.com/en/repositories/working-with-files/managing-files/editing-files) may help you conclude this step easily. +That will be your first commit to this repository. + +### Locally clone the repository +To work locally on your system, you can clone the repository. +``` +git clone git@github.com:/ +cd / +``` ## Get the data - cd data - curl -LO http://nipy.bic.berkeley.edu/psych-214/group00.tar.gz - tar zxvf group00.tar.gz - cd .. +``` +cd data +# Change the number here to your number. +# 34937553 is for group 0 +# 34937565 is for group 1 +# 34937586 is for group 2 +curl -L https://figshare.com/ndownloader/files/34937553 -o group_data.tar +tar xvf group_data.tar +cd .. +``` ## Check the data - python3 scripts/validate_data.py data +``` +python3 scripts/validate_data.py data +``` ## Find outliers - python3 scripts/find_outliers.py data +``` +python3 scripts/find_outliers.py data +``` This should print output to the terminal of form: - , , ... - , , ... +``` +, , , ... +, , , ... +``` Where `` is the name of the image that has outlier scans, and `` is an index to the volume in the 4D image that you have -indentified as an outlier. 0 refers to the first volume. For example: +identified as an outlier. 0 refers to the first volume. For example: - group00_sub01_run1.nii 3, 21, 22, 104 - group00_sub02_run2.nii 11, 33 91 - group00_sub04_run2.nii 101, 102, 132 - group00_sub07_run2.nii 0, 1, 2, 166, 167 - group00_sub09_run2.nii 3 +``` +data/sub-01/func/sub-01_task-taskzero_run-01_bold.nii.gz, 3, 21, 22, 104 +data/sub-01/func/sub-01_task-taskzero_run-02_bold.nii.gz, 11, 33, 91 +data/sub-03/func/sub-03_task-taskzero_run-02_bold.nii.gz, 101, 102, 132 +data/sub-08/func/sub-08_task-taskzero_run-01_bold.nii.gz, 0, 1, 2, 166, 167 +data/sub-09/func/sub-08_task-taskzero_run-01_bold.nii.gz, 3 +``` diff --git a/findoutlie/__init__.py b/findoutlie/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/findoutlie/outfind.py b/findoutlie/outfind.py new file mode 100644 index 0000000..93f65f4 --- /dev/null +++ b/findoutlie/outfind.py @@ -0,0 +1,32 @@ +""" Module with routines for finding outliers +""" + +import os.path as op +from glob import glob + + +def detect_outliers(fname): + return [42] + + +def find_outliers(data_directory): + """ Return filenames and outlier indices for images in `data_directory`. + + Parameters + ---------- + data_directory : str + Directory containing containing images. + + Returns + ------- + outlier_dict : dict + Dictionary with keys being filenames and values being lists of outliers + for filename. + """ + image_fnames = glob(op.join(data_directory, '**', 'sub-*.nii.gz'), + recursive=True) + outlier_dict = {} + for fname in image_fnames: + outliers = detect_outliers(fname) + outlier_dict[fname] = outliers + return outlier_dict diff --git a/scripts/find_outliers.py b/scripts/find_outliers.py index 82f4184..975984b 100644 --- a/scripts/find_outliers.py +++ b/scripts/find_outliers.py @@ -5,36 +5,45 @@ python3 scripts/find_outliers.py data """ +import os.path as op import sys -def find_outliers(data_directory): - """ Print filenames and outlier indices for images in `data_directory`. +from argparse import ArgumentParser, RawDescriptionHelpFormatter - Print filenames and detected outlier indices to the terminal. +# Put the findoutlie directory on the Python path. +PACKAGE_DIR = op.join(op.dirname(__file__), '..') +sys.path.append(PACKAGE_DIR) - Parameters - ---------- - data_directory : str - Directory containing containing images. +from findoutlie import outfind - Returns - ------- - None - """ - # Your code here - raise RuntimeError('No code yet') + +def print_outliers(data_directory): + outlier_dict = outfind.find_outliers(data_directory) + for fname, outliers in outlier_dict.items(): + if len(outliers) == 0: + continue + outlier_strs = [] + for out_ind in outliers: + outlier_strs.append(str(out_ind)) + print(', '.join([fname] + outlier_strs)) + + +def get_parser(): + parser = ArgumentParser(description=__doc__, # Usage from docstring + formatter_class=RawDescriptionHelpFormatter) + parser.add_argument('data_directory', + help='Directory containing data') + return parser def main(): # This function (main) called when this file run as a script. # # Get the data directory from the command line arguments - if len(sys.argv) < 2: - raise RuntimeError("Please give data directory on " - "command line") - data_directory = sys.argv[1] - # Call function to validate data in data directory - find_outliers(data_directory) + parser = get_parser() + args = parser.parse_args() + # Call function to find outliers. + print_outliers(args.data_directory) if __name__ == '__main__': diff --git a/scripts/validate_data.py b/scripts/validate_data.py index 1fef5da..5eb4b29 100644 --- a/scripts/validate_data.py +++ b/scripts/validate_data.py @@ -9,6 +9,7 @@ import sys import hashlib + def file_hash(filename): """ Get byte contents of file `filename`, return SHA1 hash