-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from owczr/develop
Add Azure VM scripts
- Loading branch information
Showing
11 changed files
with
177 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
numpy==1.23.5 | ||
pydicom==2.4.4 | ||
#pydicom==2.4.4 | ||
scikit-image==0.20.0 | ||
tensorflow==2.12.0 | ||
tqdm==4.65.0 | ||
|
File renamed without changes.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#!/bin/bash | ||
# This script downloads the LIDC-IDRI dataset from the TCIA website. | ||
|
||
# URL variables | ||
nbia_url="https://cbiit-download.nci.nih.gov/nbia/releases/ForTCIA/NBIADataRetriever_4.4.1/nbia-data-retriever-4.4.1.deb" | ||
images_url="https://wiki.cancerimagingarchive.net/download/attachments/1966254/TCIA_LIDC-IDRI_20200921.tcia?version=1&modificationDate=1600709265077&api=v2" | ||
annotations_url="https://wiki.cancerimagingarchive.net/download/attachments/1966254/LIDC-XML-only.zip?version=1&modificationDate=1530215018015&api=v2" | ||
|
||
# Make directory for the dataset | ||
dataset_dir="/mnt/data" | ||
echo -e "-- Using directory $dataset_dir\n" | ||
|
||
# Download the NBIA Data Retriever | ||
nbia_file_path="$dataset_dir/nbia-data-retriever.deb" | ||
|
||
echo -e "-- Downloading the NBIA Data Retriever...\n" | ||
wget -O $nbia_file_path $nbia_url | ||
echo -e "-- Downloaded to: $nbia_file_path\n" | ||
|
||
# Download the manifest file | ||
images_file_path="$dataset_dir/images.tcia" | ||
|
||
echo -e "-- Downloading the manifest file...\n" | ||
wget -O $images_file_path $images_url | ||
echo -e "-- Downloaded to: $images_file_path\n" | ||
|
||
# Download the annotations file | ||
annotations_file_path="$dataset_dir/annotations.zip" | ||
|
||
echo -e "-- Downloading the annotations file...\n" | ||
wget -O $annotations_file_path $annotations_url | ||
echo -e "-- Downloaded to: $annotations_file_path\n" | ||
|
||
# Ensure java is installed | ||
echo -e "-- Checking if java is installed...\n" | ||
if ! command -v java &> /dev/null | ||
then | ||
echo -e "-- Java is not installed. Installing...\n" | ||
sudo -S apt-get install default-jre | ||
echo -e "-- Java installed.\n" | ||
else | ||
echo -e "-- Java is installed.\n" | ||
fi | ||
|
||
# Install the NBIA Data Retriever | ||
echo "-- Installing the NBIA Data Retriever...\n" | ||
sudo -S dpkg -r $nbia_file_path; sudo -S dpkg -i $nbia_file_path | ||
echo -e "-- Installed.\n" | ||
|
||
# Download the dataset | ||
output_dir="$dataset_dir/images" | ||
manifest_path="$images_file_path" | ||
|
||
echo -e "-- Downloading the dataset...\n" | ||
/opt/nbia-data-retriever/nbia-data-retriever --cli $manifest_path -d $output_dir -v -f | ||
echo -e "-- Downloaded to: $output_dir\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
|
||
input_dir="/home/jo-engineers-thesis/dataset/images/images/LIDC-IDRI" | ||
output_dir="/mnt/data/images/images/LIDC-IDRI" | ||
|
||
#mv "$input_dir"/* "$output_dir"/ | ||
for file in "$input_dir"/*; do | ||
mv $file "$output_dir"/ | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import os | ||
|
||
import click | ||
|
||
from src.preprocessing.dataset_processor import DatasetProcessor | ||
|
||
|
||
@click.command() | ||
@click.option("-i", "--input_path", type=click.Path(exists=True, file_okay=False, dir_okay=True), | ||
help="Path to directory containing patient data with Dicom images") | ||
@click.option("-o", "--output_path", type=click.Path(file_okay=False, dir_okay=True, writable=True), | ||
help="Path to output directory where processed dicoms will be saved") | ||
@click.option("-t", "--train_size", type=float, default=0.8, | ||
help="Train size for train/test split") | ||
def run(input_path, output_path, train_size): | ||
try: | ||
dp = DatasetProcessor(input_path) | ||
dp.process_and_save(output_path) | ||
click.echo(f"Processing completed. Data saved to {output_path}") | ||
except Exception as e: | ||
click.echo(f"An error occurred: {e}", err=True) | ||
|
||
if __name__ == "__main__": | ||
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
|
||
SOURCE_DIR=$1 | ||
DESTINATION_DIR=$2 | ||
NUMBER_OF_FILES=$3 | ||
|
||
echo "Creating a subset of $NUMBER_OF_FILES files from $SOURCE_DIR in $DESTINATION_DIR" | ||
find "$SOURCE_DIR" -type f | shuf -n "$NUMBER_OF_FILES" | xargs -I {} cp {} "$DESTINATION_DIR" | ||
echo "Done!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import os | ||
|
||
import click | ||
|
||
from src.preprocessing.dataset_processor import DatasetProcessor | ||
|
||
|
||
@click.command() | ||
@click.option("-d", "--dataset_path", type=click.Path(exists=True, file_okay=False, dir_okay=True), | ||
help="Path to directory containing processed dataset") | ||
@click.option("-t", "--train_size", type=float, default=0.8, | ||
help="Train size for train/test split") | ||
def run(dataset_path, train_size): | ||
try: | ||
dp = DatasetProcessor(dataset_path) | ||
click.echo(f"Splitting processed dataset at {dataset_path}\nTrain split: {train_size}") | ||
dp.train_test_split(dataset_path, train_size=train_size) | ||
click.echo(f"Splitting completed.") | ||
except Exception as e: | ||
click.echo(f"An error occurred: {e}", err=True) | ||
|
||
if __name__ == "__main__": | ||
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
|
||
LOCAL_DATA_PATH=$1 | ||
STORAGE_ACCOUNT_NAME=$2 | ||
CONTAINER_NAME=$3 | ||
BLOB_DIRECTORY_NAME=$4 | ||
|
||
upload_directory() { | ||
local dir_path=$1 | ||
local blob_dir_path=$2 | ||
for filepath in "$dir_path"/*; do | ||
if [ -d "$filepath" ]; then | ||
upload_directory "$filepath" "$blob_dir_path/$(basename "$filepath")" | ||
elif [ -f "$filepath" ]; then | ||
echo "Uploading $filepath to $blob_dir_path/$(basename "$filepath")" | ||
az storage blob upload --account-name $STORAGE_ACCOUNT_NAME \ | ||
--container-name $CONTAINER_NAME --file "$filepath" \ | ||
--name "$blob_dir_path/$(basename "$filepath")" | ||
fi | ||
done | ||
} | ||
|
||
echo "Started uplading $LOCAL_DATA_PATH to Azure Storage with arguments:" | ||
echo " --account-name $STORAGE_ACCOUNT_NAME" | ||
echo " --container-name $CONTAINER_NAME" | ||
|
||
echo "Uploading train directory..." | ||
upload_directory "$LOCAL_DATA_PATH/train" "$BLOB_DIRECTORY_NAME/train" | ||
echo "Done!" | ||
|
||
echo "Uploading test directory..." | ||
upload_directory "$LOCAL_DATA_PATH/test" "$BLOB_DIRECTORY_NAME/test" | ||
echo "Done!" | ||
|
||
echo "Upload complete!" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
LOCAL_DATA_PATH=$1 | ||
STORAGE_ACCOUNT_NAME=$2 | ||
CONTAINER_NAME=$3 | ||
SAS_TOKEN=$4 | ||
|
||
REMOTE_STORAGE_PATH="https://$STORAGE_ACCOUNT_NAME.blob.core.windows.net/$CONTAINER_NAME" | ||
|
||
echo "Uploading $LOCAL_DATA_PATH to $REMOTE_STORAGE_PATH" | ||
echo "Uploading train directory..." | ||
azcopy copy "$LOCAL_DATA_PATH/train" "$REMOTE_STORAGE_PATH?$SAS_TOKEN" --recursive=true | ||
|
||
echo "Uploading test directory..." | ||
azcopy copy "$LOCAL_DATA_PATH/test" "$REMOTE_STORAGE_PATH?$SAS_TOKEN" --recursive=true | ||
|
||
echo "Upload complete!" | ||
|