From 4a27f200947baf350de42a34506f9fd9cc374c2e Mon Sep 17 00:00:00 2001
From: skycoco <skycoco1994@gmail.com>
Date: Tue, 19 Sep 2023 15:30:08 +1200
Subject: [PATCH 1/3] update docs, and create delete_thumbnail()

---
 sparc_me/core/api_tools.py | 103 +++++++++++++++++++++----------------
 sparc_me/core/dataset.py   |  89 ++++++++++++++++++++++++++------
 2 files changed, 132 insertions(+), 60 deletions(-)

diff --git a/sparc_me/core/api_tools.py b/sparc_me/core/api_tools.py
index a3abac5..4e3b2af 100644
--- a/sparc_me/core/api_tools.py
+++ b/sparc_me/core/api_tools.py
@@ -14,10 +14,13 @@ def __init__(self):
         pass
 
     def get_dataset_versions_pensieve(self, datasetId):
-        '''
-            get one dataset all versions
+        """
+        get one dataset all versions
+        
+        :param datasetId: the dataset id from SPARC
+        :type datasetId: str|int
         :return: versions
-        '''
+        """
 
         if not isinstance(datasetId, str):
             datasetId = str(datasetId)
@@ -33,15 +36,16 @@ def get_dataset_versions_pensieve(self, datasetId):
             return versions
 
     def get_all_datasets_all_versions(self):
-        '''
-            Get all datasets with all versions
-            It may cost a few minutes to get the whole data,
-            Because some dataset have a lot of versions, e.g, 20,
-            And every time when the version number getter than 1,
-            it will request server for getting new data, so it waste a lot of time.
+        """
+        
+        Get all datasets with all versions
+        It may cost a few minutes to get the whole data,
+        Because some dataset have a lot of versions, e.g, 20,
+        And every time when the version number getter than 1,
+        it will request server for getting new data, so it waste a lot of time.
 
         :return: datasets
-        '''
+        """
         datasets = []
 
         latest_datasets = self.get_all_datasets_latest_version_pensieve()
@@ -56,10 +60,11 @@ def get_all_datasets_all_versions(self):
         return datasets
 
     def get_all_datasets_latest_version_pensieve(self):
-        '''
-            Get all datasets with latest version
+        """
+        Get all datasets with latest version
+            
         :return: datasets | []
-        '''
+        """
 
         url = "https://api.pennsieve.io/discover/datasets?limit=2147483647&offset=0&orderBy=relevance&orderDirection=desc"
 
@@ -77,10 +82,13 @@ def get_all_datasets_latest_version_pensieve(self):
         return []
 
     def get_dataset_latest_version_pensieve(self, datasetId):
-        '''
-         :parameter: datasetId : String
-         :return:
-        '''
+        """
+        
+        :param datasetId: the dataset id from SPARC
+        :type datasetId: str|int
+        :return:
+        """
+
         if isinstance(datasetId, int):
             datasetId = str(datasetId)
         elif isinstance(datasetId, str):
@@ -97,10 +105,15 @@ def get_dataset_latest_version_pensieve(self, datasetId):
             return json.loads(response.text)
 
     def get_metadata_pensieve(self, datasetId, versionId):
-        '''
-            Get a metadata from the specific version
-        :return: metadata json format
-        '''
+        """
+         Get a metadata from the specific version
+
+         :param datasetId: 
+         :type datasetId: str | int
+         :param versionId:
+         :type versionId: str | int
+         :return: metadata json format
+        """
 
         if not isinstance(datasetId, str):
             datasetId = str(datasetId)
@@ -129,10 +142,14 @@ def get_dataset_latest_version_number(self, datasetId):
             versionId = ""
         return versionId
 
-    def download_file(self, datasetId, filepath):
-        '''
-          Download bytes files from Pennsieve
-        '''
+    def _download_file(self, datasetId, filepath):
+        """
+        Download bytes files from Pennsieve
+        
+        :param datasetId: 
+        :param filepath: file path from 
+        :return: 
+        """
         versionId = self.get_dataset_latest_version_number(datasetId)
 
         url = "https://api.pennsieve.io/zipit/discover"
@@ -150,13 +167,14 @@ def download_file(self, datasetId, filepath):
             return response.reason
 
     def get_xlsx_csv_file_pennsieve(self, datasetId, filepath, savepath):
-        '''
-            store excel file locally
-        :param datasetId:
-        :param filepath:
-        :param savepath:
-        :return:
-        '''
+        """
+        
+        store excel file locally
+        :param datasetId: dataset id from SPARC
+        :param filepath: dataset version from SPARC
+        :param savepath: Path for save dataset
+        """
+            
         pathList = filepath.split('.')
         extension = pathList[1]
         fileStrList = filepath.split('/')
@@ -169,8 +187,8 @@ def get_xlsx_csv_file_pennsieve(self, datasetId, filepath, savepath):
 
         save_dir = Path(savepath)
         if not save_dir.is_dir():
-            save_dir.mkdir(parents=True, exist_ok=False)
-        response = self.download_file(datasetId, filepath)
+            save_dir._mkdir(parents=True, exist_ok=False)
+        response = self._download_file(datasetId, filepath)
 
         if extension == "xlsx":
             with io.BytesIO(response.content) as fh:
@@ -186,7 +204,7 @@ def get_xlsx_csv_file_pennsieve(self, datasetId, filepath, savepath):
             df.to_csv(savepath + filename, sep=',', header=False, index=False)
 
     def get_UBERONs_From_Dataset(self, datasetId, filepath):
-        response = self.download_file(datasetId, filepath)
+        response = self._download_file(datasetId, filepath)
         with io.BytesIO(response.content) as fh:
             df = pd.read_csv(fh)
         df = df.dropna(axis=0, how='any')
@@ -196,7 +214,7 @@ def get_UBERONs_From_Dataset(self, datasetId, filepath):
     TODO: download whole dataset
     '''
 
-    def mkdir(self, paths):
+    def _mkdir(self, paths):
         for path in paths:
             savepath = "dataset/"
             fileStrList = path.split('/')
@@ -223,7 +241,7 @@ def get_all_files_path(self, dataset_id, version_id):
                 paths.append(files[idx]["path"])
             return paths
 
-    def craw(self, datasetId, versionId, url_queue: queue.Queue, html_queue: queue.Queue):
+    def _craw(self, datasetId, versionId, url_queue: queue.Queue, html_queue: queue.Queue):
         '''
           Download bytes files from Pennsieve
         '''
@@ -254,8 +272,7 @@ def craw(self, datasetId, versionId, url_queue: queue.Queue, html_queue: queue.Q
             except Exception as e:
                 print(f"The file: {filepath} download failed! The error is {e}")
 
-
-    def parse(self, html_queue: queue.Queue):
+    def _parse(self, html_queue: queue.Queue):
         while True:
             res = html_queue.get()
             if res is None:
@@ -282,7 +299,7 @@ def download_dataset(self, dataset_id, version_id=None):
             print("Invalid version id, Now will download the first version of the dataset for you!")
 
         paths = self.get_all_files_path(dataset_id, version_id)
-        self.mkdir(paths)
+        self._mkdir(paths)
         url_queue = queue.Queue()
         html_queue = queue.Queue()
         threads = []
@@ -295,11 +312,11 @@ def download_dataset(self, dataset_id, version_id=None):
         url_queue.put(None)
 
         for idx in range(3):
-            t1 = threading.Thread(target=self.craw, args=(dataset_id, version_id, url_queue, html_queue))
+            t1 = threading.Thread(target=self._craw, args=(dataset_id, version_id, url_queue, html_queue))
             threads.append(t1)
             t1.start()
         for idx in range(2):
-            t2 = threading.Thread(target=self.parse, args=(html_queue,))
+            t2 = threading.Thread(target=self._parse, args=(html_queue,))
             t2.start()
 
         for t in threads:
@@ -318,7 +335,7 @@ def get_dataset_protocolsio_link(self, datasetId):
     def get_protocolsio_text(self, datasetId, dir):
         save_dir = Path(dir)
         if not save_dir.is_dir():
-            save_dir.mkdir(parents=True, exist_ok=False)
+            save_dir._mkdir(parents=True, exist_ok=False)
 
         protocol_url = self.get_dataset_protocolsio_link(datasetId)
         if protocol_url:
diff --git a/sparc_me/core/dataset.py b/sparc_me/core/dataset.py
index c6eb75f..c4c7303 100644
--- a/sparc_me/core/dataset.py
+++ b/sparc_me/core/dataset.py
@@ -82,7 +82,7 @@ def _get_template_dir(self, version):
 
         return template_dir
 
-    def set_template_version(self, version):
+    def _set_template_version(self, version):
         """
         Choose a template version
 
@@ -146,6 +146,11 @@ def _load(self, dir_path):
         return dataset
 
     def create_empty_dataset(self, version='2.0.0'):
+        """
+        Create an empty dataset from template via dataset version
+        :param version: the dataset version
+        :type version: '2.0.0' | '1.2.3'
+        """
         self.load_from_template(version=version)
 
     def load_from_template(self, version):
@@ -157,7 +162,7 @@ def load_from_template(self, version):
         :return: loaded dataset
         :rtype: dict
         """
-        self.set_version(version)
+        self._set_version(version)
         # self._dataset_path = self._get_template_dir(self._version)
         template_dataset_path = self._get_template_dir(self._version)
         self._dataset = self._load(str(template_dataset_path))
@@ -179,7 +184,7 @@ def _convert_version_format(self, version):
 
         return version
 
-    def set_version(self, version):
+    def _set_version(self, version):
         """
         Set dataset version version
 
@@ -191,7 +196,7 @@ def set_version(self, version):
         self._version = version
         self._set_version_specific_variables(version)
 
-    def load_template(self, version):
+    def _load_template(self, version):
         """
         Load template
 
@@ -202,15 +207,16 @@ def load_template(self, version):
         """
 
         version = self._convert_version_format(version)
-        self.set_template_version(version)
+        self._set_template_version(version)
         self._template_dir = self._get_template_dir(self._template_version)
         self._template = self._load(str(self._template_dir))
 
         return self._template
 
-    def save_template(self, save_dir, version=None):
+    def _save_template(self, save_dir, version=None):
         """
         Save the template directory locally
+        TODO: will delete later
 
         :param save_dir: path to the output folder
         :type save_dir: string
@@ -241,7 +247,7 @@ def load_dataset(self, dataset_path=None, from_template=False, version=None):
         :rtype: dict
         """
         if version:
-            self.set_version(version)
+            self._set_version(version)
 
         if not self._dataset_path:
             self._dataset_path = Path(dataset_path)
@@ -281,7 +287,7 @@ def save(self, save_dir="", remove_empty=False, keep_style=False):
                     data = self._filter(data, filename)
 
                 if isinstance(data, pd.DataFrame):
-                    self.set_version(self._version)
+                    self._set_version(self._version)
                     template_dir = self._get_template_dir(self._version)
 
                     if keep_style:
@@ -363,7 +369,7 @@ def list_metadata_files(self, version, print_list=True):
         """
         metadata_files = list()
 
-        self.load_template(version=version)
+        self._load_template(version=version)
 
         for key, value in self._template.items():
             if isinstance(value, dict):
@@ -421,7 +427,7 @@ def list_elements(self, metadata_file, axis=0, version=None):
             return fields
 
         if not self._template:
-            self.load_template(version=None)
+            self._load_template(version=None)
 
         data = self._template.get(metadata_file)
         metadata = data.get("metadata")
@@ -452,7 +458,15 @@ def _generate_metadata(self):
 
     def get_metadata(self, metadata_file):
         """
-        :param metadata_file: one of string of [code_description, code_parameters, dataset_description,manifest,performances,resources,samples,subjects,submission]
+        Get a Metadata object based on the metadata file name
+        To edit values for a metadata
+
+        :param metadata_file: one of string of [code_description,
+                                                code_parameters,
+                                                dataset_description,
+                                                manifest,performances,
+                                                resources,samples,
+                                                subjects,submission]
         :type  metadata_file: string
         :return: give a metadata editor for a specific metadata
         """
@@ -463,9 +477,10 @@ def get_metadata(self, metadata_file):
         metadata_file = validate_metadata_file(metadata_file, self._version)
         return self._metadata[metadata_file]
 
-    def set_field(self, metadata_file, row_index, header, value):
+    def _set_field(self, metadata_file, row_index, header, value):
         """
         Set single field by row idx/name and column name (the header)
+        TODO: will delete later
 
         :param metadata_file: metadata metadata_file
         :type metadata_file: string
@@ -500,7 +515,7 @@ def set_field(self, metadata_file, row_index, header, value):
 
         return self._dataset
 
-    def set_field_using_row_name(self, metadata_file, row_name, header, value):
+    def _set_field_using_row_name(self, metadata_file, row_name, header, value):
         """
         Set single cell. The row is identified by the given unique name and column is identified by the header.
 
@@ -538,9 +553,9 @@ def set_field_using_row_name(self, metadata_file, row_name, header, value):
             raise ValueError(msg)
         else:
             excel_row_index = matching_indices[0] + 2
-            return self.set_field(metadata_file=metadata_file, row_index=excel_row_index, header=header, value=value)
+            return self._set_field(metadata_file=metadata_file, row_index=excel_row_index, header=header, value=value)
 
-    def append(self, metadata_file, row, check_exist=False, unique_column=None):
+    def _append(self, metadata_file, row, check_exist=False, unique_column=None):
         """
         Append a row to a metadata file
 
@@ -629,8 +644,9 @@ def update_by_json(self, metadata_file, json_file):
 
         return metadata
 
-    def generate_file_from_template(self, save_path, metadata_file, data=pd.DataFrame(), keep_style=False):
+    def _generate_file_from_template(self, save_path, metadata_file, data=pd.DataFrame(), keep_style=False):
         """Generate file from a template and populate with data if givn
+        TODO: will delete later
 
         :param save_path: destination to save the generated file
         :type save_path: string
@@ -653,6 +669,17 @@ def generate_file_from_template(self, save_path, metadata_file, data=pd.DataFram
 
     def add_subjects(self, subjects):
 
+        """
+        Add Subejct list to dataset.
+        This function will add subjects and samples to metadata,
+        And will move the sample files from origin source path to dataset
+        primary subject sample folder.
+        It will automatically update manifest and dataset_description metadata files.
+
+        :param subjects: Subject dataset
+        :type subjects: list
+        """
+
         self.save()
         if not isinstance(subjects, list):
             msg = "Please provide a list of subjects"
@@ -666,6 +693,7 @@ def add_subjects(self, subjects):
     def get_subject(self, subject_sds_id) -> Subject:
         """
         Get a subject by subject sds id
+
         :param subject_sds_id: subject sds id
         :type subject_sds_id: str
         :return: Subject
@@ -712,7 +740,14 @@ def add_derivative_data(self, source_path, subject, sample, copy=True, overwrite
         self._add_sample_data(source_path, self._dataset_path, subject, sample, data_type="derivative", copy=copy,
                               overwrite=overwrite)
 
-    def add_element(self, metadata_file, element):
+    def _add_element(self, metadata_file, element):
+        """
+        May need to delete
+
+        :param metadata_file:
+        :param element:
+        :return:
+        """
         metadata = self._dataset.get(metadata_file).get("metadata")
         if metadata_file in self._column_based:
             row_pd = pd.DataFrame([{"Metadata element": element}])
@@ -743,6 +778,7 @@ def add_thumbnail(self, source_path, copy=True, overwrite=True):
             self._modify_manifest(fname=filename, manifest_folder_path=str(self._dataset_path),
                                   destination_path=str(destination_path.parent), description=description)
 
+
     def _add_sample_data(self, source_path, dataset_path, subject, sample, data_type="primary", copy=True,
                          overwrite=True):
         """Copy or move data from source folder to destination folder
@@ -963,7 +999,26 @@ def delete_sample(self, destination_path, data_type="primary"):
                 samples_metadata.remove_row(sam_folder.name)
                 samples_metadata.save()
 
+    def delete_thumbnail(self, destination_path):
+        """
+        Delete a thumbnail from dataset
+        Will automatically update manifest metadata.
+
+        :param destination_path: The thumbnail path in dataset that you want to delete.
+        :type destination_path: str
+        """
+        self.delete_data(destination_path)
+
     def delete_data(self, destination_path):
+        """
+        Delete file based on ,the file path in dataset
+        It will automatically update mainfest metadata
+        TODO: need to connect delete sample and subject, and update subject and sample metadata
+
+        :param destination_path: the file path that you want to delete
+        :type destination_path: str
+        :return:
+        """
         if not Path(destination_path).exists():
             msg = f"The file {str(destination_path)} is not existing"
             raise FileNotFoundError(msg)

From dce386c7aa2b4f2d530654a8509e62cbe6490853 Mon Sep 17 00:00:00 2001
From: skycoco <skycoco1994@gmail.com>
Date: Tue, 19 Sep 2023 16:38:30 +1200
Subject: [PATCH 2/3] fixed all #106

---
 examples/example_for_create_dataset.py | 60 ++++++++++++++++++++------
 setup.py                               |  2 +-
 sparc_me/core/metadata.py              | 60 ++++++++++++++++++++------
 sparc_me/core/schema.py                | 21 +++++++--
 4 files changed, 113 insertions(+), 30 deletions(-)

diff --git a/examples/example_for_create_dataset.py b/examples/example_for_create_dataset.py
index bfb2748..123560b 100644
--- a/examples/example_for_create_dataset.py
+++ b/examples/example_for_create_dataset.py
@@ -103,8 +103,10 @@ def add_values_dataset_description(dataset_description):
     # code_parameters = dataset.get_metadata(metadata_file="code_parameters")
     # code_description = dataset.get_metadata(metadata_file="code_description")
 
-    des_schema = schema.get_schema("dataset_description")
-    des_schema.get('subtitle')
+    print("******************************************")
+    des_schema = schema.get_schema("dataset_description", name_only=False)
+    print(des_schema)
+
 
     # NOTE: Step3.1(optional), remove entire values in dataset_description
     dataset_description.clear_values()
@@ -191,19 +193,49 @@ def add_values_dataset_description(dataset_description):
     # add_values_for_subject_metadata(subject_metadata)
 
     # New function for add subjects and samples
+    # subjects = []
+    # for subject_user_id in [1, 2]:
+    #     samples = []
+    #     for sample_user_id in [1, 2]:
+    #         sample = sm.Sample()
+    #         sample.add_path(
+    #             "./test_data/bids_data/sub-0{0}/sequence{1}/".format(
+    #                 subject_user_id, sample_user_id))
+    #         samples.append(sample)
+    #
+    #     subject = sm.Subject()
+    #     subject.add_samples(samples)
+    #     subjects.append(subject)
+
     subjects = []
-    for subject_user_id in [1, 2]:
-        samples = []
-        for sample_user_id in [1, 2]:
-            sample = sm.Sample()
-            sample.add_path(
-                "./test_data/bids_data/sub-0{0}/sequence{1}/".format(
-                    subject_user_id, sample_user_id))
-            samples.append(sample)
-
-        subject = sm.Subject()
-        subject.add_samples(samples)
-        subjects.append(subject)
+    samples = []
+
+    sample1 = sm.Sample()
+    sample1.add_path("./test_data/bids_data/sub-01/sequence1/")
+    sample1.add_path("./test_data/sample2/raw/dummy_sam2.txt")
+    samples.append(sample1)
+
+    sample2 = sm.Sample()
+    sample2.add_path("./test_data/bids_data/sub-01/sequence2/")
+    samples.append(sample2)
+
+    subject1 = sm.Subject()
+    subject1.add_samples(samples)
+    subjects.append(subject1)
+
+    samples = []
+
+    sample1 = sm.Sample()
+    sample1.add_path("./test_data/bids_data/sub-02/sequence1/")
+    samples.append(sample1)
+
+    sample2 = sm.Sample()
+    sample2.add_path("./test_data/bids_data/sub-02/sequence2/")
+    samples.append(sample2)
+
+    subject2 = sm.Subject()
+    subject2.add_samples(samples)
+    subjects.append(subject2)
 
     dataset.add_subjects(subjects)
 
diff --git a/setup.py b/setup.py
index 3605e38..dbd971e 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name="sparc_me",
-    version="2.2.3",
+    version="2.2.8",
     description='A python tool to explore, enhance, and expand SPARC datasets and their descriptions in accordance with FAIR principles.',
     author="Thiranja Prasad Babarenda Gamage, Chinchien Lin, Savindi Wijenayaka, Michael Hoffman, Linkun Gao, Haribalan Kumar",
     email="psam012@aucklanduni.ac.nz, clin864@aucklanduni.ac.nz",
diff --git a/sparc_me/core/metadata.py b/sparc_me/core/metadata.py
index e150eb2..9e9acd0 100644
--- a/sparc_me/core/metadata.py
+++ b/sparc_me/core/metadata.py
@@ -4,6 +4,7 @@
 import shutil
 from sparc_me.core.utils import find_col_element
 from datetime import datetime, timezone
+from typing import List
 
 
 class Metadata:
@@ -406,11 +407,12 @@ class Sample:
     _metadata: Metadata = None
     _manifest_metadata: Metadata = None
 
+
     def __init__(self):
         self.sample_id = ""
         self.subject_id = ""
         self.sample_dir = Path()
-        self.source_sam_dir = Path()
+        self.source_sample_paths: List[Path] = []
         self.index = -1
 
     def set_subject_id(self, sub_id):
@@ -469,16 +471,42 @@ def add_path(self, source_path):
         Add sample source path to sample object
 
         :param source_path: sample folder source path
-        :type source_path: str
+        :type source_path: str | list
 
         """
-        self.source_sam_dir = Path(source_path)
+        if isinstance(source_path, list):
+            for file_path in source_path:
+                self.source_sample_paths.append(Path(file_path))
+        else:
+            self.source_sample_paths.append(Path(source_path))
+
+
+    def set_path(self, source_path):
+        """
+        Add sample source path to sample object
+        Override the Previous path
+
+        :param source_path: sample folder source path
+        :type source_path: str | list
+
+        """
+        if isinstance(source_path, list):
+            self.source_sample_paths = []
+            for file_path in source_path:
+                self.source_sample_paths.append(Path(file_path))
+        else:
+            self.source_sample_paths = [Path(source_path)]
+
 
     def set_values(self, metadata={}):
         """
         :param metadata: key : value dict (element:value)
         :type metadata: dict
         """
+        if not isinstance(metadata, dict):
+            msg = f"You should use a dict here, you provide parameter type is {type(metadata)}"
+            raise TypeError(msg)
+
         for element, value in metadata.items():
             if element == 'sample id' or element == 'subject id':
                 continue
@@ -511,15 +539,19 @@ def move(self):
         if not self.sample_dir.exists():
             self.sample_dir.mkdir(parents=True, exist_ok=True)
 
-        source_sample_files = self.source_sam_dir.rglob("*")
-        for file in source_sample_files:
-            if file.is_file():
-                relative_path = file.relative_to(self.source_sam_dir)
-                target_file = self.sample_dir / relative_path
-                target_file.parent.mkdir(parents=True, exist_ok=True)
-                shutil.copy(str(file), str(target_file))
-                self._update_manifest(sample_path=str(target_file))
-
+        for source_sam in self.source_sample_paths:
+            if source_sam.is_dir():
+                source_sample_files = source_sam.rglob("*")
+                for file in source_sample_files:
+                    if file.is_file():
+                        relative_path = file.relative_to(source_sam)
+                        target_file = self.sample_dir / relative_path
+                        target_file.parent.mkdir(parents=True, exist_ok=True)
+                        shutil.copy(str(file), str(target_file))
+                        self._update_manifest(sample_path=str(target_file))
+            elif source_sam.is_file():
+                shutil.copy(str(source_sam), str(self.sample_dir))
+                self._update_manifest(sample_path=str(self.sample_dir / source_sam.name))
     def _update_manifest(self, sample_path):
         """
         Update manifest metadata, after remove samples
@@ -661,6 +693,10 @@ def set_values(self, metadata={}):
         :param metadata: key : value dict (element:value)
         :type metadata: dict
         """
+        if not isinstance(metadata, dict):
+            msg = f"You should use a dict here, you provide parameter type is {type(metadata)}"
+            raise TypeError(msg)
+
         for element, value in metadata.items():
             if element == 'subject id':
                 continue
diff --git a/sparc_me/core/schema.py b/sparc_me/core/schema.py
index ff7b9e9..bad187a 100644
--- a/sparc_me/core/schema.py
+++ b/sparc_me/core/schema.py
@@ -52,7 +52,6 @@ def validate_dataset(self, dataset):
             data = schema.load_data(metadata.metadata_file_path)
             self.validate(data, metadata_file=metadata_file, version=metadata.version)
 
-
     def validate(self, data, metadata_file, version):
         """
         Validate data instance
@@ -149,9 +148,10 @@ def get_default_schema(version, metadata_file):
 
         return schema
 
-    def get_schema(self, metadata_file, version="2.0.0", print_schema=True):
+    def get_schema(self, metadata_file, version="2.0.0", print_schema=True, required_only=True, name_only=True):
         """
         get a schema via metadata_file/metadate file name
+
         :param metadata_file: the metadata file name
         :type metadata_file: str
         :param version: "2.0.0"|"1.2.3"
@@ -171,7 +171,22 @@ def get_schema(self, metadata_file, version="2.0.0", print_schema=True):
         with open(schema_path, 'r') as file:
             schema_json: Dict = json.load(file)
             if print_schema:
-                print(json.dumps(schema_json.get('properties'), indent=4))
+                if required_only:
+                    if name_only:
+                        print(f"The required elements for {metadata_file}:")
+                        print(json.dumps(schema_json.get('required'), indent=4))
+                    else:
+                        required_items = []
+                        for key, value in schema_json.get('properties').items():
+                            if "required" in value and value["required"] == "Y":
+                                required_items.append({key: value})
+
+                        print(f"The required elements for {metadata_file}:")
+                        print(json.dumps(required_items, indent=4))
+                        return required_items
+                else:
+                    print(json.dumps(schema_json.get('properties'), indent=4))
+
             return CaseInsensitiveDict(schema_json.get('properties'))
 
     def set_schema(self, schema):

From 74808249fb811df3410801ccfa06260413ac051f Mon Sep 17 00:00:00 2001
From: skycoco <skycoco1994@gmail.com>
Date: Tue, 19 Sep 2023 16:43:11 +1200
Subject: [PATCH 3/3] rename delete thumbnail to remove thumbnail

---
 sparc_me/core/dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sparc_me/core/dataset.py b/sparc_me/core/dataset.py
index c4c7303..6c31726 100644
--- a/sparc_me/core/dataset.py
+++ b/sparc_me/core/dataset.py
@@ -999,7 +999,7 @@ def delete_sample(self, destination_path, data_type="primary"):
                 samples_metadata.remove_row(sam_folder.name)
                 samples_metadata.save()
 
-    def delete_thumbnail(self, destination_path):
+    def remove_thumbnail(self, destination_path):
         """
         Delete a thumbnail from dataset
         Will automatically update manifest metadata.