Skip to content

Commit

Permalink
Add annotation between scaffold information files and scaffold metada…
Browse files Browse the repository at this point in the history
…ta files.
  • Loading branch information
hsorby committed May 15, 2024
1 parent a1a3581 commit 843a1ff
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 18 deletions.
21 changes: 13 additions & 8 deletions src/sparc/curation/tools/definitions.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,46 @@
# VERSION = sparc.curation.tools.__version__

CONTEXT_INFO_MIME = 'application/x.vnd.abi.context-information+json'
PLOT_CSV_MIME = 'text/x.vnd.abi.plot+csv'
PLOT_TSV_MIME = 'text/x.vnd.abi.plot+Tab-separated-values'
SCAFFOLD_DIR_MIME = 'inode/vnd.abi.scaffold+directory'
SCAFFOLD_INFO_MIME = 'application/x.vnd.abi.organ-scaffold-info+json'
SCAFFOLD_META_MIME = 'application/x.vnd.abi.scaffold.meta+json'
SCAFFOLD_VIEW_MIME = 'application/x.vnd.abi.scaffold.view+json'
SCAFFOLD_THUMBNAIL_MIME = 'image/x.vnd.abi.thumbnail+jpeg'
STL_MODEL_MIME = 'model/stl'
VTK_MODEL_MIME = 'model/vtk'
CONTEXT_INFO_MIME = 'application/x.vnd.abi.context-information+json'
PLOT_CSV_MIME = 'text/x.vnd.abi.plot+csv'
PLOT_TSV_MIME = 'text/x.vnd.abi.plot+Tab-separated-values'

OLD_SCAFFOLD_MIMES = [SCAFFOLD_DIR_MIME, 'inode/vnd.abi.scaffold+file', 'inode/vnd.abi.scaffold+thumbnail']

SIZE_NAME = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")

MANIFEST_FILENAME = 'manifest.xlsx'
FILENAME_COLUMN = 'filename'

ADDITIONAL_TYPES_COLUMN = 'additional types'
MANIFEST_DIR_COLUMN = 'manifest_dir'
SOURCE_OF_COLUMN = 'isSourceOf'
ANATOMICAL_ENTITY_COLUMN = 'isAboutAnatomicalEntity'
DERIVED_FROM_COLUMN = 'isDerivedFrom'
FILE_LOCATION_COLUMN = 'file_location'
FILENAME_COLUMN = 'filename'
MANIFEST_DIR_COLUMN = 'manifest_dir'
SHEET_NAME_COLUMN = 'sheet_name'
SOURCE_OF_COLUMN = 'isSourceOf'
SUPPLEMENTAL_JSON_COLUMN = 'Supplemental JSON Metadata'
ANATOMICAL_ENTITY_COLUMN = 'isAboutAnatomicalEntity'

MIMETYPE_TO_FILETYPE_MAP = {
SCAFFOLD_META_MIME: 'Metadata',
SCAFFOLD_VIEW_MIME: 'View',
SCAFFOLD_THUMBNAIL_MIME: 'Thumbnail',
STL_MODEL_MIME: 'STL Model',
VTK_MODEL_MIME: 'VTK Model',
SCAFFOLD_DIR_MIME: 'Directory'
SCAFFOLD_DIR_MIME: 'Directory',
SCAFFOLD_INFO_MIME: 'ScaffoldInformation',
}

MIMETYPE_TO_PARENT_FILETYPE_MAP = {
SCAFFOLD_VIEW_MIME: 'Metadata',
CONTEXT_INFO_MIME: 'Metadata',
SCAFFOLD_META_MIME: 'ScaffoldInformation',
SCAFFOLD_THUMBNAIL_MIME: 'View',
STL_MODEL_MIME: 'View',
VTK_MODEL_MIME: 'View',
Expand All @@ -45,4 +49,5 @@
MIMETYPE_TO_CHILDREN_FILETYPE_MAP = {
SCAFFOLD_VIEW_MIME: ['Thumbnail', 'STL Model', 'VTK Model'],
SCAFFOLD_META_MIME: ['View', 'ContextInfo'],
SCAFFOLD_INFO_MIME: ['Metadata'],
}
1 change: 1 addition & 0 deletions src/sparc/curation/tools/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ class BadManifestError(Exception):
"""
pass


class DatasetNotDefinedError(AnnotationError):
"""
Class for errors where the dataset for annotations has not
Expand Down
46 changes: 36 additions & 10 deletions src/sparc/curation/tools/helpers/error_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sparc.curation.tools.definitions import FILE_LOCATION_COLUMN, FILENAME_COLUMN, ADDITIONAL_TYPES_COLUMN, \
SCAFFOLD_META_MIME, SCAFFOLD_VIEW_MIME, \
SCAFFOLD_THUMBNAIL_MIME, DERIVED_FROM_COLUMN, SOURCE_OF_COLUMN, MANIFEST_DIR_COLUMN, \
OLD_SCAFFOLD_MIMES, MIMETYPE_TO_PARENT_FILETYPE_MAP, MIMETYPE_TO_FILETYPE_MAP, STL_MODEL_MIME, VTK_MODEL_MIME
OLD_SCAFFOLD_MIMES, MIMETYPE_TO_PARENT_FILETYPE_MAP, MIMETYPE_TO_FILETYPE_MAP, STL_MODEL_MIME, VTK_MODEL_MIME, SCAFFOLD_INFO_MIME
from sparc.curation.tools.helpers.file_helper import OnDiskFiles
from sparc.curation.tools.helpers.manifest_helper import ManifestDataFrame

Expand Down Expand Up @@ -200,6 +200,8 @@ def get_incorrect_derived_from(self):
self._on_disk_alt_forms_files[mime_type], self.on_disk_view_files, self._manifest_alt_forms_files[mime_type], mime_type)
errors.extend(alt_forms_derived_from_errors)

errors.extend(self._process_metadata_organ_scaffold(derived_from=True))

return errors

def _process_incorrect_source_of(self, on_disk_files, mimetype, on_disk_child_files):
Expand Down Expand Up @@ -246,6 +248,27 @@ def _process_incorrect_source_of(self, on_disk_files, mimetype, on_disk_child_fi

return errors

def _get_single_value(self, column_heading, value, out_column_heading):
query_result = self.manifest.get_matching_entry(column_heading, value, out_column_heading)
if len(query_result) == 1:
return query_result[0]

return None

def _process_metadata_organ_scaffold(self, derived_from=False):
error = []
scaffold_info_location = self._get_single_value(ADDITIONAL_TYPES_COLUMN, SCAFFOLD_INFO_MIME, FILE_LOCATION_COLUMN)
metadata_location = self._get_single_value(ADDITIONAL_TYPES_COLUMN, SCAFFOLD_META_MIME, FILE_LOCATION_COLUMN)
if scaffold_info_location and metadata_location:
scaffold_info_source_of = self._get_single_value(FILE_LOCATION_COLUMN, scaffold_info_location, SOURCE_OF_COLUMN)
metadata_derived_from = self._get_single_value(FILE_LOCATION_COLUMN, metadata_location, DERIVED_FROM_COLUMN)
if str(scaffold_info_source_of) == "nan" and not derived_from:
error.append(IncorrectSourceOfError(scaffold_info_location, SCAFFOLD_INFO_MIME, [metadata_location]))
elif str(metadata_derived_from) == "nan" and derived_from:
error.append(IncorrectDerivedFromError(metadata_location, SCAFFOLD_META_MIME, [scaffold_info_location]))

return error

def get_incorrect_source_of(self):
"""
Get errors for incorrect source of relationships in the manifest dataframe.
Expand All @@ -267,6 +290,9 @@ def get_incorrect_source_of(self):
self.on_disk_view_files, SCAFFOLD_VIEW_MIME, self._on_disk_alt_forms_files[mime_type])
errors.extend(alt_forms_derived_from_errors)

# Look for link between metadata file and application/x.vnd.abi.organ-scaffold-info+json
errors.extend(self._process_metadata_organ_scaffold())

return errors

def get_incorrect_complementary(self):
Expand Down Expand Up @@ -330,18 +356,15 @@ def update_derived_from(self, file_location, mime, target):
target_filenames = []

if mime == SCAFFOLD_VIEW_MIME:
# If the MIME type is SCAFFOLD_VIEW_MIME, find the matching target filenames
for t in target:
target_manifest = self.manifest.get_matching_entry(FILE_LOCATION_COLUMN, t, MANIFEST_DIR_COLUMN)
if source_manifest == target_manifest:
target_filenames.extend(
self.manifest.get_matching_entry(FILE_LOCATION_COLUMN, t, FILENAME_COLUMN))

elif mime == SCAFFOLD_THUMBNAIL_MIME:
# If the MIME type is SCAFFOLD_THUMBNAIL_MIME, find the best matching target filename
target_filenames = self._find_best_match(file_location, source_manifest, target)
elif mime in [STL_MODEL_MIME, VTK_MODEL_MIME]:
elif mime in [SCAFFOLD_THUMBNAIL_MIME, STL_MODEL_MIME, VTK_MODEL_MIME]:
target_filenames = self._find_best_match(file_location, source_manifest, target)
elif mime in [SCAFFOLD_META_MIME]:
target_filenames = target

# Update the 'Derived From' column content with the target filenames
self.manifest.update_column_content(file_location, DERIVED_FROM_COLUMN, "\n".join(target_filenames))
Expand All @@ -354,6 +377,7 @@ def update_source_of(self, file_location, mime, target, replace):
file_location (str): The file location to update.
mime (str): The MIME type of the file.
target (list): List of target file locations.
replace (bool): True if the contents is to be replaced.
"""
# Get the source manifest entry for the given file location
Expand All @@ -373,13 +397,15 @@ def update_source_of(self, file_location, mime, target, replace):
if t_mime and t_mime[0] == SCAFFOLD_THUMBNAIL_MIME:
filtered_targets.append(t)
else:
matched_entries = self.manifest.get_matching_entry(FILE_LOCATION_COLUMN, t, FILENAME_COLUMN)
if replace:
target_filenames = self.manifest.get_matching_entry(FILE_LOCATION_COLUMN, t, FILENAME_COLUMN)
target_filenames = matched_entries
else:
target_filenames.extend(
self.manifest.get_matching_entry(FILE_LOCATION_COLUMN, t, FILENAME_COLUMN))
target_filenames.extend(matched_entries)

target_filenames.extend(self._find_best_match(file_location, source_manifest, filtered_targets))
elif mime in [SCAFFOLD_INFO_MIME]:
target_filenames = target

# Update the 'Source Of' column content with the target filenames
self.manifest.update_column_content(file_location, SOURCE_OF_COLUMN, "\n".join(target_filenames))
Expand Down

0 comments on commit 843a1ff

Please sign in to comment.