Skip to content

Commit

Permalink
Fixing the isa exporter (#1645)
Browse files Browse the repository at this point in the history
* Revising the ISA JSON exporter
  • Loading branch information
kdp-cloud authored Nov 10, 2023
1 parent 558f94b commit f6bdffd
Showing 1 changed file with 81 additions and 35 deletions.
116 changes: 81 additions & 35 deletions lib/isa_exporter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def convert_investigation
people = []
@investigation.related_people.each { |p| people << convert_person(p) }
isa_investigation[:people] = people

studies = []
@investigation.studies.each { |s| studies << convert_study(s) }
isa_investigation[:studies] = studies

@OBJECT_MAP = @OBJECT_MAP.merge(isa_investigation)

isa_investigation
Expand Down Expand Up @@ -81,12 +81,12 @@ def convert_study(study)

study.assays.each do |a|
# There should be only one attribute with isa_tag == protocol
with_tag_protocol = a.sample_type.sample_attributes.detect { |sa| sa.isa_tag&.isa_protocol? }
protocol_attribute = a.sample_type.sample_attributes.detect { |sa| sa.isa_tag&.isa_protocol? }
with_tag_parameter_value = a.sample_type.sample_attributes.select { |sa| sa.isa_tag&.isa_parameter_value? }
raise "Protocol ISA tag not found in #{t(:assay)} #{a.id}" if with_tag_protocol.blank?
raise "Protocol ISA tag not found in #{t(:assay)} #{a.id}" if protocol_attribute.blank?

# raise "The #{t(:study)} with the title '#{study.title}' does not have an SOP" if a.sops.blank?
protocols << convert_protocol(a.sops, a.id, with_tag_protocol, with_tag_parameter_value)
protocols << convert_protocol(a.sops, a.id, protocol_attribute, with_tag_parameter_value)
end
isa_study[:protocols] = protocols

Expand Down Expand Up @@ -238,12 +238,12 @@ def convert_materials_samples(sample_type)
with_tag_sample = sample_type.sample_attributes.detect { |sa| sa.isa_tag&.isa_sample? }
with_tag_sample_characteristic =
sample_type.sample_attributes.select { |sa| sa.isa_tag&.isa_sample_characteristic? }
with_type_seek_sample_multi = sample_type.sample_attributes.detect(&:seek_sample_multi?)
seek_sample_multi_attribute = sample_type.sample_attributes.detect(&:seek_sample_multi?)
sample_type.samples.map do |s|
{
'@id': "#sample/#{s.id}",
name: s.get_attribute_value(with_tag_sample),
derivesFrom: extract_sample_ids(s.get_attribute_value(with_type_seek_sample_multi), 'source'),
derivesFrom: extract_sample_ids(s.get_attribute_value(seek_sample_multi_attribute), 'source'),
characteristics: convert_characteristics(s, with_tag_sample_characteristic),
factorValues: [
{
Expand Down Expand Up @@ -308,29 +308,42 @@ def convert_process_sequence(sample_type, sop_ids, id)
# This method is meant to be used for both Studies and Assays
return [] unless sample_type.samples.any?

with_tag_isa_parameter_value = get_values(sample_type)
with_tag_protocol = detect_protocol(sample_type)
with_type_seek_sample_multi = detect_sample_multi(sample_type)
isa_parameter_value_attributes = select_parameter_values(sample_type)
protocol_attribute = detect_protocol(sample_type)
type = 'source'
type = get_derived_from_type(sample_type) if sample_type.assays.any?

# Convention : The sample_types of studies don't have assay
sample_type.samples.map do |s|
{
'@id': normalize_id("#process/#{with_tag_protocol.title}/#{s.id}"),
# This make a process sequence per sample. Same inputs that generate one output should be bundled.
# One input used for multiple outputs should be bundled as well.
# In SEEK parameter_values are sample specific but outputs, linked to the same input, with different parameter_values
# should be in a different process in the processSequence
samples_grouped_by_input_and_parameter_value = group_samples_by_input_and_parameter_value(sample_type)
result = []
samples_grouped_by_input_and_parameter_value.map do |input_ids, samples_group|
output_ids = samples_group.pluck(:id).join('_')
process = {
'@id': normalize_id("#process/#{protocol_attribute.title}/#{output_ids}"),
name: '',
executesProtocol: {
'@id': "#protocol/#{sop_ids}_#{id}"
},
parameterValues: convert_parameter_values(s, with_tag_isa_parameter_value),
parameterValues: convert_parameter_values(samples_group, isa_parameter_value_attributes),
performer: '',
date: '',
previousProcess: previous_process(s),
nextProcess: next_process(s),
inputs: extract_sample_ids(s.get_attribute_value(with_type_seek_sample_multi), type),
outputs: process_sequence_output(s)
inputs: input_ids.first.map { |input| { '@id': "##{type}/#{input[:id]}" } },
outputs: process_sequence_output(samples_group)
}
# Study processes don't have a previousProcess and nextProcess
unless type == 'source'
process.merge!({
previousProcess: previous_process(samples_group),
nextProcess: next_process(samples_group)
})
end
result.push(process)
end
result
end

def convert_data_files(sample_types)
Expand Down Expand Up @@ -360,8 +373,8 @@ def convert_other_materials(sample_types)
with_tag_isa_other_material = st.sample_attributes.detect { |sa| sa.isa_tag&.isa_other_material? }
return [] unless with_tag_isa_other_material

with_type_seek_sample_multi = st.sample_attributes.detect(&:seek_sample_multi?)
raise 'Defective ISA other_materials!' unless with_type_seek_sample_multi
seek_sample_multi_attribute = st.sample_attributes.detect(&:seek_sample_multi?)
raise 'Defective ISA other_materials!' unless seek_sample_multi_attribute

with_tag_isa_other_material_characteristics =
st.sample_attributes.select { |sa| sa.isa_tag&.isa_other_material_characteristic? }
Expand All @@ -379,7 +392,7 @@ def convert_other_materials(sample_types)
type: with_tag_isa_other_material.title,
characteristics: convert_characteristics(s, with_tag_isa_other_material_characteristics),
# It can sometimes be other_material or sample!!!! SHOULD BE DYNAMIC
derivesFrom: extract_sample_ids(s.get_attribute_value(with_type_seek_sample_multi), type)
derivesFrom: extract_sample_ids(s.get_attribute_value(seek_sample_multi_attribute), type)
}
end
.flatten
Expand All @@ -406,7 +419,7 @@ def detect_protocol(sample_type)
sample_type.sample_attributes.detect { |sa| sa.isa_tag&.isa_protocol? }
end

def get_values(sample_type)
def select_parameter_values(sample_type)
sample_type.sample_attributes.select { |sa| sa.isa_tag&.isa_parameter_value? }
end

Expand All @@ -426,8 +439,9 @@ def detect_sample_multi(sample_type)
sample_type.sample_attributes.detect(&:seek_sample_multi?)
end

def next_process(sample)
def next_process(samples_hash)
# return {} for studies
sample = Sample.find(samples_hash.first[:id])
return {} if sample.sample_type.assays.blank?

sample_type = sample.linking_samples.first&.sample_type
Expand All @@ -438,7 +452,9 @@ def next_process(sample)
return {}
end

def previous_process(sample)
def previous_process(samples_hash)
sample = Sample.find(samples_hash.first[:id])
# hvfjkbdfk
sample_type = sample.linked_samples.first&.sample_type
if (sample_type)
protocol = detect_protocol(sample_type)
Expand All @@ -450,22 +466,52 @@ def previous_process(sample)
return {}
end

def process_sequence_output(sample)
def group_id(key)
key[0] = key[0].map { |input| { id: input['id'] } }
key
end

def group_samples_by_input_and_parameter_value(sample_type)
samples = sample_type.samples
samples_metadata = samples.map do |sample|
metadata = JSON.parse(sample.json_metadata)

{
'id': sample.id,
'title': sample.title
}.merge(metadata).transform_keys!(&:to_sym)
end

input_attribute = detect_sample_multi(sample_type)&.title&.to_sym
parameter_value_attributes = select_parameter_values(sample_type).map(&:title).map(&:to_sym)
group_attributes = parameter_value_attributes.unshift(input_attribute)

grouped_samples = samples_metadata.group_by { |smd| group_attributes.map { |attr| smd[attr] } }
grouped_samples.transform_keys { |key| group_id(key) }
end

def process_sequence_output(samples_hash)
prefix = 'sample'
if sample.sample_type.isa_template.level == 'assay'
if detect_other_material(sample.sample_type)
prefix = 'other_material'
elsif detect_data_file(sample.sample_type)
prefix = 'data_file'
else
raise 'Defective ISA process!'
samples_hash.map do |sample_hash|
sample = Sample.find(sample_hash[:id])
if sample.sample_type.isa_template.level == 'assay'
if detect_other_material(sample.sample_type)
prefix = 'other_material'
elsif detect_data_file(sample.sample_type)
prefix = 'data_file'
else
raise 'Defective ISA process!'
end
end
{ '@id': "##{prefix}/#{sample.id}" }
end
[{ '@id': "##{prefix}/#{sample.id}" }]
end

def convert_parameter_values(sample, with_tag_isa_parameter_value)
with_tag_isa_parameter_value.map do |p|
def convert_parameter_values(sample_group_hash, isa_parameter_value_attributes)
# Every sample in the group should has the same parameterValue.
# So retrieving the first one in the group should be fine.
sample = Sample.find(sample_group_hash.first[:id])
isa_parameter_value_attributes.map do |p|
value = sample.get_attribute_value(p) || ''
ontology = get_ontology_details(p, value, true)
{
Expand Down

0 comments on commit f6bdffd

Please sign in to comment.