From 1a89cc6ba600769cc79f6f6040319e026df5e4b6 Mon Sep 17 00:00:00 2001 From: Michael Baudis Date: Thu, 21 Nov 2024 08:42:07 +0900 Subject: [PATCH] extending the Phenopackets schema --- .../bycon-database-schemas/pgxIndividual.json | 4 +- .../bycon-model/common/Interpretation.json | 56 +++++ .../json/bycon-model/common/pedigree.json | 2 +- .../json/bycon-model/common/vitalStatus.json | 2 +- .../phenopackets/defaultSchema.json | 5 +- .../bycon-database-schemas/pgxIndividual.yaml | 4 +- .../bycon-model/common/Interpretation.yaml | 44 ++++ .../src/bycon-model/common/pedigree.yaml | 2 +- .../src/bycon-model/common/vitalStatus.yaml | 2 +- .../phenopackets/defaultSchema.yaml | 7 +- bycon/schemas/paths/analyses.txt | 10 - bycon/schemas/paths/biosamples.txt | 96 --------- bycon/schemas/paths/cohorts.txt | 94 --------- bycon/schemas/paths/datasets.txt | 13 -- bycon/schemas/paths/genomicVariations.txt | 192 ------------------ bycon/schemas/paths/individuals.txt | 140 ------------- bycon/schemas/paths/runs.txt | 13 -- docs/generated/beacon-responses.md | 2 +- housekeepers/frequencymapsCreator.py | 141 ------------- updev.sh | 1 + 20 files changed, 118 insertions(+), 712 deletions(-) create mode 100644 bycon/schemas/models/json/bycon-model/common/Interpretation.json create mode 100644 bycon/schemas/models/src/bycon-model/common/Interpretation.yaml delete mode 100644 bycon/schemas/paths/analyses.txt delete mode 100644 bycon/schemas/paths/biosamples.txt delete mode 100644 bycon/schemas/paths/cohorts.txt delete mode 100644 bycon/schemas/paths/datasets.txt delete mode 100644 bycon/schemas/paths/genomicVariations.txt delete mode 100644 bycon/schemas/paths/individuals.txt delete mode 100644 bycon/schemas/paths/runs.txt delete mode 100755 housekeepers/frequencymapsCreator.py diff --git a/bycon/schemas/models/json/bycon-database-schemas/pgxIndividual.json b/bycon/schemas/models/json/bycon-database-schemas/pgxIndividual.json index cb91ef1b6..61486eae9 100644 --- a/bycon/schemas/models/json/bycon-database-schemas/pgxIndividual.json +++ b/bycon/schemas/models/json/bycon-database-schemas/pgxIndividual.json @@ -50,8 +50,8 @@ } ] }, - "vitalStatus": { - "$ref": "../common/vitalStatus.json" + "VitalStatus": { + "$ref": "../common/VitalStatus.json" }, "geoLocation": { "$ref": "../common/GeoLocation.json" diff --git a/bycon/schemas/models/json/bycon-model/common/Interpretation.json b/bycon/schemas/models/json/bycon-model/common/Interpretation.json new file mode 100644 index 000000000..26d10c397 --- /dev/null +++ b/bycon/schemas/models/json/bycon-model/common/Interpretation.json @@ -0,0 +1,56 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Interpretation", + "description": "This object intends to represent the interpretation of a genomic analysis, such as the report from a diagnostic laboratory. Source: Phenopackets v2 ", + "type": "object", + "properties": { + "id": { + "description": "Interpretation identifier.", + "type": "string", + "examples": [ + "CONSORTIUM:0000123456" + ] + }, + "progressStatus": { + "type": "string", + "enum": [ + "UNKNOWN_PROGRESS", + "IN_PROGRESS", + "COMPLETED", + "SOLVED", + "UNSOLVED" + ] + }, + "diagnosis": { + "$ref": "#/$defs/Diagnosis" + }, + "summary": { + "type": "string", + "description": "Additional data about this interpretation, e.g. free text summary." + } + }, + "$defs": { + "Diagnosis": { + "properties": { + "disease": { + "$ref": "ontologyTerm.json" + }, + "genomicInterpretations": { + "type": "array", + "items": { + "description": "TODO: Define class \"GenomicInterpretation\".", + "type": "object" + } + } + }, + "required": [ + "disease" + ] + } + }, + "required": [ + "id", + "progressStatus" + ], + "additionalProperties": true +} \ No newline at end of file diff --git a/bycon/schemas/models/json/bycon-model/common/pedigree.json b/bycon/schemas/models/json/bycon-model/common/pedigree.json index 6a432e921..a480d5fd5 100644 --- a/bycon/schemas/models/json/bycon-model/common/pedigree.json +++ b/bycon/schemas/models/json/bycon-model/common/pedigree.json @@ -10,7 +10,7 @@ "example": "Pedigree1001" }, "disease": { - "$ref": "../common/disease.json" + "$ref": "disease.json" }, "numSubjects": { "description": "Total number of subjects in pedigree.", diff --git a/bycon/schemas/models/json/bycon-model/common/vitalStatus.json b/bycon/schemas/models/json/bycon-model/common/vitalStatus.json index 684d9fafa..f0aa9c32d 100644 --- a/bycon/schemas/models/json/bycon-model/common/vitalStatus.json +++ b/bycon/schemas/models/json/bycon-model/common/vitalStatus.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://progenetix.org/services/schemas/vitalStatus/v2021-12-02", + "$id": "https://progenetix.org/services/schemas/VitalStatus/v2021-12-02", "title": "VitalStatus", "description": "This element can be used to report whether the individual is living or dead at the timepoint when the phenopacket was created (or if the status is unknown). Source: Phenopackets v2, with modifications", "type": "object", diff --git a/bycon/schemas/models/json/bycon-model/phenopackets/defaultSchema.json b/bycon/schemas/models/json/bycon-model/phenopackets/defaultSchema.json index 1754b578f..bb2be79ae 100644 --- a/bycon/schemas/models/json/bycon-model/phenopackets/defaultSchema.json +++ b/bycon/schemas/models/json/bycon-model/phenopackets/defaultSchema.json @@ -50,13 +50,14 @@ "interpretations": { "type": "array", "items": { - "$ref": "Interpretation.json" + "$ref": "../common/Interpretation.json" } }, "medicalActions": { "type": "array", "items": { - "$ref": "MedicalAction.json" + "description": "TODO: Define class \"MedicalAction\". ", + "type": "object" } } } diff --git a/bycon/schemas/models/src/bycon-database-schemas/pgxIndividual.yaml b/bycon/schemas/models/src/bycon-database-schemas/pgxIndividual.yaml index 83f6a3c72..5e83ad96e 100644 --- a/bycon/schemas/models/src/bycon-database-schemas/pgxIndividual.yaml +++ b/bycon/schemas/models/src/bycon-database-schemas/pgxIndividual.yaml @@ -40,8 +40,8 @@ properties: notes: HapMap individual - id: 022f39e9-57ee-4b2b-8b3a-8929e3d69a37 notes: TCGA case_id - vitalStatus: - $ref: ../common/vitalStatus.yaml + VitalStatus: + $ref: ../common/VitalStatus.yaml geoLocation: $ref: ../common/GeoLocation.yaml ancestries: diff --git a/bycon/schemas/models/src/bycon-model/common/Interpretation.yaml b/bycon/schemas/models/src/bycon-model/common/Interpretation.yaml new file mode 100644 index 000000000..03dc426d3 --- /dev/null +++ b/bycon/schemas/models/src/bycon-model/common/Interpretation.yaml @@ -0,0 +1,44 @@ +$schema: https://json-schema.org/draft/2020-12/schema +title: Interpretation +description: >- + This object intends to represent the interpretation of a genomic analysis, such + as the report from a diagnostic laboratory. + Source: Phenopackets v2 +type: object +properties: + id: + description: Interpretation identifier. + type: string + examples: + - CONSORTIUM:0000123456 + progressStatus: + type: string + enum: + - UNKNOWN_PROGRESS + - IN_PROGRESS + - COMPLETED + - SOLVED + - UNSOLVED + diagnosis: + $ref: '#/$defs/Diagnosis' + summary: + type: string + description: >- + Additional data about this interpretation, e.g. free text summary. +$defs: + Diagnosis: + properties: + disease: + $ref: ontologyTerm.yaml + genomicInterpretations: + type: array + items: + description: >- + TODO: Define class "GenomicInterpretation". + type: object + required: + - disease +required: + - id + - progressStatus +additionalProperties: true diff --git a/bycon/schemas/models/src/bycon-model/common/pedigree.yaml b/bycon/schemas/models/src/bycon-model/common/pedigree.yaml index f223d1b9a..e4b93910d 100644 --- a/bycon/schemas/models/src/bycon-model/common/pedigree.yaml +++ b/bycon/schemas/models/src/bycon-model/common/pedigree.yaml @@ -8,7 +8,7 @@ properties: type: string example: Pedigree1001 disease: - $ref: ../common/disease.yaml + $ref: disease.yaml numSubjects: description: Total number of subjects in pedigree. type: integer diff --git a/bycon/schemas/models/src/bycon-model/common/vitalStatus.yaml b/bycon/schemas/models/src/bycon-model/common/vitalStatus.yaml index 5a98d6b4c..dd6971019 100644 --- a/bycon/schemas/models/src/bycon-model/common/vitalStatus.yaml +++ b/bycon/schemas/models/src/bycon-model/common/vitalStatus.yaml @@ -1,5 +1,5 @@ $schema: https://json-schema.org/draft/2020-12/schema -$id: https://progenetix.org/services/schemas/vitalStatus/v2021-12-02 +$id: https://progenetix.org/services/schemas/VitalStatus/v2021-12-02 title: VitalStatus description: >- This element can be used to report whether the individual is living or dead diff --git a/bycon/schemas/models/src/bycon-model/phenopackets/defaultSchema.yaml b/bycon/schemas/models/src/bycon-model/phenopackets/defaultSchema.yaml index ad9206b10..7792513c9 100644 --- a/bycon/schemas/models/src/bycon-model/phenopackets/defaultSchema.yaml +++ b/bycon/schemas/models/src/bycon-model/phenopackets/defaultSchema.yaml @@ -42,10 +42,13 @@ properties: interpretations: type: array items: - $ref: Interpretation.yaml + $ref: ../common/Interpretation.yaml medicalActions: type: array items: - $ref: MedicalAction.yaml + description: >- + TODO: Define class "MedicalAction". + + type: object diff --git a/bycon/schemas/paths/analyses.txt b/bycon/schemas/paths/analyses.txt deleted file mode 100644 index 1b211c61a..000000000 --- a/bycon/schemas/paths/analyses.txt +++ /dev/null @@ -1,10 +0,0 @@ -aligner -analysisDate -biosampleId -id -individualId -info -pipelineName -pipelineRef -runId -variantCaller diff --git a/bycon/schemas/paths/biosamples.txt b/bycon/schemas/paths/biosamples.txt deleted file mode 100644 index e086838a7..000000000 --- a/bycon/schemas/paths/biosamples.txt +++ /dev/null @@ -1,96 +0,0 @@ -beacon_model_path: biosamples.biosampleStatus.id -beacon_model_path: biosamples.biosampleStatus.label -beacon_model_path: biosamples.collectionDate -beacon_model_path: biosamples.collectionMoment -beacon_model_path: biosamples.diagnosticMarkers.id -beacon_model_path: biosamples.diagnosticMarkers.label -beacon_model_path: biosamples.histologicalDiagnosis.id -beacon_model_path: biosamples.histologicalDiagnosis.label -beacon_model_path: biosamples.id -beacon_model_path: biosamples.individualId -beacon_model_path: biosamples.info -beacon_model_path: biosamples.measurements.assayCode.id -beacon_model_path: biosamples.measurements.assayCode.label -beacon_model_path: biosamples.measurements.date -beacon_model_path: biosamples.measurements.measurementValue.id -beacon_model_path: biosamples.measurements.measurementValue.label -beacon_model_path: biosamples.measurements.measurementValue.referenceRange.high -beacon_model_path: biosamples.measurements.measurementValue.referenceRange.low -beacon_model_path: biosamples.measurements.measurementValue.referenceRange.unit.id -beacon_model_path: biosamples.measurements.measurementValue.referenceRange.unit.label -beacon_model_path: biosamples.measurements.measurementValue.typedQuantities.quantity.referenceRange.high -beacon_model_path: biosamples.measurements.measurementValue.typedQuantities.quantity.referenceRange.low -beacon_model_path: biosamples.measurements.measurementValue.typedQuantities.quantity.referenceRange.unit -beacon_model_path: biosamples.measurements.measurementValue.typedQuantities.quantity.unit.id -beacon_model_path: biosamples.measurements.measurementValue.typedQuantities.quantity.unit.label -beacon_model_path: biosamples.measurements.measurementValue.typedQuantities.quantity.value -beacon_model_path: biosamples.measurements.measurementValue.unit.id -beacon_model_path: biosamples.measurements.measurementValue.unit.label -beacon_model_path: biosamples.measurements.measurementValue.value -beacon_model_path: biosamples.measurements.notes -beacon_model_path: biosamples.measurements.observationMoment -beacon_model_path: biosamples.measurements.observationMoment.days -beacon_model_path: biosamples.measurements.observationMoment.end -beacon_model_path: biosamples.measurements.observationMoment.end.iso8601duration -beacon_model_path: biosamples.measurements.observationMoment.id -beacon_model_path: biosamples.measurements.observationMoment.iso8601duration -beacon_model_path: biosamples.measurements.observationMoment.label -beacon_model_path: biosamples.measurements.observationMoment.start -beacon_model_path: biosamples.measurements.observationMoment.start.iso8601duration -beacon_model_path: biosamples.measurements.observationMoment.weeks -beacon_model_path: biosamples.measurements.procedure.ageAtProcedure -beacon_model_path: biosamples.measurements.procedure.bodySite.id -beacon_model_path: biosamples.measurements.procedure.bodySite.label -beacon_model_path: biosamples.measurements.procedure.dateOfProcedure -beacon_model_path: biosamples.measurements.procedure.procedureCode.id -beacon_model_path: biosamples.measurements.procedure.procedureCode.label -beacon_model_path: biosamples.notes -beacon_model_path: biosamples.pathologicalStage.id -beacon_model_path: biosamples.pathologicalStage.label -beacon_model_path: biosamples.pathologicalTnmFinding.id -beacon_model_path: biosamples.pathologicalTnmFinding.label -beacon_model_path: biosamples.phenotypicFeatures.evidence.evidenceCode.id -beacon_model_path: biosamples.phenotypicFeatures.evidence.evidenceCode.label -beacon_model_path: biosamples.phenotypicFeatures.evidence.reference.id -beacon_model_path: biosamples.phenotypicFeatures.evidence.reference.notes -beacon_model_path: biosamples.phenotypicFeatures.evidence.reference.reference -beacon_model_path: biosamples.phenotypicFeatures.excluded -beacon_model_path: biosamples.phenotypicFeatures.featureType.id -beacon_model_path: biosamples.phenotypicFeatures.featureType.label -beacon_model_path: biosamples.phenotypicFeatures.modifiers.id -beacon_model_path: biosamples.phenotypicFeatures.modifiers.label -beacon_model_path: biosamples.phenotypicFeatures.notes -beacon_model_path: biosamples.phenotypicFeatures.onset -beacon_model_path: biosamples.phenotypicFeatures.onset.days -beacon_model_path: biosamples.phenotypicFeatures.onset.end -beacon_model_path: biosamples.phenotypicFeatures.onset.end.iso8601duration -beacon_model_path: biosamples.phenotypicFeatures.onset.id -beacon_model_path: biosamples.phenotypicFeatures.onset.iso8601duration -beacon_model_path: biosamples.phenotypicFeatures.onset.label -beacon_model_path: biosamples.phenotypicFeatures.onset.start -beacon_model_path: biosamples.phenotypicFeatures.onset.start.iso8601duration -beacon_model_path: biosamples.phenotypicFeatures.onset.weeks -beacon_model_path: biosamples.phenotypicFeatures.resolution -beacon_model_path: biosamples.phenotypicFeatures.resolution.days -beacon_model_path: biosamples.phenotypicFeatures.resolution.end -beacon_model_path: biosamples.phenotypicFeatures.resolution.end.iso8601duration -beacon_model_path: biosamples.phenotypicFeatures.resolution.id -beacon_model_path: biosamples.phenotypicFeatures.resolution.iso8601duration -beacon_model_path: biosamples.phenotypicFeatures.resolution.label -beacon_model_path: biosamples.phenotypicFeatures.resolution.start -beacon_model_path: biosamples.phenotypicFeatures.resolution.start.iso8601duration -beacon_model_path: biosamples.phenotypicFeatures.resolution.weeks -beacon_model_path: biosamples.phenotypicFeatures.severity.id -beacon_model_path: biosamples.phenotypicFeatures.severity.label -beacon_model_path: biosamples.sampleOriginDetail.id -beacon_model_path: biosamples.sampleOriginDetail.label -beacon_model_path: biosamples.sampleOriginType.id -beacon_model_path: biosamples.sampleOriginType.label -beacon_model_path: biosamples.sampleProcessing.id -beacon_model_path: biosamples.sampleProcessing.label -beacon_model_path: biosamples.sampleStorage.id -beacon_model_path: biosamples.sampleStorage.label -beacon_model_path: biosamples.tumorGrade.id -beacon_model_path: biosamples.tumorGrade.label -beacon_model_path: biosamples.tumorProgression.id -beacon_model_path: biosamples.tumorProgression.label diff --git a/bycon/schemas/paths/cohorts.txt b/bycon/schemas/paths/cohorts.txt deleted file mode 100644 index 086d6909c..000000000 --- a/bycon/schemas/paths/cohorts.txt +++ /dev/null @@ -1,94 +0,0 @@ -cohortDataTypes|id -cohortDataTypes|label -cohortDesign|id -cohortDesign|label -cohortSize -cohortType -collectionEvents|eventAgeRange|availability -collectionEvents|eventAgeRange|availabilityCount -collectionEvents|eventAgeRange|distribution -collectionEvents|eventDataTypes|availability -collectionEvents|eventDataTypes|availabilityCount -collectionEvents|eventDataTypes|distribution -collectionEvents|eventDiseases|availability -collectionEvents|eventDiseases|availabilityCount -collectionEvents|eventDiseases|distribution -collectionEvents|eventEthnicities|availability -collectionEvents|eventEthnicities|availabilityCount -collectionEvents|eventEthnicities|distribution -collectionEvents|eventGenders|availability -collectionEvents|eventGenders|availabilityCount -collectionEvents|eventGenders|distribution -collectionEvents|eventLocations|availability -collectionEvents|eventLocations|availabilityCount -collectionEvents|eventLocations|distribution -collectionEvents|eventPhenotypes|availability -collectionEvents|eventPhenotypes|availabilityCount -collectionEvents|eventPhenotypes|distribution -collectionEvents|eventTimeline|availability -collectionEvents|eventTimeline|availabilityCount -collectionEvents|eventTimeline|distribution -exclusionCriteria|ageRange -exclusionCriteria|ageRange|end|iso8601duration -exclusionCriteria|ageRange|start|iso8601duration -exclusionCriteria|diseaseConditions|ageOfOnset -exclusionCriteria|diseaseConditions|diseaseCode|id -exclusionCriteria|diseaseConditions|diseaseCode|label -exclusionCriteria|diseaseConditions|familyHistory -exclusionCriteria|diseaseConditions|notes -exclusionCriteria|diseaseConditions|severity|id -exclusionCriteria|diseaseConditions|severity|label -exclusionCriteria|diseaseConditions|stage|id -exclusionCriteria|diseaseConditions|stage|label -exclusionCriteria|ethnicities|id -exclusionCriteria|ethnicities|label -exclusionCriteria|genders|id -exclusionCriteria|genders|label -exclusionCriteria|locations|id -exclusionCriteria|locations|label -exclusionCriteria|phenotypicConditions|evidence|evidenceCode -exclusionCriteria|phenotypicConditions|evidence|reference -exclusionCriteria|phenotypicConditions|excluded -exclusionCriteria|phenotypicConditions|featureType|id -exclusionCriteria|phenotypicConditions|featureType|label -exclusionCriteria|phenotypicConditions|modifiers -exclusionCriteria|phenotypicConditions|notes -exclusionCriteria|phenotypicConditions|onset -exclusionCriteria|phenotypicConditions|resolution -exclusionCriteria|phenotypicConditions|severity|id -exclusionCriteria|phenotypicConditions|severity|label -exclusionCriteria|type|availability -exclusionCriteria|type|availabilityCount -id -inclusionCriteria|ageRange -inclusionCriteria|ageRange|end|iso8601duration -inclusionCriteria|ageRange|start|iso8601duration -inclusionCriteria|diseaseConditions|ageOfOnset -inclusionCriteria|diseaseConditions|diseaseCode|id -inclusionCriteria|diseaseConditions|diseaseCode|label -inclusionCriteria|diseaseConditions|familyHistory -inclusionCriteria|diseaseConditions|notes -inclusionCriteria|diseaseConditions|severity|id -inclusionCriteria|diseaseConditions|severity|label -inclusionCriteria|diseaseConditions|stage|id -inclusionCriteria|diseaseConditions|stage|label -inclusionCriteria|ethnicities|id -inclusionCriteria|ethnicities|label -inclusionCriteria|genders|id -inclusionCriteria|genders|label -inclusionCriteria|locations|id -inclusionCriteria|locations|label -inclusionCriteria|phenotypicConditions|evidence|evidenceCode -inclusionCriteria|phenotypicConditions|evidence|reference -inclusionCriteria|phenotypicConditions|excluded -inclusionCriteria|phenotypicConditions|featureType|id -inclusionCriteria|phenotypicConditions|featureType|label -inclusionCriteria|phenotypicConditions|modifiers -inclusionCriteria|phenotypicConditions|notes -inclusionCriteria|phenotypicConditions|onset -inclusionCriteria|phenotypicConditions|resolution -inclusionCriteria|phenotypicConditions|severity|id -inclusionCriteria|phenotypicConditions|severity|label -inclusionCriteria|type|availability -inclusionCriteria|type|availabilityCount -name diff --git a/bycon/schemas/paths/datasets.txt b/bycon/schemas/paths/datasets.txt deleted file mode 100644 index 5153ce5d4..000000000 --- a/bycon/schemas/paths/datasets.txt +++ /dev/null @@ -1,13 +0,0 @@ -createDateTime -dataUseConditions|duoDataUse|description -dataUseConditions|duoDataUse|id -dataUseConditions|duoDataUse|label -dataUseConditions|duoDataUse|modifiers -dataUseConditions|duoDataUse|version -description -externalUrl -id -info -name -updateDateTime -version diff --git a/bycon/schemas/paths/genomicVariations.txt b/bycon/schemas/paths/genomicVariations.txt deleted file mode 100644 index 2fcc0b2ba..000000000 --- a/bycon/schemas/paths/genomicVariations.txt +++ /dev/null @@ -1,192 +0,0 @@ -caseLevelData|alleleOrigin|id -caseLevelData|alleleOrigin|label -caseLevelData|analysisId -caseLevelData|biosampleId -caseLevelData|clinicalInterpretations|annotatedWith|toolName -caseLevelData|clinicalInterpretations|annotatedWith|version -caseLevelData|clinicalInterpretations|category|id -caseLevelData|clinicalInterpretations|category|label -caseLevelData|clinicalInterpretations|clinicalRelevance -caseLevelData|clinicalInterpretations|conditionId -caseLevelData|clinicalInterpretations|effect|id -caseLevelData|clinicalInterpretations|effect|label -caseLevelData|clinicalInterpretations|evidenceType|id -caseLevelData|clinicalInterpretations|evidenceType|label -caseLevelData|id -caseLevelData|individualId -caseLevelData|phenotypicEffects|annotatedWith|toolName -caseLevelData|phenotypicEffects|annotatedWith|version -caseLevelData|phenotypicEffects|category|id -caseLevelData|phenotypicEffects|category|label -caseLevelData|phenotypicEffects|clinicalRelevance -caseLevelData|phenotypicEffects|conditionId -caseLevelData|phenotypicEffects|effect|id -caseLevelData|phenotypicEffects|effect|label -caseLevelData|phenotypicEffects|evidenceType|id -caseLevelData|phenotypicEffects|evidenceType|label -caseLevelData|runId -caseLevelData|zygosity|id -caseLevelData|zygosity|label -frequencyInPopulations|frequencies|alleleFrequency -frequencyInPopulations|frequencies|population -frequencyInPopulations|source -frequencyInPopulations|sourceReference -frequencyInPopulations|version -identifiers|clinvarVariantId -identifiers|genomicHGVSId -identifiers|proteinHGVSIds -identifiers|transcriptHGVSIds -identifiers|variantAlternativeIds|id -identifiers|variantAlternativeIds|notes -identifiers|variantAlternativeIds|reference -identifiers|variantAlternativeIds|type -molecularAttributes|aminoacidChanges -molecularAttributes|geneIds -molecularAttributes|genomicFeatures|featureClass|id -molecularAttributes|genomicFeatures|featureClass|label -molecularAttributes|genomicFeatures|featureID|id -molecularAttributes|genomicFeatures|featureID|label -molecularAttributes|molecularEffects|id -molecularAttributes|molecularEffects|label -variantInternalId -variantLevelData|clinicalInterpretations|annotatedWith|toolName -variantLevelData|clinicalInterpretations|annotatedWith|toolReferences -variantLevelData|clinicalInterpretations|annotatedWith|version -variantLevelData|clinicalInterpretations|category|id -variantLevelData|clinicalInterpretations|category|label -variantLevelData|clinicalInterpretations|clinicalRelevance -variantLevelData|clinicalInterpretations|conditionId -variantLevelData|clinicalInterpretations|effect|id -variantLevelData|clinicalInterpretations|effect|label -variantLevelData|clinicalInterpretations|evidenceType|id -variantLevelData|clinicalInterpretations|evidenceType|label -variantLevelData|phenotypicEffects|annotatedWith|toolName -variantLevelData|phenotypicEffects|annotatedWith|toolReferences -variantLevelData|phenotypicEffects|annotatedWith|version -variantLevelData|phenotypicEffects|category|id -variantLevelData|phenotypicEffects|category|label -variantLevelData|phenotypicEffects|clinicalRelevance -variantLevelData|phenotypicEffects|conditionId -variantLevelData|phenotypicEffects|effect|id -variantLevelData|phenotypicEffects|effect|label -variantLevelData|phenotypicEffects|evidenceType|id -variantLevelData|phenotypicEffects|evidenceType|label -variation|_id -variation|alternateBases -variation|copies -variation|copy_change -variation|count|_id -variation|count|chr -variation|count|gene_id -variation|count|interval -variation|count|sequence_id -variation|count|species_id -variation|count|type -variation|location|_id -variation|location|chr -variation|location|interval -variation|location|interval|end -variation|location|interval|end|comparator -variation|location|interval|end|max -variation|location|interval|end|min -variation|location|interval|end|type -variation|location|interval|end|value -variation|location|interval|start -variation|location|interval|start|comparator -variation|location|interval|start|max -variation|location|interval|start|min -variation|location|interval|start|type -variation|location|interval|start|value -variation|location|interval|type -variation|location|sequence_id -variation|location|species_id -variation|location|type -variation|members -variation|members|count|comparator -variation|members|count|max -variation|members|count|min -variation|members|count|type -variation|members|count|value -variation|members|type -variation|members|variation -variation|referenceBases -variation|state|components|location|_id -variation|state|components|location|chr -variation|state|components|location|interval -variation|state|components|location|interval|end -variation|state|components|location|interval|end|comparator -variation|state|components|location|interval|end|max -variation|state|components|location|interval|end|min -variation|state|components|location|interval|end|type -variation|state|components|location|interval|end|value -variation|state|components|location|interval|start -variation|state|components|location|interval|start|comparator -variation|state|components|location|interval|start|max -variation|state|components|location|interval|start|min -variation|state|components|location|interval|start|type -variation|state|components|location|interval|start|value -variation|state|components|location|interval|type -variation|state|components|location|sequence_id -variation|state|components|location|species_id -variation|state|components|location|type -variation|state|components|reverse_complement -variation|state|components|sequence -variation|state|components|type -variation|state|count|comparator -variation|state|count|max -variation|state|count|min -variation|state|count|type -variation|state|count|value -variation|state|location|_id -variation|state|location|chr -variation|state|location|interval -variation|state|location|interval|end -variation|state|location|interval|end|comparator -variation|state|location|interval|end|max -variation|state|location|interval|end|min -variation|state|location|interval|end|type -variation|state|location|interval|end|value -variation|state|location|interval|start -variation|state|location|interval|start|comparator -variation|state|location|interval|start|max -variation|state|location|interval|start|min -variation|state|location|interval|start|type -variation|state|location|interval|start|value -variation|state|location|interval|type -variation|state|location|sequence_id -variation|state|location|species_id -variation|state|location|type -variation|state|reverse_complement -variation|state|seq_expr|location|_id -variation|state|seq_expr|location|chr -variation|state|seq_expr|location|interval -variation|state|seq_expr|location|interval|end -variation|state|seq_expr|location|interval|end|comparator -variation|state|seq_expr|location|interval|end|max -variation|state|seq_expr|location|interval|end|min -variation|state|seq_expr|location|interval|end|type -variation|state|seq_expr|location|interval|end|value -variation|state|seq_expr|location|interval|start -variation|state|seq_expr|location|interval|start|comparator -variation|state|seq_expr|location|interval|start|max -variation|state|seq_expr|location|interval|start|min -variation|state|seq_expr|location|interval|start|type -variation|state|seq_expr|location|interval|start|value -variation|state|seq_expr|location|interval|type -variation|state|seq_expr|location|sequence_id -variation|state|seq_expr|location|species_id -variation|state|seq_expr|location|type -variation|state|seq_expr|reverse_complement -variation|state|seq_expr|sequence -variation|state|seq_expr|type -variation|state|sequence -variation|state|type -variation|subject|_id -variation|subject|chr -variation|subject|gene_id -variation|subject|interval -variation|subject|sequence_id -variation|subject|species_id -variation|subject|type -variation|type -variation|variantType diff --git a/bycon/schemas/paths/individuals.txt b/bycon/schemas/paths/individuals.txt deleted file mode 100644 index 647acfda3..000000000 --- a/bycon/schemas/paths/individuals.txt +++ /dev/null @@ -1,140 +0,0 @@ -diseases|ageOfOnset -diseases|ageOfOnset|days -diseases|ageOfOnset|end -diseases|ageOfOnset|end|iso8601duration -diseases|ageOfOnset|id -diseases|ageOfOnset|iso8601duration -diseases|ageOfOnset|label -diseases|ageOfOnset|start -diseases|ageOfOnset|start|iso8601duration -diseases|ageOfOnset|weeks -diseases|diseaseCode|id -diseases|diseaseCode|label -diseases|familyHistory -diseases|notes -diseases|severity|id -diseases|severity|label -diseases|stage|id -diseases|stage|label -ethnicity|id -ethnicity|label -exposures|ageAtExposure|iso8601duration -exposures|date -exposures|duration -exposures|exposureCode|id -exposures|exposureCode|label -exposures|unit|id -exposures|unit|label -geographicOrigin|id -geographicOrigin|label -id -info -interventionsOrProcedures|ageAtProcedure -interventionsOrProcedures|ageAtProcedure|days -interventionsOrProcedures|ageAtProcedure|end -interventionsOrProcedures|ageAtProcedure|end|iso8601duration -interventionsOrProcedures|ageAtProcedure|id -interventionsOrProcedures|ageAtProcedure|iso8601duration -interventionsOrProcedures|ageAtProcedure|label -interventionsOrProcedures|ageAtProcedure|start -interventionsOrProcedures|ageAtProcedure|start|iso8601duration -interventionsOrProcedures|ageAtProcedure|weeks -interventionsOrProcedures|bodySite|id -interventionsOrProcedures|bodySite|label -interventionsOrProcedures|dateOfProcedure -interventionsOrProcedures|procedureCode|id -interventionsOrProcedures|procedureCode|label -karyotypicSex -measures|assayCode|id -measures|assayCode|label -measures|date -measures|measurementValue|id -measures|measurementValue|label -measures|measurementValue|referenceRange|high -measures|measurementValue|referenceRange|low -measures|measurementValue|referenceRange|unit|id -measures|measurementValue|referenceRange|unit|label -measures|measurementValue|typedQuantities|quantity|referenceRange|high -measures|measurementValue|typedQuantities|quantity|referenceRange|low -measures|measurementValue|typedQuantities|quantity|referenceRange|unit -measures|measurementValue|typedQuantities|quantity|unit|id -measures|measurementValue|typedQuantities|quantity|unit|label -measures|measurementValue|typedQuantities|quantity|value -measures|measurementValue|unit|id -measures|measurementValue|unit|label -measures|measurementValue|value -measures|notes -measures|observationMoment -measures|observationMoment|days -measures|observationMoment|end -measures|observationMoment|end|iso8601duration -measures|observationMoment|id -measures|observationMoment|iso8601duration -measures|observationMoment|label -measures|observationMoment|start -measures|observationMoment|start|iso8601duration -measures|observationMoment|weeks -measures|procedure|ageAtProcedure -measures|procedure|bodySite|id -measures|procedure|bodySite|label -measures|procedure|dateOfProcedure -measures|procedure|procedureCode|id -measures|procedure|procedureCode|label -pedigrees|disease|ageOfOnset -pedigrees|disease|diseaseCode|id -pedigrees|disease|diseaseCode|label -pedigrees|disease|familyHistory -pedigrees|disease|notes -pedigrees|disease|severity|id -pedigrees|disease|severity|label -pedigrees|disease|stage|id -pedigrees|disease|stage|label -pedigrees|id -pedigrees|members|affected -pedigrees|members|memberId -pedigrees|members|role|id -pedigrees|members|role|label -phenotypicFeatures|evidence|evidenceCode|id -phenotypicFeatures|evidence|evidenceCode|label -phenotypicFeatures|evidence|reference|id -phenotypicFeatures|evidence|reference|notes -phenotypicFeatures|evidence|reference|reference -phenotypicFeatures|excluded -phenotypicFeatures|featureType|id -phenotypicFeatures|featureType|label -phenotypicFeatures|modifiers|id -phenotypicFeatures|modifiers|label -phenotypicFeatures|notes -phenotypicFeatures|onset -phenotypicFeatures|onset|days -phenotypicFeatures|onset|end -phenotypicFeatures|onset|end|iso8601duration -phenotypicFeatures|onset|id -phenotypicFeatures|onset|iso8601duration -phenotypicFeatures|onset|label -phenotypicFeatures|onset|start -phenotypicFeatures|onset|start|iso8601duration -phenotypicFeatures|onset|weeks -phenotypicFeatures|resolution -phenotypicFeatures|resolution|days -phenotypicFeatures|resolution|end -phenotypicFeatures|resolution|end|iso8601duration -phenotypicFeatures|resolution|id -phenotypicFeatures|resolution|iso8601duration -phenotypicFeatures|resolution|label -phenotypicFeatures|resolution|start -phenotypicFeatures|resolution|start|iso8601duration -phenotypicFeatures|resolution|weeks -phenotypicFeatures|severity|id -phenotypicFeatures|severity|label -sex|id -sex|label -treatments|ageAtOnset|iso8601duration -treatments|cumulativeDose|referenceRange|id -treatments|cumulativeDose|referenceRange|label -treatments|doseIntervals|id -treatments|doseIntervals|label -treatments|routeOfAdministration|id -treatments|routeOfAdministration|label -treatments|treatmentCode|id -treatments|treatmentCode|label diff --git a/bycon/schemas/paths/runs.txt b/bycon/schemas/paths/runs.txt deleted file mode 100644 index 89b44b755..000000000 --- a/bycon/schemas/paths/runs.txt +++ /dev/null @@ -1,13 +0,0 @@ -biosampleId -id -individualId -info -libraryLayout -librarySelection -librarySource|id -librarySource|label -libraryStrategy -platform -platformModel|id -platformModel|label -runDate diff --git a/docs/generated/beacon-responses.md b/docs/generated/beacon-responses.md index 8002a4345..85c5b41c5 100644 --- a/docs/generated/beacon-responses.md +++ b/docs/generated/beacon-responses.md @@ -224,7 +224,7 @@ The type of response used for the endpoint depends on the requested and granted ### phenopacket @ `/phenopackets` -The Phenopacket class is a bare-bones JSON-schema rewrite of the Phenopackets v2 standard. +The Phenopacket class is a bare-bones JSON-schema rewrite of the Phenopackets v2 standard ("PXF"), for the representation of attributes supported in the `bycon` framework. At this time the Phenopackets schema is not part of the Beacon v2 default data model. However, many sub-schemas in Beacon v2 have been informed by the PXF model and development process, allowing a straightforward cross-mapping of the data structures. The type of response used for the endpoint depends on the requested and granted `responseGranularity`. In the `bycon` framework Phenopackets are generated at export time by aggregating the relevant information from the matched `individual`, `biosample`s, `analysis`(/es) and `genomicVariation`s. diff --git a/housekeepers/frequencymapsCreator.py b/housekeepers/frequencymapsCreator.py deleted file mode 100755 index ce0f4b17a..000000000 --- a/housekeepers/frequencymapsCreator.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 - -import datetime -import time -from pymongo import MongoClient -from progress.bar import Bar - -from bycon import * -from byconServiceLibs import assertSingleDatasetOrExit, ask_limit_reset, ByconBundler, GenomeBins, set_collation_types - -################################################################################ - -def main(): - initialize_bycon_service() - GB = GenomeBins() - ask_limit_reset() - ds_id = assertSingleDatasetOrExit() - set_collation_types() - print(f'=> Using data values from {ds_id} for {GB.get_genome_bin_count()} intervals...') - - data_client = MongoClient(host=DB_MONGOHOST) - data_db = data_client[ ds_id ] - coll_coll = data_db[ "collations" ] - fm_coll = data_db[ "frequencymaps" ] - ind_coll = data_db["individuals"] - bios_coll = data_db[ "biosamples" ] - cs_coll = data_db["analyses"] - - coll_ids = _filter_coll_ids(coll_coll) - coll_no = len(coll_ids) - - if not BYC["TEST_MODE"]: - bar = Bar(f'{coll_no} {ds_id} fMaps', max = coll_no, suffix='%(percent)d%%'+f' of {coll_no}' ) - - coll_i = 0 - for c_id in coll_ids: - coll = coll_coll.find_one({"id": c_id}) - c_o_id = coll.get("_id") - if not coll: - print(f"\n¡¡¡ some error - collation {c_id} not found !!!") - if not BYC["TEST_MODE"]: - bar.next() - continue - coll_i += 1 - - start_time = time.time() - - # prdbug(coll) - - BYC.update({"BYC_FILTERS":[{"id":c_id}, {"id": "EDAM:operation_3961"}]}) - BYC.update({"PAGINATED_STATUS": False}) - BYC.update({"FMAPS_SCOPE": coll.get("scope", "biosamples")}) - - prdbug(f'=> processing {c_id} with limit {BYC_PARS.get("limit")}') - RSS = ByconResultSets().datasetsResults() - pdb = ByconBundler().resultsets_frequencies_bundles(RSS) - if_bundles = pdb.get("interval_frequencies_bundles") - GB = GenomeBins() - - if not BYC["TEST_MODE"]: - bar.next() - - if len(if_bundles) < 1: - prdbug(f'No interval_frequencies for {c_id}') - continue - - analyses_count = RSS[ds_id]["analyses.id"]["target_count"] - cnv_cs_count = if_bundles[0].get("sample_count", 0) - - coll_coll.update_one( - {"_id": c_o_id}, - {"$set": {"cnv_analyses": analyses_count}} - ) - if cnv_cs_count < 1: - continue - - update_obj = { - "id": c_id, - "label": coll["label"], - "dataset_id": coll["dataset_id"], - "scope": coll["scope"], - "db_key": coll["db_key"], - "collation_type": coll["collation_type"], - "child_terms": coll["child_terms"], - "updated": datetime.datetime.now().isoformat(), - "frequencymap": { - "interval_count": GB.get_genome_bin_count(), - "binning": BYC_PARS.get("genome_binning", ""), - "intervals": if_bundles[0].get("interval_frequencies", []), - "frequencymap_samples": cnv_cs_count, - "cnv_analyses": analyses_count - } - } - - if cnv_cs_count > 2000: - proc_time = time.time() - start_time - print(f'\n==> Processed {c_id}: {cnv_cs_count} of {analyses_count} in {"%.2f" % proc_time}s: {"%.4f" % (proc_time/cnv_cs_count)}s per analysis') - - if not BYC["TEST_MODE"]: - fm_coll.delete_many( { "id": c_id } ) - fm_coll.insert_one( update_obj ) - - if not BYC["TEST_MODE"]: - bar.finish() - - -################################################################################ - -def _filter_coll_ids(coll_coll): - # collation types have been limited potentially before - f_d_s = BYC.get("filter_definitions", {}) - c_t_s = list(f_d_s.keys()) - query = { "collation_type":{"$in": c_t_s } } - if len(BYC["BYC_FILTERS"]) > 0: - f_l = [] - for c_t in BYC["BYC_FILTERS"]: - f_l.append( c_t["id"]) - if len(f_l) > 1: - query = { "$and": [ - { "collation_type":{"$in": c_t_s }}, - { "id": {"$in": f_l }} - ] - } - elif len(f_l) == 1: - query = { "$and": [ - {"collation_type":{"$in": c_t_s }}, - {"id": f_l[0]} - ] - } - - coll_ids = coll_coll.distinct("id", query) - - return coll_ids - - -################################################################################ -################################################################################ -################################################################################ - -if __name__ == '__main__': - main() diff --git a/updev.sh b/updev.sh index 70ec6365a..554fd09cd 100755 --- a/updev.sh +++ b/updev.sh @@ -14,3 +14,4 @@ pip3 install $BY --break-system-packages rm -rf ./build rm -rf ./dist rm -rf ./bycon.egg-info +# rsync -avh --exclude=logs ./ ~/Github/bycon/