-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Start adding test and possibly some fixes for #481
Conversion to GFF3Feature includes source and score and sets ID and Parent attributes. However, export to GFF from UI fails.
- Loading branch information
Showing
10 changed files
with
226 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* eslint-disable @typescript-eslint/no-floating-promises */ | ||
|
||
import { describe, it } from 'node:test' | ||
import { assert } from 'chai' | ||
import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test' | ||
import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3' | ||
|
||
describe('annotationFeatureToGFF3', () => { | ||
it('Convert one gene', () => { | ||
const annotationFeature = readAnnotationFeatureSnapshot( | ||
'test_data/gene.json', | ||
) | ||
const [gff3Feature] = annotationFeatureToGFF3(annotationFeature) | ||
|
||
assert.deepEqual(gff3Feature.type, 'gene') | ||
assert.deepEqual(gff3Feature.start, 1000) | ||
assert.deepEqual(gff3Feature.end, 9000) | ||
assert.deepEqual(gff3Feature.strand, '+') | ||
assert.deepEqual(gff3Feature.score, 123) | ||
assert.deepEqual(gff3Feature.source, 'test_data') | ||
assert.deepEqual(gff3Feature.attributes?.Name, ['EDEN']) | ||
assert.deepEqual(gff3Feature.attributes?.testid, ['t003']) | ||
assert.deepEqual(gff3Feature.attributes?.ID, ['gene10001']) | ||
|
||
const [children] = gff3Feature.child_features | ||
const [mrna] = children | ||
assert.deepEqual(mrna.type, 'mRNA') | ||
assert.deepEqual(mrna.attributes?.Parent, ['gene10001']) | ||
|
||
// Sanity check the annotationFeature does have a score, etc. | ||
// assert.deepEqual(annotationFeature.attributes?.gff_score, ['123']) | ||
// assert.deepEqual(annotationFeature.attributes?.gff_source, ['test_data']) | ||
}) | ||
}) |
112 changes: 112 additions & 0 deletions
112
packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* eslint-disable @typescript-eslint/no-unsafe-assignment */ | ||
|
||
import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst' | ||
import { GFF3Feature } from '@gmod/gff' | ||
|
||
export function annotationFeatureToGFF3( | ||
feature: AnnotationFeatureSnapshot, | ||
parentId?: string, | ||
refSeqNames?: Record<string, string | undefined>, | ||
): GFF3Feature { | ||
const locations = [{ start: feature.min, end: feature.max }] | ||
// const locations = feature.discontinuousLocations?.length | ||
// ? feature.discontinuousLocations | ||
// : [{ start: feature.start, end: feature.end, phase: feature.phase }] | ||
const attributes: Record<string, string[] | undefined> = JSON.parse( | ||
JSON.stringify(feature.attributes), | ||
) | ||
const ontologyTerms: string[] = [] | ||
const source = feature.attributes?.gff_source?.[0] ?? null | ||
delete attributes.gff_source | ||
if (parentId) { | ||
attributes.Parent = [parentId] | ||
} | ||
if (attributes.gff_id) { | ||
attributes.ID = attributes.gff_id | ||
delete attributes.gff_id | ||
} | ||
if (attributes.gff_name) { | ||
attributes.Name = attributes.gff_name | ||
delete attributes.gff_name | ||
} | ||
if (attributes.gff_alias) { | ||
attributes.Alias = attributes.gff_alias | ||
delete attributes.gff_alias | ||
} | ||
if (attributes.gff_target) { | ||
attributes.Target = attributes.gff_target | ||
delete attributes.gff_target | ||
} | ||
if (attributes.gff_gap) { | ||
attributes.Gap = attributes.gff_gap | ||
delete attributes.gff_gap | ||
} | ||
if (attributes.gff_derives_from) { | ||
attributes.Derives_from = attributes.gff_derives_from | ||
delete attributes.gff_derives_from | ||
} | ||
if (attributes.gff_note) { | ||
attributes.Note = attributes.gff_note | ||
delete attributes.gff_note | ||
} | ||
if (attributes.gff_dbxref) { | ||
attributes.Dbxref = attributes.gff_dbxref | ||
delete attributes.gff_dbxref | ||
} | ||
if (attributes.gff_is_circular) { | ||
attributes.Is_circular = attributes.gff_is_circular | ||
delete attributes.gff_is_circular | ||
} | ||
if (attributes.gff_ontology_term) { | ||
ontologyTerms.push(...attributes.gff_ontology_term) | ||
delete attributes.gff_ontology_term | ||
} | ||
if (attributes['Gene Ontology']) { | ||
ontologyTerms.push(...attributes['Gene Ontology']) | ||
delete attributes['Gene Ontology'] | ||
} | ||
if (attributes['Sequence Ontology']) { | ||
ontologyTerms.push(...attributes['Sequence Ontology']) | ||
delete attributes['Sequence Ontology'] | ||
} | ||
if (ontologyTerms.length > 0) { | ||
attributes.Ontology_term = ontologyTerms | ||
} | ||
|
||
const gff_score = feature.attributes?.gff_score | ||
let score = null | ||
if (gff_score) { | ||
if (gff_score.length == 1) { | ||
score = Number(gff_score[0]) | ||
} else { | ||
throw new Error('Unexpected score') | ||
} | ||
} | ||
delete attributes.gff_score | ||
|
||
return locations.map((location) => ({ | ||
start: location.start + 1, | ||
end: location.end, | ||
seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq, | ||
source, | ||
type: feature.type, | ||
score, | ||
strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null, | ||
phase: null, | ||
// phase: | ||
// location.phase === 0 | ||
// ? '0' | ||
// : location.phase === 1 | ||
// ? '1' | ||
// : location.phase === 2 | ||
// ? '2' | ||
// : null, | ||
attributes: Object.keys(attributes).length > 0 ? attributes : null, | ||
derived_features: [], | ||
child_features: feature.children | ||
? Object.values(feature.children).map((child) => | ||
annotationFeatureToGFF3(child, attributes.ID?.[0], refSeqNames), | ||
) | ||
: [], | ||
})) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export * from './annotationFeatureToGFF3' | ||
export * from './gffReservedKeys' | ||
export * from './gff3ToAnnotationFeature' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
{ | ||
"_id": "66d70e4ccc30b55b65e5f619", | ||
"refSeq": "chr1", | ||
"type": "gene", | ||
"min": 999, | ||
"max": 9000, | ||
"strand": 1, | ||
"attributes": { | ||
"gff_id": ["gene10001"], | ||
"gff_name": ["EDEN"], | ||
"gff_score": ["123"], | ||
"gff_source": ["test_data"], | ||
"testid": ["t003"] | ||
}, | ||
"children": { | ||
"66d70e4ccc30b55b65e5f618": { | ||
"_id": "66d70e4ccc30b55b65e5f618", | ||
"refSeq": "chr1", | ||
"type": "mRNA", | ||
"min": 1049, | ||
"max": 9000, | ||
"strand": 1, | ||
"children": { | ||
"66d70e4ccc30b55b65e5f615": { | ||
"_id": "66d70e4ccc30b55b65e5f615", | ||
"refSeq": "chr1", | ||
"type": "exon", | ||
"min": 1049, | ||
"max": 1500, | ||
"strand": 1, | ||
"attributes": { | ||
"gff_id": ["exon10001"], | ||
"testid": ["t007"] | ||
} | ||
}, | ||
"66d70e4ccc30b55b65e5f616": { | ||
"_id": "66d70e4ccc30b55b65e5f616", | ||
"refSeq": "chr1", | ||
"type": "exon", | ||
"min": 4999, | ||
"max": 5500, | ||
"strand": 1, | ||
"attributes": { | ||
"gff_id": ["exon10004"], | ||
"testid": ["t010"] | ||
} | ||
}, | ||
"66d70e4ccc30b55b65e5f617": { | ||
"_id": "66d70e4ccc30b55b65e5f617", | ||
"refSeq": "chr1", | ||
"type": "CDS", | ||
"min": 1200, | ||
"max": 5000, | ||
"strand": 1, | ||
"attributes": { | ||
"gff_id": ["cds10001"], | ||
"gff_name": ["edenprotein.1"], | ||
"testid": ["t012", "t013", "t014", "t015"] | ||
} | ||
} | ||
}, | ||
"attributes": { | ||
"gff_id": ["mRNA10001"], | ||
"gff_name": ["EDEN.1"], | ||
"testid": ["t004", "t001", "t004"] | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.