Skip to content

Commit

Permalink
Start adding test and possibly some fixes for #481
Browse files Browse the repository at this point in the history
Conversion to GFF3Feature includes source and score and sets ID and
Parent attributes. However, export to GFF from UI fails.
  • Loading branch information
dariober committed Nov 26, 2024
1 parent 1433542 commit f00faeb
Show file tree
Hide file tree
Showing 10 changed files with 226 additions and 110 deletions.
2 changes: 1 addition & 1 deletion packages/apollo-cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ $ npm install -g @apollo-annotation/cli
$ apollo COMMAND
running command...
$ apollo (--version)
@apollo-annotation/cli/0.1.21 linux-x64 node-v20.17.0
@apollo-annotation/cli/0.1.21 linux-x64 node-v20.13.0
$ apollo --help [COMMAND]
USAGE
$ apollo COMMAND
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import {
RefSeqDocument,
} from '@apollo-annotation/schemas'
import {
makeGFF3Feature,
annotationFeatureToGFF3,
splitStringIntoChunks,
} from '@apollo-annotation/shared'
import gff from '@gmod/gff'
Expand Down Expand Up @@ -179,7 +179,7 @@ export class ExportService {
const refSeqNames = Object.fromEntries(
refSeqs.map((refSeq) => [refSeq._id, refSeq.name]),
)
const gff3Feature = makeGFF3Feature(
const gff3Feature = annotationFeatureToGFF3(
flattened as unknown as AnnotationFeatureSnapshot,
undefined,
refSeqNames,
Expand Down
34 changes: 34 additions & 0 deletions packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* eslint-disable @typescript-eslint/no-floating-promises */

import { describe, it } from 'node:test'
import { assert } from 'chai'
import { readAnnotationFeatureSnapshot } from './gff3ToAnnotationFeature.test'
import { annotationFeatureToGFF3 } from './annotationFeatureToGFF3'

describe('annotationFeatureToGFF3', () => {
it('Convert one gene', () => {
const annotationFeature = readAnnotationFeatureSnapshot(
'test_data/gene.json',
)
const [gff3Feature] = annotationFeatureToGFF3(annotationFeature)

assert.deepEqual(gff3Feature.type, 'gene')
assert.deepEqual(gff3Feature.start, 1000)
assert.deepEqual(gff3Feature.end, 9000)
assert.deepEqual(gff3Feature.strand, '+')
assert.deepEqual(gff3Feature.score, 123)
assert.deepEqual(gff3Feature.source, 'test_data')
assert.deepEqual(gff3Feature.attributes?.Name, ['EDEN'])
assert.deepEqual(gff3Feature.attributes?.testid, ['t003'])
assert.deepEqual(gff3Feature.attributes?.ID, ['gene10001'])

const [children] = gff3Feature.child_features
const [mrna] = children
assert.deepEqual(mrna.type, 'mRNA')
assert.deepEqual(mrna.attributes?.Parent, ['gene10001'])

// Sanity check the annotationFeature does have a score, etc.
// assert.deepEqual(annotationFeature.attributes?.gff_score, ['123'])
// assert.deepEqual(annotationFeature.attributes?.gff_source, ['test_data'])
})
})
112 changes: 112 additions & 0 deletions packages/apollo-shared/src/GFF3/annotationFeatureToGFF3.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/* eslint-disable @typescript-eslint/no-unsafe-assignment */

import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst'
import { GFF3Feature } from '@gmod/gff'

export function annotationFeatureToGFF3(
feature: AnnotationFeatureSnapshot,
parentId?: string,
refSeqNames?: Record<string, string | undefined>,
): GFF3Feature {
const locations = [{ start: feature.min, end: feature.max }]
// const locations = feature.discontinuousLocations?.length
// ? feature.discontinuousLocations
// : [{ start: feature.start, end: feature.end, phase: feature.phase }]
const attributes: Record<string, string[] | undefined> = JSON.parse(
JSON.stringify(feature.attributes),
)
const ontologyTerms: string[] = []
const source = feature.attributes?.gff_source?.[0] ?? null
delete attributes.gff_source
if (parentId) {
attributes.Parent = [parentId]
}
if (attributes.gff_id) {
attributes.ID = attributes.gff_id
delete attributes.gff_id
}
if (attributes.gff_name) {
attributes.Name = attributes.gff_name
delete attributes.gff_name
}
if (attributes.gff_alias) {
attributes.Alias = attributes.gff_alias
delete attributes.gff_alias
}
if (attributes.gff_target) {
attributes.Target = attributes.gff_target
delete attributes.gff_target
}
if (attributes.gff_gap) {
attributes.Gap = attributes.gff_gap
delete attributes.gff_gap
}
if (attributes.gff_derives_from) {
attributes.Derives_from = attributes.gff_derives_from
delete attributes.gff_derives_from
}
if (attributes.gff_note) {
attributes.Note = attributes.gff_note
delete attributes.gff_note
}
if (attributes.gff_dbxref) {
attributes.Dbxref = attributes.gff_dbxref
delete attributes.gff_dbxref
}
if (attributes.gff_is_circular) {
attributes.Is_circular = attributes.gff_is_circular
delete attributes.gff_is_circular
}
if (attributes.gff_ontology_term) {
ontologyTerms.push(...attributes.gff_ontology_term)
delete attributes.gff_ontology_term
}
if (attributes['Gene Ontology']) {
ontologyTerms.push(...attributes['Gene Ontology'])
delete attributes['Gene Ontology']
}
if (attributes['Sequence Ontology']) {
ontologyTerms.push(...attributes['Sequence Ontology'])
delete attributes['Sequence Ontology']
}
if (ontologyTerms.length > 0) {
attributes.Ontology_term = ontologyTerms
}

const gff_score = feature.attributes?.gff_score
let score = null
if (gff_score) {
if (gff_score.length == 1) {
score = Number(gff_score[0])
} else {
throw new Error('Unexpected score')
}
}
delete attributes.gff_score

return locations.map((location) => ({
start: location.start + 1,
end: location.end,
seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq,
source,
type: feature.type,
score,
strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null,
phase: null,
// phase:
// location.phase === 0
// ? '0'
// : location.phase === 1
// ? '1'
// : location.phase === 2
// ? '2'
// : null,
attributes: Object.keys(attributes).length > 0 ? attributes : null,
derived_features: [],
child_features: feature.children
? Object.values(feature.children).map((child) =>
annotationFeatureToGFF3(child, attributes.ID?.[0], refSeqNames),
)
: [],
}))
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ function readFeatureFile(fn: string): GFF3Feature[] {
return inGff
}

function readAnnotationFeatureSnapshot(fn: string): AnnotationFeatureSnapshot {
export function readAnnotationFeatureSnapshot(
fn: string,
): AnnotationFeatureSnapshot {
const lines = readFileSync(fn).toString()
return JSON.parse(lines) as AnnotationFeatureSnapshot
}
Expand Down
1 change: 1 addition & 0 deletions packages/apollo-shared/src/GFF3/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from './annotationFeatureToGFF3'
export * from './gffReservedKeys'
export * from './gff3ToAnnotationFeature'
102 changes: 0 additions & 102 deletions packages/apollo-shared/src/util.ts
Original file line number Diff line number Diff line change
@@ -1,105 +1,3 @@
/* eslint-disable @typescript-eslint/no-unsafe-assignment */

import { AnnotationFeatureSnapshot } from '@apollo-annotation/mst'
import { GFF3Feature } from '@gmod/gff'

export function makeGFF3Feature(
feature: AnnotationFeatureSnapshot,
parentId?: string,
refSeqNames?: Record<string, string | undefined>,
): GFF3Feature {
const locations = [{ start: feature.min, end: feature.max }]
// const locations = feature.discontinuousLocations?.length
// ? feature.discontinuousLocations
// : [{ start: feature.start, end: feature.end, phase: feature.phase }]
const attributes: Record<string, string[] | undefined> = JSON.parse(
JSON.stringify(feature.attributes),
)
const ontologyTerms: string[] = []
const source = feature.attributes?.source?.[0] ?? null
delete attributes.source
if (parentId) {
attributes.Parent = [parentId]
}
if (attributes._id) {
attributes.ID = attributes._id
delete attributes._id
}
if (attributes.gff_name) {
attributes.Name = attributes.gff_name
delete attributes.gff_name
}
if (attributes.gff_alias) {
attributes.Alias = attributes.gff_alias
delete attributes.gff_alias
}
if (attributes.gff_target) {
attributes.Target = attributes.gff_target
delete attributes.gff_target
}
if (attributes.gff_gap) {
attributes.Gap = attributes.gff_gap
delete attributes.gff_gap
}
if (attributes.gff_derives_from) {
attributes.Derives_from = attributes.gff_derives_from
delete attributes.gff_derives_from
}
if (attributes.gff_note) {
attributes.Note = attributes.gff_note
delete attributes.gff_note
}
if (attributes.gff_dbxref) {
attributes.Dbxref = attributes.gff_dbxref
delete attributes.gff_dbxref
}
if (attributes.gff_is_circular) {
attributes.Is_circular = attributes.gff_is_circular
delete attributes.gff_is_circular
}
if (attributes.gff_ontology_term) {
ontologyTerms.push(...attributes.gff_ontology_term)
delete attributes.gff_ontology_term
}
if (attributes['Gene Ontology']) {
ontologyTerms.push(...attributes['Gene Ontology'])
delete attributes['Gene Ontology']
}
if (attributes['Sequence Ontology']) {
ontologyTerms.push(...attributes['Sequence Ontology'])
delete attributes['Sequence Ontology']
}
if (ontologyTerms.length > 0) {
attributes.Ontology_term = ontologyTerms
}
return locations.map((location) => ({
start: location.start + 1,
end: location.end,
seq_id: refSeqNames ? refSeqNames[feature.refSeq] ?? null : feature.refSeq,
source,
type: feature.type,
score: null,
// score: feature.score ?? null,
strand: feature.strand ? (feature.strand === 1 ? '+' : '-') : null,
phase: null,
// phase:
// location.phase === 0
// ? '0'
// : location.phase === 1
// ? '1'
// : location.phase === 2
// ? '2'
// : null,
attributes: Object.keys(attributes).length > 0 ? attributes : null,
derived_features: [],
child_features: feature.children
? Object.values(feature.children).map((child) =>
makeGFF3Feature(child, attributes.ID?.[0], refSeqNames),
)
: [],
}))
}

export function splitStringIntoChunks(
input: string,
chunkSize: number,
Expand Down
69 changes: 69 additions & 0 deletions packages/apollo-shared/test_data/gene.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"_id": "66d70e4ccc30b55b65e5f619",
"refSeq": "chr1",
"type": "gene",
"min": 999,
"max": 9000,
"strand": 1,
"attributes": {
"gff_id": ["gene10001"],
"gff_name": ["EDEN"],
"gff_score": ["123"],
"gff_source": ["test_data"],
"testid": ["t003"]
},
"children": {
"66d70e4ccc30b55b65e5f618": {
"_id": "66d70e4ccc30b55b65e5f618",
"refSeq": "chr1",
"type": "mRNA",
"min": 1049,
"max": 9000,
"strand": 1,
"children": {
"66d70e4ccc30b55b65e5f615": {
"_id": "66d70e4ccc30b55b65e5f615",
"refSeq": "chr1",
"type": "exon",
"min": 1049,
"max": 1500,
"strand": 1,
"attributes": {
"gff_id": ["exon10001"],
"testid": ["t007"]
}
},
"66d70e4ccc30b55b65e5f616": {
"_id": "66d70e4ccc30b55b65e5f616",
"refSeq": "chr1",
"type": "exon",
"min": 4999,
"max": 5500,
"strand": 1,
"attributes": {
"gff_id": ["exon10004"],
"testid": ["t010"]
}
},
"66d70e4ccc30b55b65e5f617": {
"_id": "66d70e4ccc30b55b65e5f617",
"refSeq": "chr1",
"type": "CDS",
"min": 1200,
"max": 5000,
"strand": 1,
"attributes": {
"gff_id": ["cds10001"],
"gff_name": ["edenprotein.1"],
"testid": ["t012", "t013", "t014", "t015"]
}
}
},
"attributes": {
"gff_id": ["mRNA10001"],
"gff_name": ["EDEN.1"],
"testid": ["t004", "t001", "t004"]
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import {
} from '@apollo-annotation/mst'
import {
ValidationResultSet,
makeGFF3Feature,
splitStringIntoChunks,
} from '@apollo-annotation/shared'
import { annotationFeatureToGFF3 } from '@apollo-annotation/shared/src/GFF3/annotationFeatureToGFF3'
import gff, { GFF3Item } from '@gmod/gff'
import { getConf } from '@jbrowse/core/configuration'
import { Region, getSession } from '@jbrowse/core/util'
Expand Down Expand Up @@ -147,7 +147,7 @@ export class DesktopFileDriver extends BackendDriver {
for (const [, refSeq] of clientAssembly.refSeqs) {
const { features } = refSeq
for (const [, feature] of features) {
gff3Items.push(makeGFF3Feature(getSnapshot(feature)))
gff3Items.push(annotationFeatureToGFF3(getSnapshot(feature)))
}
}
for (const [, refSeq] of clientAssembly.refSeqs) {
Expand Down
Loading

0 comments on commit f00faeb

Please sign in to comment.