Skip to content

Commit

Permalink
Merge branch 'main' into linkouts
Browse files Browse the repository at this point in the history
  • Loading branch information
adf-ncgr committed Mar 15, 2024
2 parents 391b53c + dd87f63 commit b3223af
Show file tree
Hide file tree
Showing 91 changed files with 1,879 additions and 231 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
identifier: Arachis.pan3.BYQ9

provenance: "The files in this directory are a product of the staff of the LegumeInfo, PeanutBase, and SoyBase project teams. The method is described here: https://github.com/legumeinfo/pandagma"

source: "https://data.legumeinfo.org"

synopsis: "Pangene set for Arachis species, based on A. hypogaea, A. duranensis, and A. ipaensis. This pangene set includes four annotation sets from A. hypogaea and one each from A. duranensis and A. ipaensis."

scientific_name: Arachis

taxid: 3826

annotations_main:
- arahy.BaileyII.gnm1.ann1
- arahy.Tifrunner.gnm1.ann2
- arahy.Tifrunner.gnm2.ann1
- arahy.Tifrunner.gnm2.ann2

annotations_extra:
- aradu.V14167.gnm1.ann1
- araip.K30076.gnm1.ann1
- arast.V10309.gnm1.ann1

description: "Pan-gene set for Arachis species, spanning 4 species and 7 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)"

original_file_creation_date: "2024-02-14"

local_file_creation_date: "2024-02-14"

dataset_release_date: "2024-02-14"

contributors: The International Peanut Genome Initiative and authors of all constituent genomes and annotations

data_curators: Steven Cannon, Andrew Farmer

public_access_level: public

license: Open

keywords: Arachis, peanut, pan-gene, pangene, orthogroup

Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ strains:
origin: Argentina, via USDA National Plant Germplasm Repository
description: "Arachis duranensis strain V14167 originates in a region east of the Andes, in the vicinity of northern Argentina and southern Bolivia. This accession is thought to be one of the diploid ancestors of cultivated peanut. Its genome has been sequenced as part of the Peanut Genomics Initiative, to help in the accurate assembly of the more important cultivated peanut, A. hypogaea. This accession is described in Bertioli et al., 2016 (doi:10.1038/ng.3517)"
resources:
- name: Peanutbase Genome Browser (GBrowse)
URL: "https://legacy.peanutbase.org/gb2/gbrowse/Aradu1.0"
description: "GBrowse for V14167 assembly 1.0"
- name: Peanutbase Genome Browser (JBrowse)
URL: "https://legacy.peanutbase.org/genomes/jbrowse?data=Aradu1.0"
description: "JBrowse for V14167 assembly 1.0"
- name: "Genome assembly at NCBI"
URL: "http://www.ncbi.nlm.nih.gov/assembly/GCA_000817695.1/"
description: "Genome assembly at NCBI, V14167"
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,6 @@ strains:
origin: Southeastern United States
description: "Cultivar Tifrunner is a runner-type peanut bred in the United States and widely planted as a variety and widely used in breeding programs. The genome of this cultivar is described in Bertioli et al., 2019 (doi:10.1038/s41588-019-0405-z)."
resources:
- name: Peanutbase Genome Browser (GBrowse)
URL: "https://legacy.peanutbase.org/gbrowse_peanut1.0"
description: "GBrowse for Tifrunner assembly 1.0"
- name: Peanutbase Genome Browser (JBrowse)
URL: "https://legacy.peanutbase.org/genomes/jbrowse?data=arahy.Tifrunner.gnm1"
description: "JBrowse for Tifrunner assembly 1.0"
- name: PeanutBase Genome Browser (GBrowse)
URL: "https://legacy.peanutbase.org/gb2/gbrowse/arahy.Tifrunner.gnm2/"
description: "GBrowse for Tifrunner assembly 2.0"
- name: NCBI Tifrunner assembly 2.0
URL: "https://www.ncbi.nlm.nih.gov/nuccore/PIVG00000000"
description: "Tifrunner gnm2.ann2 (assembly 2.0 annotation 2.0) at NCBI"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ strains:
origin: Southern Bolivia
description: "Arachis ipaensis accession K30076 is the only accession collected from what is thought to be the origin of tetraploid peanut, Villa Montes Bolivia. It was likely carried there by early agriculturalists from a larger population several hundred kilometers to the north (Bertioli et al., 2016: doi:10.1038/ng.3517)"
resources:
- name: Peanutbase Genome Browser (GBrowse)
URL: "https://legacy.peanutbase.org/gb2/gbrowse/Araip1.0"
description: "GBrowse for K30076 assembly 1.0"
- name: Peanutbase Genome Browser (JBrowse)
URL: "https://legacy.peanutbase.org/genomes/jbrowse?data=Araip1.0"
description: "JBrowse for K30076 assembly 1.0"
- name: "Genome assembly at NCBI"
URL: "http://www.ncbi.nlm.nih.gov/assembly/GCA_000816755.1/"
description: "Genome assembly at NCBI, K30076"
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"parameters": {
"out_path": "/busco_wd",
"cpu": "16",
"force": "False",
"restart": "False",
"quiet": "False",
"download_path": "/erdos/adf/busco_downloads",
"datasets_version": "odb10",
"offline": "False",
"download_base_url": "https://busco-data.ezlab.org/v5/data/",
"auto-lineage": "False",
"auto-lineage-prok": "False",
"auto-lineage-euk": "False",
"update-data": "False",
"use_augustus": "False",
"batch_mode": "False",
"tar": "False",
"in": "/busco_wd/run_busco.bash.JhWvm4/arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa",
"out": "BUSCO_proteins_fabales_odb10",
"mode": "proteins",
"lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10",
"main_out": "/busco_wd/BUSCO_proteins_fabales_odb10",
"lineage_results_dir": "run_fabales_odb10",
"domain": "eukaryota"
},
"lineage_dataset": {
"name": "fabales_odb10",
"creation_date": "2020-08-05",
"number_of_buscos": "5366",
"number_of_species": "10"
},
"versions": {
"hmmsearch": 3.1,
"busco": "5.4.3"
},
"results": {
"one_line_summary": "C:97.2%[S:92.0%,D:5.2%],F:0.2%,M:2.6%,n:5366",
"Complete": 97.2,
"Single copy": 92.0,
"Multi copy": 5.2,
"Fragmented": 0.2,
"Missing": 2.6,
"n_markers": 5366,
"domain": "eukaryota"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
3f1a18896d5177db5fe45542ea95e258 ./MANIFEST.V10309.gnm1.ann1.CZRZ.correspondence.yml
614df38a7cb60dc67f46a110be552eed ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.gzi
d8e6e51a1d21345c0cd09fae2f83216b ./arast.V10309.gnm1.ann1.CZRZ.gene_models_noncoding.gff3.gz
e1d17910856619103237a051c5ef1c63 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz.tbi
ca2619b180190a64447f8210e2d5eec6 ./arast.V10309.gnm1.ann1.CZRZ.legfed_v1_0.M65K.gfa.tsv.gz
019172ff554aa12f41de5ae259c02e38 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.bed.gz
ec8680eb9ca6dfdb54f6add1cc7e1041 ./CHANGES.V10309.gnm1.ann1.CZRZ.txt
18ab8a5e9e76c6cbb9627238879c77d6 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.gzi
e2a22e27ca2f90fd35972be47dd887e0 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.gzi
7f128ca5e449152a6726d0d26a365b66 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_exons.gff3.gz
e2d0f1b8ac44771183d2192e529dad78 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz
5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.fai
b831cf6e0d31dd98b5445c62d49c10fe ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.gzi
0bcd21094be2e521736a5f66d3727203 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz
7bc661e46f60e9308d5cd2663776e081 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.fai
04ef98566b55ad667b058b1bfb3dc8df ./arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz
daed0de5d10057896f6b4f1ac5196e8b ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz
d0edbcc9dbd65df0dfc4a6f0f7b91d45 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.gzi
14031d715fea66f1e566653240e66c96 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz
1b9fe9691705b63380ff110c8aedff36 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.fai
9eb1a8a66ce49dffbf62d20395271da5 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.full_table.tsv.gz
8c144993f8edb2552eecf7d3b9a63acb ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.txt
5987914f4a2958d9d07f39a607152e03 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json
6d706c9b8df610e5b9b93de84d864128 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.gzi
302486920ff3179e56516d1f8a68e9b5 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz
99e732d008bec0ab1ed5d278babecea0 ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz
9ec146a3ecd614dcb09c58d4acf5548f ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz
e24016df9c5d0d1b78a7d55f77115113 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.fai
2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.fai
b42d54c9910048ddb841ca2ff57eac91 ./MANIFEST.V10309.gnm1.ann1.CZRZ.descriptions.yml
d037c86f41256501f7f40d94cb6a2d5e ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.fai
cef1c92ab0413ef9aacbadad1b2da421 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz
cc9b81836241eb801e8856db6f09bb7b ./arast.V10309.gnm1.ann1.CZRZ.gene_models_noncoding.gff3.gz.tbi
e7e13718aea32fbff026133e7bb1bf33 ./README.V10309.gnm1.ann1.CZRZ.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
identifier: V10309.gnm1.ann1.CZRZ

provenance: "The files in this directory originated from GenBank, for RefSeq genome sequence GCF_014773155.1, submitted by the International Peanut Genome Initiative in 2018. The GenBank source is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and Peanutbase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and Peanutbase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately."

source: "https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_014773155.1"

synopsis: "GenBank RefSeq annotation for Genome assembly 1 for Arachis stenosperma, genotype V10309"

scientific_name: Arachis stenosperma

taxid: 217475

genotype:
- V10309

chromosome_prefix: Chr

supercontig_prefix: Scaffold

description: "This annotation was produced by GenBank on the RefSeq assembly V10309 in 2023"

genbank_accession: GCF_014773155.1

original_file_creation_date: "2023-10-01"

local_file_creation_date: "2024-01-17"

dataset_release_date: "2024-01-22"

publication_doi: 10.1038/s41588-019-0405-z

publication_title: "The genome sequence of segmental allotetraploid peanut Arachis hypogaea"

contributors: The International Peanut Genome Initiative; lead assembly group Jeremy Schmutz, Jerry Jenkins, Jane Grimwood; project leads David Bertioli; Soraya Bertioli; Brian Schleffler; Scott Jackson; Peggy Ozias-Akins

citation: "Bertioli, D.J., Jenkins, J., Clevenger, J. et al. The genome sequence of segmental allotetraploid peanut Arachis hypogaea. Nat Genet 51, 877-884 (2019). https://doi.org/10.1038/s41588-019-0405-z"

data_curators: Steven Cannon, Andrew Farmer

public_access_level: public

license: open

keywords: wild peanut, Arachis stenosperma

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"use_augustus": "False",
"batch_mode": "False",
"tar": "False",
"in": "/busco_wd/run_busco.bash.HhFEIb/arast.V10309.gnm1.PFL2.genome_main.fna",
"in": "/busco_wd/run_busco.bash.TUc4DD/arast.V10309.gnm1.PFL2.genome_main.fna",
"out": "BUSCO_genome_fabales_odb10",
"mode": "euk_genome_met",
"lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
c908fbab9e3ba8bd12ea4c15d81cf15f ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.txt
7f781e9bdc60c3f91ad6b50a4d77e8a8 ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json
fab12c1ce8e88fe52d35b2a19f553b9f ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.full_table.tsv.gz
d41d8cd98f00b204e9800998ecf8427e ./BUSCO/*.busco.fabales_odb10.full_table.tsv.gz
faec240bbac90aa193a29ac1ea445fa5 ./MANIFEST.PFL2.correspondence.yml
039116c908fc69fccfd9fb7e87dbf914 ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.fai
ec29d4bc0070da9437427107daad2ce8 ./CHANGES.V10309.gnm1.PFL2.txt
73035054706eb276dcfe093afeeb7f88 ./MANIFEST.PFL2.descriptions.yml
79e6bb587c84bf683645b6dc852af8b3 ./arast.V10309.gnm1.PFL2.agp_placed.txt.gz
de9db7d3cde8f1bc4165d001bcdaae9d ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.gzi
b1b1d846422d32f2bf7e06c7cd4d4663 ./usage_policy.txt
6b55f7b9c5339a9d63dc7ae383775653 ./arast.V10309.gnm1.PFL2.agp_unplaced.txt.gz
7c4ecbd0d8cd4cb34c2148b999059d99 ./arast.V10309.gnm1.PFL2.genome_main.fna.gz
21de1a074eae6c2e77f6ed6d312e4fdf ./README.V10309.gnm1.PFL2.yml
b437e9a2e1a98b7ffcc6e5c19ed2a467 ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.fai
23600f3d3e963fca4db8171463a5511b ./arast.V10309.gnm1.PFL2.genome_main.fna.gz
23395511f3bea713fb4e99feffe7c7a1 ./arast.V10309.gnm1.PFL2.seqid_map.tsv.gz
06a6c1322f3b6d318ec252ca06998774 ./arast.V10309.gnm1.PFL2.initial_seqid_map.tsv.gz
c9b30ab32ad5a4d5b0d36264accf19c9 ./README.V10309.gnm1.PFL2.yml
a0519ad7023d621523e1a4dbbfe69870 ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json
02058aeb14f69544672211741da0402f ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.txt
c7cffe7305a3c19b8b5787357ffb90d4 ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.full_table.tsv.gz
083bb2fc52e62d669290a8b8e291be4b ./MANIFEST.V10309.gnm1.PFL2.descriptions.yml
d13a0d5c902dff37ab7fbaeb9682e9fc ./CHANGES.V10309.gnm1.PFL2.txt
3dde83ffee922953772e2933f032e2e1 ./MANIFEST.V10309.gnm1.PFL2.correspondence.yml
b294aff4939ad016dfeefec23f28154f ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.gzi
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
---
identifier: V10309.gnm1.PFL2

provenance: "The files in this directory originated from http://peanutbase.org. There is also a corresponding genome assembly at GenBank. Files in this directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and PeanutBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers. If you use the data in the present directory, please 1) please cite the data appropriately - generally referring to the original publications for this data; and if you make use of any significant modifications in the files, then please also cite the respective database project(s) related to this directory."
provenance: "The files in this directory originated from GenBank, for RefSeq genome sequence GCF_014773155.1, submitted by the International Peanut Genome Initiative in 2018. The GenBank source is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and Peanutbase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and Peanutbase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately."

source: http://peanutbase.org
source: "https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_014773155.1"

synopsis: Genome assembly 1 for Arachis stenosperma, accession V10309
synopsis: "Genome assembly 1 for Arachis stenosperma, genotype V10309"

scientific_name: Arachis stenosperma

taxid: 217475

bioproject: PRJNA610652

scientific_name_abbrev: arast

genotype:
- V10309

Expand All @@ -24,22 +20,27 @@ supercontig_prefix: Scaffold

description: "Genome assembly 1 for Arachis stenosperma, accession V10309, with sequenced generated using PacBio Sequel; Illumina HiSeq. Arachis stenosperma Krapov. & W.C. Greg. is a wild peanut relative native to central Brazil, in the past it was cultivated by native peoples of South America, and was carried to the Atlantic coast, where populations persist to the present day. It is a source of strong pest and disease resistance and has been used by peanut breeders and geneticists in interspecific hybrids. A. stenosperma is diploid species in the A-genome group of Arachis, which has similarity to the A genome of tetraploid cultivated peanut (A. hypogaea). This accession was sequenced with PacBio long reads, with contributions by USDA-ARS and researchers at Mars Inc. and the University of Georgia"

genbank_accession: GCA_014773155.1
genbank_accession: GCF_014773155.1

original_file_creation_date: "2023-10-01"

original_file_creation_date: "2020-10-02"
local_file_creation_date: "2024-01-17"

local_file_creation_date: "2020-05-20"
dataset_release_date: "2024-01-22"

publication_doi: 10.1016/B978-1-63067-038-2.00008-3
publication_doi: 10.1038/s41588-019-0405-z

dataset_release_date: "2022-12-01"
publication_title: "The genome sequence of segmental allotetraploid peanut Arachis hypogaea"

contributors: Clevenger,J., Cannon,S.B., Youngblood, C., Vaughn,J.N., Abernathy, B, Bertioli,D.J., Scheffler, B.
contributors: The International Peanut Genome Initiative; lead assembly group Jeremy Schmutz, Jerry Jenkins, Jane Grimwood; project leads David Bertioli; Soraya Bertioli; Brian Schleffler; Scott Jackson; Peggy Ozias-Akins

data_curators: Steven Cannon
citation: "Bertioli, D.J., Jenkins, J., Clevenger, J. et al. The genome sequence of segmental allotetraploid peanut Arachis hypogaea. Nat Genet 51, 877-884 (2019). https://doi.org/10.1038/s41588-019-0405-z"

public_access_level: public, with usage restriction
data_curators: Steven Cannon, Andrew Farmer

public_access_level: public

license: open

keywords: Arachis stenosperma
keywords: wild peanut, Arachis stenosperma

Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ resources:
URL: "https://gcv.legumeinfo.org/gene;lis=cajca.C.cajan_19480?q=C.cajan_19480&sources=lis&algorithm=repeat&match=10&mismatch=-1&gap=-1&score=30&threshold=25&bmatched=20&bintermediate=10&bmask=10&linkage=average&cthreshold=20&neighbors=10&matched=4&intermediate=5&bregexp=&border=chromosome&regexp=&order=distance"
description: "Browser for dynamically discovering and viewing genomic synteny across selected species."
- name: Germplasm GIS
URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=6&maxRecs=200&taxonQuery=Cajanus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false"
URL: "https://germplasm-map.legumeinfo.org/#?zoom=6&maxRecs=200&taxonQuery=Cajanus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false"
description: "Geographic information system viewer, showing collection locations for Cajanus data held by the U.S. National Plant Germplasm System."
Loading

0 comments on commit b3223af

Please sign in to comment.