From df1ab4f37852f25089adce7943c8b1db8d25f247 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Sun, 14 Jan 2024 08:46:14 -0600 Subject: [PATCH 01/27] Add assemblies and annotations for two accessions of soybean and one of M. truncatula --- ...YY3.busco.fabales_odb10.short_summary.json | 47 ++++++++++++++ .../CHECKSUM.Lee.gnm3.ann1.ZYY3.md5 | 21 +++++++ .../README.Lee.gnm3.ann1.ZYY3.yml | 46 ++++++++++++++ ...7HW.busco.fabales_odb10.short_summary.json | 47 ++++++++++++++ .../CHECKSUM.Wm82.gnm5.ann1.J7HW.md5 | 21 +++++++ .../README.Wm82.gnm5.ann1.J7HW.yml | 46 ++++++++++++++ ...G1C.busco.fabales_odb10.short_summary.json | 62 +++++++++++++++++++ .../Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml | 46 ++++++++++++++ ...RKG.busco.fabales_odb10.short_summary.json | 62 +++++++++++++++++++ .../Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml | 46 ++++++++++++++ .../README.HM078.gnm1.ann1.3RNP.yml | 41 ++++++++++++ .../CHECKSUM.HM078.gnm1.Q3TM.md5 | 4 ++ .../README.HM078.gnm1.Q3TM.yml | 43 +++++++++++++ 13 files changed, 532 insertions(+) create mode 100644 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/BUSCO/glyma.Lee.gnm3.ann1.ZYY3.busco.fabales_odb10.short_summary.json create mode 100644 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/CHECKSUM.Lee.gnm3.ann1.ZYY3.md5 create mode 100644 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml create mode 100644 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/BUSCO/glyma.Wm82.gnm5.ann1.J7HW.busco.fabales_odb10.short_summary.json create mode 100644 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/CHECKSUM.Wm82.gnm5.ann1.J7HW.md5 create mode 100644 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml create mode 100644 Glycine/max/genomes/Lee.gnm3.VG1C/BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.short_summary.json create mode 100644 Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml create mode 100644 Glycine/max/genomes/Wm82.gnm5.NRKG/BUSCO/glyma.Wm82.gnm5.NRKG.busco.fabales_odb10.short_summary.json create mode 100644 Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml create mode 100644 Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/README.HM078.gnm1.ann1.3RNP.yml create mode 100644 Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 create mode 100644 Medicago/truncatula/genomes/HM078.gnm1.Q3TM/README.HM078.gnm1.Q3TM.yml diff --git a/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/BUSCO/glyma.Lee.gnm3.ann1.ZYY3.busco.fabales_odb10.short_summary.json b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/BUSCO/glyma.Lee.gnm3.ann1.ZYY3.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..575e9690 --- /dev/null +++ b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/BUSCO/glyma.Lee.gnm3.ann1.ZYY3.busco.fabales_odb10.short_summary.json @@ -0,0 +1,47 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.r5aXtU/glyma.Lee.gnm3.ann1.ZYY3.protein.faa", + "out": "BUSCO_proteins_fabales_odb10", + "mode": "proteins", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_proteins_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:96.6%[S:41.7%,D:54.9%],F:0.5%,M:2.9%,n:5366", + "Complete": 96.6, + "Single copy": 41.7, + "Multi copy": 54.9, + "Fragmented": 0.5, + "Missing": 2.9, + "n_markers": 5366, + "domain": "eukaryota" + } +} \ No newline at end of file diff --git a/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/CHECKSUM.Lee.gnm3.ann1.ZYY3.md5 b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/CHECKSUM.Lee.gnm3.ann1.ZYY3.md5 new file mode 100644 index 00000000..11bc1c86 --- /dev/null +++ b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/CHECKSUM.Lee.gnm3.ann1.ZYY3.md5 @@ -0,0 +1,21 @@ +35a2aeeb39fadc8f69f576b7572d14c9 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/CHANGES.Lee.gnm3.ann1.ZYY3.txt +8f39da602bc1f6f8656806048b229d51 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.cds.fna.gz +d64b9aebb294a47c8418b5d51f10bc30 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.cds.fna.gz.fai +be60aac3ab164fe9de9fb030b20c4bd5 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.cds.fna.gz.gzi +a457c0a6207a867cc431249d14d22995 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.featid_map.tsv.gz +94ab994663c5c6a01ee738d4c774757a Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.gene_models_main.bed.gz +cd0cd44aa0256cdf6e2d4e51e153eb96 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.gene_models_main.bed.gz.tbi +a932a61cc62b0b0133de6376d15a2fe6 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.gene_models_main.gff3.gz +6bfc0aa4587d55a8154b736b274ced4a Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.gene_models_main.gff3.gz.tbi +dcebf90f4fcfc956f15f998de6bf40a3 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.iprscan.gff3.gz +311af8e6854ca75079d4a8ec580d9e36 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.iprscan.gff3.gz.tbi +35fdd32bc07438effb21650ce140e5dd Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.legfed_v1_0.M65K.gfa.tsv.gz +8e0c732143f283d26f5e8dd0eacee4b9 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.mrna.fna.gz +2b79a734b8c3e7d1406a35c9a07c2dc6 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.mrna.fna.gz.fai +7137af8e442ed1dc9f3673bbea783dca Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.mrna.fna.gz.gzi +89e35db9a7ca3443558b7fffe5dde2cd Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.protein.faa.gz +3c4e71efb952f31eaabd63b2b45b20be Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.protein.faa.gz.fai +225a7fdccf5b3f1f4dd7b352ade5f979 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/glyma.Lee.gnm3.ann1.ZYY3.protein.faa.gz.gzi +472de9144ba6f3812ec44029baee8999 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/MANIFEST.Lee.gnm3.ann1.ZYY3.correspondence.yml +dc0996846a41cd92a0fff070a2e34435 Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/MANIFEST.Lee.gnm3.ann1.ZYY3.descriptions.yml +649a6030393505f6518da5aec869e28e Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml diff --git a/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml new file mode 100644 index 00000000..84b4b00a --- /dev/null +++ b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml @@ -0,0 +1,46 @@ +--- +identifier: Lee.gnm3.ann1.ZYY3 + +provenance: "The files in this directory originated from https://doi.org/10.6084/m9.figshare.22688281.v3. That repository is considered the primary and authoritative for this data; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://10.6084/m9.figshare.22688281.v3" + +synopsis: "Annotation 1 for Glycine max accession Lee, genome assembly 3" + +scientific_name: Glycine max + +taxid: 3847 + +genotype: + - Lee + +chromosome_prefix: Chr + +supercontig_prefix: NONE + +description: "Gene annotation resources. See full description at Garg, Khan et al., 2023." + +dataset_doi: 10.6084/m9.figshare.22688281.v3 + +original_file_creation_date: "2023-07-22" + +local_file_creation_date: "2023-10-11" + +dataset_release_date: "2023-11-01" + +publication_doi: 10.1002/tpg2.20382 + +publication_title: "Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research" + +contributors: Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT + +citation: "Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT. Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research. Plant Genome. 2023 Sep 25:e20382. doi: 10.1002/tpg2.20382. Epub ahead of print. PMID: 37749941." + +data_curators: Steven Cannon + +public_access_level: public + +license: Open, with usage agreement + +keywords: soybean, Lee + diff --git a/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/BUSCO/glyma.Wm82.gnm5.ann1.J7HW.busco.fabales_odb10.short_summary.json b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/BUSCO/glyma.Wm82.gnm5.ann1.J7HW.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..92c25240 --- /dev/null +++ b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/BUSCO/glyma.Wm82.gnm5.ann1.J7HW.busco.fabales_odb10.short_summary.json @@ -0,0 +1,47 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.VfRTNY/glyma.Wm82.gnm5.ann1.J7HW.protein.faa", + "out": "BUSCO_proteins_fabales_odb10", + "mode": "proteins", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_proteins_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:96.7%[S:41.4%,D:55.3%],F:0.6%,M:2.7%,n:5366", + "Complete": 96.7, + "Single copy": 41.4, + "Multi copy": 55.3, + "Fragmented": 0.6, + "Missing": 2.7, + "n_markers": 5366, + "domain": "eukaryota" + } +} \ No newline at end of file diff --git a/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/CHECKSUM.Wm82.gnm5.ann1.J7HW.md5 b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/CHECKSUM.Wm82.gnm5.ann1.J7HW.md5 new file mode 100644 index 00000000..b765ace8 --- /dev/null +++ b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/CHECKSUM.Wm82.gnm5.ann1.J7HW.md5 @@ -0,0 +1,21 @@ +35a2aeeb39fadc8f69f576b7572d14c9 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/CHANGES.Wm82.gnm5.ann1.J7HW.txt +41315b08d4e17a02872ec532af8553da Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.cds.fna.gz +866505e9e117bfda3e5a75690ff51021 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.cds.fna.gz.fai +4e305621731cf6f133a172a7bbe5d9ed Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.cds.fna.gz.gzi +eef48f2ae3eee4c7da405855e46b46b4 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.featid_map.tsv.gz +f42a28ded0121394beae1c7d7290fc8d Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.gene_models_main.bed.gz +33e0fcd083b9d5bb5c0646c3d6887546 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.gene_models_main.bed.gz.tbi +0fe6a974403696be532b02678fc97fce Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.gene_models_main.gff3.gz +60860b757da7df92d5e8ce348a3d2022 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.gene_models_main.gff3.gz.tbi +8206add9bfa12eba7e3bff145b808822 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.iprscan.gff3.gz +a5c04d79d0d02074848688e6a9afd7f2 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.iprscan.gff3.gz.tbi +c6b1565f53eeef6d10c7fc82958ed8ea Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.legfed_v1_0.M65K.gfa.tsv.gz +8138d55fd0e5bdcf92695c0879aad50c Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.mrna.fna.gz +907b8d41833302223863833de58beaa9 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.mrna.fna.gz.fai +7d0bb4e6611f8a58ae1a8f3b796a713c Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.mrna.fna.gz.gzi +c87fc883643f7c3740db8645e0727574 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.protein.faa.gz +7f1b479ec2cbb79d8837f7dda85818b2 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.protein.faa.gz.fai +404d0eb61d0a2e7792028dd893cc7d33 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/glyma.Wm82.gnm5.ann1.J7HW.protein.faa.gz.gzi +40a6065db3d7debe6eef91ea90ad2ce2 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/MANIFEST.Wm82.gnm5.ann1.J7HW.correspondence.yml +e66cf7307ff21d96de8008ba9414ddb3 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/MANIFEST.Wm82.gnm5.ann1.J7HW.descriptions.yml +5558eccb3a7a2d342a8b1a7c58b8fd61 Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml diff --git a/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml new file mode 100644 index 00000000..2bd83f33 --- /dev/null +++ b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml @@ -0,0 +1,46 @@ +--- +identifier: Wm82.gnm5.ann1.J7HW + +provenance: "The files in this directory originated from https://doi.org/10.6084/m9.figshare.22688281.v3. That repository is considered the primary and authoritative for this data; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://doi.org/10.6084/m9.figshare.22688281.v3" + +synopsis: "Annotation 1 for Glycine max accession Williams 82 (Wm82), genome assembly 5" + +scientific_name: Glycine max + +taxid: 3847 + +genotype: + - Wm82 + +chromosome_prefix: Chr + +supercontig_prefix: NONE + +description: "Gene annotation resources. See full description at Garg, Khan et al., 2023." + +dataset_doi: 10.6084/m9.figshare.22688281.v3 + +original_file_creation_date: "2023-07-22" + +local_file_creation_date: "2023-10-11" + +dataset_release_date: "2023-11-01" + +publication_doi: 10.1002/tpg2.20382 + +publication_title: "Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research" + +contributors: Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT + +citation: "Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT. Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research. Plant Genome. 2023 Sep 25:e20382. doi: 10.1002/tpg2.20382. Epub ahead of print. PMID: 37749941." + +data_curators: Steven Cannon + +public_access_level: public + +license: Open, with usage agreement + +keywords: soybean, Wm82, Williams 82 + diff --git a/Glycine/max/genomes/Lee.gnm3.VG1C/BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.short_summary.json b/Glycine/max/genomes/Lee.gnm3.VG1C/BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..779efec9 --- /dev/null +++ b/Glycine/max/genomes/Lee.gnm3.VG1C/BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.short_summary.json @@ -0,0 +1,62 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.6c7Msp/glyma.Lee.gnm3.VG1C.genome_main.fna", + "out": "BUSCO_genome_fabales_odb10", + "mode": "euk_genome_met", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_genome_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota", + "max_intron": "90000", + "max_seq_len": "120000", + "metaeuk_parameters": "", + "metaeuk_rerun_parameters": "", + "contig_break": "10", + "scaffold_composition": "False", + "gene_predictor": "metaeuk" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "bbtools": "38.96", + "metaeuk": "5.34c21f2", + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:99.5%[S:36.5%,D:63.0%],F:0.1%,M:0.4%,n:5366", + "Complete": 99.5, + "Single copy": 36.5, + "Multi copy": 63.0, + "Fragmented": 0.1, + "Missing": 0.4, + "n_markers": 5366, + "domain": "eukaryota", + "Number of scaffolds": "22", + "Number of contigs": "48", + "Total length": "1016382015", + "Percent gaps": "0.003%", + "Scaffold N50": "51595525", + "Contigs N50": "32157198" + } +} \ No newline at end of file diff --git a/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml b/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml new file mode 100644 index 00000000..16b352ac --- /dev/null +++ b/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml @@ -0,0 +1,46 @@ +--- +identifier: Lee.gnm3.VG1C + +provenance: "The files in this directory originated from https://doi.org/10.6084/m9.figshare.22688281.v3. That repository is considered the primary and authoritative for this data; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://doi.org/10.6084/m9.figshare.22688281.v3" + +synopsis: "Glycine max accession Lee, genome assembly 3" + +scientific_name: Glycine max + +taxid: 3847 + +genotype: + - Lee + +chromosome_prefix: Chr + +supercontig_prefix: NONE + +description: "Sequenced using PacBio RSII and HiFi reads, with Bionano optical maps to aid in scaffolding. See full description at Garg, Khan et al., 2023." + +dataset_doi: 10.6084/m9.figshare.22688281.v3 + +original_file_creation_date: "2023-07-22" + +local_file_creation_date: "2023-10-11" + +dataset_release_date: "2023-11-01" + +publication_doi: 10.1002/tpg2.20382 + +publication_title: "Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research" + +contributors: Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT + +citation: "Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT. Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research. Plant Genome. 2023 Sep 25:e20382. doi: 10.1002/tpg2.20382. Epub ahead of print. PMID: 37749941." + +data_curators: Steven Cannon + +public_access_level: public + +license: Open, with usage agreement + +keywords: soybean, Lee + diff --git a/Glycine/max/genomes/Wm82.gnm5.NRKG/BUSCO/glyma.Wm82.gnm5.NRKG.busco.fabales_odb10.short_summary.json b/Glycine/max/genomes/Wm82.gnm5.NRKG/BUSCO/glyma.Wm82.gnm5.NRKG.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..43708da2 --- /dev/null +++ b/Glycine/max/genomes/Wm82.gnm5.NRKG/BUSCO/glyma.Wm82.gnm5.NRKG.busco.fabales_odb10.short_summary.json @@ -0,0 +1,62 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.YeMcrA/glyma.Wm82.gnm5.NRKG.genome_main.fna", + "out": "BUSCO_genome_fabales_odb10", + "mode": "euk_genome_met", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_genome_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota", + "max_intron": "90000", + "max_seq_len": "120000", + "metaeuk_parameters": "", + "metaeuk_rerun_parameters": "", + "contig_break": "10", + "scaffold_composition": "False", + "gene_predictor": "metaeuk" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "bbtools": "38.96", + "metaeuk": "5.34c21f2", + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:99.5%[S:36.3%,D:63.2%],F:0.1%,M:0.4%,n:5366", + "Complete": 99.5, + "Single copy": 36.3, + "Multi copy": 63.2, + "Fragmented": 0.1, + "Missing": 0.4, + "n_markers": 5366, + "domain": "eukaryota", + "Number of scaffolds": "22", + "Number of contigs": "39", + "Total length": "1013741020", + "Percent gaps": "0.001%", + "Scaffold N50": "51410279", + "Contigs N50": "46254787" + } +} \ No newline at end of file diff --git a/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml b/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml new file mode 100644 index 00000000..1d3846a9 --- /dev/null +++ b/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml @@ -0,0 +1,46 @@ +--- +identifier: Wm82.gnm5.NRKG + +provenance: "The files in this directory originated from https://doi.org/10.6084/m9.figshare.22688281.v3. That repository is considered the primary and authoritative for this data; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://doi.org/10.6084/m9.figshare.22688281.v3" + +synopsis: "Glycine max accession Williams 82 (Wm82), genome assembly 5" + +scientific_name: Glycine max + +taxid: 3847 + +genotype: + - Wm82 + +chromosome_prefix: Chr + +supercontig_prefix: NONE + +description: "Sequenced using PacBio RSII and HiFi reads, with Bionano optical maps to aid in scaffolding. See full description at Garg, Khan et al., 2023." + +dataset_doi: 10.6084/m9.figshare.22688281.v3 + +original_file_creation_date: "2023-07-22" + +local_file_creation_date: "2023-10-11" + +dataset_release_date: "2023-11-01" + +publication_doi: 10.1002/tpg2.20382 + +publication_title: "Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research" + +contributors: Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT + +citation: "Garg V, Khan AW, Fengler K, Llaca V, Yuan Y, Vuong TD, Harris C, Chan TF, Lam HM, Varshney RK, Nguyen HT. Near-gapless genome assemblies of Williams 82 and Lee cultivars for accelerating global soybean research. Plant Genome. 2023 Sep 25:e20382. doi: 10.1002/tpg2.20382. Epub ahead of print. PMID: 37749941." + +data_curators: Steven Cannon + +public_access_level: public + +license: Open, with usage agreement + +keywords: soybean, Wm82, Williams 82 + diff --git a/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/README.HM078.gnm1.ann1.3RNP.yml b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/README.HM078.gnm1.ann1.3RNP.yml new file mode 100644 index 00000000..684c6e05 --- /dev/null +++ b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/README.HM078.gnm1.ann1.3RNP.yml @@ -0,0 +1,41 @@ +--- +identifier: HM078.gnm1.ann1.3RNP + +provenance: "contributed directly by authors" + +source: + +synopsis: Genome annotation for Medicago truncatula (HM078). + +scientific_name: Medicago truncatula + +taxid: 3880 + +bioproject: + +scientific_name_abbrev: medtr + +genotype: + - HM078 + +description: "Genome annotation for Medicago truncatula (HM078)." + +original_file_creation_date: "2024-01-02" + +local_file_creation_date: "2024-01-11" + +publication_doi: + +dataset_release_date: "2024-01-11" + +publication_title: + +contributors: "Jacob Botkin, Shaun Curtin" + +data_curators: Andrew Farmer + +public_access_level: public + +license: Open + +citation: diff --git a/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 new file mode 100644 index 00000000..81c52198 --- /dev/null +++ b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 @@ -0,0 +1,4 @@ +4e4ea00a039158b5bdc1f274bbfb40a2 ./medtr.HM078.gnm1.Q3TM.genome_main.fna.gz.gzi +699e12b09676e7a0f7d143395f7ec1f3 ./medtr.HM078.gnm1.Q3TM.genome_main.fna.gz.fai +3b594b5531a47ee8830d53698c44a179 ./README.HM078.gnm1.Q3TM.yml +ab66fa55170527a4a49cca56e4080b49 ./medtr.HM078.gnm1.Q3TM.genome_main.fna.gz diff --git a/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/README.HM078.gnm1.Q3TM.yml b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/README.HM078.gnm1.Q3TM.yml new file mode 100644 index 00000000..8466a880 --- /dev/null +++ b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/README.HM078.gnm1.Q3TM.yml @@ -0,0 +1,43 @@ +--- +identifier: HM078.gnm1.Q3TM + +provenance: "contributed directly by authors" + +source: + +synopsis: Genome assembly for Medicago truncatula (HM078) + +scientific_name: Medicago truncatula + +taxid: 3880 + +bioproject: + +scientific_name_abbrev: medtr + +genotype: + - HM078 + +supercontig_prefix: ctg + +description: "Genome assembly for Medicago truncatula (HM078)." + +original_file_creation_date: "2024-01-02" + +local_file_creation_date: "2024-01-11" + +publication_doi: + +dataset_release_date: "2024-01-11" + +publication_title: + +contributors: "Jacob Botkin, Shaun Curtin" + +data_curators: Andrew Farmer + +public_access_level: public + +license: Open + +citation: From 577a5fcadebfaa420f407c033734f6cd8da63465 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 17 Jan 2024 18:25:08 -0600 Subject: [PATCH 02/27] Change licenses from Open with usage agreement to Open for selected genomes and annotations --- .../README.Hwangkeum.gnm1.ann1.1G4F.yml | 2 +- .../JD17.gnm1.ann1.CLFP/README.JD17.gnm1.ann1.CLFP.yml | 2 +- .../Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml | 2 +- .../Wm82.gnm4.ann1.T8TQ/README.Wm82.gnm4.ann1.T8TQ.yml | 2 +- .../Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml | 2 +- .../README.Wm82_ISU01.gnm2.ann1.FGFB.yml | 2 +- .../FiskebyIII.gnm1.F177/README.FiskebyIII.gnm1.F177.yml | 2 +- .../genomes/Hwangkeum.gnm1.4S83/README.Hwangkeum.gnm1.4S83.yml | 2 +- Glycine/max/genomes/JD17.gnm1.TW7L/README.JD17.gnm1.TW7L.yml | 2 +- Glycine/max/genomes/Lee.gnm1.BXNC/README.Lee.gnm1.BXNC.yml | 2 +- Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml | 2 +- Glycine/max/genomes/Wm82.gnm4.4PTR/README.Wm82.gnm4.4PTR.yml | 2 +- Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml | 2 +- .../Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Glycine/max/annotations/Hwangkeum.gnm1.ann1.1G4F/README.Hwangkeum.gnm1.ann1.1G4F.yml b/Glycine/max/annotations/Hwangkeum.gnm1.ann1.1G4F/README.Hwangkeum.gnm1.ann1.1G4F.yml index fdb7c790..a0f502bd 100644 --- a/Glycine/max/annotations/Hwangkeum.gnm1.ann1.1G4F/README.Hwangkeum.gnm1.ann1.1G4F.yml +++ b/Glycine/max/annotations/Hwangkeum.gnm1.ann1.1G4F/README.Hwangkeum.gnm1.ann1.1G4F.yml @@ -36,7 +36,7 @@ data_curators: Wei Huang public_access_level: public -license: Open, with usage agreement +license: Open keywords: genome annotation, Hwangkeum diff --git a/Glycine/max/annotations/JD17.gnm1.ann1.CLFP/README.JD17.gnm1.ann1.CLFP.yml b/Glycine/max/annotations/JD17.gnm1.ann1.CLFP/README.JD17.gnm1.ann1.CLFP.yml index 2ede55d8..f32e7479 100644 --- a/Glycine/max/annotations/JD17.gnm1.ann1.CLFP/README.JD17.gnm1.ann1.CLFP.yml +++ b/Glycine/max/annotations/JD17.gnm1.ann1.CLFP/README.JD17.gnm1.ann1.CLFP.yml @@ -42,6 +42,6 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, JD17, Jidou 17 diff --git a/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml index 84b4b00a..fcefee4e 100644 --- a/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml +++ b/Glycine/max/annotations/Lee.gnm3.ann1.ZYY3/README.Lee.gnm3.ann1.ZYY3.yml @@ -40,7 +40,7 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Lee diff --git a/Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/README.Wm82.gnm4.ann1.T8TQ.yml b/Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/README.Wm82.gnm4.ann1.T8TQ.yml index d6339b7b..326d0a2f 100644 --- a/Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/README.Wm82.gnm4.ann1.T8TQ.yml +++ b/Glycine/max/annotations/Wm82.gnm4.ann1.T8TQ/README.Wm82.gnm4.ann1.T8TQ.yml @@ -34,6 +34,6 @@ data_curators: Steven Cannon public_access_level: public -license: open, with usage agreement +license: Open keywords: soybean diff --git a/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml index 2bd83f33..ed62d201 100644 --- a/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml +++ b/Glycine/max/annotations/Wm82.gnm5.ann1.J7HW/README.Wm82.gnm5.ann1.J7HW.yml @@ -40,7 +40,7 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Wm82, Williams 82 diff --git a/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml b/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml index 0b12aa62..d3a81b9b 100644 --- a/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml +++ b/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml @@ -36,6 +36,6 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Wm82, Williams 82, Wm82_ISU01, Wm82-ISU-01 diff --git a/Glycine/max/genomes/FiskebyIII.gnm1.F177/README.FiskebyIII.gnm1.F177.yml b/Glycine/max/genomes/FiskebyIII.gnm1.F177/README.FiskebyIII.gnm1.F177.yml index 64df739e..10a77401 100644 --- a/Glycine/max/genomes/FiskebyIII.gnm1.F177/README.FiskebyIII.gnm1.F177.yml +++ b/Glycine/max/genomes/FiskebyIII.gnm1.F177/README.FiskebyIII.gnm1.F177.yml @@ -34,6 +34,6 @@ data_curators: Wei Huang public_access_level: public -license: Open, with usage agreement +license: Open keywords: Glycine max, Fiskeby III diff --git a/Glycine/max/genomes/Hwangkeum.gnm1.4S83/README.Hwangkeum.gnm1.4S83.yml b/Glycine/max/genomes/Hwangkeum.gnm1.4S83/README.Hwangkeum.gnm1.4S83.yml index 3287d8cf..71a3c84a 100644 --- a/Glycine/max/genomes/Hwangkeum.gnm1.4S83/README.Hwangkeum.gnm1.4S83.yml +++ b/Glycine/max/genomes/Hwangkeum.gnm1.4S83/README.Hwangkeum.gnm1.4S83.yml @@ -40,7 +40,7 @@ data_curators: Wei Huang public_access_level: public -license: Open, with usage agreement +license: Open keywords: Glycine max, genome, Hwangkeum diff --git a/Glycine/max/genomes/JD17.gnm1.TW7L/README.JD17.gnm1.TW7L.yml b/Glycine/max/genomes/JD17.gnm1.TW7L/README.JD17.gnm1.TW7L.yml index abb42ee6..2a7cdfee 100644 --- a/Glycine/max/genomes/JD17.gnm1.TW7L/README.JD17.gnm1.TW7L.yml +++ b/Glycine/max/genomes/JD17.gnm1.TW7L/README.JD17.gnm1.TW7L.yml @@ -42,6 +42,6 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, JD17, Jidou 17 diff --git a/Glycine/max/genomes/Lee.gnm1.BXNC/README.Lee.gnm1.BXNC.yml b/Glycine/max/genomes/Lee.gnm1.BXNC/README.Lee.gnm1.BXNC.yml index ebc49e62..3cf98423 100644 --- a/Glycine/max/genomes/Lee.gnm1.BXNC/README.Lee.gnm1.BXNC.yml +++ b/Glycine/max/genomes/Lee.gnm1.BXNC/README.Lee.gnm1.BXNC.yml @@ -40,6 +40,6 @@ data_curators: Jacqueline Campbell, Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Lee diff --git a/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml b/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml index 16b352ac..062b332b 100644 --- a/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml +++ b/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml @@ -40,7 +40,7 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Lee diff --git a/Glycine/max/genomes/Wm82.gnm4.4PTR/README.Wm82.gnm4.4PTR.yml b/Glycine/max/genomes/Wm82.gnm4.4PTR/README.Wm82.gnm4.4PTR.yml index 27005f95..3f6399f9 100644 --- a/Glycine/max/genomes/Wm82.gnm4.4PTR/README.Wm82.gnm4.4PTR.yml +++ b/Glycine/max/genomes/Wm82.gnm4.4PTR/README.Wm82.gnm4.4PTR.yml @@ -40,6 +40,6 @@ data_curators: Jacqueline Campbell, Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Wm82, Williams 82 diff --git a/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml b/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml index 1d3846a9..5241537d 100644 --- a/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml +++ b/Glycine/max/genomes/Wm82.gnm5.NRKG/README.Wm82.gnm5.NRKG.yml @@ -40,7 +40,7 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Wm82, Williams 82 diff --git a/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml b/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml index c5d2647d..bac74818 100644 --- a/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml +++ b/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml @@ -38,6 +38,6 @@ data_curators: Steven Cannon public_access_level: public -license: Open, with usage agreement +license: Open keywords: soybean, Wm82, Williams 82, Wm82_ISU01, Wm82-ISU-01 From 73a665c5467068019b4fa0cb2dda550505f56c75 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Mon, 22 Jan 2024 11:14:22 -0600 Subject: [PATCH 03/27] updates for new Cercis chinensis genome --- .../description_Cercis.yml | 2 + .../description_Cercis_chinensis.yml | 14 +++++ ...8L4.busco.fabales_odb10.short_summary.json | 47 ++++++++++++++ .../CHECKSUM.GXIB.gnm1.ann1.L8L4.md5 | 24 +++++++ .../README.GXIB.gnm1.ann1.L8L4.yml | 42 +++++++++++++ ...DZH.busco.fabales_odb10.short_summary.json | 62 +++++++++++++++++++ .../CHECKSUM.GXIB.gnm1.5DZH.md5 | 12 ++++ .../GXIB.gnm1.5DZH/README.GXIB.gnm1.5DZH.yml | 42 +++++++++++++ 8 files changed, 245 insertions(+) create mode 100644 Cercis/chinensis/about_this_collection/description_Cercis_chinensis.yml create mode 100644 Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.short_summary.json create mode 100644 Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/CHECKSUM.GXIB.gnm1.ann1.L8L4.md5 create mode 100644 Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/README.GXIB.gnm1.ann1.L8L4.yml create mode 100644 Cercis/chinensis/genomes/GXIB.gnm1.5DZH/BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.short_summary.json create mode 100644 Cercis/chinensis/genomes/GXIB.gnm1.5DZH/CHECKSUM.GXIB.gnm1.5DZH.md5 create mode 100644 Cercis/chinensis/genomes/GXIB.gnm1.5DZH/README.GXIB.gnm1.5DZH.yml diff --git a/Cercis/GENUS/about_this_collection/description_Cercis.yml b/Cercis/GENUS/about_this_collection/description_Cercis.yml index 335549ef..6e75829b 100644 --- a/Cercis/GENUS/about_this_collection/description_Cercis.yml +++ b/Cercis/GENUS/about_this_collection/description_Cercis.yml @@ -6,3 +6,5 @@ description: "Cercis is a genus of about 10 species in the subfamily Cercidoidea species: - canadensis + - chinensis + - gigantea diff --git a/Cercis/chinensis/about_this_collection/description_Cercis_chinensis.yml b/Cercis/chinensis/about_this_collection/description_Cercis_chinensis.yml new file mode 100644 index 00000000..6afd3202 --- /dev/null +++ b/Cercis/chinensis/about_this_collection/description_Cercis_chinensis.yml @@ -0,0 +1,14 @@ +--- +taxid: 161750 +genus: Cercis +species: chinensis +abbrev: cerch +commonName: Chinese redbud +description: "Cercis chinensis is a flowering tree endemic to China, with medicinal and horticultural uses." + +strains: + - identifier: GXIB + accession: GXIB + name: GXIB + origin: "A plant at the campus of Guangxi Institute of Botany, Chinese Academy of Sciences" + description: "A plant at the campus of Guangxi Institute of Botany, Chinese Academy of Sciences" diff --git a/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.short_summary.json b/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..2acf2e78 --- /dev/null +++ b/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.short_summary.json @@ -0,0 +1,47 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.Bsb7z4/cerch.GXIB.gnm1.ann1.L8L4.protein.faa", + "out": "BUSCO_proteins_fabales_odb10", + "mode": "proteins", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_proteins_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:90.3%[S:86.9%,D:3.4%],F:1.1%,M:8.6%,n:5366", + "Complete": 90.3, + "Single copy": 86.9, + "Multi copy": 3.4, + "Fragmented": 1.1, + "Missing": 8.6, + "n_markers": 5366, + "domain": "eukaryota" + } +} \ No newline at end of file diff --git a/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/CHECKSUM.GXIB.gnm1.ann1.L8L4.md5 b/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/CHECKSUM.GXIB.gnm1.ann1.L8L4.md5 new file mode 100644 index 00000000..2678de97 --- /dev/null +++ b/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/CHECKSUM.GXIB.gnm1.ann1.L8L4.md5 @@ -0,0 +1,24 @@ +8623d7c06bd89a57930b6da8f2551b73 ./cerch.GXIB.gnm1.ann1.L8L4.cds.fna.gz +bdaf80cd97b89c14bc464df92cb0da74 ./README.GXIB.gnm1.ann1.L8L4.yml +3c0378dc215a6742c20f90738a536073 ./cerch.GXIB.gnm1.ann1.L8L4.gene_models_main.gff3.gz.tbi +3fc53bc15ed2049fa82f626889ac23cb ./cerch.GXIB.gnm1.ann1.L8L4.mrna.fna.gz +15f24234d784e244423f205341b13df7 ./cerch.GXIB.gnm1.ann1.L8L4.iprscan.gff3.gz +34e5e51fc1b7a4307fa9bb0e392547bf ./CHANGES.GXIB.gnm1.ann1.L8L4.txt +0eb98cc542498a13428733ec3472d8fd ./cerch.GXIB.gnm1.ann1.L8L4.cds.fna.gz.fai +a00737bcffc56a728af5b680b63d4e47 ./BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.full_table.tsv.gz +122f36b069ebb009a267d196a53272f4 ./BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.short_summary.txt +d4bbefc8d565911d2f94accc17e370d6 ./BUSCO/cerch.GXIB.gnm1.ann1.L8L4.busco.fabales_odb10.short_summary.json +19b3bd4c6ecccac677b7a47291004230 ./cerch.GXIB.gnm1.ann1.L8L4.protein.faa.gz.gzi +07c4e2596b38110e8103a203f08ccd8e ./cerch.GXIB.gnm1.ann1.L8L4.gene_models_main.gff3.gz +6ff864e90bec4614df6018f58099b911 ./cerch.GXIB.gnm1.ann1.L8L4.mrna.fna.gz.fai +400cb7ff637035580c5d79330e67ad8c ./initial_seqid_map.tsv.gz +2c2a0d93d3008fc86e9c269f8b3429af ./MANIFEST.GXIB.gnm1.ann1.L8L4.descriptions.yml +e531870b5a703d5fdfc525cdfab5ba20 ./cerch.GXIB.gnm1.ann1.L8L4.mrna.fna.gz.gzi +1a4d8a28e82af01aa60736bedf38f124 ./MANIFEST.GXIB.gnm1.ann1.L8L4.correspondence.yml +872d6f32abca043db1aa3dce94ee6d29 ./cerch.GXIB.gnm1.ann1.L8L4.legfed_v1_0.M65K.gfa.tsv.gz +c92a531cf55e8433ef04cdcc4bc638dd ./cerch.GXIB.gnm1.ann1.L8L4.iprscan.gff3.gz.tbi +5071ef19dc3fd20a7111a2245286b792 ./cerch.GXIB.gnm1.ann1.L8L4.protein.faa.gz.fai +41b53ce70f33697e1b3804d7037f02a5 ./cerch.GXIB.gnm1.ann1.L8L4.cds.fna.gz.gzi +bc41e89b4498fa8dd02786010fdbcef3 ./cerch.GXIB.gnm1.ann1.L8L4.protein.faa.gz +f14447ecf6ddafb17ed6907ab3811d6b ./cerch.GXIB.gnm1.ann1.L8L4.gene_models_main.bed.gz +b11ef51c4b4c7be096d3f6b15724c32a ./cerch.GXIB.gnm1.ann1.L8L4.featid_map.tsv.gz diff --git a/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/README.GXIB.gnm1.ann1.L8L4.yml b/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/README.GXIB.gnm1.ann1.L8L4.yml new file mode 100644 index 00000000..a1eba71a --- /dev/null +++ b/Cercis/chinensis/annotations/GXIB.gnm1.ann1.L8L4/README.GXIB.gnm1.ann1.L8L4.yml @@ -0,0 +1,42 @@ +--- +identifier: GXIB.gnm1.ann1.L8L4 + +provenance: "The files in this directory originated from the Genome Warehouse of the China National Center for Bioinformation, https://ngdc.cncb.ac.cn/gwh. The Genome Warehouse repository is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://ngdc.cncb.ac.cn/gwh/Assembly/24066/show" + +synopsis: "Gene annotations for Cercis chinensis, from a plant at the campus of Guangxi Institute of Botany, Chinese Academy of Sciences." + +scientific_name: Cercis chinensis + +taxid: 161750 + +genotype: + - GXIB + +description: "Gene annotations for Cercis chinensis were based on deep transcriptome sequencing of five plant tissues, and transcriptome-based, homolog-based, and ab initio methods." + +genbank_accession: GWHBHAK00000000 + +original_file_creation_date: "2021-11-20" + +local_file_creation_date: "2024-01-08" + +dataset_release_date: "2024-02-01" + +publication_doi: 10.1016/j.xplc.2022.100422 + +publication_title: "The nearly complete assembly of the Cercis chinensis genome and Fabaceae phylogenomic studies provide insights into new gene evolution" + +contributors: Li J, Shen J, Wang R, Chen Y, Zhang T, Wang H, Guo C, Qi J. + +citation: "Li J, Shen J, Wang R, Chen Y, Zhang T, Wang H, Guo C, Qi J. The nearly complete assembly of the Cercis chinensis genome and Fabaceae phylogenomic studies provide insights into new gene evolution. Plant Commun. 2023 Jan 9;4(1):100422. doi: 10.1016/j.xplc.2022.100422. Epub 2022 Aug 11. PMID: 35957520; PMCID: PMC9860166." + +data_curators: Steven Cannon, Andrew Farmer + +public_access_level: public + +license: open + +keywords: Chinese redbud tree + diff --git a/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.short_summary.json b/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..f9ad999e --- /dev/null +++ b/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.short_summary.json @@ -0,0 +1,62 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.kQYowt/cerch.GXIB.gnm1.5DZH.genome_main.fna", + "out": "BUSCO_genome_fabales_odb10", + "mode": "euk_genome_met", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_genome_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota", + "max_intron": "90000", + "max_seq_len": "120000", + "metaeuk_parameters": "", + "metaeuk_rerun_parameters": "", + "contig_break": "10", + "scaffold_composition": "False", + "gene_predictor": "metaeuk" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "bbtools": "38.96", + "metaeuk": "5.34c21f2", + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:94.6%[S:91.3%,D:3.3%],F:0.5%,M:4.9%,n:5366", + "Complete": 94.6, + "Single copy": 91.3, + "Multi copy": 3.3, + "Fragmented": 0.5, + "Missing": 4.9, + "n_markers": 5366, + "domain": "eukaryota", + "Number of scaffolds": "67", + "Number of contigs": "81", + "Total length": "352840832", + "Percent gaps": "0.000%", + "Scaffold N50": "47428482", + "Contigs N50": "29660241" + } +} \ No newline at end of file diff --git a/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/CHECKSUM.GXIB.gnm1.5DZH.md5 b/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/CHECKSUM.GXIB.gnm1.5DZH.md5 new file mode 100644 index 00000000..4b5c3eb4 --- /dev/null +++ b/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/CHECKSUM.GXIB.gnm1.5DZH.md5 @@ -0,0 +1,12 @@ +7e95a66608fffc4ec23e3609c98f50cb ./cerch.GXIB.gnm1.5DZH.genome_main.fna.gz +400cb7ff637035580c5d79330e67ad8c ./GWHBHAK00000000.initial_seqid_map.tsv.gz +79bba70b9b165ef60871cf923e171fd3 ./MANIFEST.GXIB.gnm1.5DZH.correspondence.yml +25a2387d9aa291155bd00ac84b4a7bc2 ./README.GXIB.gnm1.5DZH.yml +eee2850d3f7c54dd08829a3a99984267 ./CHANGES.GXIB.gnm1.5DZH.txt +81c1cfa56e35efee992b0499e4114e2e ./cerch.GXIB.gnm1.5DZH.seqid_map.tsv.gz +7feb8a191a8881964e3952b60886ec92 ./cerch.GXIB.gnm1.5DZH.genome_main.fna.gz.gzi +34d40d991ac549e4383dee9abf1de131 ./cerch.GXIB.gnm1.5DZH.genome_main.fna.gz.fai +a4c0c59a2b28f140f1153a3b4e50136b ./BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.full_table.tsv.gz +3397f6f2ac5c35ac4620395a0b907712 ./BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.short_summary.json +3a1254f41bab42b6a875c8c60d8ea79f ./BUSCO/cerch.GXIB.gnm1.5DZH.busco.fabales_odb10.short_summary.txt +3cb0adbb09245fe6ecfd4012265efe45 ./MANIFEST.GXIB.gnm1.5DZH.descriptions.yml diff --git a/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/README.GXIB.gnm1.5DZH.yml b/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/README.GXIB.gnm1.5DZH.yml new file mode 100644 index 00000000..f07a6e8b --- /dev/null +++ b/Cercis/chinensis/genomes/GXIB.gnm1.5DZH/README.GXIB.gnm1.5DZH.yml @@ -0,0 +1,42 @@ +--- +identifier: GXIB.gnm1.5DZH + +provenance: "The files in this directory originated from the Genome Warehouse of the China National Center for Bioinformation, https://ngdc.cncb.ac.cn/gwh. The Genome Warehouse repository is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://ngdc.cncb.ac.cn/gwh/Assembly/24066/show" + +synopsis: "Genome assembly for Cercis chinensis, from a plant at the campus of Guangxi Institute of Botany, Chinese Academy of Sciences." + +scientific_name: Cercis chinensis + +taxid: 161750 + +genotype: + - GXIB + +description: "Genome assembly for Cercis chinensis, from a plant at the campus of Guangxi Institute of Botany (GXIB), from Li, Shen et al., 2023. The genome assembly was derived from PacBio HiFi reads with 130.9x coverage." + +genbank_accession: GWHBHAK00000000 + +original_file_creation_date: "2021-11-20" + +local_file_creation_date: "2024-01-08" + +dataset_release_date: "2024-02-01" + +publication_doi: 10.1016/j.xplc.2022.100422 + +publication_title: "The nearly complete assembly of the Cercis chinensis genome and Fabaceae phylogenomic studies provide insights into new gene evolution" + +contributors: Li J, Shen J, Wang R, Chen Y, Zhang T, Wang H, Guo C, Qi J. + +citation: "Li J, Shen J, Wang R, Chen Y, Zhang T, Wang H, Guo C, Qi J. The nearly complete assembly of the Cercis chinensis genome and Fabaceae phylogenomic studies provide insights into new gene evolution. Plant Commun. 2023 Jan 9;4(1):100422. doi: 10.1016/j.xplc.2022.100422. Epub 2022 Aug 11. PMID: 35957520; PMCID: PMC9860166." + +data_curators: Steven Cannon, Andrew Farmer + +public_access_level: public + +license: open + +keywords: Chinese redbud tree + From 233071f39e77fcff9fa4f567a8a8805fe0c2ca9c Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Mon, 22 Jan 2024 11:15:41 -0600 Subject: [PATCH 04/27] updates for Arachis stenosperma annotations from NCBI --- ...ZRZ.busco.fabales_odb10.short_summary.json | 47 +++++++++++++++++++ .../CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 | 34 ++++++++++++++ .../README.V10309.gnm1.ann1.CZRZ.yml | 46 ++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json create mode 100644 Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 create mode 100644 Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/README.V10309.gnm1.ann1.CZRZ.yml diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..3182ab72 --- /dev/null +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json @@ -0,0 +1,47 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.JhWvm4/arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa", + "out": "BUSCO_proteins_fabales_odb10", + "mode": "proteins", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_proteins_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:97.2%[S:92.0%,D:5.2%],F:0.2%,M:2.6%,n:5366", + "Complete": 97.2, + "Single copy": 92.0, + "Multi copy": 5.2, + "Fragmented": 0.2, + "Missing": 2.6, + "n_markers": 5366, + "domain": "eukaryota" + } +} \ No newline at end of file diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 new file mode 100644 index 00000000..618bcbc5 --- /dev/null +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 @@ -0,0 +1,34 @@ +3f1a18896d5177db5fe45542ea95e258 ./MANIFEST.V10309.gnm1.ann1.CZRZ.correspondence.yml +8d7e589149431bf1b3b8d577ab89a7a8 ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.gzi +585d514247e90339c2852780379633c4 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz.tbi +ca2619b180190a64447f8210e2d5eec6 ./arast.V10309.gnm1.ann1.CZRZ.legfed_v1_0.M65K.gfa.tsv.gz +a2243209dcacbd58933da9d8ef821ab5 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.bed.gz +d13a0d5c902dff37ab7fbaeb9682e9fc ./CHANGES.V10309.gnm1.ann1.CZRZ.txt +ecd023051a659495cebaf005c5f5a5f8 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.gzi +ecd023051a659495cebaf005c5f5a5f8 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.gzi +4d3e658890cedb270609a160e0f3e684 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_exons.gff3.gz +7cacd41f7aa04ec27ed09daa2c967e28 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz +5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.fai +c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.gzi +1ff23c576960f7354f35728ff00ac8fa ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz +7bc661e46f60e9308d5cd2663776e081 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.fai +04ef98566b55ad667b058b1bfb3dc8df ./arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz +f888bec467738642550af258e3e61113 ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz +eb04962bd814ee2aaaedd42e774f17ee ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.gzi +7cacd41f7aa04ec27ed09daa2c967e28 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz +5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.fai +9eb1a8a66ce49dffbf62d20395271da5 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.full_table.tsv.gz +8c144993f8edb2552eecf7d3b9a63acb ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.txt +5987914f4a2958d9d07f39a607152e03 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json +c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.gzi +92fa388cf112420111f5218a0da23306 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz +aceafbc0f83018f2b9e19be55a30665f ./.arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz.swp +e2f21d5ed24891d848c9f98e244cadbe ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz +5b255f7d20defdcde92ea16827d98600 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz +2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.fai +2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.fai +b42d54c9910048ddb841ca2ff57eac91 ./MANIFEST.V10309.gnm1.ann1.CZRZ.descriptions.yml +d037c86f41256501f7f40d94cb6a2d5e ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.fai +92fa388cf112420111f5218a0da23306 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz +d1293c7e339185b773c0d2809d78e88a ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz.tbi +e7e13718aea32fbff026133e7bb1bf33 ./README.V10309.gnm1.ann1.CZRZ.yml diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/README.V10309.gnm1.ann1.CZRZ.yml b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/README.V10309.gnm1.ann1.CZRZ.yml new file mode 100644 index 00000000..ac7683ef --- /dev/null +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/README.V10309.gnm1.ann1.CZRZ.yml @@ -0,0 +1,46 @@ +--- +identifier: V10309.gnm1.ann1.CZRZ + +provenance: "The files in this directory originated from GenBank, for RefSeq genome sequence GCF_014773155.1, submitted by the International Peanut Genome Initiative in 2018. The GenBank source is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and Peanutbase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and Peanutbase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_014773155.1" + +synopsis: "GenBank RefSeq annotation for Genome assembly 1 for Arachis stenosperma, genotype V10309" + +scientific_name: Arachis stenosperma + +taxid: 217475 + +genotype: + - V10309 + +chromosome_prefix: Chr + +supercontig_prefix: Scaffold + +description: "This annotation was produced by GenBank on the RefSeq assembly V10309 in 2023" + +genbank_accession: GCF_014773155.1 + +original_file_creation_date: "2023-10-01" + +local_file_creation_date: "2024-01-17" + +dataset_release_date: "2024-01-22" + +publication_doi: 10.1038/s41588-019-0405-z + +publication_title: "The genome sequence of segmental allotetraploid peanut Arachis hypogaea" + +contributors: The International Peanut Genome Initiative; lead assembly group Jeremy Schmutz, Jerry Jenkins, Jane Grimwood; project leads David Bertioli; Soraya Bertioli; Brian Schleffler; Scott Jackson; Peggy Ozias-Akins + +citation: "Bertioli, D.J., Jenkins, J., Clevenger, J. et al. The genome sequence of segmental allotetraploid peanut Arachis hypogaea. Nat Genet 51, 877-884 (2019). https://doi.org/10.1038/s41588-019-0405-z" + +data_curators: Steven Cannon, Andrew Farmer + +public_access_level: public + +license: open + +keywords: wild peanut, Arachis stenosperma + From 410502bffb6d14c7e39d53001cbbe6db98a585fd Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Mon, 22 Jan 2024 11:27:09 -0600 Subject: [PATCH 05/27] updates for glyma.Wm82_NJAU.gnm1/Wm82_NJAU.gnm1.ann1 --- .../description_Glycine_max.yml | 6 ++ ...M71.busco.fabales_odb10.short_summary.json | 47 ++++++++++++++ .../CHECKSUM.Wm82_NJAU.gnm1.ann1.KM71.md5 | 33 ++++++++++ .../README.Wm82_NJAU.gnm1.ann1.KM71.yml | 42 +++++++++++++ ...4GV.busco.fabales_odb10.short_summary.json | 62 +++++++++++++++++++ .../CHECKSUM.Wm82_NJAU.gnm1.N4GV.md5 | 11 ++++ .../README.Wm82_NJAU.gnm1.N4GV.yml | 42 +++++++++++++ 7 files changed, 243 insertions(+) create mode 100644 Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.short_summary.json create mode 100644 Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/CHECKSUM.Wm82_NJAU.gnm1.ann1.KM71.md5 create mode 100644 Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/README.Wm82_NJAU.gnm1.ann1.KM71.yml create mode 100644 Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.short_summary.json create mode 100644 Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/CHECKSUM.Wm82_NJAU.gnm1.N4GV.md5 create mode 100644 Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/README.Wm82_NJAU.gnm1.N4GV.yml diff --git a/Glycine/max/about_this_collection/description_Glycine_max.yml b/Glycine/max/about_this_collection/description_Glycine_max.yml index b7336b11..b1500f18 100644 --- a/Glycine/max/about_this_collection/description_Glycine_max.yml +++ b/Glycine/max/about_this_collection/description_Glycine_max.yml @@ -244,3 +244,9 @@ strains: name: Zi Hua No.4 origin: China, Hei Long Jiang description: Landrace Zi Hua No.4 (SoyL02 from China, Hei Long Jiang, from Liu et al. 2020 + - identifier: Wm82_NJAU + accession: Wm82_NJAU + accession_group: + name: "Williams 82 Nanjing Agricultural University" + origin: "Nanjing Agricultural University" + description: "Williams 82 sequenced for T2T assembly of Wang, Zhang et al. 2023" diff --git a/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.short_summary.json b/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..7bb7b88b --- /dev/null +++ b/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.short_summary.json @@ -0,0 +1,47 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/glyma.Wm82_NJAU.gnm1.ann1.KM71.protein_primary.faa", + "out": "BUSCO_proteins_fabales_odb10", + "mode": "proteins", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_proteins_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:99.4%[S:38.8%,D:60.6%],F:0.1%,M:0.5%,n:5366", + "Complete": 99.4, + "Single copy": 38.8, + "Multi copy": 60.6, + "Fragmented": 0.1, + "Missing": 0.5, + "n_markers": 5366, + "domain": "eukaryota" + } +} \ No newline at end of file diff --git a/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/CHECKSUM.Wm82_NJAU.gnm1.ann1.KM71.md5 b/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/CHECKSUM.Wm82_NJAU.gnm1.ann1.KM71.md5 new file mode 100644 index 00000000..d343f72b --- /dev/null +++ b/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/CHECKSUM.Wm82_NJAU.gnm1.ann1.KM71.md5 @@ -0,0 +1,33 @@ +7106b72854e81581d569901480b7e239 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.legfed_v1_0.M65K.gfa.tsv.gz +a17a5f73b9c2c0790144a523108f606d ./glyma.Wm82_NJAU.gnm1.ann1.KM71.cds.fna.gz.gzi +edaa20f1ff030ac91559f80c1ccf1f3a ./glyma.Wm82_NJAU.gnm1.ann1.KM71.gene_models_main.gff3.gz.tbi +a116b4a8888579cfd424fd40f6432ebf ./glyma.Wm82_NJAU.gnm1.ann1.KM71.protein.faa.gz +5117fb55aee5a26bd5f786dbde88460b ./glyma.Wm82_NJAU.gnm1.ann1.KM71.mrna.fna.gz.gzi +7757d9d41b7a0c6c2de93bd0fbb63b78 ./MANIFEST.Wm82_NJAU.gnm1.ann1.KM71.descriptions.yml +1d5f0519c5905d69058514f2a5eeafd9 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.gene_models_main.gff3.gz +d067d77a9df22d533a4ffd4547b59139 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.protein_primary.faa.gz.fai +fb6ec30c44707c0b9aeb1b9601cc85d9 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.mrna_primary.fna.gz +a8af46631916c5564dbe3b0be485d00c ./glyma.Wm82_NJAU.gnm1.ann1.KM71.protein.faa.gz.gzi +9ea6df75222c49e188213ac93a5747a8 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.mrna_primary.fna.gz.fai +b5409f7cf837ac4d51b64a4db384b83c ./glyma.Wm82_NJAU.gnm1.ann1.KM71.cds_primary.fna.gz.fai +bb21b2eb2816262265b42103e5d430e2 ./MANIFEST.Wm82_NJAU.gnm1.ann1.KM71.correspondence.yml +006e9368c239fd552112254344f91400 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.iprscan.gff3.gz +de79915781fb06259cfcfb112f484f33 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.gene_models_exons.gff3.gz +693c989cad62f9d0c0fa5272e7bd81a5 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.gene_models_main.bed.gz +d603e3328c4fd26d8843f1b07c9ddbec ./glyma.Wm82_NJAU.gnm1.ann1.KM71.protein.faa.gz.fai +5353215185f73f918fd6dd45f18f744a ./glyma.Wm82_NJAU.gnm1.ann1.KM71.protein_primary.faa.gz +f967bf7d5677b752948d2064a91a7c1a ./glyma.Wm82_NJAU.gnm1.ann1.KM71.mrna_primary.fna.gz.gzi +0221af1d729b7dd6dae922518cac7535 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.cds_primary.fna.gz.gzi +b9800527dc8a4a751001330ecf045c71 ./README.Wm82_NJAU.gnm1.ann1.KM71.yml +b58ccf9f55f2f39e3b54ef548f04e2f4 ./CHANGES.Wm82_NJAU.gnm1.ann1.KM71.txt +262f79e05dcfe77ac242e7a8eb5ab6f8 ./BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.short_summary.json +414b8286ebf7a7903a596b404421733d ./BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.full_table.tsv.gz +d7091d9bf34ad9dcf0c46e81c8c85e38 ./BUSCO/glyma.Wm82_NJAU.gnm1.ann1.KM71.busco.fabales_odb10.short_summary.txt +70f37696eb2e1bdcc25181441094bbd3 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.protein_primary.faa.gz.gzi +74965707b522ac6bada753f1965a61bf ./glyma.Wm82_NJAU.gnm1.ann1.KM71.mrna.fna.gz.fai +92afe1203d7de472614922215d862a7c ./glyma.Wm82_NJAU.gnm1.ann1.KM71.cds_primary.fna.gz +e5e087de657a4a22c2574dd61ee2d3c2 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.mrna.fna.gz +3f9ac6da624bd3d59ee10b3aa1f9d0e7 ./glyma.Wm82_NJAU.gnm1.ann1.KM71.cds.fna.gz.fai +ccf6b99cd008dae821d5346976b26cdc ./glyma.Wm82_NJAU.gnm1.ann1.KM71.featid_map.tsv.gz +3a67c73e81b2d7da356e5a9db0081e0b ./glyma.Wm82_NJAU.gnm1.ann1.KM71.cds.fna.gz +e31be1312dd21de1ce85a345f6923b7e ./glyma.Wm82_NJAU.gnm1.ann1.KM71.iprscan.gff3.gz.tbi diff --git a/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/README.Wm82_NJAU.gnm1.ann1.KM71.yml b/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/README.Wm82_NJAU.gnm1.ann1.KM71.yml new file mode 100644 index 00000000..7b62f22f --- /dev/null +++ b/Glycine/max/annotations/Wm82_NJAU.gnm1.ann1.KM71/README.Wm82_NJAU.gnm1.ann1.KM71.yml @@ -0,0 +1,42 @@ +--- +identifier: Wm82_NJAU.gnm1.ann1.KM71 + +provenance: "The files in this directory originated from https://ngdc.cncb.ac.cn/gwh/Assembly/37536/show. The Genome Warehouse repository is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://ngdc.cncb.ac.cn/gwh/Assembly/37536/show" + +synopsis: "Annotation 1 for Glycine max accession Williams 82 from Nanjing Agricultural University (Wm82-NJAU), genome assembly v1" + +scientific_name: Glycine max + +taxid: 3847 + +genotype: + - Wm82_NJAU + +chromosome_prefix: Gm + +description: "Gene annotation resources. See full description in supplemental methods at Wang, Lomngfei et al., 2023" + +original_file_creation_date: "2023-08-19" + +local_file_creation_date: "2023-01-17" + +dataset_release_date: "2023-02-01" + +publication_doi: 10.1016/j.molp.2023.08.012 + +publication_title: "A telomere-to-telomere gap-free assembly of soybean genome" + +contributors: Longfei Wang, Mengzhu Zhang, Mengna Li, Xinyu Jiang, Wu Jiao, Qingxin Song + +citation: "Wang L, Zhang M, Li M, Jiang X, Jiao W, Song Q. A telomere-to-telomere gap-free assembly of soybean genome. Mol Plant. 2023 Nov 6;16(11):1711-1714. doi: 10.1016/j.molp.2023.08.012. Epub 2023 Aug 26. PMID: 37634078." + +data_curators: Jacqueline Campbell, Steven Cannon + +public_access_level: public + +license: open + +keywords: soybean, Wm82, Williams 82, Wm82_NJAU, Wm82-NJAU + diff --git a/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.short_summary.json b/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..ba26e387 --- /dev/null +++ b/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.short_summary.json @@ -0,0 +1,62 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.YAC27Y/glyma.Wm82_NJAU.gnm1.N4GV.genome_main.fna", + "out": "BUSCO_genome_fabales_odb10", + "mode": "euk_genome_met", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_genome_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota", + "max_intron": "90000", + "max_seq_len": "120000", + "metaeuk_parameters": "", + "metaeuk_rerun_parameters": "", + "contig_break": "10", + "scaffold_composition": "False", + "gene_predictor": "metaeuk" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "bbtools": "38.96", + "metaeuk": "5.34c21f2", + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:99.5%[S:36.3%,D:63.2%],F:0.1%,M:0.4%,n:5366", + "Complete": 99.5, + "Single copy": 36.3, + "Multi copy": 63.2, + "Fragmented": 0.1, + "Missing": 0.4, + "n_markers": 5366, + "domain": "eukaryota", + "Number of scaffolds": "20", + "Number of contigs": "20", + "Total length": "1011793938", + "Percent gaps": "0.000%", + "Scaffold N50": "51165319", + "Contigs N50": "51165319" + } +} \ No newline at end of file diff --git a/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/CHECKSUM.Wm82_NJAU.gnm1.N4GV.md5 b/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/CHECKSUM.Wm82_NJAU.gnm1.N4GV.md5 new file mode 100644 index 00000000..07cd9104 --- /dev/null +++ b/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/CHECKSUM.Wm82_NJAU.gnm1.N4GV.md5 @@ -0,0 +1,11 @@ +db557e93b1042dab5534a0dca4039e10 ./glyma.Wm82_NJAU.gnm1.N4GV.seqid_map.tsv.gz +76058c6e8f18ceec4271a93ab616de24 ./MANIFEST.Wm82_NJAU.gnm1.N4GV.correspondence.yml +12f4119f9a75fba095ee0a3264a33314 ./glyma.Wm82_NJAU.gnm1.N4GV.genome_main.fna.gz +8617985b4ae9081c7be50e112410d7bc ./glyma.Wm82_NJAU.gnm1.N4GV.genome_main.fna.gz.gzi +d83be3a3db44750cc823f4149f434c15 ./README.Wm82_NJAU.gnm1.N4GV.yml +5d1d85f477a2fd2c31dfa1121794995d ./MANIFEST.Wm82_NJAU.gnm1.N4GV.descriptions.yml +b58ccf9f55f2f39e3b54ef548f04e2f4 ./CHANGES.Wm82_NJAU.gnm1.N4GV.txt +bf994daf7eb8f5c4dfd98a96d8f7aca2 ./BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.short_summary.json +4b33c3022d84b2585126e3937d3e1753 ./BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.short_summary.txt +96391ff32318549cd7d8a90eea64d0b5 ./BUSCO/glyma.Wm82_NJAU.gnm1.N4GV.busco.fabales_odb10.full_table.tsv.gz +ecea9fb08643fd30da20c54de2d72d75 ./glyma.Wm82_NJAU.gnm1.N4GV.genome_main.fna.gz.fai diff --git a/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/README.Wm82_NJAU.gnm1.N4GV.yml b/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/README.Wm82_NJAU.gnm1.N4GV.yml new file mode 100644 index 00000000..4b3a764b --- /dev/null +++ b/Glycine/max/genomes/Wm82_NJAU.gnm1.N4GV/README.Wm82_NJAU.gnm1.N4GV.yml @@ -0,0 +1,42 @@ +--- +identifier: Wm82_NJAU.gnm1.N4GV + +provenance: "The files in this directory originated from https://ngdc.cncb.ac.cn/gwh/Assembly/37536/show. The Genome Warehouse repository is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and SoyBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and SoyBase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." + +source: "https://ngdc.cncb.ac.cn/gwh/Assembly/37536/show" + +synopsis: "Glycine max accession Williams 82 from Nanjing Agricultural University (Wm82-NJAU), genome assebly v1" + +scientific_name: Glycine max + +taxid: 3847 + +genotype: + - Wm82_NJAU + +chromosome_prefix: Gm + +description: "Sequenced using PacBio HiFi, Nanopore ONT ulta-long, and short read Illumina sequencing; assembled with hifisam v0.16.1; minimap2 v2.24; Racon v1.5.0; RagTag v2.1.0" + +original_file_creation_date: "2023-08-19" + +local_file_creation_date: "2023-01-17" + +dataset_release_date: "2023-02-01" + +publication_doi: 10.1016/j.molp.2023.08.012 + +publication_title: "A telomere-to-telomere gap-free assembly of soybean genome" + +contributors: Longfei Wang, Mengzhu Zhang, Mengna Li, Xinyu Jiang, Wu Jiao, Qingxin Song + +citation: "Wang L, Zhang M, Li M, Jiang X, Jiao W, Song Q. A telomere-to-telomere gap-free assembly of soybean genome. Mol Plant. 2023 Nov 6;16(11):1711-1714. doi: 10.1016/j.molp.2023.08.012. Epub 2023 Aug 26. PMID: 37634078." + +data_curators: Jacqueline Campbell, Steven Cannon + +public_access_level: public + +license: open + +keywords: soybean, Wm82, Williams 82, Wm82_NJAU, Wm82-NJAU + From 98c2bce8f7832ffc15caa9b8d5d6b69de6f4d1ca Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Mon, 22 Jan 2024 13:58:32 -0600 Subject: [PATCH 06/27] metadata for genome/annotations medtr.HM078.gnm1/medtr.HM078.gnm1.ann1 --- .../description_Medicago_truncatula.yml | 5 ++ ...RNP.busco.fabales_odb10.short_summary.json | 47 ++++++++++++++ .../CHECKSUM.HM078.gnm1.ann1.3RNP.md5 | 18 ++++++ ...3TM.busco.fabales_odb10.short_summary.json | 62 +++++++++++++++++++ .../CHECKSUM.HM078.gnm1.Q3TM.md5 | 3 + 5 files changed, 135 insertions(+) create mode 100644 Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.short_summary.json create mode 100644 Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 create mode 100644 Medicago/truncatula/genomes/HM078.gnm1.Q3TM/BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.short_summary.json diff --git a/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml b/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml index b801fc8a..216e4474 100644 --- a/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml +++ b/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml @@ -33,3 +33,8 @@ strains: name: R108 (HM340) origin: Mediterranean region description: "Medicago truncatula accession R108 has been used in various genetic and genomic studies, e.g. Gaige et al., 2011 (DOI:10.1080/07060661.2012.662176), Li et al., 2014 (DOI:10.1016/j.jplph.2013.12.018), and Cosson et al., 2006 (DOI:10.1385/1-59745-130-4:115). A genome assembly is available: https://www.ncbi.nlm.nih.gov/assembly/GCA_002024945.1/." + - identifier: HM078 + accession: SA27063 + name: SA27063 + origin: Greece + description: "HM078 is was originally collected by researchers at INRA-Montpellier (doi:10.1186/1471-2229-6-28), and later characterized as one of a handful of accessions showing resistance to spring black stem and leaf spot disease (doi.org/10.1094/PHYTO-96-1330)." diff --git a/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.short_summary.json b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..0c52f332 --- /dev/null +++ b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.short_summary.json @@ -0,0 +1,47 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.msnrtH/medtr.HM078.gnm1.ann1.3RNP.protein.faa", + "out": "BUSCO_proteins_fabales_odb10", + "mode": "proteins", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_proteins_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:98.4%[S:95.6%,D:2.8%],F:0.4%,M:1.2%,n:5366", + "Complete": 98.4, + "Single copy": 95.6, + "Multi copy": 2.8, + "Fragmented": 0.4, + "Missing": 1.2, + "n_markers": 5366, + "domain": "eukaryota" + } +} \ No newline at end of file diff --git a/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 new file mode 100644 index 00000000..4165675d --- /dev/null +++ b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 @@ -0,0 +1,18 @@ +4595919ff6129a308ccaf6c9b06e85f0 ./medtr.HM078.gnm1.ann1.3RNP.mrna.fna.gz.fai +adf1b17620b4f5699c1502a6588339a2 ./medtr.HM078.gnm1.ann1.3RNP.gene_models_main.gff3.gz +49e4992064141a84e361dbadd785ba58 ./medtr.HM078.gnm1.ann1.3RNP.protein.faa.gz +cd80fa46e412cb66472db90883ace6d8 ./medtr.HM078.gnm1.ann1.3RNP.cds.fna.gz.gzi +f0288fdaf6e82de3e957886ffeb1a080 ./medtr.HM078.gnm1.ann1.3RNP.legfed_v1_0.M65K.gfa.tsv.gz +b818a5a86bc7c04021d4103f86e5a276 ./medtr.HM078.gnm1.ann1.3RNP.iprscan.gff3.gz +82f5ebc2af0f5a48950660c142848d2d ./medtr.HM078.gnm1.ann1.3RNP.protein.faa.gz.fai +239fb56cf3070c2d3f39c6c80630e37b ./medtr.HM078.gnm1.ann1.3RNP.gene_models_main.gff3.gz.tbi +ef85f14304019e0c6214b659e1079dd7 ./medtr.HM078.gnm1.ann1.3RNP.iprscan.gff3.gz.tbi +9846679ac06bde3c820e74c4678a055b ./medtr.HM078.gnm1.ann1.3RNP.protein.faa.gz.gzi +f9e1d8191fffd849156ebadf7127c2fc ./BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.full_table.tsv.gz +7d5b18a7563b593a9c035defcd239b95 ./BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.short_summary.txt +bad63b9874ecb2b43174883e70d6c38d ./BUSCO/medtr.HM078.gnm1.ann1.3RNP.busco.fabales_odb10.short_summary.json +e2aaf47271d0b0a4d8e384a0c65fac3e ./medtr.HM078.gnm1.ann1.3RNP.cds.fna.gz +8b74ff97109461906b016182db4c9c15 ./medtr.HM078.gnm1.ann1.3RNP.cds.fna.gz.fai +70efd5d7a0f653fb9c0194c3561d26af ./README.HM078.gnm1.ann1.3RNP.yml +3bebfca5e545d522d54459e5994313bd ./medtr.HM078.gnm1.ann1.3RNP.mrna.fna.gz +d662df519e7d9e16abb6e3adf19e9110 ./medtr.HM078.gnm1.ann1.3RNP.mrna.fna.gz.gzi diff --git a/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.short_summary.json b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.short_summary.json new file mode 100644 index 00000000..85cbe1bf --- /dev/null +++ b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.short_summary.json @@ -0,0 +1,62 @@ +{ + "parameters": { + "out_path": "/busco_wd", + "cpu": "16", + "force": "False", + "restart": "False", + "quiet": "False", + "download_path": "/erdos/adf/busco_downloads", + "datasets_version": "odb10", + "offline": "False", + "download_base_url": "https://busco-data.ezlab.org/v5/data/", + "auto-lineage": "False", + "auto-lineage-prok": "False", + "auto-lineage-euk": "False", + "update-data": "False", + "use_augustus": "False", + "batch_mode": "False", + "tar": "False", + "in": "/busco_wd/run_busco.bash.GaoQFy/medtr.HM078.gnm1.Q3TM.genome_main.fna", + "out": "BUSCO_genome_fabales_odb10", + "mode": "euk_genome_met", + "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", + "main_out": "/busco_wd/BUSCO_genome_fabales_odb10", + "lineage_results_dir": "run_fabales_odb10", + "domain": "eukaryota", + "max_intron": "90000", + "max_seq_len": "120000", + "metaeuk_parameters": "", + "metaeuk_rerun_parameters": "", + "contig_break": "10", + "scaffold_composition": "False", + "gene_predictor": "metaeuk" + }, + "lineage_dataset": { + "name": "fabales_odb10", + "creation_date": "2020-08-05", + "number_of_buscos": "5366", + "number_of_species": "10" + }, + "versions": { + "hmmsearch": 3.1, + "bbtools": "38.96", + "metaeuk": "5.34c21f2", + "busco": "5.4.3" + }, + "results": { + "one_line_summary": "C:99.0%[S:96.3%,D:2.7%],F:0.3%,M:0.7%,n:5366", + "Complete": 99.0, + "Single copy": 96.3, + "Multi copy": 2.7, + "Fragmented": 0.3, + "Missing": 0.7, + "n_markers": 5366, + "domain": "eukaryota", + "Number of scaffolds": "239", + "Number of contigs": "254", + "Total length": "493760656", + "Percent gaps": "0.000%", + "Scaffold N50": "62416031", + "Contigs N50": "31411382" + } +} \ No newline at end of file diff --git a/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 index 81c52198..04ee2b6d 100644 --- a/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 +++ b/Medicago/truncatula/genomes/HM078.gnm1.Q3TM/CHECKSUM.HM078.gnm1.Q3TM.md5 @@ -1,4 +1,7 @@ 4e4ea00a039158b5bdc1f274bbfb40a2 ./medtr.HM078.gnm1.Q3TM.genome_main.fna.gz.gzi 699e12b09676e7a0f7d143395f7ec1f3 ./medtr.HM078.gnm1.Q3TM.genome_main.fna.gz.fai +85e114f09cd90f6be7fe476e4cddba71 ./BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.short_summary.json +b6a748e9750c298bc658a137d8f1d6c1 ./BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.short_summary.txt +23821f47677c5c4012629ab4653ecbcf ./BUSCO/medtr.HM078.gnm1.Q3TM.busco.fabales_odb10.full_table.tsv.gz 3b594b5531a47ee8830d53698c44a179 ./README.HM078.gnm1.Q3TM.yml ab66fa55170527a4a49cca56e4080b49 ./medtr.HM078.gnm1.Q3TM.genome_main.fna.gz From 937671ed277b27e7c57d241bd91b6d06c5a87934 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Tue, 23 Jan 2024 11:04:59 -0600 Subject: [PATCH 07/27] updates for modifications to chromosome names: Chr%d -> chr%02d --- .../CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 | 9 +++-- ...FL2.busco.fabales_odb10.short_summary.json | 2 +- .../CHECKSUM.V10309.gnm1.PFL2.md5 | 26 +++++++-------- .../README.V10309.gnm1.PFL2.yml | 33 ++++++++++--------- 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 index 618bcbc5..c160ba41 100644 --- a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 @@ -1,12 +1,12 @@ 3f1a18896d5177db5fe45542ea95e258 ./MANIFEST.V10309.gnm1.ann1.CZRZ.correspondence.yml 8d7e589149431bf1b3b8d577ab89a7a8 ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.gzi -585d514247e90339c2852780379633c4 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz.tbi +6e8f6c330ae2b3c0c262564bb8a4c7a7 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz.tbi ca2619b180190a64447f8210e2d5eec6 ./arast.V10309.gnm1.ann1.CZRZ.legfed_v1_0.M65K.gfa.tsv.gz -a2243209dcacbd58933da9d8ef821ab5 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.bed.gz +019172ff554aa12f41de5ae259c02e38 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.bed.gz d13a0d5c902dff37ab7fbaeb9682e9fc ./CHANGES.V10309.gnm1.ann1.CZRZ.txt ecd023051a659495cebaf005c5f5a5f8 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.gzi ecd023051a659495cebaf005c5f5a5f8 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.gzi -4d3e658890cedb270609a160e0f3e684 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_exons.gff3.gz +7f128ca5e449152a6726d0d26a365b66 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_exons.gff3.gz 7cacd41f7aa04ec27ed09daa2c967e28 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz 5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.fai c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.gzi @@ -22,9 +22,8 @@ eb04962bd814ee2aaaedd42e774f17ee ./arast.V10309.gnm1.ann1.CZRZ.protein_primary. 5987914f4a2958d9d07f39a607152e03 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.gzi 92fa388cf112420111f5218a0da23306 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz -aceafbc0f83018f2b9e19be55a30665f ./.arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz.swp e2f21d5ed24891d848c9f98e244cadbe ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz -5b255f7d20defdcde92ea16827d98600 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz +87ef4d01b7a08ab75e7715e2603a6a7e ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz 2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.fai 2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.fai b42d54c9910048ddb841ca2ff57eac91 ./MANIFEST.V10309.gnm1.ann1.CZRZ.descriptions.yml diff --git a/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json b/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json index d338b609..cf51c038 100644 --- a/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json +++ b/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json @@ -16,7 +16,7 @@ "use_augustus": "False", "batch_mode": "False", "tar": "False", - "in": "/busco_wd/run_busco.bash.HhFEIb/arast.V10309.gnm1.PFL2.genome_main.fna", + "in": "/busco_wd/run_busco.bash.TUc4DD/arast.V10309.gnm1.PFL2.genome_main.fna", "out": "BUSCO_genome_fabales_odb10", "mode": "euk_genome_met", "lineage_dataset": "/erdos/adf/busco_downloads/lineages/fabales_odb10", diff --git a/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/CHECKSUM.V10309.gnm1.PFL2.md5 b/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/CHECKSUM.V10309.gnm1.PFL2.md5 index d8226bd4..8e94a8ad 100644 --- a/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/CHECKSUM.V10309.gnm1.PFL2.md5 +++ b/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/CHECKSUM.V10309.gnm1.PFL2.md5 @@ -1,14 +1,12 @@ -c908fbab9e3ba8bd12ea4c15d81cf15f ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.txt -7f781e9bdc60c3f91ad6b50a4d77e8a8 ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json -fab12c1ce8e88fe52d35b2a19f553b9f ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.full_table.tsv.gz -d41d8cd98f00b204e9800998ecf8427e ./BUSCO/*.busco.fabales_odb10.full_table.tsv.gz -faec240bbac90aa193a29ac1ea445fa5 ./MANIFEST.PFL2.correspondence.yml -039116c908fc69fccfd9fb7e87dbf914 ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.fai -ec29d4bc0070da9437427107daad2ce8 ./CHANGES.V10309.gnm1.PFL2.txt -73035054706eb276dcfe093afeeb7f88 ./MANIFEST.PFL2.descriptions.yml -79e6bb587c84bf683645b6dc852af8b3 ./arast.V10309.gnm1.PFL2.agp_placed.txt.gz -de9db7d3cde8f1bc4165d001bcdaae9d ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.gzi -b1b1d846422d32f2bf7e06c7cd4d4663 ./usage_policy.txt -6b55f7b9c5339a9d63dc7ae383775653 ./arast.V10309.gnm1.PFL2.agp_unplaced.txt.gz -7c4ecbd0d8cd4cb34c2148b999059d99 ./arast.V10309.gnm1.PFL2.genome_main.fna.gz -21de1a074eae6c2e77f6ed6d312e4fdf ./README.V10309.gnm1.PFL2.yml +b437e9a2e1a98b7ffcc6e5c19ed2a467 ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.fai +23600f3d3e963fca4db8171463a5511b ./arast.V10309.gnm1.PFL2.genome_main.fna.gz +23395511f3bea713fb4e99feffe7c7a1 ./arast.V10309.gnm1.PFL2.seqid_map.tsv.gz +06a6c1322f3b6d318ec252ca06998774 ./arast.V10309.gnm1.PFL2.initial_seqid_map.tsv.gz +c9b30ab32ad5a4d5b0d36264accf19c9 ./README.V10309.gnm1.PFL2.yml +a0519ad7023d621523e1a4dbbfe69870 ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.json +02058aeb14f69544672211741da0402f ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.short_summary.txt +c7cffe7305a3c19b8b5787357ffb90d4 ./BUSCO/arast.V10309.gnm1.PFL2.busco.fabales_odb10.full_table.tsv.gz +083bb2fc52e62d669290a8b8e291be4b ./MANIFEST.V10309.gnm1.PFL2.descriptions.yml +d13a0d5c902dff37ab7fbaeb9682e9fc ./CHANGES.V10309.gnm1.PFL2.txt +3dde83ffee922953772e2933f032e2e1 ./MANIFEST.V10309.gnm1.PFL2.correspondence.yml +b294aff4939ad016dfeefec23f28154f ./arast.V10309.gnm1.PFL2.genome_main.fna.gz.gzi diff --git a/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/README.V10309.gnm1.PFL2.yml b/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/README.V10309.gnm1.PFL2.yml index baf69de0..109cbf0d 100644 --- a/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/README.V10309.gnm1.PFL2.yml +++ b/Arachis/stenosperma/genomes/V10309.gnm1.PFL2/README.V10309.gnm1.PFL2.yml @@ -1,20 +1,16 @@ --- identifier: V10309.gnm1.PFL2 -provenance: "The files in this directory originated from http://peanutbase.org. There is also a corresponding genome assembly at GenBank. Files in this directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and PeanutBase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers. If you use the data in the present directory, please 1) please cite the data appropriately - generally referring to the original publications for this data; and if you make use of any significant modifications in the files, then please also cite the respective database project(s) related to this directory." +provenance: "The files in this directory originated from GenBank, for RefSeq genome sequence GCF_014773155.1, submitted by the International Peanut Genome Initiative in 2018. The GenBank source is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and Peanutbase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and Peanutbase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately." -source: http://peanutbase.org +source: "https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_014773155.1" -synopsis: Genome assembly 1 for Arachis stenosperma, accession V10309 +synopsis: "Genome assembly 1 for Arachis stenosperma, genotype V10309" scientific_name: Arachis stenosperma taxid: 217475 -bioproject: PRJNA610652 - -scientific_name_abbrev: arast - genotype: - V10309 @@ -24,22 +20,27 @@ supercontig_prefix: Scaffold description: "Genome assembly 1 for Arachis stenosperma, accession V10309, with sequenced generated using PacBio Sequel; Illumina HiSeq. Arachis stenosperma Krapov. & W.C. Greg. is a wild peanut relative native to central Brazil, in the past it was cultivated by native peoples of South America, and was carried to the Atlantic coast, where populations persist to the present day. It is a source of strong pest and disease resistance and has been used by peanut breeders and geneticists in interspecific hybrids. A. stenosperma is diploid species in the A-genome group of Arachis, which has similarity to the A genome of tetraploid cultivated peanut (A. hypogaea). This accession was sequenced with PacBio long reads, with contributions by USDA-ARS and researchers at Mars Inc. and the University of Georgia" -genbank_accession: GCA_014773155.1 +genbank_accession: GCF_014773155.1 + +original_file_creation_date: "2023-10-01" -original_file_creation_date: "2020-10-02" +local_file_creation_date: "2024-01-17" -local_file_creation_date: "2020-05-20" +dataset_release_date: "2024-01-22" -publication_doi: 10.1016/B978-1-63067-038-2.00008-3 +publication_doi: 10.1038/s41588-019-0405-z -dataset_release_date: "2022-12-01" +publication_title: "The genome sequence of segmental allotetraploid peanut Arachis hypogaea" -contributors: Clevenger,J., Cannon,S.B., Youngblood, C., Vaughn,J.N., Abernathy, B, Bertioli,D.J., Scheffler, B. +contributors: The International Peanut Genome Initiative; lead assembly group Jeremy Schmutz, Jerry Jenkins, Jane Grimwood; project leads David Bertioli; Soraya Bertioli; Brian Schleffler; Scott Jackson; Peggy Ozias-Akins -data_curators: Steven Cannon +citation: "Bertioli, D.J., Jenkins, J., Clevenger, J. et al. The genome sequence of segmental allotetraploid peanut Arachis hypogaea. Nat Genet 51, 877-884 (2019). https://doi.org/10.1038/s41588-019-0405-z" -public_access_level: public, with usage restriction +data_curators: Steven Cannon, Andrew Farmer + +public_access_level: public license: open -keywords: Arachis stenosperma +keywords: wild peanut, Arachis stenosperma + From f66a6f4579405b543872ac8d1c8289492d5df558 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Wed, 24 Jan 2024 15:14:54 -0600 Subject: [PATCH 08/27] re-remove gigantea temporarily --- Cercis/GENUS/about_this_collection/description_Cercis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cercis/GENUS/about_this_collection/description_Cercis.yml b/Cercis/GENUS/about_this_collection/description_Cercis.yml index 6e75829b..d7dd8a75 100644 --- a/Cercis/GENUS/about_this_collection/description_Cercis.yml +++ b/Cercis/GENUS/about_this_collection/description_Cercis.yml @@ -7,4 +7,3 @@ description: "Cercis is a genus of about 10 species in the subfamily Cercidoidea species: - canadensis - chinensis - - gigantea From 8772389d6ba5ebd4de2ebfa5681b5e8340d552b1 Mon Sep 17 00:00:00 2001 From: adf-ncgr Date: Thu, 25 Jan 2024 11:56:31 -0700 Subject: [PATCH 09/27] update references to legacy site with appropriate equivalents or delete those which will be autocontent-generated --- .../description_Arachis_duranensis.yml | 6 ------ .../description_Arachis_hypogaea.yml | 9 --------- .../description_Arachis_ipaensis.yml | 6 ------ .../about_this_collection/description_Cajanus.yml | 2 +- .../description_Cajanus_cajan.yml | 6 ------ .../about_this_collection/description_Cicer.yml | 2 +- .../description_Cicer_arietinum.yml | 12 ------------ .../about_this_collection/description_Lotus.yml | 2 +- .../description_Lotus_japonicus.yml | 6 ------ .../about_this_collection/description_Medicago.yml | 2 +- .../description_Medicago_sativa.yml | 6 ------ .../description_Medicago_truncatula.yml | 6 ------ .../about_this_collection/description_Phaseolus.yml | 2 +- .../description_Phaseolus_acutifolius.yml | 6 ------ .../description_Phaseolus_lunatus.yml | 6 ------ .../description_Phaseolus_vulgaris.yml | 12 ------------ .../description_Pisum_sativum.yml | 6 ------ .../description_Trifolium_pratense.yml | 6 ------ .../about_this_collection/description_Vigna.yml | 2 +- .../description_Vigna_angularis.yml | 6 ------ .../description_Vigna_radiata.yml | 6 ------ .../description_Vigna_unguiculata.yml | 6 ------ 22 files changed, 6 insertions(+), 117 deletions(-) diff --git a/Arachis/duranensis/about_this_collection/description_Arachis_duranensis.yml b/Arachis/duranensis/about_this_collection/description_Arachis_duranensis.yml index 815918ec..3f230a98 100644 --- a/Arachis/duranensis/about_this_collection/description_Arachis_duranensis.yml +++ b/Arachis/duranensis/about_this_collection/description_Arachis_duranensis.yml @@ -16,12 +16,6 @@ strains: origin: Argentina, via USDA National Plant Germplasm Repository description: "Arachis duranensis strain V14167 originates in a region east of the Andes, in the vicinity of northern Argentina and southern Bolivia. This accession is thought to be one of the diploid ancestors of cultivated peanut. Its genome has been sequenced as part of the Peanut Genomics Initiative, to help in the accurate assembly of the more important cultivated peanut, A. hypogaea. This accession is described in Bertioli et al., 2016 (doi:10.1038/ng.3517)" resources: - - name: Peanutbase Genome Browser (GBrowse) - URL: "https://legacy.peanutbase.org/gb2/gbrowse/Aradu1.0" - description: "GBrowse for V14167 assembly 1.0" - - name: Peanutbase Genome Browser (JBrowse) - URL: "https://legacy.peanutbase.org/genomes/jbrowse?data=Aradu1.0" - description: "JBrowse for V14167 assembly 1.0" - name: "Genome assembly at NCBI" URL: "http://www.ncbi.nlm.nih.gov/assembly/GCA_000817695.1/" description: "Genome assembly at NCBI, V14167" diff --git a/Arachis/hypogaea/about_this_collection/description_Arachis_hypogaea.yml b/Arachis/hypogaea/about_this_collection/description_Arachis_hypogaea.yml index 015f5ace..bdf1f410 100644 --- a/Arachis/hypogaea/about_this_collection/description_Arachis_hypogaea.yml +++ b/Arachis/hypogaea/about_this_collection/description_Arachis_hypogaea.yml @@ -36,15 +36,6 @@ strains: origin: Southeastern United States description: "Cultivar Tifrunner is a runner-type peanut bred in the United States and widely planted as a variety and widely used in breeding programs. The genome of this cultivar is described in Bertioli et al., 2019 (doi:10.1038/s41588-019-0405-z)." resources: - - name: Peanutbase Genome Browser (GBrowse) - URL: "https://legacy.peanutbase.org/gbrowse_peanut1.0" - description: "GBrowse for Tifrunner assembly 1.0" - - name: Peanutbase Genome Browser (JBrowse) - URL: "https://legacy.peanutbase.org/genomes/jbrowse?data=arahy.Tifrunner.gnm1" - description: "JBrowse for Tifrunner assembly 1.0" - - name: PeanutBase Genome Browser (GBrowse) - URL: "https://legacy.peanutbase.org/gb2/gbrowse/arahy.Tifrunner.gnm2/" - description: "GBrowse for Tifrunner assembly 2.0" - name: NCBI Tifrunner assembly 2.0 URL: "https://www.ncbi.nlm.nih.gov/nuccore/PIVG00000000" description: "Tifrunner gnm2.ann2 (assembly 2.0 annotation 2.0) at NCBI" diff --git a/Arachis/ipaensis/about_this_collection/description_Arachis_ipaensis.yml b/Arachis/ipaensis/about_this_collection/description_Arachis_ipaensis.yml index c6e0512c..68d9d979 100644 --- a/Arachis/ipaensis/about_this_collection/description_Arachis_ipaensis.yml +++ b/Arachis/ipaensis/about_this_collection/description_Arachis_ipaensis.yml @@ -16,12 +16,6 @@ strains: origin: Southern Bolivia description: "Arachis ipaensis accession K30076 is the only accession collected from what is thought to be the origin of tetraploid peanut, Villa Montes Bolivia. It was likely carried there by early agriculturalists from a larger population several hundred kilometers to the north (Bertioli et al., 2016: doi:10.1038/ng.3517)" resources: - - name: Peanutbase Genome Browser (GBrowse) - URL: "https://legacy.peanutbase.org/gb2/gbrowse/Araip1.0" - description: "GBrowse for K30076 assembly 1.0" - - name: Peanutbase Genome Browser (JBrowse) - URL: "https://legacy.peanutbase.org/genomes/jbrowse?data=Araip1.0" - description: "JBrowse for K30076 assembly 1.0" - name: "Genome assembly at NCBI" URL: "http://www.ncbi.nlm.nih.gov/assembly/GCA_000816755.1/" description: "Genome assembly at NCBI, K30076" diff --git a/Cajanus/GENUS/about_this_collection/description_Cajanus.yml b/Cajanus/GENUS/about_this_collection/description_Cajanus.yml index 24c4cb18..4956c0de 100644 --- a/Cajanus/GENUS/about_this_collection/description_Cajanus.yml +++ b/Cajanus/GENUS/about_this_collection/description_Cajanus.yml @@ -15,5 +15,5 @@ resources: URL: "https://gcv.legumeinfo.org/gene;lis=cajca.C.cajan_19480?q=C.cajan_19480&sources=lis&algorithm=repeat&match=10&mismatch=-1&gap=-1&score=30&threshold=25&bmatched=20&bintermediate=10&bmask=10&linkage=average&cthreshold=20&neighbors=10&matched=4&intermediate=5&bregexp=&border=chromosome®exp=&order=distance" description: "Browser for dynamically discovering and viewing genomic synteny across selected species." - name: Germplasm GIS - URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=6&maxRecs=200&taxonQuery=Cajanus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" + URL: "https://germplasm-map.legumeinfo.org/#?zoom=6&maxRecs=200&taxonQuery=Cajanus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" description: "Geographic information system viewer, showing collection locations for Cajanus data held by the U.S. National Plant Germplasm System." diff --git a/Cajanus/cajan/about_this_collection/description_Cajanus_cajan.yml b/Cajanus/cajan/about_this_collection/description_Cajanus_cajan.yml index f2ab45ed..6bfac3f8 100644 --- a/Cajanus/cajan/about_this_collection/description_Cajanus_cajan.yml +++ b/Cajanus/cajan/about_this_collection/description_Cajanus_cajan.yml @@ -6,12 +6,6 @@ abbrev: cajca commonName: pigeonpea description: "Pigeonpea (Cajanus cajan) is a member of the millettioid (warm-season legume) clade within the sub-family Papilionoideae. Pigeonpea is the main protein source for more than a billion people in the developing world. It is grown mainly in the semi-arid tropical regions (Asia, Africa and Central/South America)." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/Cc1.0" - description: "GBrowse for Cajanus cajan assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=Cc1.0" - description: "JBrowse for Cajanus cajan assembly v1.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the Cajanus cajan assembly v1.0" diff --git a/Cicer/GENUS/about_this_collection/description_Cicer.yml b/Cicer/GENUS/about_this_collection/description_Cicer.yml index 731f696d..b010aa31 100644 --- a/Cicer/GENUS/about_this_collection/description_Cicer.yml +++ b/Cicer/GENUS/about_this_collection/description_Cicer.yml @@ -20,5 +20,5 @@ resources: URL: "https://gcv.legumeinfo.org/gene;lis=cicar.ICC4958.Ca_00714?q=cicar.ICC4958.Ca_00714&sources=lis&algorithm=repeat&match=10&mismatch=-1&gap=-1&score=30&threshold=25&bmatched=20&bintermediate=10&bmask=10&linkage=average&cthreshold=20&neighbors=10&matched=4&intermediate=5&bregexp=&border=chromosome®exp=&order=distance" description: "Browser for dynamically discovering and viewing genomic synteny across selected species." - name: Germplasm GIS - URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=6&maxRecs=200&taxonQuery=Cicer&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" + URL: "https://germplasm-map.legumeinfo.org/#?zoom=6&maxRecs=200&taxonQuery=Cicer&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" description: "Geographic information system viewer, showing collection locations for Cicer data held by the U.S. National Plant Germplasm System." diff --git a/Cicer/arietinum/about_this_collection/description_Cicer_arietinum.yml b/Cicer/arietinum/about_this_collection/description_Cicer_arietinum.yml index 10338517..12df1f04 100644 --- a/Cicer/arietinum/about_this_collection/description_Cicer_arietinum.yml +++ b/Cicer/arietinum/about_this_collection/description_Cicer_arietinum.yml @@ -13,12 +13,6 @@ strains: origin: Canada description: "Chickpea variety CDC Frontier is a kabuli type chickpea (light-seeded), selected as a variety in Canada. The genome has been sequenced: Varshney et al., 2013 (DOI:10.1038/nbt.2491)" resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/cicar.CDCFrontier.v1.0" - description: "GBrowse for CDC Frontier assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=cicar.CDCFrontier.v1.0" - description: "JBrowse for CDC Frontier assembly v1.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the CDC Frontier v1.0 assembly" @@ -28,12 +22,6 @@ strains: origin: India description: "Chickpea variety ICC 4958 is a desi type chickpea (dark-seeded). The genome has been sequenced: Parween, S et al., 2015 (DOI:10.1038/srep12806)" resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/cicar.ICC4958.v2.0" - description: "GBrowse for ICC 4958 assembly v2.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=cicar.ICC4958.v2.0" - description: "JBrowse for assembly ICC 4958 v2.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the ICC 4958 assembly v2.0" diff --git a/Lotus/GENUS/about_this_collection/description_Lotus.yml b/Lotus/GENUS/about_this_collection/description_Lotus.yml index e850bd74..922d46f2 100644 --- a/Lotus/GENUS/about_this_collection/description_Lotus.yml +++ b/Lotus/GENUS/about_this_collection/description_Lotus.yml @@ -12,5 +12,5 @@ resources: URL: "https://gcv.legumeinfo.org/gene;lis=lotja.Lj1g3v0579550?q=Lj1g3v0579550&sources=lis&algorithm=repeat&match=10&mismatch=-1&gap=-1&score=30&threshold=25&bmatched=20&bintermediate=10&bmask=10&linkage=average&cthreshold=20&neighbors=10&matched=4&intermediate=5&bregexp=&border=chromosome®exp=&order=distance" description: "Browser for dynamically discovering and viewing genomic synteny across selected species." - name: Germplasm GIS - URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=8&maxRecs=200&taxonQuery=Lotus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=36.22&lng=-111.81&mapHeight=164.75&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.33522435930639&ne_lng=-93.97705078125001&sw_lat=34.397844946449865&sw_lng=-124.95849609375001&accessionIdsInclusive=false" + URL: "https://germplasm-map.legumeinfo.org#?zoom=8&maxRecs=200&taxonQuery=Lotus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=36.22&lng=-111.81&mapHeight=164.75&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.33522435930639&ne_lng=-93.97705078125001&sw_lat=34.397844946449865&sw_lng=-124.95849609375001&accessionIdsInclusive=false" description: "Geographic information system viewer, showing collection locations for Lotus data held by the U.S. National Plant Germplasm System." diff --git a/Lotus/japonicus/about_this_collection/description_Lotus_japonicus.yml b/Lotus/japonicus/about_this_collection/description_Lotus_japonicus.yml index bf06f42d..c81236f7 100644 --- a/Lotus/japonicus/about_this_collection/description_Lotus_japonicus.yml +++ b/Lotus/japonicus/about_this_collection/description_Lotus_japonicus.yml @@ -6,12 +6,6 @@ abbrev: lotja commonName: birdsfoot trefoil description: "Lotus japonicus, a forage crop (much like clover), is widely used as a model in legume biology. It is physically small in stature, with a small genome, and a short generation time, and it is relatively easily transformable (for gene functional studies). Genome version available on LIS is version Lj2.5 and Lj3.0, publication associated with this genome version is available here: Sato et al., 2008 (doi:10.1093/dnares/dsn008)." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/Lj3.0" - description: "GBrowse for Lutus Japonicus assembly v3.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse?data=Lj3.0" - description: "JBrowse for Lotus Japonicus assembly v3.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the Lotus Japonicus assembly v3.0" diff --git a/Medicago/GENUS/about_this_collection/description_Medicago.yml b/Medicago/GENUS/about_this_collection/description_Medicago.yml index b88c2a4b..42b207b8 100644 --- a/Medicago/GENUS/about_this_collection/description_Medicago.yml +++ b/Medicago/GENUS/about_this_collection/description_Medicago.yml @@ -21,5 +21,5 @@ resources: URL: "https://gcv.legumeinfo.org/gene;lis=medtr.A17_HM341.gnm4.ann2.Medtr1g009230" description: "Browser for dynamically discovering and viewing genomic synteny across selected species." - name: Germplasm GIS - URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=6&maxRecs=200&taxonQuery=Medicago&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" + URL: "https://germplasm-map.legumeinfo.org/#?zoom=6&maxRecs=200&taxonQuery=Medicago&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" description: "Geographic information system viewer, showing collection locations for Medicago data held by the U.S. National Plant Germplasm System." diff --git a/Medicago/sativa/about_this_collection/description_Medicago_sativa.yml b/Medicago/sativa/about_this_collection/description_Medicago_sativa.yml index e3d6eba7..27e53ff9 100644 --- a/Medicago/sativa/about_this_collection/description_Medicago_sativa.yml +++ b/Medicago/sativa/about_this_collection/description_Medicago_sativa.yml @@ -22,12 +22,6 @@ strains: origin: China description: "Xinjiang Daye is a salt-sensitive cultivar of alfalfa." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/gb2/gbrowse/medsa.XinJiangDaYe.gnm1" - description: "GBrowse for XinJiangDaYe assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=medsa.XinJiangDaYe.gnm1" - description: "JBrowse for XinJiangDaYe assembly v1.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the XinJiangDaYe assembly v1.0" diff --git a/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml b/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml index 216e4474..bc6380c2 100644 --- a/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml +++ b/Medicago/truncatula/about_this_collection/description_Medicago_truncatula.yml @@ -7,12 +7,6 @@ commonName: barrel medic description: "Medicago truncatula, a close relative of alfalfa (M. sativa), is widely used as a model in legume biology. It is physically small in stature, with a small genome, and has a short generation-to-generation time. Sevweral high-quality genome assemblies have been generated, including Tang et al., 2014 (DOI:10.1186/1471-2164-15-312) and Pecrix et al., 2018 (DOI:10.1038/s41477-018-0286-7)." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/Mt4.0" - description: "GBrowse for Medicago truncatula assembly v4.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/Mt4.0" - description: "JBrowse for Medicago truncatula assembly v4.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the Medicago truncatula assembly v4.0" diff --git a/Phaseolus/GENUS/about_this_collection/description_Phaseolus.yml b/Phaseolus/GENUS/about_this_collection/description_Phaseolus.yml index 7ce25fa2..0cac81bc 100644 --- a/Phaseolus/GENUS/about_this_collection/description_Phaseolus.yml +++ b/Phaseolus/GENUS/about_this_collection/description_Phaseolus.yml @@ -23,5 +23,5 @@ resources: URL: "https://gcvit.phaseolus.legumeinfo.org" description: "Genome-wide views of genetic variants (SNPs) between bean accessions" - name: Germplasm GIS - URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=6&maxRecs=200&taxonQuery=Phaseolus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" + URL: "https://germplasm-map.legumeinfo.org/#?zoom=6&maxRecs=200&taxonQuery=Phaseolus&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188.25&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" description: "Geographic information system viewer, showing collection locations for Phaseolus data held by the U.S. National Plant Germplasm System." diff --git a/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml b/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml index 3858a9f4..f3481728 100644 --- a/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml +++ b/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml @@ -6,12 +6,6 @@ abbrev: phaac commonName: tepary bean description: "The tepary bean (Phaseolus acutifolius), native to the Southwestern United States and northern Mexico, has been cultivated by indigenous groups in that region since pre-Columbian times. It is generally more drought-tolerant than common bean" resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/phaac.Frijol_Bayo.gnm1" - description: "GBrowse for Frijol_Bayo assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=phaac.Frijol_Bayo.gnm1" - description: "JBrowse for Frijol_Bayo assembly v1.0" strains: - identifier: Frijol_Bayo diff --git a/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml b/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml index ff725fbe..e5c31370 100644 --- a/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml +++ b/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml @@ -13,12 +13,6 @@ strains: origin: Colombia description: "G27455 was selected from MI population, which is the most widespread among domesticated gene pools. Domesticated accessions have low heterozygosity, which reduces the complexity of genome assembly. G27455 is cultivated in the north of Colombia, where temperatures are very high all year, suggesting it may be a good source of candidate genes for resistance to heat and drought stress." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/phalu.G27455.gnm1" - description: "GBrowse for G27544 assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=phalu.G27455.gnm1" - description: "JBrowse for G27544 assembly v1.0" - identifier: B2C accession: PI 549515 name: B2C diff --git a/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml b/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml index 15bbee9d..57577ab2 100644 --- a/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml +++ b/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml @@ -13,12 +13,6 @@ strains: origin: Peru description: "Andean landrace G19833 was selected for genome sequencing partly due to its resistance to numerous diseases, including bean rust (Uromyces appendiculatus), as well as anthracnose, angular leaf spot, Aschochyta blight, and the bean golden mosaic and bean common mosaic viruses (Hurtado-Gonzales et al., 2017; https://digitalcommons.unl.edu/usdaarsfacpub/1664/). This accession has also been used to generate populations for mapping traits such as phosphorous acquisition and agronomic performance." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/phavu.G19833.gnm2" - description: "GBrowse for G19833 assembly v2.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=phavu.G19833.gnm2" - description: "JBrowse for G19833 assembly v2.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the Phaseolus vulgaris genome v2.0" @@ -28,12 +22,6 @@ strains: origin: US National Plant Germplasm System description: "Bean variety UI111 is a Mesomarican line that has been used in numerous breeding projects and trait-mapping studies.It is a pinto-type bean, with indeterminate climbing habit." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/phavu.UI111.gnm1/" - description: "GBrowse for UI111 assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/phavu.UI111.gnm1/" - description: "JBrowse for UI111 assembly v1.0" - identifier: LaborOvalle accession: Labor Ovalle name: Labor Ovalle diff --git a/Pisum/sativum/about_this_collection/description_Pisum_sativum.yml b/Pisum/sativum/about_this_collection/description_Pisum_sativum.yml index 008d001c..6aa32259 100644 --- a/Pisum/sativum/about_this_collection/description_Pisum_sativum.yml +++ b/Pisum/sativum/about_this_collection/description_Pisum_sativum.yml @@ -13,12 +13,6 @@ strains: origin: France description: "The variety Cameor was released by the French breeding company Seminor in 1973." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/pissa.Cameor.gnm1" - description: "GBrowse for Cameor assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=pissa.Cameor.gnm1" - description: "JBrowse for Cameor assembly v1.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against Cameor assembly v1.0" diff --git a/Trifolium/pratense/about_this_collection/description_Trifolium_pratense.yml b/Trifolium/pratense/about_this_collection/description_Trifolium_pratense.yml index aab294bb..07987cd8 100644 --- a/Trifolium/pratense/about_this_collection/description_Trifolium_pratense.yml +++ b/Trifolium/pratense/about_this_collection/description_Trifolium_pratense.yml @@ -7,12 +7,6 @@ commonName: red clover description: "Red clover (Trifolium pratense) is a perennial legume species native to Northwest Africa, Europe and Western Asia. Today, red clover is found in many regions around the world including the Americas and Australia. Unlike other legume species, red clover is an outcrossing species due to a strong gametophytic incompatibility system. Red clover is a widely grown fodder legume crop and is valued for its role in increasing soil fertility by nitrogen fixation." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/Tp2.0" - description: "GBrowse Trifolium pratense assembly v2.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=Tp2.0" - description: "JBrowse for Trifolium pratense assembly v2.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against Trifolium pratense assembly v2.0" diff --git a/Vigna/GENUS/about_this_collection/description_Vigna.yml b/Vigna/GENUS/about_this_collection/description_Vigna.yml index a1e6282a..b6e80171 100644 --- a/Vigna/GENUS/about_this_collection/description_Vigna.yml +++ b/Vigna/GENUS/about_this_collection/description_Vigna.yml @@ -20,5 +20,5 @@ resources: URL: "https://gcv.legumeinfo.org/gene;lis=vigun.IT97K-499-35.gnm1.ann2.Vigun05g001800" description: "Browser for dynamically discovering and viewing genomic synteny across selected species." - name: Germplasm GIS - URL: "https://legacy.legumeinfo.org/germplasm/map#?zoom=6&maxRecs=200&taxonQuery=Vigna&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" + URL: "https://germplasm-map.legumeinfo.org/#?zoom=6&maxRecs=200&taxonQuery=Vigna&traitScale=global&geocodedOnly=false&traitExcludeUnchar=false&limitToMapExtent=false&lat=35.87&lng=-109.47&mapHeight=188&baseMap=ESRI%20-%20NatGeo%20(default,%20reference%20map)&ne_lat=37.52715361723378&ne_lng=-93.97705078125001&sw_lat=34.17999758688084&sw_lng=-124.95849609375001&accessionIdsInclusive=false" description: "Geographic information system viewer, showing collection locations for Vigna data held by the U.S. National Plant Germplasm System." diff --git a/Vigna/angularis/about_this_collection/description_Vigna_angularis.yml b/Vigna/angularis/about_this_collection/description_Vigna_angularis.yml index 3e95e1c5..40265b04 100644 --- a/Vigna/angularis/about_this_collection/description_Vigna_angularis.yml +++ b/Vigna/angularis/about_this_collection/description_Vigna_angularis.yml @@ -6,12 +6,6 @@ abbrev: vigan commonName: adzuki bean description: "Adzuki bean (Vigna angularis) is a diploid legume crop (2n = 2x = 22). Adzuki beans are commonly sweetened before eating by boiling the beans with sugar, resulting in red bean paste. The adzuki bean is a good dietary source for a variety of minerals including; magnesium, potassium and zinc. Genome assemblies have been generated for several accessions." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/Va3.0" - description: "GBrowse for Vigna angularis assembly v3.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=Va3.0" - description: "JBrowse for Vigna angularis assembly v3.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the Vigna angularis assembly v3.0" diff --git a/Vigna/radiata/about_this_collection/description_Vigna_radiata.yml b/Vigna/radiata/about_this_collection/description_Vigna_radiata.yml index 6dfcc4af..afb590a0 100644 --- a/Vigna/radiata/about_this_collection/description_Vigna_radiata.yml +++ b/Vigna/radiata/about_this_collection/description_Vigna_radiata.yml @@ -7,12 +7,6 @@ commonName: mungbean description: "Mungbean (Vigna radiata), also known as moong bean, green gram, is a fast-growing warm-season legume and has a diploid chromosome number of 2n=22. Mungbean is mainly cultivated today in China, India and Southeast Asia but can be found in dry regions within Southern Europe and United States. Mungbeans are a good source of dietary protein, folate and iron. This legume species was moved from the genus Phaseolus to Vigna and is correctly cites as Vigna radiata." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/Vr1.0" - description: "GBrowse for Vigna radiata assembly v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=Vr1.0" - description: "JBrowse for Vigna radiata assembly v1.0" - name: LIS SequenceServer URL: "https://sequenceserver.legumeinfo.org/" description: "SequenceServer BLAST against the Vigna radiata assembly v1.0" diff --git a/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml b/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml index 38aa8fd1..14f07bbd 100644 --- a/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml +++ b/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml @@ -17,12 +17,6 @@ strains: origin: IITA, Nigeria description: "Cowpea variety is an elite breeding line developed at the International Institute of Tropical Agriculture (IITA, Nigeria). This accession was used for generation of the reference genome assembly described in Lonardi et al., 2019 (DOI:10.1111/tpj.14349)." resources: - - name: LIS Genome Browser (GBrowse) - URL: "https://legacy.legumeinfo.org/genomes/gbrowse/vigun.IT97K-499-35.gnm1" - description: "GBrowse for IT97K-499-35 v1.0" - - name: LIS Genome Browser (JBrowse) - URL: "https://legacy.legumeinfo.org/genomes/jbrowse/?data=vigun.IT97K-499-35.gnm1" - description: "JBrowse for IT97K-499-35 assembly v1.0" - identifier: CB5-2 name: CB5-2 accession: CB5-2 From b1e2dee3612cc88cdd1decfd3cf8fbeeceb2e13b Mon Sep 17 00:00:00 2001 From: adf-ncgr Date: Thu, 25 Jan 2024 13:22:49 -0700 Subject: [PATCH 10/27] fix empty resources causing yaml validation failure --- .../about_this_collection/description_Vigna_unguiculata.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml b/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml index 14f07bbd..1517d1ff 100644 --- a/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml +++ b/Vigna/unguiculata/about_this_collection/description_Vigna_unguiculata.yml @@ -16,7 +16,6 @@ strains: name: IT97K-499-35 origin: IITA, Nigeria description: "Cowpea variety is an elite breeding line developed at the International Institute of Tropical Agriculture (IITA, Nigeria). This accession was used for generation of the reference genome assembly described in Lonardi et al., 2019 (DOI:10.1111/tpj.14349)." - resources: - identifier: CB5-2 name: CB5-2 accession: CB5-2 From b970b638774b1a7e260336d50d9686b23d8746b9 Mon Sep 17 00:00:00 2001 From: adf-ncgr Date: Thu, 25 Jan 2024 13:24:51 -0700 Subject: [PATCH 11/27] more yaml validation failures due to empty resources lists following deletion of "legacy" --- .../about_this_collection/description_Phaseolus_acutifolius.yml | 1 - .../about_this_collection/description_Phaseolus_lunatus.yml | 1 - .../about_this_collection/description_Phaseolus_vulgaris.yml | 1 - 3 files changed, 3 deletions(-) diff --git a/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml b/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml index f3481728..b63ca7f1 100644 --- a/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml +++ b/Phaseolus/acutifolius/about_this_collection/description_Phaseolus_acutifolius.yml @@ -5,7 +5,6 @@ species: acutifolius abbrev: phaac commonName: tepary bean description: "The tepary bean (Phaseolus acutifolius), native to the Southwestern United States and northern Mexico, has been cultivated by indigenous groups in that region since pre-Columbian times. It is generally more drought-tolerant than common bean" -resources: strains: - identifier: Frijol_Bayo diff --git a/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml b/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml index e5c31370..2dac6c08 100644 --- a/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml +++ b/Phaseolus/lunatus/about_this_collection/description_Phaseolus_lunatus.yml @@ -12,7 +12,6 @@ strains: name: G27455 origin: Colombia description: "G27455 was selected from MI population, which is the most widespread among domesticated gene pools. Domesticated accessions have low heterozygosity, which reduces the complexity of genome assembly. G27455 is cultivated in the north of Colombia, where temperatures are very high all year, suggesting it may be a good source of candidate genes for resistance to heat and drought stress." - resources: - identifier: B2C accession: PI 549515 name: B2C diff --git a/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml b/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml index 57577ab2..4afe101b 100644 --- a/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml +++ b/Phaseolus/vulgaris/about_this_collection/description_Phaseolus_vulgaris.yml @@ -21,7 +21,6 @@ strains: name: UI111 origin: US National Plant Germplasm System description: "Bean variety UI111 is a Mesomarican line that has been used in numerous breeding projects and trait-mapping studies.It is a pinto-type bean, with indeterminate climbing habit." - resources: - identifier: LaborOvalle accession: Labor Ovalle name: Labor Ovalle From 306ef4f283d6d954b46d266ebc8028e3d44ceff1 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Sat, 27 Jan 2024 12:24:12 -0600 Subject: [PATCH 12/27] New repeat library collection, for repeat-masking across the legumes. --- .../README.TE_lib_2024.rpt.YFQ7.yml | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml diff --git a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml new file mode 100644 index 00000000..fd1b28c6 --- /dev/null +++ b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml @@ -0,0 +1,47 @@ +--- +identifier: TE_lib_2024.rpt.YFQ7 + +provenance: "This repository is considered the primary source for these files. The files here are held as part of the PeanutBase, LegumeInfo, and SoyBase projects." + +synopsis: Genomic repetitive elements for legume species. Derived from 14 diverse genera (11 legumes and 3 non-legumes). + +scientific_name: Fabaceae + +taxid: 3803 + +scientific_name_abbrev: legume + +genotype: + - Arachis species + - Arabidopsis thaliana + - Bauhinia tomentosa + - Cercis canadensis + - Cercis chinensis + - Chamaecrista faciculata + - Glycine max + - Medicago truncatula + - Phaseolus vulgaris + - Prunus persica + - Quillaja saponaria + - Senna tora + - Sindora glauca + - Vitis vinifera + +description: "Genomic repetitive elements for legume species. Derived from RepeatModeler run on 14 genomes -- 11 legumes and three non-legume species: arasp arath bauto cerca cerch chafa glyma medtr phavu prupe quisa sento singl vitvi. Additional curated repeats in Arachis contributed by Dongying Gao, David Bertioli, Soraya Bertioli and in Glycine contributed by Jianxin Ma and Jianchang Du, as part of the Williams 82 reference genome project in 2010." + +original_file_creation_date: "2024-01-23" + +local_file_creation_date: "2024-01-23" + +dataset_release_date: "2024-01-27" + +contributors: "Hyunoh Lee, Steven Cannon, Dongying Gao, David Bertioli, Jianxin Ma, Jianchang Du" + +data_curators: Steven Cannon, Hyunoh Lee + +public_access_level: public + +license: Open + +keywords: transposable elements + From 03245499ce561f599d141a11a4448e8b51cb5346 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Sat, 27 Jan 2024 18:24:36 -0600 Subject: [PATCH 13/27] Add bed file to collection HM078.gnm1.ann1.3RNP --- .../HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 index 4165675d..dead58ef 100644 --- a/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 +++ b/Medicago/truncatula/annotations/HM078.gnm1.ann1.3RNP/CHECKSUM.HM078.gnm1.ann1.3RNP.md5 @@ -3,6 +3,8 @@ adf1b17620b4f5699c1502a6588339a2 ./medtr.HM078.gnm1.ann1.3RNP.gene_models_main. 49e4992064141a84e361dbadd785ba58 ./medtr.HM078.gnm1.ann1.3RNP.protein.faa.gz cd80fa46e412cb66472db90883ace6d8 ./medtr.HM078.gnm1.ann1.3RNP.cds.fna.gz.gzi f0288fdaf6e82de3e957886ffeb1a080 ./medtr.HM078.gnm1.ann1.3RNP.legfed_v1_0.M65K.gfa.tsv.gz +be4c463008222c4542fd0e28a4888dd6 ./medtr.HM078.gnm1.ann1.3RNP.gene_models_main.bed.gz.tbi +844923d40861a91c05804f396c0a701b ./medtr.HM078.gnm1.ann1.3RNP.gene_models_main.bed.gz b818a5a86bc7c04021d4103f86e5a276 ./medtr.HM078.gnm1.ann1.3RNP.iprscan.gff3.gz 82f5ebc2af0f5a48950660c142848d2d ./medtr.HM078.gnm1.ann1.3RNP.protein.faa.gz.fai 239fb56cf3070c2d3f39c6c80630e37b ./medtr.HM078.gnm1.ann1.3RNP.gene_models_main.gff3.gz.tbi From 6c06d5ed7f08dd3e8eb68f9a221610450f686cd9 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Sat, 27 Jan 2024 18:27:14 -0600 Subject: [PATCH 14/27] Change directory and file names for LEGUMES/Fabaceae/genefamilies and LEGUMES/Fabaceae/supplements for consistency with Data Store naming conventions (strain.type.KEY4 for directories, gensp.strain.type.KEY4.filetype.ext for files) --- .../CHECKSUM.legume.genefam.fam1.M65K.md5 | 15 --------------- .../CHECKSUM.mixed.fam1.M65K.md5 | 15 +++++++++++++++ .../README.mixed.fam1.M65K.yml} | 0 .../CHECKSUM.legume.genefam.esm.RDQM.md5 | 18 ------------------ .../mixed.esm.RDQM/CHECKSUM.mixed.esm.RDQM.md5 | 18 ++++++++++++++++++ .../README.mixed.esm.RDQM.yml} | 0 6 files changed, 33 insertions(+), 33 deletions(-) delete mode 100644 LEGUMES/Fabaceae/genefamilies/legume.genefam.fam1.M65K/CHECKSUM.legume.genefam.fam1.M65K.md5 create mode 100644 LEGUMES/Fabaceae/genefamilies/mixed.fam1.M65K/CHECKSUM.mixed.fam1.M65K.md5 rename LEGUMES/Fabaceae/genefamilies/{legume.genefam.fam1.M65K/README.legume.genefam.fam1.M65K.yml => mixed.fam1.M65K/README.mixed.fam1.M65K.yml} (100%) delete mode 100644 LEGUMES/Fabaceae/supplements/legume.genefam.esm.RDQM/CHECKSUM.legume.genefam.esm.RDQM.md5 create mode 100644 LEGUMES/Fabaceae/supplements/mixed.esm.RDQM/CHECKSUM.mixed.esm.RDQM.md5 rename LEGUMES/Fabaceae/supplements/{legume.genefam.esm.RDQM/README.legume.genefam.esm.RDQM.yml => mixed.esm.RDQM/README.mixed.esm.RDQM.yml} (100%) diff --git a/LEGUMES/Fabaceae/genefamilies/legume.genefam.fam1.M65K/CHECKSUM.legume.genefam.fam1.M65K.md5 b/LEGUMES/Fabaceae/genefamilies/legume.genefam.fam1.M65K/CHECKSUM.legume.genefam.fam1.M65K.md5 deleted file mode 100644 index eda85ae7..00000000 --- a/LEGUMES/Fabaceae/genefamilies/legume.genefam.fam1.M65K/CHECKSUM.legume.genefam.fam1.M65K.md5 +++ /dev/null @@ -1,15 +0,0 @@ -f6accb8df52ee6ea5fb37ecacfb40b33 ./legume.genefam.fam1.M65K.trees_ML_rooted.tar.gz -3ca90e8113cc374cbd608090872e2577 ./legume.genefam.fam1.M65K.hmmalign_trim.tar.gz -e81ada7896a13f7950de0ad7b8c55d1f ./legume.genefam.fam1.M65K.info_fam_composition_sum.tsv.gz -428436e533043dd818e5831e4c4fce30 ./legume.genefam.fam1.M65K.hmmalign.tar.gz -0ed4739d10b4e7ff784aff513bcd6ead ./legume.genefam.fam1.M65K.median_abs_dev.tsv.gz -10f32fcb4a5d40da90483fa925652877 ./MANIFEST.legume.genefam.fam1.M65K.descriptions.yml -5447b43ca8ec262cb9799d1090ae1312 ./CHANGES.legume.genefam.fam1.M65K.txt -98da488c10642916e883971cd269d021 ./legume.genefam.fam1.M65K.hmmemit.tar.gz -4ea3d73a675e78d5f1cedf91a46c4b01 ./legume.genefam.fam1.M65K.proteomes.tar.gz -cbf8bb785a0f6879b5b5e973fb0c72a1 ./MANIFEST.legume.genefam.fam1.M65K.correspondence.yml -40997a2329d2ac4dd5104cfac40fbc9b ./legume.genefam.fam1.M65K.hmm.tar.gz -d0ef153bf9b0572e249f1aab7230f035 ./README.legume.genefam.fam1.M65K.yml -70b17c178000a3075fde8a52dc88e9ec ./legume.genefam.fam1.M65K.info_annot_ahrd.tsv.gz -d61c64afd7f3489a9771bc7bf91c5eee ./legume.genefam.fam1.M65K.info_fam_composition_list.tsv.gz -f7c74df3b35c4e338621d072887cd44e ./legume.genefam.fam1.M65K.family_fasta.tar.gz diff --git a/LEGUMES/Fabaceae/genefamilies/mixed.fam1.M65K/CHECKSUM.mixed.fam1.M65K.md5 b/LEGUMES/Fabaceae/genefamilies/mixed.fam1.M65K/CHECKSUM.mixed.fam1.M65K.md5 new file mode 100644 index 00000000..405c807f --- /dev/null +++ b/LEGUMES/Fabaceae/genefamilies/mixed.fam1.M65K/CHECKSUM.mixed.fam1.M65K.md5 @@ -0,0 +1,15 @@ +40997a2329d2ac4dd5104cfac40fbc9b ./legume.mixed.fam1.M65K.hmm.tar.gz +e81ada7896a13f7950de0ad7b8c55d1f ./legume.mixed.fam1.M65K.info_fam_composition_sum.tsv.gz +0ed4739d10b4e7ff784aff513bcd6ead ./legume.mixed.fam1.M65K.median_abs_dev.tsv.gz +428436e533043dd818e5831e4c4fce30 ./legume.mixed.fam1.M65K.hmmalign.tar.gz +f6accb8df52ee6ea5fb37ecacfb40b33 ./legume.mixed.fam1.M65K.trees_ML_rooted.tar.gz +3ca90e8113cc374cbd608090872e2577 ./legume.mixed.fam1.M65K.hmmalign_trim.tar.gz +f7c74df3b35c4e338621d072887cd44e ./legume.mixed.fam1.M65K.family_fasta.tar.gz +70b17c178000a3075fde8a52dc88e9ec ./legume.mixed.fam1.M65K.info_annot_ahrd.tsv.gz +d0ef153bf9b0572e249f1aab7230f035 ./README.mixed.fam1.M65K.yml +adf580cb24cb21e9c8239b48da4b8b6a ./CHANGES.mixed.fam1.M65K.txt +8786e12f3bfafcb4780a0e87ecb03c1a ./MANIFEST.mixed.fam1.M65K.descriptions.yml +64e7dcfb5c5558cf18c42292a4105b05 ./MANIFEST.mixed.fam1.M65K.correspondence.yml +98da488c10642916e883971cd269d021 ./legume.mixed.fam1.M65K.hmmemit.tar.gz +d61c64afd7f3489a9771bc7bf91c5eee ./legume.mixed.fam1.M65K.info_fam_composition_list.tsv.gz +4ea3d73a675e78d5f1cedf91a46c4b01 ./legume.mixed.fam1.M65K.proteomes.tar.gz diff --git a/LEGUMES/Fabaceae/genefamilies/legume.genefam.fam1.M65K/README.legume.genefam.fam1.M65K.yml b/LEGUMES/Fabaceae/genefamilies/mixed.fam1.M65K/README.mixed.fam1.M65K.yml similarity index 100% rename from LEGUMES/Fabaceae/genefamilies/legume.genefam.fam1.M65K/README.legume.genefam.fam1.M65K.yml rename to LEGUMES/Fabaceae/genefamilies/mixed.fam1.M65K/README.mixed.fam1.M65K.yml diff --git a/LEGUMES/Fabaceae/supplements/legume.genefam.esm.RDQM/CHECKSUM.legume.genefam.esm.RDQM.md5 b/LEGUMES/Fabaceae/supplements/legume.genefam.esm.RDQM/CHECKSUM.legume.genefam.esm.RDQM.md5 deleted file mode 100644 index 66612a9d..00000000 --- a/LEGUMES/Fabaceae/supplements/legume.genefam.esm.RDQM/CHECKSUM.legume.genefam.esm.RDQM.md5 +++ /dev/null @@ -1,18 +0,0 @@ -124ea0038dd863eae408e7f25b2a5148 ./legume.genefam.esm.RDQM.ST05_legume_genome_sizes.xlsx -087c36db426687770c6f098182ced009 ./MANIFEST.legume.genefam.esm.RDQM.descriptions.yml -b37fb000dd91f067004b49f7ea3a34cd ./legume.genefam.esm.RDQM.SD08_quota_dotplots.pdf -eb342a147eaad1645896936d14e8e546 ./legume.genefam.esm.RDQM.SD02_65_hmmalign_trim2.tar.gz -c48a04aee79772e9a0efbcc61eb123e7 ./legume.genefam.esm.RDQM.SD01_65_hmmalign.tar.gz -bf8efd2183c30457c1012ea53ff0110f ./legume.genefam.esm.RDQM.SD07_LPWG_chrom_counts_by_genus3.tree.txt -90049a7d2a42093afdbdf06393acf43c ./MANIFEST.legume.genefam.esm.RDQM.correspondence.yml -43ed1c2abd5438edbfd8e58ae05b5ba7 ./legume.genefam.esm.RDQM.ST03_legcyc_genes.xlsx -634354d452383c138dc0be2685ce6cca ./legume.genefam.esm.RDQM.SD06_LPWG_chrom_counts_by_genus3_all_colored.pdf -42cf0f72763ea1a7aa8b39287d41daf2 ./legume.genefam.esm.RDQM.SD05_tree_legcyc_and_outgrps7boot_color.nh.txt -47be74848a3c10b865f45b10ba1c428e ./legume.genefam.esm.RDQM.SD03_70_trees_combined.tar.gz -876b54f8bffdfca9f6ff5a6af8baf42a ./legume.genefam.esm.RDQM.SD09_quota_tables.tar.gz -555e40c2c071f45ac3f49d172f79442e ./README.legume.genefam.esm.RDQM.yml -c0b5d48beaf17a53f375a64e2ece4d03 ./legume.genefam.esm.RDQM.ST04_legume_matK_matrix_and_counts_v05.xlsx -69eeb825b0bf35339d1bcbfc91b447d0 ./CHANGES.legume.genefam.esm.RDQM.txt -4ab791f1bb15730caf13534bfa162be3 ./legume.genefam.esm.RDQM.ST01_basal_legume_histograms.xlsx -03e41ecb26a4998a0e87a5fe13c6e3ce ./legume.genefam.esm.RDQM.ST02_species_counts_in_gene_families.xlsx -ec304fe54d4310365ddbac21ef1e7904 ./legume.genefam.esm.RDQM.SD04_71_trees_reduced_trees.tar.gz diff --git a/LEGUMES/Fabaceae/supplements/mixed.esm.RDQM/CHECKSUM.mixed.esm.RDQM.md5 b/LEGUMES/Fabaceae/supplements/mixed.esm.RDQM/CHECKSUM.mixed.esm.RDQM.md5 new file mode 100644 index 00000000..1be2469e --- /dev/null +++ b/LEGUMES/Fabaceae/supplements/mixed.esm.RDQM/CHECKSUM.mixed.esm.RDQM.md5 @@ -0,0 +1,18 @@ +42cf0f72763ea1a7aa8b39287d41daf2 ./legume.mixed.esm.RDQM.SD05_tree_legcyc_and_outgrps7boot_color.nh.txt +bf8efd2183c30457c1012ea53ff0110f ./legume.mixed.esm.RDQM.SD07_LPWG_chrom_counts_by_genus3.tree.txt +c48a04aee79772e9a0efbcc61eb123e7 ./legume.mixed.esm.RDQM.SD01_65_hmmalign.tar.gz +634354d452383c138dc0be2685ce6cca ./legume.mixed.esm.RDQM.SD06_LPWG_chrom_counts_by_genus3_all_colored.pdf +b37fb000dd91f067004b49f7ea3a34cd ./legume.mixed.esm.RDQM.SD08_quota_dotplots.pdf +47be74848a3c10b865f45b10ba1c428e ./legume.mixed.esm.RDQM.SD03_70_trees_combined.tar.gz +6eced5ba383b0a167605cf4eef64c382 ./CHANGES.mixed.esm.RDQM.txt +555e40c2c071f45ac3f49d172f79442e ./README.mixed.esm.RDQM.yml +ec304fe54d4310365ddbac21ef1e7904 ./legume.mixed.esm.RDQM.SD04_71_trees_reduced_trees.tar.gz +c0b5d48beaf17a53f375a64e2ece4d03 ./legume.mixed.esm.RDQM.ST04_legume_matK_matrix_and_counts_v05.xlsx +e77219fdf18ca7d6b577edc600fa4359 ./MANIFEST.mixed.esm.RDQM.correspondence.yml +eb342a147eaad1645896936d14e8e546 ./legume.mixed.esm.RDQM.SD02_65_hmmalign_trim2.tar.gz +4ab791f1bb15730caf13534bfa162be3 ./legume.mixed.esm.RDQM.ST01_basal_legume_histograms.xlsx +088c63ef2a1f507130d23ba65c01d23a ./MANIFEST.mixed.esm.RDQM.descriptions.yml +03e41ecb26a4998a0e87a5fe13c6e3ce ./legume.mixed.esm.RDQM.ST02_species_counts_in_gene_families.xlsx +124ea0038dd863eae408e7f25b2a5148 ./legume.mixed.esm.RDQM.ST05_legume_genome_sizes.xlsx +876b54f8bffdfca9f6ff5a6af8baf42a ./legume.mixed.esm.RDQM.SD09_quota_tables.tar.gz +43ed1c2abd5438edbfd8e58ae05b5ba7 ./legume.mixed.esm.RDQM.ST03_legcyc_genes.xlsx diff --git a/LEGUMES/Fabaceae/supplements/legume.genefam.esm.RDQM/README.legume.genefam.esm.RDQM.yml b/LEGUMES/Fabaceae/supplements/mixed.esm.RDQM/README.mixed.esm.RDQM.yml similarity index 100% rename from LEGUMES/Fabaceae/supplements/legume.genefam.esm.RDQM/README.legume.genefam.esm.RDQM.yml rename to LEGUMES/Fabaceae/supplements/mixed.esm.RDQM/README.mixed.esm.RDQM.yml From 69d2aaac9e98370306ec031c898a6b172db7162e Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Mon, 29 Jan 2024 23:45:06 -0600 Subject: [PATCH 15/27] updated iprscan files which were erroneously empty previously --- .../V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 index c160ba41..21814624 100644 --- a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 @@ -13,7 +13,7 @@ c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna. 1ff23c576960f7354f35728ff00ac8fa ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz 7bc661e46f60e9308d5cd2663776e081 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.fai 04ef98566b55ad667b058b1bfb3dc8df ./arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz -f888bec467738642550af258e3e61113 ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz +daed0de5d10057896f6b4f1ac5196e8b ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz eb04962bd814ee2aaaedd42e774f17ee ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.gzi 7cacd41f7aa04ec27ed09daa2c967e28 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz 5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.fai @@ -29,5 +29,5 @@ e2f21d5ed24891d848c9f98e244cadbe ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz b42d54c9910048ddb841ca2ff57eac91 ./MANIFEST.V10309.gnm1.ann1.CZRZ.descriptions.yml d037c86f41256501f7f40d94cb6a2d5e ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.fai 92fa388cf112420111f5218a0da23306 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz -d1293c7e339185b773c0d2809d78e88a ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz.tbi +c12383db77e38c85973edfba355fc5a4 ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz.tbi e7e13718aea32fbff026133e7bb1bf33 ./README.V10309.gnm1.ann1.CZRZ.yml From 73b33a58a7e58473c49a54381899e0478cf6d730 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 31 Jan 2024 13:11:56 -0600 Subject: [PATCH 16/27] Replace legume repeat collection, dropping TE_lib_2024.rpt.YFQ7 and adding TE_lib_2024.rpt.6WVT (the previous collection, accessible for four days, had some genic sequences) --- .../CHECKSUM.TE_lib_2024.rpt.6WVT.md5 | 7 +++++++ .../README.TE_lib_2024.rpt.6WVT.yml} | 9 +++++---- 2 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 rename LEGUMES/Fabaceae/repeats/{TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml => TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml} (88%) diff --git a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 new file mode 100644 index 00000000..a7d9b546 --- /dev/null +++ b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 @@ -0,0 +1,7 @@ +889e479ab4dfb5cf2e7be955bac40064 ./legume.TE_lib_2024.rpt.6WVT.fna.gz.fai +2cb398a3ff7bc90feb24e05e4089a970 ./legume.TE_lib_2024.rpt.6WVT.fna.gz +51c20848e76c19125c0a8a3c0be15e9d ./CHANGES.TE_lib_2024.rpt.6WVT.txt +121d649b6b6bff4abeddcbaaf4e85951 ./MANIFEST.TE_lib_2024.rpt.6WVT.correspondence.yml +9a0622dbada7845b683e758d9c58d441 ./README.TE_lib_2024.rpt.6WVT.yml +c11cd2f94829cc33b2161cfc1158efab ./MANIFEST.TE_lib_2024.rpt.6WVT.descriptions.yml +684958f4e4d062c428556ea3a7f3dea8 ./legume.TE_lib_2024.rpt.6WVT.fna.gz.gzi diff --git a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml similarity index 88% rename from LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml rename to LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml index fd1b28c6..487903b1 100644 --- a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.YFQ7/README.TE_lib_2024.rpt.YFQ7.yml +++ b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml @@ -1,5 +1,6 @@ --- -identifier: TE_lib_2024.rpt.YFQ7 +identifier: TE_lib_2024.rpt.6WVT +identifier: TE_lib_2024.rpt.6WVT provenance: "This repository is considered the primary source for these files. The files here are held as part of the PeanutBase, LegumeInfo, and SoyBase projects." @@ -29,11 +30,11 @@ genotype: description: "Genomic repetitive elements for legume species. Derived from RepeatModeler run on 14 genomes -- 11 legumes and three non-legume species: arasp arath bauto cerca cerch chafa glyma medtr phavu prupe quisa sento singl vitvi. Additional curated repeats in Arachis contributed by Dongying Gao, David Bertioli, Soraya Bertioli and in Glycine contributed by Jianxin Ma and Jianchang Du, as part of the Williams 82 reference genome project in 2010." -original_file_creation_date: "2024-01-23" +original_file_creation_date: "2024-01-31" -local_file_creation_date: "2024-01-23" +local_file_creation_date: "2024-01-31" -dataset_release_date: "2024-01-27" +dataset_release_date: "2024-01-31" contributors: "Hyunoh Lee, Steven Cannon, Dongying Gao, David Bertioli, Jianxin Ma, Jianchang Du" From c1ae034e2a3111e5e28cb47a55f797223a1f25ab Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 31 Jan 2024 13:51:53 -0600 Subject: [PATCH 17/27] Fix README yml --- .../TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 | 2 +- .../TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 index a7d9b546..a5c00a23 100644 --- a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 +++ b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 @@ -2,6 +2,6 @@ 2cb398a3ff7bc90feb24e05e4089a970 ./legume.TE_lib_2024.rpt.6WVT.fna.gz 51c20848e76c19125c0a8a3c0be15e9d ./CHANGES.TE_lib_2024.rpt.6WVT.txt 121d649b6b6bff4abeddcbaaf4e85951 ./MANIFEST.TE_lib_2024.rpt.6WVT.correspondence.yml -9a0622dbada7845b683e758d9c58d441 ./README.TE_lib_2024.rpt.6WVT.yml +9cadc9a785b1fb368a8d4ac4a956d656 ./README.TE_lib_2024.rpt.6WVT.yml c11cd2f94829cc33b2161cfc1158efab ./MANIFEST.TE_lib_2024.rpt.6WVT.descriptions.yml 684958f4e4d062c428556ea3a7f3dea8 ./legume.TE_lib_2024.rpt.6WVT.fna.gz.gzi diff --git a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml index 487903b1..f450bca4 100644 --- a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml +++ b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/README.TE_lib_2024.rpt.6WVT.yml @@ -1,6 +1,5 @@ --- identifier: TE_lib_2024.rpt.6WVT -identifier: TE_lib_2024.rpt.6WVT provenance: "This repository is considered the primary source for these files. The files here are held as part of the PeanutBase, LegumeInfo, and SoyBase projects." From bc74e0af10ad9f9aeb2a0f1b692169aacdb9ed96 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 31 Jan 2024 14:55:58 -0600 Subject: [PATCH 18/27] More tweaks to new repeat collection --- .../TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 index a5c00a23..86417370 100644 --- a/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 +++ b/LEGUMES/Fabaceae/repeats/TE_lib_2024.rpt.6WVT/CHECKSUM.TE_lib_2024.rpt.6WVT.md5 @@ -1,7 +1,5 @@ -889e479ab4dfb5cf2e7be955bac40064 ./legume.TE_lib_2024.rpt.6WVT.fna.gz.fai -2cb398a3ff7bc90feb24e05e4089a970 ./legume.TE_lib_2024.rpt.6WVT.fna.gz +29479639f657adca86de0a24b25a691d ./legume.TE_lib_2024.rpt.6WVT.fna.gz 51c20848e76c19125c0a8a3c0be15e9d ./CHANGES.TE_lib_2024.rpt.6WVT.txt 121d649b6b6bff4abeddcbaaf4e85951 ./MANIFEST.TE_lib_2024.rpt.6WVT.correspondence.yml 9cadc9a785b1fb368a8d4ac4a956d656 ./README.TE_lib_2024.rpt.6WVT.yml c11cd2f94829cc33b2161cfc1158efab ./MANIFEST.TE_lib_2024.rpt.6WVT.descriptions.yml -684958f4e4d062c428556ea3a7f3dea8 ./legume.TE_lib_2024.rpt.6WVT.fna.gz.gzi From 8550e8e0053466a3fd4f06fa7723aca3fbf8a3c9 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 31 Jan 2024 19:44:21 -0600 Subject: [PATCH 19/27] Re-call _primary.f?a files, which seem not to have been primary in the initial run. --- .../CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 index 21814624..8bd7b8aa 100644 --- a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 @@ -1,33 +1,19 @@ 3f1a18896d5177db5fe45542ea95e258 ./MANIFEST.V10309.gnm1.ann1.CZRZ.correspondence.yml -8d7e589149431bf1b3b8d577ab89a7a8 ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.gzi -6e8f6c330ae2b3c0c262564bb8a4c7a7 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz.tbi ca2619b180190a64447f8210e2d5eec6 ./arast.V10309.gnm1.ann1.CZRZ.legfed_v1_0.M65K.gfa.tsv.gz 019172ff554aa12f41de5ae259c02e38 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.bed.gz -d13a0d5c902dff37ab7fbaeb9682e9fc ./CHANGES.V10309.gnm1.ann1.CZRZ.txt -ecd023051a659495cebaf005c5f5a5f8 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.gzi -ecd023051a659495cebaf005c5f5a5f8 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.gzi +0866ecc344be7fb3ac8fda2904d56b6a ./CHANGES.V10309.gnm1.ann1.CZRZ.txt 7f128ca5e449152a6726d0d26a365b66 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_exons.gff3.gz -7cacd41f7aa04ec27ed09daa2c967e28 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz -5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.fai -c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.gzi -1ff23c576960f7354f35728ff00ac8fa ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz -7bc661e46f60e9308d5cd2663776e081 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.fai +e2d0f1b8ac44771183d2192e529dad78 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz +0bcd21094be2e521736a5f66d3727203 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz 04ef98566b55ad667b058b1bfb3dc8df ./arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz daed0de5d10057896f6b4f1ac5196e8b ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz -eb04962bd814ee2aaaedd42e774f17ee ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.gzi -7cacd41f7aa04ec27ed09daa2c967e28 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz -5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.fai +14031d715fea66f1e566653240e66c96 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz 9eb1a8a66ce49dffbf62d20395271da5 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.full_table.tsv.gz 8c144993f8edb2552eecf7d3b9a63acb ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.txt 5987914f4a2958d9d07f39a607152e03 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json -c69c7edf12ad79f176d0365902e36d4e ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.gzi -92fa388cf112420111f5218a0da23306 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz -e2f21d5ed24891d848c9f98e244cadbe ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz +302486920ff3179e56516d1f8a68e9b5 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz +99e732d008bec0ab1ed5d278babecea0 ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz 87ef4d01b7a08ab75e7715e2603a6a7e ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz -2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.fai -2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.fai b42d54c9910048ddb841ca2ff57eac91 ./MANIFEST.V10309.gnm1.ann1.CZRZ.descriptions.yml -d037c86f41256501f7f40d94cb6a2d5e ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.fai -92fa388cf112420111f5218a0da23306 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz -c12383db77e38c85973edfba355fc5a4 ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz.tbi +cef1c92ab0413ef9aacbadad1b2da421 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz e7e13718aea32fbff026133e7bb1bf33 ./README.V10309.gnm1.ann1.CZRZ.yml From 2169ae67d45c21c0761a5db9017d6c1c6b0a7b73 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Tue, 6 Feb 2024 17:05:13 -0600 Subject: [PATCH 20/27] metadata files for alfalfa marker data from Breeding Insight Platform --- ...nJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.md5 | 3 ++ ...nJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.yml | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/CHECKSUM.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.md5 create mode 100644 Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/README.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.yml diff --git a/Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/CHECKSUM.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.md5 b/Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/CHECKSUM.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.md5 new file mode 100644 index 00000000..1c0d8cd0 --- /dev/null +++ b/Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/CHECKSUM.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.md5 @@ -0,0 +1,3 @@ +7c357e037540fdc48eb389f6d2d137c6 ./medsa.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.gff3.gz +fd0a59b0a8590f6a5bc217cd30d7b468 ./medsa.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.gff3.gz.tbi +7b75c25240f69ea0ff872feb895d9488 ./README.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.yml diff --git a/Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/README.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.yml b/Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/README.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.yml new file mode 100644 index 00000000..2e308c91 --- /dev/null +++ b/Medicago/sativa/markers/XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01/README.XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01.yml @@ -0,0 +1,35 @@ +--- +identifier: XinJiangDaYe.gnm1.mrk.3KAlfaDArTagBICU01 + +genotyping_platform: 3KAlfaDArTagBICU01 + +synopsis: "3KAlfaDArTagBICU01 is a DArTag panel of 3K loci distributed across the alfalfa genome for use in molecular breeding and genomic insight" + +related_to: XinJiangDaYe.gnm1.12MR + +scientific_name: Medicago sativa + +taxid: 3879 + +scientific_name_abbrev: medsa + +genotype: + - XinJiangDaYe + +description: "This alfalfa 3K DArTag panel was developed from a diversity panel of 40 individual alfalfa clonal genotypes, focusing on elite breeding and stress-resistant genotypes used in North America. This panel consisted of 17 elite parents with various fall dormancy levels, six samples of diploid-cultivated alfalfa, 13 genotypes with abiotic stress resistance, one genotype with Aphanomyces root rot disease resistance, and three other genotypes... a high-confidence set of 10K SNPs were obtained by requiring them: (1) not located within 5bp distance to an indel, (2) QUAL > 30, (3) minimum and maximum read depths of 20 and 1,900, respectively, (4) for each sample, at least one read supporting reference allele and two reads supporting the alternative allele, (5) no missing genotype per SNP position, (6) with a minor allele frequency greater than 0.25, (7) not located in transposable elements and (8) not within 1Kb of chromosome termini. The 10K SNPs were assessed by DArT, and from those that passed QC, a 3K SNP set targeting even genomic distribution was selected to form a 3K DArTag marker panel. Of the 3,000 loci selected for the panel, 85% (2,542) reside in genic regions and only 15% (458) reside in non-genic regions." + +publication_doi: 10.1371/journal.pone.0054985 + +publication_title: "A public mid-density genotyping platform for alfalfa (Medicago sativa L.)" + +contributors: "Dongyan Zhao, Katherine Mejia-Guerra, Marcelo Mollinari, Deborah A Samac, Brian M Irish, Kasia Heller-Uszynska, Craig T Beil, Moira J Sheehan" + +citation: "Dongyan Zhao, Katherine Mejia-Guerra, Marcelo Mollinari, Deborah A Samac, Brian M Irish, Kasia Heller-Uszynska, Craig T Beil, Moira J Sheehan. A public mid-density genotyping platform for alfalfa (Medicago sativa L.). Genetic Resources 2023; 4 (8), 55–63. doi:10.46265/genresj.EMOR6509" + +data_curators: Andrew Farmer + +public_access_level: public + +license: Open + +keywords: alfalfa, genetic markers, genotyping From 0aeb9e35c016d26022c683fbdb5186966e47f9d6 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Thu, 8 Feb 2024 15:34:31 -0600 Subject: [PATCH 21/27] reinstated csi index for the gff file, needed for JBrowse2 to be able to serve it properly --- .../CHECKSUM.Cameor.gnm1.ann1.7SZR.md5 | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/CHECKSUM.Cameor.gnm1.ann1.7SZR.md5 b/Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/CHECKSUM.Cameor.gnm1.ann1.7SZR.md5 index f3dab76b..a09cc98f 100644 --- a/Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/CHECKSUM.Cameor.gnm1.ann1.7SZR.md5 +++ b/Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/CHECKSUM.Cameor.gnm1.ann1.7SZR.md5 @@ -1,25 +1,31 @@ -6e0f6dda7ea2cd9f22e683a2461c107f Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/CHANGES.Cameor.gnm1.ann1.7SZR.txt -221117109239a30599c5b12c3af737ad Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/MANIFEST.Cameor.gnm1.ann1.7SZR.correspondence.yml -57fb53fdba9fb42140103266036a6d12 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/MANIFEST.Cameor.gnm1.ann1.7SZR.descriptions.yml -cac5b777d7eec32e45174a64c152b5da Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds_primary.fna.gz -cea35fdbe658227b4f4f16ef2d2a347a Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds_primary.fna.gz.fai -1c5de471798411810cc6f2e131b44eee Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds_primary.fna.gz.gzi -5196394aca3fb8a57a4680057b5259d2 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz -2b5e2755bce849b66db9637f34b94a98 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz.fai -eab9a97cd9cdd9cb2e41b37c2e2062e9 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz.gzi -3d9258e16975ebb8d39c9a437e9f522e Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.gene_models_main.bed.gz -f02318395c1323469e8763b8e283a801 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.gene_models_main.gff3.gz -5db95c71dca46416a4a49828c3f6721d Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.legfed_v1_0.M65K.gfa.tsv.gz -87f911a653d538de312da1b8ad1935bc Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna_primary.fna.gz -38d3278994eccaf853f7d013e0390a66 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna_primary.fna.gz.fai -a9fc9dc3678d6099db3a393fbac2dcdd Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna_primary.fna.gz.gzi -6c291f3445b1dc87b3ead173fd159126 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz -1ad9737b1548c01dd2493429ef3b5c5b Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz.fai -21d399ede0fc9e86e9e6db1e42fe3503 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz.gzi -d58fe7e10f18a3c0003f3744bc979c29 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein_primary.faa.gz -69c1961731255325b1fd483a151a69ad Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein_primary.faa.gz.fai -d4b56535e0f2610fe67f6057ac593000 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein_primary.faa.gz.gzi -1f717545d8558245d0ff206fb313aafa Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz -fda119447f209ed74bf80da9b21106e7 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz.fai -085a8aac59784821c5083b415c6d67a7 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz.gzi -884a7efe955c8b300131c3fabd1e9230 Pisum/sativum/annotations/Cameor.gnm1.ann1.7SZR/README.Cameor.gnm1.ann1.7SZR.yml +f02318395c1323469e8763b8e283a801 ./pissa.Cameor.gnm1.ann1.7SZR.gene_models_main.gff3.gz +87f911a653d538de312da1b8ad1935bc ./pissa.Cameor.gnm1.ann1.7SZR.mrna_primary.fna.gz +d4b56535e0f2610fe67f6057ac593000 ./pissa.Cameor.gnm1.ann1.7SZR.protein_primary.faa.gz.gzi +1ad9737b1548c01dd2493429ef3b5c5b ./pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz.fai +4a46d183d50655cce632cc94509c4a93 ./pissa.Cameor.gnm1.ann1.7SZR.gene_models_main.gff3.gz.csi +884a7efe955c8b300131c3fabd1e9230 ./README.Cameor.gnm1.ann1.7SZR.yml +1f717545d8558245d0ff206fb313aafa ./pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz +085a8aac59784821c5083b415c6d67a7 ./pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz.gzi +eab9a97cd9cdd9cb2e41b37c2e2062e9 ./pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz.gzi +cea35fdbe658227b4f4f16ef2d2a347a ./pissa.Cameor.gnm1.ann1.7SZR.cds_primary.fna.gz.fai +5db95c71dca46416a4a49828c3f6721d ./pissa.Cameor.gnm1.ann1.7SZR.legfed_v1_0.M65K.gfa.tsv.gz +cdc440a11f823552a090f0fa0544af25 ./busco/pissa.Cameor.gnm1.ann1.7SZR.protein_primaryTranscript.faa.gz_busco/short_summary.specific.embryophyta_odb10.pissa.Cameor.gnm1.ann1.7SZR.protein_primaryTranscript.faa.gz_busco.txt +46a12e84a0296dfcea227429b66e8659 ./busco/pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz_busco/short_summary.specific.embryophyta_odb10.pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz_busco.txt +5196394aca3fb8a57a4680057b5259d2 ./pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz +38d3278994eccaf853f7d013e0390a66 ./pissa.Cameor.gnm1.ann1.7SZR.mrna_primary.fna.gz.fai +c50533d75612a66e25644641a317588f ./BUSCO/pissa.Cameor.gnm1.ann1.7SZR.busco.fabales_odb10.full_table.tsv.gz +632ea2f7beed459a6ef72fdf71053da0 ./BUSCO/pissa.Cameor.gnm1.ann1.7SZR.busco.fabales_odb10.short_summary.txt +a079874923382723d58eddd56868a6d8 ./BUSCO/pissa.Cameor.gnm1.ann1.7SZR.busco.fabales_odb10.short_summary.json +cac5b777d7eec32e45174a64c152b5da ./pissa.Cameor.gnm1.ann1.7SZR.cds_primary.fna.gz +3d9258e16975ebb8d39c9a437e9f522e ./pissa.Cameor.gnm1.ann1.7SZR.gene_models_main.bed.gz +6e0f6dda7ea2cd9f22e683a2461c107f ./CHANGES.Cameor.gnm1.ann1.7SZR.txt +a9fc9dc3678d6099db3a393fbac2dcdd ./pissa.Cameor.gnm1.ann1.7SZR.mrna_primary.fna.gz.gzi +57fb53fdba9fb42140103266036a6d12 ./MANIFEST.Cameor.gnm1.ann1.7SZR.descriptions.yml +221117109239a30599c5b12c3af737ad ./MANIFEST.Cameor.gnm1.ann1.7SZR.correspondence.yml +6c291f3445b1dc87b3ead173fd159126 ./pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz +fda119447f209ed74bf80da9b21106e7 ./pissa.Cameor.gnm1.ann1.7SZR.protein.faa.gz.fai +d58fe7e10f18a3c0003f3744bc979c29 ./pissa.Cameor.gnm1.ann1.7SZR.protein_primary.faa.gz +2b5e2755bce849b66db9637f34b94a98 ./pissa.Cameor.gnm1.ann1.7SZR.cds.fna.gz.fai +1c5de471798411810cc6f2e131b44eee ./pissa.Cameor.gnm1.ann1.7SZR.cds_primary.fna.gz.gzi +21d399ede0fc9e86e9e6db1e42fe3503 ./pissa.Cameor.gnm1.ann1.7SZR.mrna.fna.gz.gzi +69c1961731255325b1fd483a151a69ad ./pissa.Cameor.gnm1.ann1.7SZR.protein_primary.faa.gz.fai From 052a9c2db1a6d0f7b66783dc0903e410b2614cf3 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 14 Feb 2024 10:09:41 -0600 Subject: [PATCH 22/27] Add plant introduction number PI 704477 for G. max Wm82_ISU01 (genome, annotation, and description_Glycine_max.yml) --- Glycine/max/about_this_collection/description_Glycine_max.yml | 2 +- .../README.Wm82_ISU01.gnm2.ann1.FGFB.yml | 2 +- .../Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Glycine/max/about_this_collection/description_Glycine_max.yml b/Glycine/max/about_this_collection/description_Glycine_max.yml index b1500f18..73465a62 100644 --- a/Glycine/max/about_this_collection/description_Glycine_max.yml +++ b/Glycine/max/about_this_collection/description_Glycine_max.yml @@ -23,7 +23,7 @@ resources: description: "Tool to facilitate searches of GRIN Descriptor Data" strains: - identifier: Wm82_ISU01 - accession: Wm82-ISU-01 + accession: PI 704477 name: Williams 82-ISU-01 accession_group: Reference - Williams 82 origin: Northern United States diff --git a/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml b/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml index d3a81b9b..b52a281a 100644 --- a/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml +++ b/Glycine/max/annotations/Wm82_ISU01.gnm2.ann1.FGFB/README.Wm82_ISU01.gnm2.ann1.FGFB.yml @@ -16,7 +16,7 @@ scientific_name_abbrev: glyma genotype: - Wm82_ISU01 -description: "Gene annotation resources. See full description at Phytozome/JGI repository (address above)." +description: "Gene annotation resources. This release (Glycine max var. Williams 82-ISU-01) is an inbred line developed by Robert Stupar (University of Minnesota) by inbreeding original W82 seed, PI 704477. The resulting genome is a haploid version of W82. The original W82 is from an agronomic line and contains variation between the two haplotypes. This genome can be used as an updated version of W82 -- the soybean reference genome, however, it is not an identical genotype to previous reference. See full description at Phytozome/JGI repository (address above)." original_file_creation_date: "2022-07-28" diff --git a/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml b/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml index bac74818..ca947155 100644 --- a/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml +++ b/Glycine/max/genomes/Wm82_ISU01.gnm2.JFPQ/README.Wm82_ISU01.gnm2.JFPQ.yml @@ -18,7 +18,7 @@ genotype: chromosome_prefix: Gm -description: "This release (Glycine max var. Williams 82-ISU-01) is an inbred line developed by Robert Stupar (University of Minnesota) by inbreeding original W82 seed. The resulting genome is a haploid version of W82. The original W82 is from an agronomic line and contains variation between the two haplotypes. This genome can be used as an updated version of W82 -- the soybean reference genome, however, it is not an identical genotype to previous reference. Where possible, we have maintained continuity with the previous reference and the annotation is based on all available W82 transcript evidence. It is the most complete and highest quality soybean genome completed to date. Genome assembly consists of 47.07x of single haplotype CCS PACBIO coverage (17,049 bp average read size), assembled using HiFiAsm and HIC and the resulting sequence was polished using RACON. See full description at Phytozome/JGI repository (address above)." +description: "This release (Glycine max var. Williams 82-ISU-01) is an inbred line developed by Robert Stupar (University of Minnesota) by inbreeding original W82 seed, PI 704477. The resulting genome is a haploid version of W82. The original W82 is from an agronomic line and contains variation between the two haplotypes. This genome can be used as an updated version of W82 -- the soybean reference genome, however, it is not an identical genotype to previous reference. The genome assembly consists of 47.07x of single haplotype CCS PACBIO coverage (17,049 bp average read size), assembled using HiFiAsm and HIC and the resulting sequence was polished using RACON. See full description at Phytozome/JGI repository (address above)." original_file_creation_date: "2022-07-28" From 8098bdb48daed86423e76dcb03bc850e0afb4261 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Fri, 16 Feb 2024 16:47:35 -0600 Subject: [PATCH 23/27] updated following partition into protein coding and non-coding genes per issue https://github.com/legumeinfo/datastore-issues/issues/189 --- .../CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 index 8bd7b8aa..2093e0b2 100644 --- a/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 +++ b/Arachis/stenosperma/annotations/V10309.gnm1.ann1.CZRZ/CHECKSUM.V10309.gnm1.ann1.CZRZ.md5 @@ -1,19 +1,34 @@ 3f1a18896d5177db5fe45542ea95e258 ./MANIFEST.V10309.gnm1.ann1.CZRZ.correspondence.yml +614df38a7cb60dc67f46a110be552eed ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.gzi +d8e6e51a1d21345c0cd09fae2f83216b ./arast.V10309.gnm1.ann1.CZRZ.gene_models_noncoding.gff3.gz +e1d17910856619103237a051c5ef1c63 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz.tbi ca2619b180190a64447f8210e2d5eec6 ./arast.V10309.gnm1.ann1.CZRZ.legfed_v1_0.M65K.gfa.tsv.gz 019172ff554aa12f41de5ae259c02e38 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.bed.gz -0866ecc344be7fb3ac8fda2904d56b6a ./CHANGES.V10309.gnm1.ann1.CZRZ.txt +ec8680eb9ca6dfdb54f6add1cc7e1041 ./CHANGES.V10309.gnm1.ann1.CZRZ.txt +18ab8a5e9e76c6cbb9627238879c77d6 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.gzi +e2a22e27ca2f90fd35972be47dd887e0 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.gzi 7f128ca5e449152a6726d0d26a365b66 ./arast.V10309.gnm1.ann1.CZRZ.gene_models_exons.gff3.gz e2d0f1b8ac44771183d2192e529dad78 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz +5d1928c4f5bf4245aae4d266531ba816 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.fai +b831cf6e0d31dd98b5445c62d49c10fe ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.gzi 0bcd21094be2e521736a5f66d3727203 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz +7bc661e46f60e9308d5cd2663776e081 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.fai 04ef98566b55ad667b058b1bfb3dc8df ./arast.V10309.gnm1.ann1.CZRZ.featid_map.tsv.gz daed0de5d10057896f6b4f1ac5196e8b ./arast.V10309.gnm1.ann1.CZRZ.iprscan.gff3.gz +d0edbcc9dbd65df0dfc4a6f0f7b91d45 ./arast.V10309.gnm1.ann1.CZRZ.protein_primary.faa.gz.gzi 14031d715fea66f1e566653240e66c96 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz +1b9fe9691705b63380ff110c8aedff36 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz.fai 9eb1a8a66ce49dffbf62d20395271da5 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.full_table.tsv.gz 8c144993f8edb2552eecf7d3b9a63acb ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.txt 5987914f4a2958d9d07f39a607152e03 ./BUSCO/arast.V10309.gnm1.ann1.CZRZ.busco.fabales_odb10.short_summary.json +6d706c9b8df610e5b9b93de84d864128 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz.gzi 302486920ff3179e56516d1f8a68e9b5 ./arast.V10309.gnm1.ann1.CZRZ.cds_primary.fna.gz 99e732d008bec0ab1ed5d278babecea0 ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz -87ef4d01b7a08ab75e7715e2603a6a7e ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz +9ec146a3ecd614dcb09c58d4acf5548f ./arast.V10309.gnm1.ann1.CZRZ.gene_models_main.gff3.gz +e24016df9c5d0d1b78a7d55f77115113 ./arast.V10309.gnm1.ann1.CZRZ.mrna_primary.fna.gz.fai +2ac0bd000e3fd656d6ef6e1107337a52 ./arast.V10309.gnm1.ann1.CZRZ.mrna.fna.gz.fai b42d54c9910048ddb841ca2ff57eac91 ./MANIFEST.V10309.gnm1.ann1.CZRZ.descriptions.yml +d037c86f41256501f7f40d94cb6a2d5e ./arast.V10309.gnm1.ann1.CZRZ.protein.faa.gz.fai cef1c92ab0413ef9aacbadad1b2da421 ./arast.V10309.gnm1.ann1.CZRZ.cds.fna.gz +cc9b81836241eb801e8856db6f09bb7b ./arast.V10309.gnm1.ann1.CZRZ.gene_models_noncoding.gff3.gz.tbi e7e13718aea32fbff026133e7bb1bf33 ./README.V10309.gnm1.ann1.CZRZ.yml From c6700edfa6397c0be78e92515b1093266922f650 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Sat, 17 Feb 2024 13:05:14 -0600 Subject: [PATCH 24/27] Add new pangene collections for Arachis, Cicer, Glycine, Medicago, Phaseolus, Vigna --- .../README.Arachis.pan3.BYQ9.yml | 42 +++++++++ .../README.Cicer.pan3.JVTK.yml | 41 +++++++++ .../README.Glycine.pan5.MKRS.yml | 91 +++++++++++++++++++ .../README.Medicago.pan3.9X6B.yml | 58 ++++++++++++ .../README.Phaseolus.pan3.LXKV.yml | 44 +++++++++ .../README.Vigna.pan3.V294.yml | 47 ++++++++++ 6 files changed, 323 insertions(+) create mode 100644 Arachis/GENUS/pangenes/Arachis.pan3.BYQ9/README.Arachis.pan3.BYQ9.yml create mode 100644 Cicer/GENUS/pangenes/Cicer.pan3.JVTK/README.Cicer.pan3.JVTK.yml create mode 100644 Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml create mode 100644 Medicago/GENUS/pangenes/Medicago.pan3.9X6B/README.Medicago.pan3.9X6B.yml create mode 100644 Phaseolus/GENUS/pangenes/Phaseolus.pan3.LXKV/README.Phaseolus.pan3.LXKV.yml create mode 100644 Vigna/GENUS/pangenes/Vigna.pan3.V294/README.Vigna.pan3.V294.yml diff --git a/Arachis/GENUS/pangenes/Arachis.pan3.BYQ9/README.Arachis.pan3.BYQ9.yml b/Arachis/GENUS/pangenes/Arachis.pan3.BYQ9/README.Arachis.pan3.BYQ9.yml new file mode 100644 index 00000000..157c3e43 --- /dev/null +++ b/Arachis/GENUS/pangenes/Arachis.pan3.BYQ9/README.Arachis.pan3.BYQ9.yml @@ -0,0 +1,42 @@ +--- +identifier: Arachis.pan3.BYQ9 + +provenance: "The files in this directory are a product of the staff of the LegumeInfo, PeanutBase, and SoyBase project teams. The method is described here: https://github.com/legumeinfo/pandagma" + +source: "https://data.legumeinfo.org" + +synopsis: "Pangene set for Arachis species, based on A. hypogaea, A. duranensis, and A. ipaensis. This pangene set includes four annotation sets from A. hypogaea and one each from A. duranensis and A. ipaensis." + +scientific_name: Arachis + +taxid: 3826 + +annotations_main: + - arahy.BaileyII.gnm1.ann1 + - arahy.Tifrunner.gnm1.ann2 + - arahy.Tifrunner.gnm2.ann1 + - arahy.Tifrunner.gnm2.ann2 + +annotations_extra: + - aradu.V14167.gnm1.ann1 + - araip.K30076.gnm1.ann1 + - arast.V10309.gnm1.ann1 + +description: "Pan-gene set for Arachis species, spanning 4 species and 7 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" + +original_file_creation_date: "2024-02-14" + +local_file_creation_date: "2024-02-14" + +dataset_release_date: "2024-02-14" + +contributors: The International Peanut Genome Initiative and authors of all constituent genomes and annotations + +data_curators: Steven Cannon, Andrew Farmer + +public_access_level: public + +license: Open + +keywords: Arachis, peanut, pan-gene, pangene, orthogroup + diff --git a/Cicer/GENUS/pangenes/Cicer.pan3.JVTK/README.Cicer.pan3.JVTK.yml b/Cicer/GENUS/pangenes/Cicer.pan3.JVTK/README.Cicer.pan3.JVTK.yml new file mode 100644 index 00000000..fc060484 --- /dev/null +++ b/Cicer/GENUS/pangenes/Cicer.pan3.JVTK/README.Cicer.pan3.JVTK.yml @@ -0,0 +1,41 @@ +--- +identifier: Cicer.pan3.JVTK + +provenance: "The files in this directory are a product of the staff of the SoyBase and LegumeInfo project teams. The method is described here: https://github.com/legumeinfo/pandagma" + +source: "https://data.legumeinfo.org" + +synopsis: "Pangene set for Cicer species, based on C. arietinum, C. echinospermum, and reticulatum. This pangene set includes 6 annotation sets." + +scientific_name: Cicer + +taxid: 3826 + +annotations_main: + - cicar.CDCFrontier.gnm3.ann1 + - cicar.ICC4958.gnm2.ann1 + - cicec.S2Drd065.gnm1.ann1 + - cicre.Besev079.gnm1.ann1 + +annotations_extra: + - cicar.CDCFrontier.gnm1.ann1 + - cicar.CDCFrontier.gnm2.ann1 + +description: "Pan-gene set for Cicer species, spanning 3 species and 6 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" + +original_file_creation_date: "2024-02-14" + +local_file_creation_date: "2024-02-14" + +dataset_release_date: "2024-02-14" + +contributors: Steven Cannon + +data_curators: Steven Cannon + +public_access_level: public + +license: Open + +keywords: Cicer, chickpea, pan-gene, pangene, orthogroup + diff --git a/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml b/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml new file mode 100644 index 00000000..2eaddbda --- /dev/null +++ b/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml @@ -0,0 +1,91 @@ +--- +identifier: Glycine.pan5.MKRS + +provenance: "The files in this directory are a product of the staff of the SoyBase and LegumeInfo project teams. The method is described here: https://github.com/legumeinfo/pandagma" + +source: "https://data.legumeinfo.org" + +synopsis: "Pangene set for Glycine species, based on G. max and G. soja but also including 6 perennial Australian species by homology. This pangene set includes 56 Glycine annotation sets." + +scientific_name: Glycine + +taxid: 3846 + +annotations_main: + - glyma.FiskebyIII.gnm1.ann1 + - glyma.Hefeng25_IGA1002.gnm1.ann1 + - glyma.Huaxia3_IGA1007.gnm1.ann1 + - glyma.JD17.gnm1.ann1 + - glyma.Jinyuan_IGA1006.gnm1.ann1 + - glyma.Lee.gnm1.ann1 + - glyma.Lee.gnm2.ann1 + - glyma.Lee.gnm3.ann1 + - glyma.Wenfeng7_IGA1001.gnm1.ann1 + - glyma.Wm82_IGA1008.gnm1.ann1 + - glyma.Wm82_ISU01.gnm2.ann1 + - glyma.Wm82_NJAU.gnm1.ann1 + - glyma.Wm82.gnm2.ann1 + - glyma.Wm82.gnm4.ann1 + - glyma.Wm82.gnm5.ann1 + - glyma.Zh13_IGA1005.gnm1.ann1 + - glyma.Zh13.gnm1.ann1 + - glyma.Zh35_IGA1004.gnm1.ann1 + - glyso.F_IGA1003.gnm1.ann1 + - glyso.PI483463.gnm1.ann1 + - glyso.W05.gnm1.ann1 + +annotations_extra: + - glyma.58-161.gnm1.ann1 + - glyma.Amsoy.gnm1.ann1 + - glyma.DongNongNo_50.gnm1.ann1 + - glyma.FengDiHuang.gnm1.ann1 + - glyma.HanDouNo_5.gnm1.ann1 + - glyma.HeiHeNo_43.gnm1.ann1 + - glyma.Hwangkeum.gnm1.ann1 + - glyma.JiDouNo_17.gnm1.ann1 + - glyma.JinDouNo_23.gnm1.ann1 + - glyma.JuXuanNo_23.gnm1.ann1 + - glyma.KeShanNo_1.gnm1.ann1 + - glyma.PI_398296.gnm1.ann1 + - glyma.PI_548362.gnm1.ann1 + - glyma.QiHuangNo_34.gnm1.ann1 + - glyma.ShiShengChangYe.gnm1.ann1 + - glyma.TieFengNo_18.gnm1.ann1 + - glyma.TieJiaSiLiHuang.gnm1.ann1 + - glyma.TongShanTianEDan.gnm1.ann1 + - glyma.WanDouNo_28.gnm1.ann1 + - glyma.Wm82.gnm1.ann1 + - glyma.XuDouNo_1.gnm1.ann1 + - glyma.YuDouNo_22.gnm1.ann1 + - glyma.Zh13.gnm2.ann1 + - glyma.ZhangChunManCangJin.gnm1.ann1 + - glyma.Zhutwinning2.gnm1.ann1 + - glyma.ZiHuaNo_4.gnm1.ann1 + - glyso.PI_549046.gnm1.ann1 + - glyso.PI_562565.gnm1.ann1 + - glyso.PI_578357.gnm1.ann1 + - glycy.G1267.gnm1.ann1 + - glyd3.G1403.gnm1.ann1 + - glydo.G1134.gnm1.ann1 + - glyfa.G1718.gnm1.ann1 + - glyst.G1974.gnm1.ann1 + - glysy.G1300.gnm1.ann1 + +description: "Pan-gene set for Glycine species, spanning 8 species and 56 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" + +original_file_creation_date: "2024-02-14" + +local_file_creation_date: "2024-02-14" + +dataset_release_date: "2024-02-14" + +contributors: Steven Cannon + +data_curators: Steven Cannon + +public_access_level: public + +license: Open + +keywords: soybean, Glycine, pan-gene, pangene, orthogroup + diff --git a/Medicago/GENUS/pangenes/Medicago.pan3.9X6B/README.Medicago.pan3.9X6B.yml b/Medicago/GENUS/pangenes/Medicago.pan3.9X6B/README.Medicago.pan3.9X6B.yml new file mode 100644 index 00000000..2e36e0e6 --- /dev/null +++ b/Medicago/GENUS/pangenes/Medicago.pan3.9X6B/README.Medicago.pan3.9X6B.yml @@ -0,0 +1,58 @@ +--- +identifier: Medicago.pan3.9X6B + +provenance: "The files in this directory are a product of the staff of the SoyBase and LegumeInfo project teams. The method is described here: https://github.com/legumeinfo/pandagma" + +source: "https://data.legumeinfo.org" + +synopsis: "Pangene set for Medicago species, based primarily on M. truncatula but also including M. sativa. This pangene set includes 21 annotation sets." + +scientific_name: Medicago + +taxid: 3877 + +annotations_main: + - medsa.XinJiangDaYe_hap1.gnm1.ann1 + - medsa.XinJiangDaYe_hap2.gnm1.ann1 + - medsa.XinJiangDaYe_hap3.gnm1.ann1 + - medsa.XinJiangDaYe_hap4.gnm1.ann1 + - medtr.A17_HM341.gnm4.ann2 + - medtr.A17.gnm5.ann1_6 + - medtr.HM078.gnm1.ann1 + - medtr.R108.gnmHiC_1.ann1 + +annotations_extra: + - medtr.HM004.gnm1.ann1 + - medtr.HM010.gnm1.ann1 + - medtr.HM022.gnm1.ann1 + - medtr.HM023.gnm1.ann1 + - medtr.HM034.gnm1.ann1 + - medtr.HM050.gnm1.ann1 + - medtr.HM056.gnm1.ann1 + - medtr.HM058.gnm1.ann1 + - medtr.HM060.gnm1.ann1 + - medtr.HM095.gnm1.ann1 + - medtr.HM125.gnm1.ann1 + - medtr.HM129.gnm1.ann1 + - medtr.HM185.gnm1.ann1 + - medtr.HM324.gnm1.ann1 + - medtr.R108_HM340.gnm1.ann1 + +description: "Pan-gene set for Medicago species, spanning 2 species and 21 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" + +original_file_creation_date: "2024-02-14" + +local_file_creation_date: "2024-02-14" + +dataset_release_date: "2024-02-14" + +contributors: Steven Cannon + +data_curators: Steven Cannon + +public_access_level: public + +license: Open + +keywords: Medicago, barrel medic, alfalfa, lucerne, pan-gene, pangene, orthogroup + diff --git a/Phaseolus/GENUS/pangenes/Phaseolus.pan3.LXKV/README.Phaseolus.pan3.LXKV.yml b/Phaseolus/GENUS/pangenes/Phaseolus.pan3.LXKV/README.Phaseolus.pan3.LXKV.yml new file mode 100644 index 00000000..b2ae4f32 --- /dev/null +++ b/Phaseolus/GENUS/pangenes/Phaseolus.pan3.LXKV/README.Phaseolus.pan3.LXKV.yml @@ -0,0 +1,44 @@ +--- +identifier: Phaseolus.pan3.LXKV + +provenance: "The files in this directory are a product of the staff of the SoyBase and LegumeInfo project teams. The method is described here: https://github.com/legumeinfo/pandagma" + +source: "https://data.legumeinfo.org" + +synopsis: "Pangene set for Phaseolus species, based primarily on P. vulgaris but also including P. acutifolius, P. coccineus, and P. lunatus. This pangene set includes 9 annotation sets." + +scientific_name: Phaseolus + +taxid: 3883 + +annotations_main: + - phaac.Frijol_Bayo.gnm1.ann1 + - phaac.W6_15578.gnm2.ann1 + - phaco.PHA8298.gnm1.ann1 + - phalu.G27455.gnm1.ann1 + - phavu.5-593.gnm1.ann1 + - phavu.G19833.gnm2.ann1 + - phavu.LaborOvalle.gnm1.ann1 + - phavu.UI111.gnm1.ann1 + +annotations_extra: + - phavu.G19833.gnm1.ann1 + +description: "Pan-gene set for Phaseolus species, spanning 4 species and 9 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" + +original_file_creation_date: "2024-02-14" + +local_file_creation_date: "2024-02-14" + +dataset_release_date: "2024-02-14" + +contributors: Steven Cannon + +data_curators: Steven Cannon + +public_access_level: public + +license: Open + +keywords: Phaseolus, common bean, lima bean, pan-gene, pangene, orthogroup + diff --git a/Vigna/GENUS/pangenes/Vigna.pan3.V294/README.Vigna.pan3.V294.yml b/Vigna/GENUS/pangenes/Vigna.pan3.V294/README.Vigna.pan3.V294.yml new file mode 100644 index 00000000..320903e3 --- /dev/null +++ b/Vigna/GENUS/pangenes/Vigna.pan3.V294/README.Vigna.pan3.V294.yml @@ -0,0 +1,47 @@ +--- +identifier: Vigna.pan3.V294 + +provenance: "The files in this directory are a product of the staff of the SoyBase and LegumeInfo project teams. The method is described here: https://github.com/legumeinfo/pandagma" + +source: "https://data.legumeinfo.org" + +synopsis: "Pangene set for Vigna species, based primarily on V. unguiculata but also including V. angularis and V. radiata. This pangene set includes 12 annotation sets." + +scientific_name: Vigna + +taxid: 3913 + +annotations_main: + - vigun.CB5-2.gnm1.ann1 + - vigun.IT97K-499-35.gnm1.ann2 + - vigun.Sanzi.gnm1.ann1 + - vigun.Suvita2.gnm1.ann1 + - vigun.TZ30.gnm1.ann2 + - vigun.UCR779.gnm1.ann1 + - vigun.ZN016.gnm1.ann2 + +annotations_extra: + - vigun.IT97K-499-35.gnm1.ann1 + - vigan.Gyeongwon.gnm3.ann1 + - vigan.Shumari.gnm1.ann1 + - vigra.VC1973A.gnm6.ann1 + - vigra.VC1973A.gnm7.ann1 + +description: "Pan-gene set for Vigna species, spanning 3 species and 12 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" + +original_file_creation_date: "2024-02-14" + +local_file_creation_date: "2024-02-14" + +dataset_release_date: "2024-02-14" + +contributors: Steven Cannon + +data_curators: Steven Cannon + +public_access_level: public + +license: Open + +keywords: Vigna, cowpea, mung bean, pan-gene, pangene, orthogroup + From 69d980fee0a35be885a36892aadb8b8da0bb6fd4 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Wed, 21 Feb 2024 13:21:58 -0600 Subject: [PATCH 25/27] Add collection of soybean transposable elements/repeats corresponding with Du, Grant et al. (2010) / SoyTEdb --- .../Wm82.rpt.C1JW/CHECKSUM.Wm82.rpt.C1JW.md5 | 7 ++++ .../Wm82.rpt.C1JW/README.Wm82.rpt.C1JW.yml | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 Glycine/max/repeats/Wm82.rpt.C1JW/CHECKSUM.Wm82.rpt.C1JW.md5 create mode 100644 Glycine/max/repeats/Wm82.rpt.C1JW/README.Wm82.rpt.C1JW.yml diff --git a/Glycine/max/repeats/Wm82.rpt.C1JW/CHECKSUM.Wm82.rpt.C1JW.md5 b/Glycine/max/repeats/Wm82.rpt.C1JW/CHECKSUM.Wm82.rpt.C1JW.md5 new file mode 100644 index 00000000..ed815939 --- /dev/null +++ b/Glycine/max/repeats/Wm82.rpt.C1JW/CHECKSUM.Wm82.rpt.C1JW.md5 @@ -0,0 +1,7 @@ +3c71b0eb756eb111dfbba8e51b65f132 ./MANIFEST.Wm82.rpt.C1JW.correspondence.yml +c7e10ef425f34dd481eafd53cd0c7d12 ./README.Wm82.rpt.C1JW.yml +b20a835052e952a20c16e46e975e68fc ./glyma.Wm82.rpt.C1JW.SOY_TEdb.fna.gz +0410ce887e4a7d6951fe8a2a92ed531c ./glyma.Wm82.rpt.C1JW.SOY_TE_LIB.fna.gz +996d01c76b31ebc795ef5c631c441817 ./glyma.Wm82.rpt.C1JW.SOY_TE_LIB_id60.fna.gz +abba7aa2ce3f65a57e8ad03e4f552614 ./CHANGES.Wm82.rpt.C1JW.txt +8297a4079b3267723c40b84e2798ce2c ./MANIFEST.Wm82.rpt.C1JW.descriptions.yml diff --git a/Glycine/max/repeats/Wm82.rpt.C1JW/README.Wm82.rpt.C1JW.yml b/Glycine/max/repeats/Wm82.rpt.C1JW/README.Wm82.rpt.C1JW.yml new file mode 100644 index 00000000..cc1fe578 --- /dev/null +++ b/Glycine/max/repeats/Wm82.rpt.C1JW/README.Wm82.rpt.C1JW.yml @@ -0,0 +1,37 @@ +--- +identifier: Wm82.rpt.C1JW + +provenance: "This repository is considered the primary source for these files. The files here are held as part of the SoyBase and LegumeInfo." + +source: "https://soybase.org/data" + +synopsis: Genomic repetitive elements for Glycine max, identified in Williams 82 (Wm82) assembly 1, published as part of SoyTEdb + +scientific_name: Glycine max + +taxid: 3847 + +scientific_name_abbrev: glyma + +genotype: + - Williams 82 + +description: "This collection contains repetitive elements for Glycine max identified by Du, Grant, Tian et al. (2010)." + +original_file_creation_date: "2010-02-17" + +local_file_creation_date: "2024-02-21" + +dataset_release_date: "2024-02-21" + +publication_doi: 10.1186/1471-2164-11-113 + +contributors: "Du J, Grant D, Tian Z, Nelson RT, Zhu L, Shoemaker RC, Ma J." + +data_curators: Rex Nelson, David Grant, Steven Cannon + +public_access_level: public + +license: Open + +keywords: transposable elements, SoyTEdb, soybean, repetitive elements From 025beba23502b5dc3bfed7d71cbe95982ec3a8c6 Mon Sep 17 00:00:00 2001 From: Steven Cannon Date: Mon, 26 Feb 2024 10:34:26 -0600 Subject: [PATCH 26/27] Update README.Glycine.pan5.MKRS.yml with addition of glyma.Wm82.gnm6.ann1 (previously glyma.Wm82_ISU01.gnm2.ann1) --- .../Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml b/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml index 2eaddbda..5cad8d65 100644 --- a/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml +++ b/Glycine/GENUS/pangenes/Glycine.pan5.MKRS/README.Glycine.pan5.MKRS.yml @@ -21,12 +21,12 @@ annotations_main: - glyma.Lee.gnm2.ann1 - glyma.Lee.gnm3.ann1 - glyma.Wenfeng7_IGA1001.gnm1.ann1 - - glyma.Wm82_IGA1008.gnm1.ann1 - - glyma.Wm82_ISU01.gnm2.ann1 - - glyma.Wm82_NJAU.gnm1.ann1 - glyma.Wm82.gnm2.ann1 - glyma.Wm82.gnm4.ann1 - glyma.Wm82.gnm5.ann1 + - glyma.Wm82.gnm6.ann1 + - glyma.Wm82_IGA1008.gnm1.ann1 + - glyma.Wm82_NJAU.gnm1.ann1 - glyma.Zh13_IGA1005.gnm1.ann1 - glyma.Zh13.gnm1.ann1 - glyma.Zh35_IGA1004.gnm1.ann1 @@ -55,6 +55,7 @@ annotations_extra: - glyma.TongShanTianEDan.gnm1.ann1 - glyma.WanDouNo_28.gnm1.ann1 - glyma.Wm82.gnm1.ann1 + - glyma.Wm82_ISU01.gnm2.ann1 - glyma.XuDouNo_1.gnm1.ann1 - glyma.YuDouNo_22.gnm1.ann1 - glyma.Zh13.gnm2.ann1 @@ -73,11 +74,11 @@ annotations_extra: description: "Pan-gene set for Glycine species, spanning 8 species and 56 annotation sets, calculated using the pandagma pipeline, version 2.0 (February, 2024)" -original_file_creation_date: "2024-02-14" +original_file_creation_date: "2024-02-26" -local_file_creation_date: "2024-02-14" +local_file_creation_date: "2024-02-26" -dataset_release_date: "2024-02-14" +dataset_release_date: "2024-02-26" contributors: Steven Cannon From dd87f637f8269770bf0d7379c71f11b679273100 Mon Sep 17 00:00:00 2001 From: Andrew Farmer Date: Wed, 28 Feb 2024 10:47:29 -0600 Subject: [PATCH 27/27] scientific_name_abbrev was missing from the README, causing a soybase build to fail --- .../genomes/Lee.gnm3.VG1C/CHECKSUM.Lee.gnm3.VG1C.md5 | 11 +++++++++++ .../genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml | 2 ++ 2 files changed, 13 insertions(+) create mode 100644 Glycine/max/genomes/Lee.gnm3.VG1C/CHECKSUM.Lee.gnm3.VG1C.md5 diff --git a/Glycine/max/genomes/Lee.gnm3.VG1C/CHECKSUM.Lee.gnm3.VG1C.md5 b/Glycine/max/genomes/Lee.gnm3.VG1C/CHECKSUM.Lee.gnm3.VG1C.md5 new file mode 100644 index 00000000..694b1cfb --- /dev/null +++ b/Glycine/max/genomes/Lee.gnm3.VG1C/CHECKSUM.Lee.gnm3.VG1C.md5 @@ -0,0 +1,11 @@ +7d56dd5756863ab8d880158701d0eea2 ./glyma.Lee.gnm3.VG1C.seqid_map.tsv.gz +87758d3368d45c4225180d14a22f9a49 ./glyma.Lee.gnm3.VG1C.genome_main.fna.gz.gzi +b542e5f84bd6e071019f48b2cd7bd8dc ./MANIFEST.Lee.gnm3.VG1C.correspondence.yml +d17f5ec55f930c21b8c2ebc157c98065 ./README.Lee.gnm3.VG1C.yml +b3e80b95056b7c6843e4a129c27dac1d ./MANIFEST.Lee.gnm3.VG1C.descriptions.yml +35a2aeeb39fadc8f69f576b7572d14c9 ./CHANGES.Lee.gnm3.VG1C.txt +06f6915b64780dfd3f018c8bc197119c ./glyma.Lee.gnm3.VG1C.genome_main.fna.gz.fai +1dc474e895f7ca2ed07cb94d3af51066 ./glyma.Lee.gnm3.VG1C.genome_main.fna.gz +fcfd3f1f74c9b6c7b6c6205d1f622112 ./BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.short_summary.txt +d37e3a981ee84bfbcafc6b07212d2414 ./BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.full_table.tsv.gz +cdc47598fd8c16be2fbcbc53fdc46378 ./BUSCO/glyma.Lee.gnm3.VG1C.busco.fabales_odb10.short_summary.json diff --git a/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml b/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml index 062b332b..b51ca2a7 100644 --- a/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml +++ b/Glycine/max/genomes/Lee.gnm3.VG1C/README.Lee.gnm3.VG1C.yml @@ -11,6 +11,8 @@ scientific_name: Glycine max taxid: 3847 +scientific_name_abbrev: glyma + genotype: - Lee