Skip to content

Commit

Permalink
Merge pull request #1 from iaradsouza1/feature-counts
Browse files Browse the repository at this point in the history
feat: added featureCounts module from subread. Tested with the default test.config.
  • Loading branch information
iaradsouza1 authored Jun 8, 2023
2 parents f1c2e1a + f2bf007 commit 9c77d75
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 7 deletions.
27 changes: 27 additions & 0 deletions bin/get_counts.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env Rscript

library(dplyr)
library(purrr)

args <- commandArgs(trailingOnly = TRUE)
if (length(args) < 1) {
stop("Usage: get_counts.r <feature_counts_files>", call. = FALSE)
}

files <- unlist(strsplit(args, split = " "))

map(files, ~ {
feature_counts <- read.table(.x, header = TRUE) %>%
select(1, 7)
}) %>%
reduce(inner_join, by = "Geneid") -> count_table

write.table(count_table, file = "count_table.txt", row.names = FALSE, quote = FALSE)

colnames(count_table) <- sapply(strsplit(colnames(count_table), split = "_"), "[[", 1)
rownames(count_table) <- count_table$Geneid
count_table$Geneid <- NULL

count_table <- as.matrix(count_table)

saveRDS(count_table, file = "count_table.rds")
12 changes: 10 additions & 2 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*/

params {

config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

Expand All @@ -20,7 +21,14 @@ params {
max_time = '6.h'

// Input data
input = "https://raw.githubusercontent.com/iaradsouza1/test-dataset/main/samplesheet.csv"
// input = "https://raw.githubusercontent.com/iaradsouza1/test-dataset/main/samplesheet.csv"
input = "file:///home/iaradsouza/samplesheet.csv"

fasta_filter = "https://github.com/iaradsouza1/test-dataset/raw/main/genome/sub_csabeus_26.fasta.gz"
fasta_align = "https://github.com/iaradsouza1/test-dataset/raw/main/genome/sub_tcruzi_1_4.fasta.gz"

// fasta_align = "https://github.com/iaradsouza1/test-dataset/raw/main/genome/sub_tcruzi_1_4.fasta.gz"
fasta_align = "https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/genome.fasta"
gtf_align = "https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/genes.gtf"
attribute = 'gene_id'

}
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
"branch": "master",
"git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7",
"installed_by": ["modules"]
},
"subread/featurecounts": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
}
}
}
Expand Down
27 changes: 27 additions & 0 deletions modules/local/gather_counts.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process GATHER_COUNTS {
label "process_medium"

container "biocontainers/r-tidyverse:1.2.1"

input:
path feature_counts

output:
path "count_table.txt"
path "count_table.rds"

when:
task.ext.when == null || task.ext.when

script: // This script is bundled with the pipeline, in iaradsouza1/tab-projeto-final/bin
"""
get_counts.r \\
$feature_counts
cat <<-END_VERSIONS > versions.yml
"${task.process}":
r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
END_VERSIONS
"""

}
49 changes: 49 additions & 0 deletions modules/nf-core/subread/featurecounts/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions modules/nf-core/subread/featurecounts/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ params {
// Input options
input = null
fasta_filter = null
fasta_align = null

fasta_align = null
gtf_align = null
attribute = null

// References
// genome = null
Expand Down
11 changes: 11 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@
"type": "string",
"description": "Fasta file from the target genome",
"format": "file-path"
},
"gtf_align": {
"type": "string",
"description": "GTF file for the target organism.",
"format": "file-path"
},
"attribute": {
"type": "string",
"default": "gene_id",
"description": "Attribute to count reads",
"enum": ["gene_id", "gene", "transcript", "exon"]
}
},
"required": ["fasta_filter", "fasta_align"]
Expand Down
20 changes: 17 additions & 3 deletions workflows/finalproject.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
// Validate input parameters
WorkflowFinalproject.initialise(params, log)

// TODO nf-core: Add all file path parameters for the pipeline to the list below
// Check input path parameters to see if they exist
def checkPathParamList = [ params.input, params.multiqc_config, params.fasta_filter, params.fasta_align ]
def checkPathParamList = [ params.input, params.multiqc_config, params.fasta_filter, params.fasta_align, params.gtf_align ]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }

// Check mandatory parameters
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }

if (params.attribute == null ) { exit 1, 'You must specify a feature type (e.g., gene_id or exon) to count reads.' }
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
Expand Down Expand Up @@ -55,6 +54,8 @@ include { BOWTIE2_BUILD as BOWTIE2_BUILD_HOST } from '../modules/nf-core/bowtie2
include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_HOST } from '../modules/nf-core/bowtie2/align/main'
include { BOWTIE2_BUILD as BOWTIE2_BUILD_ORG } from '../modules/nf-core/bowtie2/build/main'
include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_ORG } from '../modules/nf-core/bowtie2/align/main'
include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/subread/featurecounts/main'
include { GATHER_COUNTS } from '../modules/local/gather_counts'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -116,10 +117,23 @@ workflow FINALPROJECT {
)
ch_versions = ch_versions.mix(BOWTIE2_ALIGN_ORG.out.versions)

//
// MODULE: Run featureCounts to obtain the table of gene counts
//
SUBREAD_FEATURECOUNTS (
BOWTIE2_ALIGN_ORG.out.aligned.map{ [ it[0], it[1], params.gtf_align ] }, params.attribute
)
ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions)

GATHER_COUNTS(
SUBREAD_FEATURECOUNTS.out.counts.collect{it[1]}
)

// Dump software versions
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)

//
// MODULE: MultiQC
//
Expand Down

0 comments on commit 9c77d75

Please sign in to comment.