Skip to content

Commit

Permalink
2nd Parity update to 1.1.1 treeval
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Jul 4, 2024
1 parent 9b11919 commit 5e22cd9
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 25 deletions.
Binary file removed assets/nf-core-curationpretext_logo_light.png
Binary file not shown.
91 changes: 72 additions & 19 deletions bin/generate_cram_csv.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,82 @@
#!/bin/bash
cram_path=$1
chunkn=0
for cram in ${cram_path}/*.cram; do
rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g")

crampath=$(readlink -f ${cram})
# generate_cram_csv.sh
# -------------------
# Generate a csv file describing the CRAM folder
# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°>
# Author = yy5
# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°>

ncontainers=$(zcat ${crampath}.crai|wc -l)
base=$(basename $cram .cram)
# Function to process chunking of a CRAM file
chunk_cram() {
local cram=$1
local chunkn=$2
local outcsv=$3
realcram=$(readlink -f ${cram})
realcrai=$(readlink -f ${cram}.crai)
local rgline=$(samtools view -H "${realcram}" | grep "@RG" | sed 's/\t/\\t/g' | sed "s/'//g")
local ncontainers=$(zcat "${realcrai}" | wc -l)
local base=$(basename "${realcram}" .cram)
local from=0
local to=10000

from=0
to=10000


while [ $to -lt $ncontainers ]
do
echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline}
from=$((to+1))
((to+=10000))
while [ $to -lt $ncontainers ]; do
echo "${realcram},${realcrai},${from},${to},${base},${chunkn},${rgline}" >> $outcsv
from=$((to + 1))
((to += 10000))
((chunkn++))
done

if [ $from -le $ncontainers ]
then
echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline}
if [ $from -le $ncontainers ]; then
echo "${realcram},${realcrai},${from},${ncontainers},${base},${chunkn},${rgline}" >> $outcsv
((chunkn++))
fi
done

echo $chunkn
}

# Function to process a CRAM file
process_cram_file() {
local cram=$1
local chunkn=$2
local outcsv=$3

local read_groups=$(samtools view -H "$cram" | grep '@RG' | awk '{for(i=1;i<=NF;i++){if($i ~ /^ID:/){print substr($i,4)}}}')
local num_read_groups=$(echo "$read_groups" | wc -w)

if [ "$num_read_groups" -gt 1 ]; then
# Multiple read groups: process each separately
for rg in $read_groups; do
local output_cram="$(basename "${cram%.cram}")_output_${rg}.cram"
samtools view -h -r "$rg" -o "$output_cram" "$cram"
samtools index "$output_cram"
chunkn=$(chunk_cram "$output_cram" "$chunkn" "$outcsv")
done
else
# Single read group or no read groups
chunkn=$(chunk_cram "$cram" "$chunkn" "$outcsv")
fi

echo $chunkn
}

# /\_/\ /\_/\
# ( o.o ) main ( o.o )
# > ^ < > ^ <

# Check if cram_path is provided
if [ -z "$1" ]; then
echo "Usage: $0 <cram_path>"
exit 1
fi

cram_path=$1
chunkn=0
outcsv=$2

# Loop through each CRAM file in the specified directory. cram cannot be the synlinked cram
for cram in ${cram_path}/*.cram; do
realcram=$(readlink -f $cram)
chunkn=$(process_cram_file $realcram $chunkn $outcsv)
done
Binary file removed docs/images/mqc_fastqc_adapter.png
Binary file not shown.
Binary file removed docs/images/mqc_fastqc_counts.png
Binary file not shown.
Binary file removed docs/images/mqc_fastqc_quality.png
Binary file not shown.
Binary file removed docs/images/nf-core-curationpretext_logo_dark.png
Binary file not shown.
Binary file removed docs/images/nf-core-curationpretext_logo_light.png
Binary file not shown.
9 changes: 3 additions & 6 deletions modules/local/generate_cram_csv.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
process GENERATE_CRAM_CSV {
tag "${meta.id}"
label 'process_low'
label 'process_tiny'

conda "bioconda::samtools=1.17"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
'biocontainers/samtools:1.17--h00cdaf9_0' }"
container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1'

input:
tuple val(meta), path(crampath)
Expand All @@ -17,7 +14,7 @@ process GENERATE_CRAM_CSV {
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
generate_cram_csv.sh $crampath >> ${prefix}_cram.csv
generate_cram_csv.sh $crampath ${prefix}_cram.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
2 changes: 2 additions & 0 deletions subworkflows/local/generate_maps.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ workflow GENERATE_MAPS {
)
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions )

GENERATE_CRAM_CSV.out.csv.view()

//
// LOGIC: make branches for different hic aligner.
//
Expand Down

0 comments on commit 5e22cd9

Please sign in to comment.