Skip to content

Commit

Permalink
Remove RNA-SeQC metrics from RNATables QC fields
Browse files Browse the repository at this point in the history
  • Loading branch information
marcellevstek committed Apr 22, 2024
1 parent 6398e17 commit d7f4c40
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 39 deletions.
5 changes: 4 additions & 1 deletion docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ All notable changes to this project are documented in this file.
Unreleased
==========

Changed
-------
- Remove ``RNA-SeQC`` metrics from ``RNATables`` QC fields

Added
-----
- Add ``restart`` method to the ``Data`` resource


Fixed
-----
- Fix fetching ``RNATables`` for collections with missing MultiQC objects by
Expand Down
39 changes: 1 addition & 38 deletions src/resdk/tables/rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,18 +163,6 @@
"type": "Int64",
"agg_func": "mean",
},
{
"name": "RNA-SeQC_mqc-generalstats-rna_seqc-Expression_Profiling_Efficiency",
"slug": "profiling_efficiency",
"type": "float64",
"agg_func": "mean",
},
{
"name": "RNA-SeQC_mqc-generalstats-rna_seqc-Genes_Detected",
"slug": "genes_detected",
"type": "Int64",
"agg_func": "mean",
},
{
"slug": "strandedness_code",
"type": "string",
Expand All @@ -185,21 +173,6 @@
},
]

MQC_COVERAGE_COLUMNS = [
{
"name": "Genes used in 3' bias",
"slug": "num_genes_three_prime_bias",
"type": "Int64",
"agg_func": "mean",
},
{
"name": "Mean 3' bias",
"slug": "mean_three_prime_bias",
"type": "float64",
"agg_func": "mean",
},
]


def general_multiqc_parser(file_object, name, column_names):
"""General parser for MultiQC files."""
Expand Down Expand Up @@ -239,11 +212,6 @@ def multiqc_general_stats_parser(file_object, name):
return general_multiqc_parser(file_object, name, MQC_GENERAL_COLUMNS)


def multiqc_coverage_parser(file_object, name):
"""Parse "multiqc_rna-seqc_coverage_stats.txt" file."""
return general_multiqc_parser(file_object, name, MQC_COVERAGE_COLUMNS)


def multiqc_strand_parser(file_object, name):
"""Parse "multiqc_library_strandedness.txt" file."""
df = pd.read_csv(file_object, sep="\t", index_col=0)
Expand Down Expand Up @@ -506,7 +474,6 @@ def _download_qc(self) -> pd.DataFrame:
"uri_general": f"{mqc.id}/multiqc_data/multiqc_general_stats.txt",
"uri_strand": f"{mqc.id}/multiqc_data/multiqc_library_strandedness.txt",
"uri_build": f"{mqc.id}/multiqc_data/multiqc_sample_info.txt",
"uri_coverage": f"{mqc.id}/multiqc_data/multiqc_rna-seqc_coverage_stats.txt",
}

df = pd.DataFrame(index=[sample.id for sample in self._samples])
Expand All @@ -515,7 +482,6 @@ def _download_qc(self) -> pd.DataFrame:
"uri_general": multiqc_general_stats_parser,
"uri_strand": multiqc_strand_parser,
"uri_build": multiqc_build_parser,
"uri_coverage": multiqc_coverage_parser,
}
for type_, parser in parsers.items():
uris = [item[type_] for item in mqc_db.values()]
Expand All @@ -528,10 +494,7 @@ def _download_qc(self) -> pd.DataFrame:
STRANDEDNESS_COLUMN = [{"slug": "strandedness_code", "type": "category"}]
column_types = {
c["slug"]: c["type"]
for c in MQC_GENERAL_COLUMNS
+ MQC_COVERAGE_COLUMNS
+ BUILD_COLUMN
+ STRANDEDNESS_COLUMN
for c in MQC_GENERAL_COLUMNS + BUILD_COLUMN + STRANDEDNESS_COLUMN
if c["slug"] in df.columns
}
df = df[column_types.keys()].astype(column_types)
Expand Down

0 comments on commit d7f4c40

Please sign in to comment.