Skip to content

Commit

Permalink
Merge branch 'demo-rfc80-poc' into demo-rfc80-poc-generic-assay-patie…
Browse files Browse the repository at this point in the history
…nt-filtering
  • Loading branch information
alisman authored Nov 18, 2024
2 parents 1f54c3d + a56a698 commit e700e31
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ public void getMutationCounts() {
Map<String, Integer> actualMutationCounts = studyViewMapper.getMutationCounts(StudyViewFilterHelper.build(studyViewFilter, null, null), genomicDataFilterMutation);
Map<String, Integer> expectedMutationCounts = new HashMap<>();
expectedMutationCounts.put("mutatedCount", 2);
expectedMutationCounts.put("notMutatedCount", 2);
expectedMutationCounts.put("notProfiledCount", 11);
expectedMutationCounts.put("notMutatedCount", 8);
expectedMutationCounts.put("notProfiledCount", 5);
assertThat(actualMutationCounts)
.usingRecursiveComparison()
.ignoringCollectionOrder()
Expand All @@ -103,7 +103,7 @@ public void getMutationCountsByType() {
List<GenomicDataCountItem> actualMutationCountsByType = studyViewMapper.getMutationCountsByType(StudyViewFilterHelper.build(studyViewFilter, null, null), List.of(genomicDataFilterMutation));
List<GenomicDataCountItem> expectedMutationCountsByType = List.of(
new GenomicDataCountItem("AKT1", "mutations", List.of(
new GenomicDataCount("nonsense mutation", "nonsense_mutation", 1, 1),
new GenomicDataCount("nonsense mutation", "nonsense_mutation", 2, 1),
new GenomicDataCount("missense mutation", "missense_mutation", 1, 1)
)));
assertThat(actualMutationCountsByType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void getMolecularProfileCounts() {

var size = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, size);
assertEquals(11, size);

}

Expand All @@ -66,7 +66,7 @@ public void getMolecularProfileCountsMultipleStudies() {

var size = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, size);
assertEquals(11, size);

}

Expand All @@ -84,7 +84,7 @@ public void getMolecularProfileCountsMultipleProfilesUnion() {

var sizeMutations = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, sizeMutations);
assertEquals(11, sizeMutations);

var sizeMrna = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mrna"))
.findFirst().get().getCount().intValue();
Expand All @@ -107,7 +107,7 @@ public void getMolecularProfileCountsMultipleProfilesIntersect() {

var sizeMutations = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(9, sizeMutations);
assertEquals(10, sizeMutations);



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class StudyViewMapperTest extends AbstractTestcontainers {

private static final String STUDY_TCGA_PUB = "study_tcga_pub";
private static final String STUDY_ACC_TCGA = "acc_tcga";
private static final String STUDY_GENIE_PUB = "study_genie_pub";

@Autowired
private StudyViewMapper studyViewMapper;
Expand Down Expand Up @@ -130,14 +131,110 @@ public void getTotalProfiledCountsByGene() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var totalProfiledCountsMap = studyViewMapper.getTotalProfiledCounts(StudyViewFilterHelper.build(studyViewFilter, null, null),
// Testing profiled counts on samples with gene panel data and WES for one study
var totalProfiledCountsForMutationsMap = studyViewMapper.getTotalProfiledCounts(StudyViewFilterHelper.build(studyViewFilter, null, null),
"MUTATION_EXTENDED", List.of());

assertEquals(3, totalProfiledCountsMap.size());

var akt2TotalProfiledCounts = totalProfiledCountsMap.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT2")).findFirst();
assertTrue(akt2TotalProfiledCounts.isPresent());
assertEquals(4, akt2TotalProfiledCounts.get().getNumberOfProfiledCases().intValue());
var totalProfiledCountsForCnaMap = studyViewMapper.getTotalProfiledCounts(StudyViewFilterHelper.build(studyViewFilter, null, null),
"COPY_NUMBER_ALTERATION", List.of());
var sampleProfiledCountsForMutationsWithoutPanelDataMap = studyViewMapper.getSampleProfileCountWithoutPanelData(StudyViewFilterHelper.build(studyViewFilter, null, null),
"MUTATION_EXTENDED");
var sampleProfiledCountsForCnaWithoutPanelDataMap = studyViewMapper.getSampleProfileCountWithoutPanelData(StudyViewFilterHelper.build(studyViewFilter, null, null),
"COPY_NUMBER_ALTERATION");

// Assert the count of genes with profiled cases for mutations
assertEquals(5, totalProfiledCountsForMutationsMap.size());
// Assert the count of genes with profiled cases for CNA
assertEquals(5, totalProfiledCountsForCnaMap.size());
// Assert the profiled counts for mutations without panel data (WES)
assertEquals(6, sampleProfiledCountsForMutationsWithoutPanelDataMap);
// Assert the profiled counts for CNA without panel data (WES)
assertEquals(11, sampleProfiledCountsForCnaWithoutPanelDataMap);

// Assert the profiled counts for AKT2 mutations
// AKT2 is on testpanel2 in STUDY_TCGA_PUB
var akt2TotalProfiledCountsForMutations = totalProfiledCountsForMutationsMap.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT2")).findFirst();
assertTrue(akt2TotalProfiledCountsForMutations.isPresent());
assertEquals(4, akt2TotalProfiledCountsForMutations.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for BRCA1 mutations
// BRCA1 is on testpanel1 in STUDY_TCGA_PUB
var brca1TotalProfiledCountsForMutations = totalProfiledCountsForMutationsMap.stream().filter(c -> c.getHugoGeneSymbol().equals("BRCA1")).findFirst();
assertTrue(brca1TotalProfiledCountsForMutations.isPresent());
assertEquals(1, brca1TotalProfiledCountsForMutations.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for AKT1 mutations
// AKT1 is on both testpanel1 and testpanel2 in STUDY_TCGA_PUB
var akt1TotalProfiledCountsForMutations = totalProfiledCountsForMutationsMap.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT1")).findFirst();
assertTrue(akt1TotalProfiledCountsForMutations.isPresent());
assertEquals(5, akt1TotalProfiledCountsForMutations.get().getNumberOfProfiledCases().intValue());

// Assert the profiled counts for AKT2 CNA
// AKT2 is on testpanel2 in STUDY_TCGA_PUB
var akt2TotalProfiledCountsForCna = totalProfiledCountsForCnaMap.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT2")).findFirst();
assertTrue(akt2TotalProfiledCountsForCna.isPresent());
assertEquals(6, akt2TotalProfiledCountsForCna.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for BRCA1 CNA
// BRCA1 is on testpanel1 in STUDY_TCGA_PUB
var brca1TotalProfiledCountsForCna = totalProfiledCountsForCnaMap.stream().filter(c -> c.getHugoGeneSymbol().equals("BRCA1")).findFirst();
assertTrue(brca1TotalProfiledCountsForCna.isPresent());
assertEquals(2, brca1TotalProfiledCountsForCna.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for AKT1 CNA
// AKT1 is on both testpanel1 and testpanel2 in STUDY_TCGA_PUB
var akt1TotalProfiledCountsForCna = totalProfiledCountsForCnaMap.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT1")).findFirst();
assertTrue(akt1TotalProfiledCountsForCna.isPresent());
assertEquals(8, akt1TotalProfiledCountsForCna.get().getNumberOfProfiledCases().intValue());

// Testing profiled counts on combined studies
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB, STUDY_GENIE_PUB));

// Testing profiled counts on samples with gene panel data and WES for a combined study
var totalProfiledCountsForMutationsMap1 = studyViewMapper.getTotalProfiledCounts(StudyViewFilterHelper.build(studyViewFilter, null, null),
"MUTATION_EXTENDED", List.of());
var totalProfiledCountsForCnaMap1 = studyViewMapper.getTotalProfiledCounts(StudyViewFilterHelper.build(studyViewFilter, null, null),
"COPY_NUMBER_ALTERATION", List.of());
var sampleProfiledCountsForMutationsWithoutPanelDataMap1 = studyViewMapper.getSampleProfileCountWithoutPanelData(StudyViewFilterHelper.build(studyViewFilter, null, null),
"MUTATION_EXTENDED");
var sampleProfiledCountsForCnaWithoutPanelDataMap1 = studyViewMapper.getSampleProfileCountWithoutPanelData(StudyViewFilterHelper.build(studyViewFilter, null, null),
"COPY_NUMBER_ALTERATION");

// Assert the count of genes with profiled cases for mutations in a combined study
assertEquals(8, totalProfiledCountsForMutationsMap1.size());
// Assert the count of genes with profiled cases for CNA in a combined study
assertEquals(8, totalProfiledCountsForCnaMap1.size());
// Assert the profiled counts for mutations without panel data (WES) in a combined study
assertEquals(8, sampleProfiledCountsForMutationsWithoutPanelDataMap1);
// Assert the profiled counts for CNA without panel data (WES) in a combined study
assertEquals(12, sampleProfiledCountsForCnaWithoutPanelDataMap1);

// Assert the profiled counts for BRCA1 mutations
// BRCA1 is on testpanel1 in STUDY_TCGA_PUB
var brca1TotalProfiledCountsForMutations1 = totalProfiledCountsForMutationsMap1.stream().filter(c -> c.getHugoGeneSymbol().equals("BRCA1")).findFirst();
assertTrue(brca1TotalProfiledCountsForMutations1.isPresent());
assertEquals(1, brca1TotalProfiledCountsForMutations1.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for BRCA2 mutations
// BRCA2 is on testpanel3 and testpanel4 in STUDY_GENIE_PUB
var brca2TotalProfiledCountsForMutations1 = totalProfiledCountsForMutationsMap1.stream().filter(c -> c.getHugoGeneSymbol().equals("BRCA2")).findFirst();
assertTrue(brca2TotalProfiledCountsForMutations1.isPresent());
assertEquals(2, brca2TotalProfiledCountsForMutations1.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for AKT2 mutations
// AKT2 is on testpanel2 in STUDY_TCGA_PUB and testpanel4 in STUDY_GENIE_PUB
var akt2TotalProfiledCountsForMutations1 = totalProfiledCountsForMutationsMap1.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT2")).findFirst();
assertTrue(akt2TotalProfiledCountsForMutations1.isPresent());
assertEquals(4, akt2TotalProfiledCountsForMutations1.get().getNumberOfProfiledCases().intValue());

// Assert the profiled counts for BRCA1 CNA
// BRCA1 is on testpanel1 in STUDY_TCGA_PUB
var brca1TotalProfiledCountsForCna1 = totalProfiledCountsForCnaMap1.stream().filter(c -> c.getHugoGeneSymbol().equals("BRCA1")).findFirst();
assertTrue(brca1TotalProfiledCountsForCna1.isPresent());
assertEquals(2, brca1TotalProfiledCountsForCna1.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for BRCA2 CNA
// BRCA2 is on testpanel3 and testpanel4 in STUDY_GENIE_PUB
var brca2TotalProfiledCountsForCna1 = totalProfiledCountsForCnaMap1.stream().filter(c -> c.getHugoGeneSymbol().equals("BRCA2")).findFirst();
assertTrue(brca2TotalProfiledCountsForCna1.isPresent());
assertEquals(3, brca2TotalProfiledCountsForCna1.get().getNumberOfProfiledCases().intValue());
// Assert the profiled counts for AKT2 CNA
// AKT2 is on testpanel2 in STUDY_TCGA_PUB and testpanel4 in STUDY_GENIE_PUB
var akt2TotalProfiledCountsForCna1 = totalProfiledCountsForCnaMap1.stream().filter(c -> c.getHugoGeneSymbol().equals("AKT2")).findFirst();
assertTrue(akt2TotalProfiledCountsForCna1.isPresent());
assertEquals(7, akt2TotalProfiledCountsForCna1.get().getNumberOfProfiledCases().intValue());
}

@Test
Expand Down
4 changes: 2 additions & 2 deletions src/test/resources/clickhouse_cgds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ CREATE TABLE genetic_profile_samples
CREATE TABLE gene_panel
(
internal_id Int32,
stable_id String,
stable_id Nullable(String),
name Nullable(String),
description Nullable(String)
) ENGINE = MergeTree() ORDER BY internal_id;
Expand All @@ -251,7 +251,7 @@ CREATE TABLE gene_panel_list
-- --------------------------------------------------------
CREATE TABLE sample_profile
(
panel_id Int32,
panel_id Nullable(Int32),
sample_id Int32,
genetic_profile_id Int32
) ENGINE = MergeTree() ORDER BY sample_id;
Expand Down
20 changes: 20 additions & 0 deletions src/test/resources/clickhouse_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ insert into genetic_profile (genetic_profile_id,stable_id,cancer_study_id,geneti
insert into genetic_profile (genetic_profile_id,stable_id,cancer_study_id,genetic_alteration_type,datatype,name,description,show_profile_in_analysis_tab,generic_assay_type) values (12,'study_tcga_pub_mutational_signature',1,'generic_assay','limit-value','mutational_signature values','mutational_signature values',1,'mutational_signature');
insert into genetic_profile (genetic_profile_id,stable_id,cancer_study_id,genetic_alteration_type,datatype,name,description,show_profile_in_analysis_tab) values (14,'study_tcga_pub_cna',1,'COPY_NUMBER_ALTERATION','discrete','Copy-number alterations','Copy number alterations (amplifications and deletions) from targeted sequencing.',1);
insert into genetic_profile (genetic_profile_id,stable_id,cancer_study_id,genetic_alteration_type,datatype,name,description,show_profile_in_analysis_tab) values (15,'study_tcga_pub_rppa',1,'PROTEIN_LEVEL','log2-value','Protein expression (RPPA)','Protein expression measured by reverse-phase protein array',0);
insert into genetic_profile (genetic_profile_id,stable_id,cancer_study_id,genetic_alteration_type,datatype,name,description,show_profile_in_analysis_tab) values (16,'genie_public_mutations',3,'MUTATION_EXTENDED','maf','Somatic mutations','Somatic mutations from targeted sequencing.',1);
insert into genetic_profile (genetic_profile_id,stable_id,cancer_study_id,genetic_alteration_type,datatype,name,description,show_profile_in_analysis_tab) values (17,'genie_public_cna',3,'COPY_NUMBER_ALTERATION','discrete','Copy-number alterations','Copy number alterations (amplifications and deletions) from targeted sequencing.',1);

insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (2,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,');
insert into genetic_profile_samples (genetic_profile_id,ordered_sample_list) values (3,'2,3,6,8,9,10,12,13,');
Expand Down Expand Up @@ -207,18 +209,28 @@ insert into mutation (mutation_event_id,genetic_profile_id,sample_id,entrez_gene

insert into gene_panel (internal_id,stable_id,description) values (1,'testpanel1','a test panel consisting of a few genes');
insert into gene_panel (internal_id,stable_id,description) values (2,'testpanel2','another test panel consisting of a few genes');
insert into gene_panel (internal_id,stable_id,description) values (3,'testpanel3','third test panel consisting of a few genes');
insert into gene_panel (internal_id,stable_id,description) values (4,'testpanel4','fourth test panel consisting of a few genes');

insert into gene_panel_list (internal_id,gene_id) values (1,207);
insert into gene_panel_list (internal_id,gene_id) values (1,369);
insert into gene_panel_list (internal_id,gene_id) values (1,672);
insert into gene_panel_list (internal_id,gene_id) values (2,207);
insert into gene_panel_list (internal_id,gene_id) values (2,208);
insert into gene_panel_list (internal_id,gene_id) values (2,4893);
insert into gene_panel_list (internal_id,gene_id) values (3,472);
insert into gene_panel_list (internal_id,gene_id) values (3,673);
insert into gene_panel_list (internal_id,gene_id) values (3,675);
insert into gene_panel_list (internal_id,gene_id) values (4,208);
insert into gene_panel_list (internal_id,gene_id) values (4,673);
insert into gene_panel_list (internal_id,gene_id) values (4,675);

insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (1,2,1);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (1,3,1);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (1,4,null);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (1,6,2);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (1,6,1);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (2,2,1);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (2,2,2);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (2,3,1);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (2,4,2);
Expand Down Expand Up @@ -263,6 +275,14 @@ insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (13,4,
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (13,6,null);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (14,2,null);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (14,4,null);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (301,16,3);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (301,17,3);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (302,16,3);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (303,17,3);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (304,17,4);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (305,16,null);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (306,16,null);
insert into sample_profile (sample_id,genetic_profile_id,panel_id) values (306,17,null);

insert into sample_list (list_id,stable_id,category,cancer_study_id,name,description) values (1,'study_tcga_pub_all','other',1,'all tumors','all tumor samples');
insert into sample_list (list_id,stable_id,category,cancer_study_id,name,description) values (2,'study_tcga_pub_acgh','other',1,'tumors acgh','all tumors with acgh data');
Expand Down
Loading

0 comments on commit e700e31

Please sign in to comment.