Skip to content

Commit

Permalink
2.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
mbaudis committed Dec 19, 2024
1 parent b363a17 commit 94ff9ff
Show file tree
Hide file tree
Showing 18 changed files with 322 additions and 283 deletions.
18 changes: 16 additions & 2 deletions bycon/byconServiceLibs/interval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import numpy as np
from copy import deepcopy
from os import path, pardir
from pymongo import MongoClient

from bycon import Cytobands, cytobands_label_from_positions, prdbug, BYC, BYC_PARS, ENV
from bycon import Cytobands, cytobands_label_from_positions, DB_MONGOHOST, prdbug, BYC, BYC_PARS, ENV

################################################################################

Expand Down Expand Up @@ -155,6 +156,19 @@ def intervalFrequencyMaps(self, analyses=[]):
return self.interval_frequencies, self.analyses_count


#--------------------------------------------------------------------------#
#--------------------------------------------------------------------------#

def intervalAidFrequencyMaps(self, ds_id, analysis_ids=["___none___"]):
data_client = MongoClient(host=DB_MONGOHOST)
data_db = data_client[ ds_id ]
ana_coll = data_db["analyses"]
# self.analyses = list(ana_coll.find({"id": {"$in": analysis_ids}}))
self.analyses = ana_coll.find({"id": {"$in": analysis_ids}})
self.__interval_counts_from_analyses()
return self.interval_frequencies, self.analyses_count


#--------------------------------------------------------------------------#
#--------------------------------------------------------------------------#
# TODO: Not used anywhere?
Expand Down Expand Up @@ -470,4 +484,4 @@ def __interval_counts_from_analyses(self):
})

if type(self.analyses).__name__ == "Cursor":
analyses.close()
self.analyses.close()
2 changes: 2 additions & 0 deletions bycon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

errors = []
warnings = []
notes = []

if "DATABASE_NAMES" in environ:
db_names = environ["DATABASE_NAMES"].split()
Expand All @@ -63,6 +64,7 @@
"TEST_MODE": False,
"ERRORS": errors,
"WARNINGS": warnings,
"NOTES": notes,
"USER": "anonymous",

"BYC_DATASET_IDS": [],
Expand Down
5 changes: 0 additions & 5 deletions bycon/config/argument_definitions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -536,11 +536,6 @@ $defs:
- --collationTypes
description: selected collation types, e.g. "EFO"

selected_beacons:
type: array
items:
type: string

# ------------------------------------------------------------------------------
# genomic parameters
# ------------------------------------------------------------------------------
Expand Down
82 changes: 82 additions & 0 deletions bycon/lib/beacon_response_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime
from deepmerge import always_merger
from os import environ
from random import sample as random_samples

from config import *

Expand All @@ -16,6 +17,87 @@

################################################################################

class MultiQueryResponses():
def __init__(self, dataset_id):
multiqueries={"useStandardArgs":{}}
if "testqueries" in BYC_PARS.get("mode", "").lower():
multiqueries = BYC.get("test_queries")
self.entity_ids = set()
self.target_path_id = "biosamples"
self.multiqueries = multiqueries
self.ds_id = dataset_id

# -------------------------------------------------------------------------#
# ----------------------------- public ------------------------------------#
# -------------------------------------------------------------------------#

def get_analysis_ids(self):
self.target_path_id = "analyses"
self.__run_multi_queries()
return self.entity_ids


# -------------------------------------------------------------------------#

def get_biosample_ids(self):
self.target_path_id = "biosamples"
self.__run_multi_queries()
return self.entity_ids


# -------------------------------------------------------------------------#

def get_individual_ids(self):
self.target_path_id = "individuals"
self.__run_multi_queries()
return self.entity_ids


# -------------------------------------------------------------------------#
# ----------------------------- private -----------------------------------#
# -------------------------------------------------------------------------#

def __run_multi_queries(self):
ho_id = f'{self.target_path_id}.id'
for qek, qev in self.multiqueries.items():
for p, v in qev.items():
if p == "filters":
f_l = []
for f in v:
f_l.append({"id": f})
if len(f_l) > 0:
BYC.update({"BYC_FILTERS":f_l})
else:
BYC_PARS.update({p: v})

prdbug(f'... getting data for {qek}')
BRS = ByconResultSets()
ds_results = BRS.datasetsResults()

# clean out those globals for next run
# filters are tricky since they have a default `[]` value
# and have been pre-parsed into BYC_FILTERS at the stage of
# `ByconResultSets()` (_i.e._ embedded `ByconQuery()`)
for p, v in qev.items():
if p == "filters":
BYC_PARS.update({"filters": []})
else:
BYC_PARS.pop(p)
BYC.update({"BYC_FILTERS": []})

if not (ds := ds_results.get(self.ds_id)):
r_c = BRS.get_record_queries()
BYC["ERRORS"].append(f'ERROR - no {qek} data for {self.ds_id}')
continue
f_i_ids = ds[ho_id].get("target_values", [])
self.entity_ids = set(self.entity_ids)
self.entity_ids.update(random_samples(f_i_ids, min(BYC_PARS.get("limit", 200), len(f_i_ids))))
BYC["NOTES"].append(f'{qek} with {ds[ho_id].get("target_count", 0)} {self.target_path_id} hits')
self.entity_ids = list(self.entity_ids)



################################################################################

class BeaconResponseMeta:
def __init__(self, data_response=None):
Expand Down
1 change: 0 additions & 1 deletion bycon/lib/query_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from config import *
from bycon_helpers import mongo_and_or_query_from_list, prdbug, prjsonnice, test_truthy


################################################################################

class ByconDatasetResults():
Expand Down
3 changes: 3 additions & 0 deletions bycon/lib/query_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from cytoband_parsing import Cytobands
from genome_utils import ChroNames, GeneInfo, VariantTypes


################################################################################
################################################################################
################################################################################

class ByconQuery():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
},
"aCollectionOf": {
"description": "If the entry type is a collection of other entry types, (e.g. a Dataset is a collection of Records), then this attribute must list the entry types that could be included. One collection type could be defined as included more than one entry type (e.g. a Dataset could include Individuals or Genomic Variants), in such cases the entries are alternative, meaning that a given instance of this entry type could be of only one of the types (e.g. a given Dataset contains Individuals, while another Dataset could contain Genomic Variants, but not both at once).",
"includedConcepts": {
"type": "array",
"type": "array",
"items": {
"$ref": "../common/basicElement.json"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ properties:
this entry type could be of only one of the types (e.g. a given Dataset contains
Individuals, while another Dataset could contain Genomic Variants, but not both
at once).
includedConcepts:
type: array
type: array
items:
$ref: ../common/basicElement.yaml
filteringTerms:
description: Reference to the file with the list of filtering terms that could
Expand Down
13 changes: 13 additions & 0 deletions docs/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@ While changes are documented for individual point versions we actually do not
push releases out for all of them; they serve more as internal development
milestones.

### 2024-12-19 (v2.1.1)

* Housekeepers updates:
- rewrote `collationsFrequencymapsCreator.py` to avoid slow processing coming
from use of standard query and bundle generation per collation
- introduced `queriesTester.py` which can use generic parameters or a set of
queries from `local/test_queries.yaml`
* this is based on the new `MultiQueryResponses` class which can loop through
multiple queries by injecting query parameters in BYC_PARS and then
call `ByconResultSets()` and create a set of matched ids from all calls
- modified `recordsSampler.py` to allow creation of example/excerpt datanases
from query parameters or the `local/test_queries.yaml` definitions

### 2024-12-09 (v2.1.0)

* integration of the React based `BeaconPlus` front end into the project
Expand Down
5 changes: 0 additions & 5 deletions docs/generated/argument_definitions.md
Original file line number Diff line number Diff line change
Expand Up @@ -461,11 +461,6 @@ delivery keys to force only some parameters in custom exporters
**description:**
selected collation types, e.g. "EFO"

#### `selected_beacons`
**type:** array
**items:**
- `type`: `string`

#### `genome_binning`
**type:** string
**default:** `1Mb`
Expand Down
2 changes: 1 addition & 1 deletion docs/housekeepers/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ given entity.

### Use

* `bin/collationsFrequencymapsCreator.py -d progenetix`
* `bin/collationsFrequencymapsCreator.py -d progenetix --limit 0`
* `bin/collationsFrequencymapsCreator.py -d examplez --collationTypes "icdot"`

## Deleting Records
Expand Down
Loading

0 comments on commit 94ff9ff

Please sign in to comment.