Skip to content

Commit

Permalink
Refactor the module class
Browse files Browse the repository at this point in the history
  • Loading branch information
jpjarnoux committed Aug 9, 2023
1 parent 0182e30 commit 31fd993
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 21 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.153
1.2.154
4 changes: 2 additions & 2 deletions ppanggolin/formats/readBinaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,9 @@ def read_modules(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = Fal
table = h5f.root.modules
modules = {} # id2mod
for row in tqdm(read_chunks(table, chunk=20000), total=table.nrows, unit="module", disable=disable_bar):
curr_module = modules.get(row['module'])
curr_module = modules.get(int(row['module']))
if curr_module is None:
curr_module = Module(row['module'])
curr_module = Module(int(row['module']))
modules[row["module"]] = curr_module
curr_module.add_family(pangenome.get_gene_family(row['geneFam'].decode()))
for module in modules.values():
Expand Down
4 changes: 2 additions & 2 deletions ppanggolin/formats/writeBinaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ def write_modules(pangenome: Pangenome, h5f: tables.File, force: bool = False, d
h5f.remove_node("/", "modules")

mod_table = h5f.create_table('/', 'modules', mod_desc(get_mod_desc(pangenome)),
expectedrows=sum([len(mod.families) for mod in pangenome.modules]))
expectedrows=sum([len(mod) for mod in pangenome.modules]))
mod_row = mod_table.row

for mod in tqdm(pangenome.modules, total=pangenome.number_of_modules(), unit="modules", disable=disable_bar):
Expand Down Expand Up @@ -760,7 +760,7 @@ def getmin(arg: iter) -> float:
info_group._v_attrs.numberOfSpots = pangenome.number_of_spots()
if pangenome.status["modules"] in ["Computed", "Loaded"]:
info_group._v_attrs.numberOfModules = pangenome.number_of_modules()
info_group._v_attrs.numberOfFamiliesInModules = sum([len(mod.families) for mod in pangenome.modules])
info_group._v_attrs.numberOfFamiliesInModules = sum([len(mod) for mod in pangenome.modules])

info_group._v_attrs.parameters = pangenome.parameters # saving the pangenome parameters

Expand Down
6 changes: 3 additions & 3 deletions ppanggolin/formats/writeFlat.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,8 +758,8 @@ def write_module_summary(output: Path, compress: bool = False):
for gene in family.genes:
org_dict[gene.organism].add(gene)
fout.write(
f"module_{mod.ID}\t{len(mod.families)}\t{len(org_dict)}\t{partition_counter.most_common(1)[0][0]}\t"
f"{round((sum([len(genes) for genes in org_dict.values()]) / len(org_dict)) / len(mod.families), 3)}\n")
f"module_{mod.ID}\t{len(mod)}\t{len(org_dict)}\t{partition_counter.most_common(1)[0][0]}\t"
f"{round((sum([len(genes) for genes in org_dict.values()]) / len(org_dict)) / len(mod), 3)}\n")
fout.close()

logging.getLogger("PPanGGOLiN").info(f"Done writing module summary: '{output.as_posix() + '/modules_summary.tsv'}'")
Expand Down Expand Up @@ -797,7 +797,7 @@ def write_org_modules(output: Path, compress: bool = False):
for fam in mod.families:
mod_orgs |= set(fam.organisms)
for org in mod_orgs:
completion = round((org.number_of_families() + len(mod.families)) / len(mod.families), 2)
completion = round((org.number_of_families() + len(mod)) / len(mod), 2)
fout.write(f"module_{mod.ID}\t{org.name}\t{completion}\n")
fout.close()
logging.getLogger("PPanGGOLiN").info(
Expand Down
2 changes: 1 addition & 1 deletion ppanggolin/mod/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def predict_modules(pangenome: Pangenome, dup_margin: float = 0.05, size: int =

fams = set()
for mod in modules:
fams |= mod.families
fams |= set(mod.families)
pangenome.add_module(mod)

logging.getLogger("PPanGGOLiN").info(f"There are {len(fams)} families among {len(modules)} modules")
Expand Down
40 changes: 28 additions & 12 deletions ppanggolin/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,31 +417,47 @@ def __init__(self, module_id: int, families: set = None):
'associated_families' are gene families that you believe are associated to the module in some way,
but do not define it.
"""
if not isinstance(module_id, int):
raise TypeError(f"Module identifier must be an integer. Given type is {type(module_id)}")
super().__init__()
self.ID = module_id
self._families = set()
if families is not None:
if not all(isinstance(fam, GeneFamily) for fam in families):
raise Exception("You provided elements that were not GeneFamily object. "
"Modules are only made of GeneFamily")
self._families |= set(families)
self._families_getter = {}
[self.add_family(family) for family in families] if families is not None else None
self.bitarray = None

def __setitem__(self, name, family):
if not isinstance(family, GeneFamily):
raise TypeError(f"A gene family is expected to be added to module. Given type was {type(family)}")
if name in self._families_getter and self[name] != family:
raise KeyError("A different gene family with the same name already exist in the module")
self._families_getter[name] = family
family.add_module(self)

def __getitem__(self, name) -> GeneFamily:
try:
return self._families_getter[name]
except KeyError:
raise KeyError(f"There isn't gene family with the name {name} in the module")

def __delitem__(self, name):
del self._families_getter[name]

def __len__(self):
return len(self._families_getter)

@property
def families(self) -> Set[GeneFamily]:
# TODO made as generator
return self._families
def families(self) -> Generator[GeneFamily, None, None]:
for family in self._families_getter.values():
yield family

def add_family(self, family: GeneFamily):
"""
Add a family to the module
:param family: the family that will ba added to the module
"""
if not isinstance(family, GeneFamily):
raise Exception("You did not provide a GenFamily object. Modules are only made of GeneFamily")
family.add_module(self)
self._families.add(family)
self._families_getter[family.name] = family

def mk_bitarray(self, index: Dict[Organism, int], partition: str = 'all'):
"""Produces a bitarray representing the presence / absence of families in the organism using the provided index
Expand Down

0 comments on commit 31fd993

Please sign in to comment.