Skip to content

Commit

Permalink
Speed up spot reading
Browse files Browse the repository at this point in the history
  • Loading branch information
jpjarnoux committed Nov 19, 2024
1 parent 1fd1092 commit 0074078
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions ppanggolin/formats/readBinaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,23 +773,27 @@ def read_rgp(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = False):

def read_spots(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = False):
"""
Read hotspot in pangenome hdf5 file to add in pangenome object
Read hotspots in the pangenome HDF5 file and add them to the pangenome object.
:param pangenome: Pangenome object without spot
:param h5f: Pangenome HDF5 file with spot computed
:param disable_bar: Disable the progress bar
Args:
pangenome (Pangenome): Pangenome object.
h5f (tables.File): Pangenome HDF5 file with spots computed.
disable_bar (bool): Whether to disable the progress bar.
"""
table = h5f.root.spots
spots = {}
curr_spot_id = None
for row in tqdm(read_chunks(table, chunk=20000), total=table.nrows, unit="spot", disable=disable_bar):
curr_spot = spots.get(int(row["spot"]))
if curr_spot is None:
curr_spot = Spot(int(row["spot"]))
spots[row["spot"]] = curr_spot
if curr_spot_id != int(row["spot"]):
curr_spot_id = int(row["spot"])
curr_spot = spots.get(curr_spot_id)
if curr_spot is None:
curr_spot = Spot(int(row["spot"]))
spots[row["spot"]] = curr_spot
region = pangenome.get_region(row["RGP"].decode())
curr_spot.add(region)
curr_spot.spot_2_families()
for spot in spots.values():
spot.spot_2_families()
pangenome.add_spot(spot)
pangenome.status["spots"] = "Loaded"

Expand Down Expand Up @@ -1174,9 +1178,8 @@ def read_pangenome(pangenome, annotation: bool = False, gene_families: bool = Fa
logging.getLogger("PPanGGOLiN").info("Reading the spots...")
read_spots(pangenome, h5f, disable_bar=disable_bar)
else:
raise Exception(f"The pangenome in file '{filename}' does not have spots information, "
f"or has been improperly filled")

raise AttributeError(f"The pangenome in file '{pangenome.file}' does not have spots information, "
f"or has been improperly filled")
if modules:
if h5f.root.status._v_attrs.modules:
logging.getLogger("PPanGGOLiN").info("Reading the modules...")
Expand Down

0 comments on commit 0074078

Please sign in to comment.