From cce9aa99ed1d0b38c1e0893f351eaf3c07b2aeca Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Fri, 12 Jul 2024 03:04:52 +0000 Subject: [PATCH 1/7] initial commit --- benchmarks/environment.yml | 15 +++ benchmarks/reader_bms.py | 119 ++++++++++-------- zarrtraj/ZARR.py | 51 ++++---- zarrtraj/data/README.md | 17 --- .../write_aligned_compressed_disk_h5md.py | 31 +++++ .../write_aligned_compressed_disk_zarrmd.py | 21 ++++ .../write_aligned_compressed_s3_zarrmd.py | 25 ++++ .../write_aligned_uncompressed_disk_h5md.py | 31 +++++ .../write_aligned_uncompressed_disk_zarrmd.py | 21 ++++ .../write_aligned_uncompressed_s3_zarrmd.py | 25 ++++ zarrtraj/data/write_long_s3_traj.py | 49 -------- zarrtraj/data/write_short_disk_traj.py | 35 ------ 12 files changed, 267 insertions(+), 173 deletions(-) create mode 100755 benchmarks/environment.yml delete mode 100755 zarrtraj/data/README.md create mode 100644 zarrtraj/data/write_aligned_compressed_disk_h5md.py create mode 100644 zarrtraj/data/write_aligned_compressed_disk_zarrmd.py create mode 100644 zarrtraj/data/write_aligned_compressed_s3_zarrmd.py create mode 100644 zarrtraj/data/write_aligned_uncompressed_disk_h5md.py create mode 100644 zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py create mode 100644 zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py delete mode 100644 zarrtraj/data/write_long_s3_traj.py delete mode 100644 zarrtraj/data/write_short_disk_traj.py diff --git a/benchmarks/environment.yml b/benchmarks/environment.yml new file mode 100755 index 0000000..9fd3cf5 --- /dev/null +++ b/benchmarks/environment.yml @@ -0,0 +1,15 @@ +name: asv-zarrtraj +channels: + - defaults + - conda-forge +dependencies: + - MDAnalysis>=2.7.0 + - zarr>=2.11.0 + - dask + - distributed + + ### AWS dependencies ### + - s3fs=2024.3.0 + + + diff --git a/benchmarks/reader_bms.py b/benchmarks/reader_bms.py index 84f1cb1..6f199bc 100644 --- a/benchmarks/reader_bms.py +++ b/benchmarks/reader_bms.py @@ -3,73 +3,94 @@ # from asv_runner.benchmarks.mark import skip_for_params from zarr.storage import DirectoryStore, LRUStoreCache import MDAnalysis.analysis.rms as rms +from MDAnalysis.coordinates.H5MD import H5MDReader import os +""" +Note: while h5md files are chunked at (1, n_atoms, 3), zarr files +are chunked with as many frames as can fit in 12MB + +""" + BENCHMARK_DATA_DIR = os.getenv("BENCHMARK_DATA_DIR") +s3_files = [ + "s3://zarrtraj-test-data/yiip_aligned_compressed.zarrmd", + "s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd", + "s3://zarrtraj-test-data/yiip_aligned_compressed.h5md", + "s3://zarrtraj-test-data/yiip_aligned_uncompressed.h5md", +] +local_files = [ + f"{BENCHMARK_DATA_DIR}/yiip_aligned_compressed.zarrmd", + f"{BENCHMARK_DATA_DIR}/yiip_aligned_uncompressed.zarrmd", + f"{BENCHMARK_DATA_DIR}/yiip_aligned_compressed.h5md", + f"{BENCHMARK_DATA_DIR}/yiip_aligned_uncompressed.h5md", +] + +h5md_files = [ + f"{BENCHMARK_DATA_DIR}/yiip_aligned_compressed.h5md", + f"{BENCHMARK_DATA_DIR}/yiip_aligned_uncompressed.h5md", +] os.environ["S3_REGION_NAME"] = "us-west-1" os.environ["AWS_PROFILE"] = "sample_profile" -class TrajReaderDiskBenchmarks(object): - """Benchmarks for zarrtraj file striding.""" +class ZARRH5MDDiskStrideTime(object): + """Benchmarks for zarrmd and h5md file striding using local files.""" - params = ( - [0, 1, 9], - ["all", 3], - [1, 10, 50], - ) - param_names = [ - "compressor_level", - "filter_precision", - "chunk_frames", - ] + params = local_files + param_names = ["filename"] + + def setup(self, filename): + self.reader_object = ZARRH5MDReader(filename) - def setup( - self, - compressor_level, - filter_precision, - chunk_frames, - ): - self.traj_file = f"{BENCHMARK_DATA_DIR}/short_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj" - self.reader_object = ZarrTrajReader(self.traj_file) - - def time_strides( - self, - compressor_level, - filter_precision, - chunk_frames, - ): + def time_strides(self, filename): """Benchmark striding over full trajectory""" for ts in self.reader_object: pass -class TrajReaderAWSBenchmarks(object): - timeout = 86400 - params = ( - [0, 1, 9], - ["all", 3], - [10, 100], - ) +class ZARRH5MDS3StrideTime(object): + """Benchmarks for zarrmd and h5md file striding using local files.""" - param_names = [ - "compressor_level", - "filter_precision", - "chunk_frames", - ] + params = s3_files + param_names = ["filename"] - def setup(self, compressor_level, filter_precision, chunk_frames): - self.traj_file = f"s3://zarrtraj-test-data/long_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj" - self.reader_object = ZarrTrajReader( - self.traj_file, - ) - # self.universe = mda.Universe( - # f"{BENCHMARK_DATA_DIR}/YiiP_system.pdb", self.traj_file - # ) - - def time_strides(self, compressor_level, filter_precision, chunk_frames): + def setup(self, filename): + self.reader_object = ZARRH5MDReader(filename) + + def time_strides(self, filename): + """Benchmark striding over full trajectory""" + for ts in self.reader_object: + pass + + +class H5MDReadersDiskStrideTime(object): + """Benchmarks for zarrmd and h5md file striding using local files.""" + + params = (h5md_files, [ZARRH5MDReader, H5MDReader]) + param_names = ["filename", "reader"] + + def setup(self, filename, reader): + self.reader_object = reader(filename) + + def time_strides(self, filename, reader): + """Benchmark striding over full trajectory""" + for ts in self.reader_object: + pass + + +class H5MDFmtDiskRSMFTime(object): + """Benchmarks for zarrtraj file striding.""" + + params = (local_files, "" + param_names = ["filename"] + + def setup(self, filename): + self.reader_object = ZARRH5MDReader(filename) + + def time_strides(self, filename): """Benchmark striding over full trajectory""" for ts in self.reader_object: pass diff --git a/zarrtraj/ZARR.py b/zarrtraj/ZARR.py index 96aa55c..9a11675 100644 --- a/zarrtraj/ZARR.py +++ b/zarrtraj/ZARR.py @@ -821,6 +821,7 @@ def __init__( # to get buffer indices, do i.e. _val_idx % _val_frames_per_chunk self._val_idx = 0 self._t_idx = 0 + self._n_frames = n_frames val_filter = None time_filter = None @@ -886,7 +887,7 @@ def write( ): # flush buffer and extend zarr dset if reached end of chunk # this will never be called if n_frames is less than the chunk size - if self._val_idx % self._val_frames_per_chunk == 0: + if self._val_idx != 0 and self._val_idx % self._val_frames_per_chunk == 0: self._val[self._val_idx - self._val_frames_per_chunk :] = ( self._val_buf[:] ) @@ -913,18 +914,18 @@ def flush(self): and shink the zarr datasets to the correct size. """ self._val[ - self._val_idx - self._val_frames_per_chunk : self._val_idx + self._val_idx - (self._val_idx % (self._val_frames_per_chunk + 1)) : self._val_idx ] = self._val_buf[ - : (self._val_idx - 1 % self._val_frames_per_chunk) + 1 + : (self._val_idx % (self._val_frames_per_chunk + 1)) ] self._val.resize(self._val_idx, *self._val_chunks[1:]) - self._t[self._t_idx - self._t_frames_per_chunk : self._t_idx] = ( - self._t_buf[: (self._t_idx - 1 % self._t_frames_per_chunk) + 1] + self._t[self._t_idx - (self._t_idx % (self._t_frames_per_chunk + 1)) : self._t_idx] = ( + self._t_buf[: (self._t_idx % (self._t_frames_per_chunk + 1))] ) self._t.resize(self._t_idx) - self._s[self._t_idx - self._t_frames_per_chunk : self._t_idx] = ( - self._s_buf[: (self._t_idx - 1 % self._t_frames_per_chunk) + 1] + self._s[self._t_idx - (self._t_idx % (self._t_frames_per_chunk + 1)) : self._t_idx] = ( + self._s_buf[: (self._t_idx % (self._t_frames_per_chunk + 1))] ) self._s.resize(self._t_idx) @@ -1268,27 +1269,16 @@ def _allocate_buffers(self, ts): """Allocates buffers for timestep data that wasn't already allocated""" t_unit = self._unit_translation_dict["time"][self.units["time"]] - length_unit = ( - self._unit_translation_dict["length"][self.units["length"]] - if self.units["length"] is not None - else None - ) - vel_unit = ( - self._unit_translation_dict["velocity"][self.units["velocity"]] - if self.units["velocity"] is not None - else None - ) - force_unit = ( - self._unit_translation_dict["force"][self.units["force"]] - if self.units["force"] is not None - else None - ) - if ( ts.dimensions is not None and np.all(ts.dimensions > 0) and "box/edges" not in self._elements ): + length_unit = ( + self._unit_translation_dict["length"][self.units["length"]] + if self.units["length"] is not None + else None + ) self._traj["box"].attrs["boundary"] = 3 * ["periodic"] self._traj["box"].require_group("edges") self._elements["box/edges"] = H5MDElementBuffer( @@ -1307,6 +1297,11 @@ def _allocate_buffers(self, ts): and ts.has_positions and "position" not in self._elements ): + length_unit = ( + self._unit_translation_dict["length"][self.units["length"]] + if self.units["length"] is not None + else None + ) self._traj.require_group("position") self._elements["position"] = H5MDElementBuffer( ts.positions.shape, @@ -1324,6 +1319,11 @@ def _allocate_buffers(self, ts): and ts.has_velocities and "velocity" not in self._elements ): + vel_unit = ( + self._unit_translation_dict["velocity"][self.units["velocity"]] + if self.units["velocity"] is not None + else None + ) self._traj.require_group("velocity") self._elements["velocity"] = H5MDElementBuffer( ts.velocities.shape, @@ -1341,6 +1341,11 @@ def _allocate_buffers(self, ts): and ts.has_forces and "force" not in self._elements ): + force_unit = ( + self._unit_translation_dict["force"][self.units["force"]] + if self.units["force"] is not None + else None + ) self._traj.require_group("force") self._elements["force"] = H5MDElementBuffer( ts.forces.shape, diff --git a/zarrtraj/data/README.md b/zarrtraj/data/README.md deleted file mode 100755 index 550587e..0000000 --- a/zarrtraj/data/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Sample Package Data - -This directory contains sample additional data you may want to include with your package. -This is a place where non-code related additional information (such as data files, molecular structures, etc.) can -go that you want to ship alongside your code. - -Please note that it is not recommended to place large files in your git directory. If your project requires files larger -than a few megabytes in size it is recommended to host these files elsewhere. This is especially true for binary files -as the `git` structure is unable to correctly take updates to these files and will store a complete copy of every version -in your `git` history which can quickly add up. As a note most `git` hosting services like GitHub have a 1 GB per repository -cap. - -## Including package data - -Modify your package's `setup.py` file and the `setup()` command. Include the -[`package_data`](http://setuptools.readthedocs.io/en/latest/setuptools.html#basic-use) keyword and point it at the -correct files. \ No newline at end of file diff --git a/zarrtraj/data/write_aligned_compressed_disk_h5md.py b/zarrtraj/data/write_aligned_compressed_disk_h5md.py new file mode 100644 index 0000000..cc07455 --- /dev/null +++ b/zarrtraj/data/write_aligned_compressed_disk_h5md.py @@ -0,0 +1,31 @@ +import zarrtraj +import zarr +from numcodecs import Blosc, Quantize +import MDAnalysis as mda +from MDAnalysis.analysis import rms, align +import numcodecs + +# This requires MDAnalysis >= 2.8.0 + +u = mda.Universe( + "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc", +) + +average = align.AverageStructure( + u, u, select="protein and name CA", ref_frame=0 +).run() +ref = average.results.universe + +aligner = align.AlignTraj( + u, + ref, + select="protein and name CA", + filename="zarrtraj/data/yiip_aligned_compressed.h5md", + writer_kwargs=dict( + n_frames=u.trajectory.n_frames, + compression="gzip", + compression_opts=9, + chunks=(9, u.trajectory.n_atoms, 3), + ), +).run() diff --git a/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py b/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py new file mode 100644 index 0000000..778bef5 --- /dev/null +++ b/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py @@ -0,0 +1,21 @@ +import zarrtraj +import zarr +from numcodecs import Blosc, Quantize +import MDAnalysis as mda +from MDAnalysis.analysis import rms, align +import numcodecs + +# This requires MDAnalysis >= 2.8.0 + +u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") + +average = align.AverageStructure(u, u, select='protein and name CA', + ref_frame=0).run() +ref = average.results.universe + +aligner = align.AlignTraj(u, ref, + select='protein and name CA', + filename='yiip_aligned_compressed.zarrmd', + writer_kwargs= dict( + n_frames=u.trajectory.n_frames, + precision=3, compressor=numcodecs.Blosc(cname="zstd", clevel=9))).run() diff --git a/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py b/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py new file mode 100644 index 0000000..9e4a0be --- /dev/null +++ b/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py @@ -0,0 +1,25 @@ +import zarrtraj +import zarr +from numcodecs import Blosc, Quantize +import MDAnalysis as mda +from MDAnalysis.analysis import rms, align +import numcodecs +import os + +os.environ["AWS_PROFILE"] = "sample_profile" +os.environ["AWS_REGION"] = "us-west-1" + +# This requires MDAnalysis >= 2.8.0 + +u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") + +average = align.AverageStructure(u, u, select='protein and name CA', + ref_frame=0).run() +ref = average.results.universe + +aligner = align.AlignTraj(u, ref, + select='protein and name CA', + filename='s3://zarrtraj-test-data/yiip_aligned_compressed.zarrmd', + writer_kwargs= dict( + n_frames=u.trajectory.n_frames, + precision=3, compressor=numcodecs.Blosc(cname="zstd", clevel=9))).run() diff --git a/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py b/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py new file mode 100644 index 0000000..14be95e --- /dev/null +++ b/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py @@ -0,0 +1,31 @@ +import zarrtraj +import zarr +from numcodecs import Blosc, Quantize +import MDAnalysis as mda +from MDAnalysis.analysis import rms, align +import numcodecs + +# This requires MDAnalysis >= 2.8.0 + +u = mda.Universe( + "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc", +) + +average = align.AverageStructure( + u, u, select="protein and name CA", ref_frame=0 +).run() +ref = average.results.universe + +aligner = align.AlignTraj( + u, + ref, + select="protein and name CA", + filename="zarrtraj/data/yiip_aligned_uncompressed.h5md", + writer_kwargs=dict( + n_frames=u.trajectory.n_frames, + compression="gzip", + compression_opts=0, + chunks=(9, u.trajectory.n_atoms, 3), + ), +).run() diff --git a/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py b/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py new file mode 100644 index 0000000..fb90e2d --- /dev/null +++ b/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py @@ -0,0 +1,21 @@ +import zarrtraj +import zarr +from numcodecs import Blosc, Quantize +import MDAnalysis as mda +from MDAnalysis.analysis import rms, align +import numcodecs + +# This requires MDAnalysis >= 2.8.0 + +u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") + +average = align.AverageStructure(u, u, select='protein and name CA', + ref_frame=0).run() +ref = average.results.universe + +aligner = align.AlignTraj(u, ref, + select='protein and name CA', + filename='yiip_aligned_compressed.zarrmd', + writer_kwargs= dict( + n_frames=u.trajectory.n_frames, + compressor=numcodecs.Blosc(cname="zstd", clevel=0))).run() diff --git a/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py b/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py new file mode 100644 index 0000000..2057395 --- /dev/null +++ b/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py @@ -0,0 +1,25 @@ +import zarrtraj +import zarr +from numcodecs import Blosc, Quantize +import MDAnalysis as mda +from MDAnalysis.analysis import rms, align +import numcodecs +import os + +os.environ["AWS_PROFILE"] = "sample_profile" +os.environ["AWS_REGION"] = "us-west-1" + +# This requires MDAnalysis >= 2.8.0 + +u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") + +average = align.AverageStructure(u, u, select='protein and name CA', + ref_frame=0).run() +ref = average.results.universe + +aligner = align.AlignTraj(u, ref, + select='protein and name CA', + filename='s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd', + writer_kwargs= dict( + n_frames=u.trajectory.n_frames, + compressor=numcodecs.Blosc(cname="zstd", clevel=0))).run() diff --git a/zarrtraj/data/write_long_s3_traj.py b/zarrtraj/data/write_long_s3_traj.py deleted file mode 100644 index f3e51ec..0000000 --- a/zarrtraj/data/write_long_s3_traj.py +++ /dev/null @@ -1,49 +0,0 @@ -import zarrtraj -import zarr -from numcodecs import Blosc, Quantize -import MDAnalysis as mda -import MDAnalysisData - -yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_long( - data_home="notebook_data_tmp" -) -# 901 frames of 111815 atoms -# each frame is 1341780 bytes (1.34178 mB) -# -uLong = mda.Universe(yiip.topology, yiip.trajectory) - -storage_options = { - "anon": False, - "s3": { - "profile": "sample_profile", - "client_kwargs": {"region_name": "us-west-1"}, - }, -} - -compressor_lvl = [0, 1, 9] -filters_list = ["all", 3] -# approx 1.3 mb, 13 mB, 130mB -chunks_frames = [1, 10, 100] - -for c in compressor_lvl: - for f in filters_list: - for ch in chunks_frames: - if f == 3: - filters = [Quantize(digits=3, dtype="f4")] - else: - filters = None - z = f"s3://zarrtraj-test-data/long_{c}_{f}_{ch}.zarrtraj" - - with mda.Writer( - z, - n_atoms=uLong.trajectory.n_atoms, - n_frames=uLong.trajectory.n_frames, - force_buffered=True, - compressor=Blosc(cname="zstd", clevel=c), - filters=filters, - chunks=(ch, uLong.trajectory.n_atoms, 3), - storage_options=storage_options, - max_memory=2**28, - ) as w: - for ts in uLong.trajectory: - w.write(uLong) diff --git a/zarrtraj/data/write_short_disk_traj.py b/zarrtraj/data/write_short_disk_traj.py deleted file mode 100644 index 499e39f..0000000 --- a/zarrtraj/data/write_short_disk_traj.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Used to generate benchmarking data for asv""" - -import zarrtraj -import zarr -from numcodecs import Blosc, Quantize -import MDAnalysis as mda -from MDAnalysis.tests.datafiles import PSF, DCD - -uShort = mda.Universe(PSF, DCD) - -compressor_lvl = [0, 1, 9] -filters_list = ["all", 3] -chunks_frames = [1, 10, 50] -# total traj is 1.3 MB -# 98 frames, 3341 atoms - -for c in compressor_lvl: - for f in filters_list: - for ch in chunks_frames: - if f == 3: - filters = [Quantize(digits=3, dtype="f4")] - else: - filters = None - z = zarr.open_group(f"short_{c}_{f}_{ch}.zarrtraj") - with mda.Writer( - z, - n_atoms=uShort.trajectory.n_atoms, - n_frames=uShort.trajectory.n_frames, - force_buffered=True, - compressor=Blosc(cname="zstd", clevel=c), - filters=filters, - chunks=(ch, uShort.trajectory.n_atoms, 3), - ) as w: - for ts in uShort.trajectory: - w.write(uShort) From b507badb2c1dd29cc7714ec850f3e6213738f13c Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Sat, 13 Jul 2024 04:46:44 +0000 Subject: [PATCH 2/7] benchmark run prep --- .gitignore | 5 +- benchmarks/reader_bms.py | 124 +++++++++++------- gh_actions_debug/run_moto.py | 36 ----- .../write_aligned_compressed_disk_h5md.py | 33 ++--- .../write_aligned_compressed_disk_zarrmd.py | 29 ++-- .../write_aligned_compressed_s3_zarrmd.py | 26 ++-- .../write_aligned_uncompressed_disk_h5md.py | 33 ++--- .../write_aligned_uncompressed_disk_zarrmd.py | 29 ++-- .../write_aligned_uncompressed_s3_zarrmd.py | 26 ++-- 9 files changed, 156 insertions(+), 185 deletions(-) delete mode 100644 gh_actions_debug/run_moto.py diff --git a/.gitignore b/.gitignore index 49b07b6..e349835 100755 --- a/.gitignore +++ b/.gitignore @@ -113,4 +113,7 @@ poetry.lock # benchmark stuff benchmarks/.asv -benchmarks/testfiles/ \ No newline at end of file +benchmarks/testfiles/ + +# debugging +tmp \ No newline at end of file diff --git a/benchmarks/reader_bms.py b/benchmarks/reader_bms.py index 6f199bc..a82d746 100644 --- a/benchmarks/reader_bms.py +++ b/benchmarks/reader_bms.py @@ -1,19 +1,21 @@ from zarrtraj import * +import MDAnalysis as mda -# from asv_runner.benchmarks.mark import skip_for_params from zarr.storage import DirectoryStore, LRUStoreCache import MDAnalysis.analysis.rms as rms from MDAnalysis.coordinates.H5MD import H5MDReader +import zarr +import h5py +import dask as da import os -""" -Note: while h5md files are chunked at (1, n_atoms, 3), zarr files -are chunked with as many frames as can fit in 12MB - -""" BENCHMARK_DATA_DIR = os.getenv("BENCHMARK_DATA_DIR") +os.environ["S3_REGION_NAME"] = "us-west-1" +os.environ["AWS_PROFILE"] = "sample_profile" + + s3_files = [ "s3://zarrtraj-test-data/yiip_aligned_compressed.zarrmd", "s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd", @@ -32,8 +34,14 @@ f"{BENCHMARK_DATA_DIR}/yiip_aligned_uncompressed.h5md", ] -os.environ["S3_REGION_NAME"] = "us-west-1" -os.environ["AWS_PROFILE"] = "sample_profile" + +def dask_rmsf(positions): + mean_positions = positions.mean(axis=0) + subtracted_positions = positions - mean_positions + squared_deviations = subtracted_positions**2 + avg_squared_deviations = squared_deviations.mean(axis=0) + sqrt_avg_squared_deviations = da.sqrt(avg_squared_deviations) + return da.sqrt((sqrt_avg_squared_deviations**2).sum(axis=1)) class ZARRH5MDDiskStrideTime(object): @@ -84,44 +92,68 @@ def time_strides(self, filename, reader): class H5MDFmtDiskRSMFTime(object): """Benchmarks for zarrtraj file striding.""" - params = (local_files, "" - param_names = ["filename"] + params = (local_files, ["dask", "mda"]) + param_names = ["filename", "method"] + + def setup(self, filename, method): + if method == "dask": + if filename.endswith(".h5md"): + self.positions = da.from_array( + h5py.File(filename)["/particles/trajectory/position/value"] + ) + + elif filename.endswith("zarrmd"): + self.positions = da.from_array( + zarr.open_group(filename)[ + "/particles/trajectory/position/value" + ] + ) + + elif method == "mda": + self.universe = mda.Universe( + f"{BENCHMARK_DATA_DIR}/yiip_equilibrium/YiiP_system.pdb", + filename, + ) + + def time_rmsf(self, filename, method): + """Benchmark striding over full trajectory""" + if method == "mda": + rms.RMSF(self.universe.atoms).run() + elif method == "dask": + rmsf = dask_rmsf(self.positions) + rmsf.compute() - def setup(self, filename): - self.reader_object = ZARRH5MDReader(filename) - def time_strides(self, filename): - """Benchmark striding over full trajectory""" - for ts in self.reader_object: - pass +class H5MDFmtAWSRSMFTime(object): + """Benchmarks for zarrtraj file striding.""" - # def time_RMSD(self, compressor_level, filter_precision, chunk_frames): - # """Benchmark RMSF calculation""" - # R = rms.RMSD( - # self.universe, - # self.universe, - # select="backbone", - # ref_frame=0, - # ).run() - - -class RawZarrReadBenchmarks(object): - timeout = 86400 - params = ( - [0, 1, 9], - ["all", 3], - [1, 10, 100], - ) - - param_names = [ - "compressor_level", - "filter_precision", - "chunk_frames", - ] - - def setup(self, compressor_level, filter_precision, chunk_frames): - self.traj_file = f"s3://zarrtraj-test-data/long_{compressor_level}_{filter_precision}_{chunk_frames}.zarrtraj" - store = zarr.storage.FSStore(url=self.traj_file, mode="r") - # For consistency with zarrtraj defaults, use 256MB LRUCache store - cache = zarr.storage.LRUStoreCache(store, max_size=2**28) - self.zarr_group = zarr.open_group(store=cache, mode="r") + params = (s3_files, ["dask", "mda"]) + param_names = ["filename", "method"] + + def setup(self, filename, method): + if method == "dask": + if filename.endswith(".h5md"): + self.positions = da.from_array( + h5py.File(filename)["/particles/trajectory/position/value"] + ) + + elif filename.endswith(".zarrmd"): + self.positions = da.from_array( + zarr.open_group(filename)[ + "/particles/trajectory/position/value" + ] + ) + + elif method == "mda": + self.universe = mda.Universe( + f"{BENCHMARK_DATA_DIR}/yiip_equilibrium/YiiP_system.pdb", + filename, + ) + + def time_rmsf(self, filename, method): + """Benchmark striding over full trajectory""" + if method == "mda": + rms.RMSF(self.universe.atoms) + elif method == "dask": + rmsf = dask_rmsf(self.positions) + rmsf.compute() diff --git a/gh_actions_debug/run_moto.py b/gh_actions_debug/run_moto.py deleted file mode 100644 index f735bdb..0000000 --- a/gh_actions_debug/run_moto.py +++ /dev/null @@ -1,36 +0,0 @@ -from moto.server import ThreadedMotoServer -import os -from zarrtraj.tests.datafiles import COORDINATES_SYNTHETIC_ZARRMD -import zarr -import s3fs -import boto3 -import time - -os.environ["AWS_ACCESS_KEY_ID"] = "testing" -os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" -os.environ["AWS_SECURITY_TOKEN"] = "testing" -os.environ["AWS_SESSION_TOKEN"] = "testing" - -# For convenience, set dict options as env vars -# boto options -os.environ["AWS_DEFAULT_REGION"] = "us-west-1" -os.environ["AWS_ENDPOINT_URL"] = "http://localhost:5000" -# s3fs options -os.environ["S3_REGION_NAME"] = "us-west-1" -os.environ["S3_ENDPOINT_URL"] = "http://localhost:5000" - -server = ThreadedMotoServer() -server.start() -s3_resource = boto3.resource("s3") -s3_resource.create_bucket( - Bucket="zarrtraj-test-data", - CreateBucketConfiguration={"LocationConstraint": "us-west-1"}, -) -# upload file -source = zarr.open_group(COORDINATES_SYNTHETIC_ZARRMD, mode="r") -obj_name = os.path.basename(COORDINATES_SYNTHETIC_ZARRMD) -s3_fs = s3fs.S3FileSystem() -cloud_store = s3fs.S3Map(root=f"s3://zarrtraj-test-data/{obj_name}", s3=s3_fs) - -zarr.convenience.copy_store(source.store, cloud_store, if_exists="raise") -time.sleep(5000) diff --git a/zarrtraj/data/write_aligned_compressed_disk_h5md.py b/zarrtraj/data/write_aligned_compressed_disk_h5md.py index cc07455..3d7d245 100644 --- a/zarrtraj/data/write_aligned_compressed_disk_h5md.py +++ b/zarrtraj/data/write_aligned_compressed_disk_h5md.py @@ -1,31 +1,20 @@ import zarrtraj -import zarr -from numcodecs import Blosc, Quantize import MDAnalysis as mda -from MDAnalysis.analysis import rms, align -import numcodecs # This requires MDAnalysis >= 2.8.0 u = mda.Universe( "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", - "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc", ) -average = align.AverageStructure( - u, u, select="protein and name CA", ref_frame=0 -).run() -ref = average.results.universe - -aligner = align.AlignTraj( - u, - ref, - select="protein and name CA", - filename="zarrtraj/data/yiip_aligned_compressed.h5md", - writer_kwargs=dict( - n_frames=u.trajectory.n_frames, - compression="gzip", - compression_opts=9, - chunks=(9, u.trajectory.n_atoms, 3), - ), -).run() +with mda.Writer( + "zarrtraj/data/yiip_aligned_compressed.h5md", + n_atoms=u.trajectory.n_atoms, + n_frames=u.trajectory.n_frames, + compression="gzip", + compression_opts=9, + chunks=(9, u.trajectory.n_atoms, 3), +) as W: + for ts in u.trajectory: + W.write(u.atoms) diff --git a/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py b/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py index 778bef5..c8bf98f 100644 --- a/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py +++ b/zarrtraj/data/write_aligned_compressed_disk_zarrmd.py @@ -1,21 +1,18 @@ import zarrtraj -import zarr -from numcodecs import Blosc, Quantize import MDAnalysis as mda -from MDAnalysis.analysis import rms, align import numcodecs -# This requires MDAnalysis >= 2.8.0 +u = mda.Universe( + "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc", +) -u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") - -average = align.AverageStructure(u, u, select='protein and name CA', - ref_frame=0).run() -ref = average.results.universe - -aligner = align.AlignTraj(u, ref, - select='protein and name CA', - filename='yiip_aligned_compressed.zarrmd', - writer_kwargs= dict( - n_frames=u.trajectory.n_frames, - precision=3, compressor=numcodecs.Blosc(cname="zstd", clevel=9))).run() +with mda.Writer( + "zarrtraj/data/yiip_aligned_compressed.zarrmd", + n_atoms=u.trajectory.n_atoms, + n_frames=u.trajectory.n_frames, + precision=3, + compressor=numcodecs.Blosc(cname="zstd", clevel=9), +) as W: + for ts in u.trajectory: + W.write(u.atoms) diff --git a/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py b/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py index 9e4a0be..3f6148e 100644 --- a/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py +++ b/zarrtraj/data/write_aligned_compressed_s3_zarrmd.py @@ -9,17 +9,17 @@ os.environ["AWS_PROFILE"] = "sample_profile" os.environ["AWS_REGION"] = "us-west-1" -# This requires MDAnalysis >= 2.8.0 +u = mda.Universe( + "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc", +) -u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") - -average = align.AverageStructure(u, u, select='protein and name CA', - ref_frame=0).run() -ref = average.results.universe - -aligner = align.AlignTraj(u, ref, - select='protein and name CA', - filename='s3://zarrtraj-test-data/yiip_aligned_compressed.zarrmd', - writer_kwargs= dict( - n_frames=u.trajectory.n_frames, - precision=3, compressor=numcodecs.Blosc(cname="zstd", clevel=9))).run() +with mda.Writer( + "s3://zarrtraj-test-data/yiip_aligned_compressed.zarrmd", + n_atoms=u.trajectory.n_atoms, + n_frames=u.trajectory.n_frames, + precision=3, + compressor=numcodecs.Blosc(cname="zstd", clevel=9), +) as W: + for ts in u.trajectory: + W.write(u.atoms) diff --git a/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py b/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py index 14be95e..f2fdfc8 100644 --- a/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py +++ b/zarrtraj/data/write_aligned_uncompressed_disk_h5md.py @@ -1,31 +1,20 @@ import zarrtraj -import zarr -from numcodecs import Blosc, Quantize import MDAnalysis as mda -from MDAnalysis.analysis import rms, align -import numcodecs # This requires MDAnalysis >= 2.8.0 u = mda.Universe( "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", - "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc", ) -average = align.AverageStructure( - u, u, select="protein and name CA", ref_frame=0 -).run() -ref = average.results.universe - -aligner = align.AlignTraj( - u, - ref, - select="protein and name CA", - filename="zarrtraj/data/yiip_aligned_uncompressed.h5md", - writer_kwargs=dict( - n_frames=u.trajectory.n_frames, - compression="gzip", - compression_opts=0, - chunks=(9, u.trajectory.n_atoms, 3), - ), -).run() +with mda.Writer( + "zarrtraj/data/yiip_aligned_uncompressed.h5md", + n_atoms=u.trajectory.n_atoms, + n_frames=u.trajectory.n_frames, + compression="gzip", + compression_opts=0, + chunks=(9, u.trajectory.n_atoms, 3), +) as W: + for ts in u.trajectory: + W.write(u.atoms) diff --git a/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py b/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py index fb90e2d..aa8b8bf 100644 --- a/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py +++ b/zarrtraj/data/write_aligned_uncompressed_disk_zarrmd.py @@ -1,21 +1,18 @@ import zarrtraj -import zarr -from numcodecs import Blosc, Quantize import MDAnalysis as mda -from MDAnalysis.analysis import rms, align import numcodecs -# This requires MDAnalysis >= 2.8.0 +u = mda.Universe( + "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc", +) -u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") - -average = align.AverageStructure(u, u, select='protein and name CA', - ref_frame=0).run() -ref = average.results.universe - -aligner = align.AlignTraj(u, ref, - select='protein and name CA', - filename='yiip_aligned_compressed.zarrmd', - writer_kwargs= dict( - n_frames=u.trajectory.n_frames, - compressor=numcodecs.Blosc(cname="zstd", clevel=0))).run() +with mda.Writer( + "zarrtraj/data/yiip_aligned_uncompressed.zarrmd", + n_atoms=u.trajectory.n_atoms, + n_frames=u.trajectory.n_frames, + precision=3, + compressor=numcodecs.Blosc(cname="zstd", clevel=0), +) as W: + for ts in u.trajectory: + W.write(u.atoms) diff --git a/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py b/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py index 2057395..2dffc27 100644 --- a/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py +++ b/zarrtraj/data/write_aligned_uncompressed_s3_zarrmd.py @@ -9,17 +9,17 @@ os.environ["AWS_PROFILE"] = "sample_profile" os.environ["AWS_REGION"] = "us-west-1" -# This requires MDAnalysis >= 2.8.0 +u = mda.Universe( + "zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", + "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center_aligned.xtc", +) -u = mda.Universe("zarrtraj/data/yiip_equilibrium/YiiP_system.pdb", "zarrtraj/data/yiip_equilibrium/YiiP_system_90ns_center.xtc") - -average = align.AverageStructure(u, u, select='protein and name CA', - ref_frame=0).run() -ref = average.results.universe - -aligner = align.AlignTraj(u, ref, - select='protein and name CA', - filename='s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd', - writer_kwargs= dict( - n_frames=u.trajectory.n_frames, - compressor=numcodecs.Blosc(cname="zstd", clevel=0))).run() +with mda.Writer( + "s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd", + n_atoms=u.trajectory.n_atoms, + n_frames=u.trajectory.n_frames, + precision=3, + compressor=numcodecs.Blosc(cname="zstd", clevel=0), +) as W: + for ts in u.trajectory: + W.write(u.atoms) From bbb8dfba82592ead28213b45441a019eca81e486 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Sat, 13 Jul 2024 04:47:03 +0000 Subject: [PATCH 3/7] remove unused --- gh_actions_debug/moto_s3_contents.py | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 gh_actions_debug/moto_s3_contents.py diff --git a/gh_actions_debug/moto_s3_contents.py b/gh_actions_debug/moto_s3_contents.py deleted file mode 100644 index 874fa41..0000000 --- a/gh_actions_debug/moto_s3_contents.py +++ /dev/null @@ -1,24 +0,0 @@ -import boto3 -import zarr -import os - -os.environ["AWS_ACCESS_KEY_ID"] = "testing" -os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" -os.environ["AWS_SECURITY_TOKEN"] = "testing" -os.environ["AWS_SESSION_TOKEN"] = "testing" - -# For convenience, set dict options as env vars -# boto options -os.environ["AWS_DEFAULT_REGION"] = "us-west-1" -os.environ["AWS_ENDPOINT_URL"] = "http://localhost:5000" -# s3fs options -os.environ["S3_REGION_NAME"] = "us-west-1" -os.environ["S3_ENDPOINT_URL"] = "http://localhost:5000" -filename = "s3://zarrtraj-test-data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd" - -mapping = zarr.storage.FSStore(filename, mode="r") -cache = zarr.storage.LRUStoreCache(mapping, max_size=(100 * 1024**2)) -file = zarr.open_group(store=cache, mode="r") - -print(file.tree()) -print(len(list(file["particles"].group_keys()))) From c758b605a5c7a1052ff73250d637b1c825839b84 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Sat, 13 Jul 2024 05:41:01 +0000 Subject: [PATCH 4/7] making s3fs core dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e2e0949..e5853ec 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "dask>=2023.11.0", "kerchunk>=0.2.6", "h5py>=3.11.0", + "s3fs==2024.3.0", ] keywords = [ "molecular simulations", @@ -38,7 +39,6 @@ test = [ "pytest-xdist>=3.5.0", "pytest-cov>=4.1.0", "MDAnalysisTests>=2.7.0", - "s3fs==2024.3.0", ] doc = [ "sphinx", From fe85db03754af459cc3c9d2e01429fd3c63bce3d Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Sat, 13 Jul 2024 16:21:54 +0000 Subject: [PATCH 5/7] bug fixes --- benchmarks/environment.yaml | 14 ------ benchmarks/environment.yml | 4 +- benchmarks/reader_bms.py | 97 ++++++++++++++++++++++++++----------- 3 files changed, 70 insertions(+), 45 deletions(-) delete mode 100755 benchmarks/environment.yaml diff --git a/benchmarks/environment.yaml b/benchmarks/environment.yaml deleted file mode 100755 index 80e7516..0000000 --- a/benchmarks/environment.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: asv-zarrtraj -channels: - - defaults - - conda-forge -dependencies: - - MDAnalysis>=2.7.0 - - zarr>=2.11.0 - - dask - - ### AWS dependencies ### - - s3fs=2024.3.0 - - - diff --git a/benchmarks/environment.yml b/benchmarks/environment.yml index 9fd3cf5..f9b08a0 100755 --- a/benchmarks/environment.yml +++ b/benchmarks/environment.yml @@ -7,9 +7,9 @@ dependencies: - zarr>=2.11.0 - dask - distributed + - h5py>=3.11.0 + - ### AWS dependencies ### - - s3fs=2024.3.0 diff --git a/benchmarks/reader_bms.py b/benchmarks/reader_bms.py index a82d746..012ef02 100644 --- a/benchmarks/reader_bms.py +++ b/benchmarks/reader_bms.py @@ -6,10 +6,32 @@ from MDAnalysis.coordinates.H5MD import H5MDReader import zarr import h5py -import dask as da +import dask.array as da import os +from dask.distributed import Client, LocalCluster + + +""" +1. Activate the devtools/asv_env.yaml environment + +2. Make sure to set the BENCHMARK_DATA_DIR to wherever local yiip files are stored + +3. To run, use: + +Development: + + asv run -q -v -e benchmarks-dev > bm.log & + +Full run: + + asv run -v -e benchmarks-dev > bm.log & + +4. To publish, use + + +""" BENCHMARK_DATA_DIR = os.getenv("BENCHMARK_DATA_DIR") os.environ["S3_REGION_NAME"] = "us-west-1" @@ -34,6 +56,11 @@ f"{BENCHMARK_DATA_DIR}/yiip_aligned_uncompressed.h5md", ] +s3_zarrmd_files = [ + "s3://zarrtraj-test-data/yiip_aligned_compressed.zarrmd", + "s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd", +] + def dask_rmsf(positions): mean_positions = positions.mean(axis=0) @@ -49,6 +76,7 @@ class ZARRH5MDDiskStrideTime(object): params = local_files param_names = ["filename"] + timeout = 2400.0 def setup(self, filename): self.reader_object = ZARRH5MDReader(filename) @@ -58,12 +86,16 @@ def time_strides(self, filename): for ts in self.reader_object: pass + def teardown(self, filename): + del self.reader_object + class ZARRH5MDS3StrideTime(object): """Benchmarks for zarrmd and h5md file striding using local files.""" params = s3_files param_names = ["filename"] + timeout = 2400.0 def setup(self, filename): self.reader_object = ZARRH5MDReader(filename) @@ -73,12 +105,16 @@ def time_strides(self, filename): for ts in self.reader_object: pass + def teardown(self, filename): + del self.reader_object + class H5MDReadersDiskStrideTime(object): """Benchmarks for zarrmd and h5md file striding using local files.""" params = (h5md_files, [ZARRH5MDReader, H5MDReader]) param_names = ["filename", "reader"] + timeout = 2400.0 def setup(self, filename, reader): self.reader_object = reader(filename) @@ -88,26 +124,23 @@ def time_strides(self, filename, reader): for ts in self.reader_object: pass + def teardown(self, filename, reader): + del self.reader_object + -class H5MDFmtDiskRSMFTime(object): - """Benchmarks for zarrtraj file striding.""" +class H5MDFmtDiskRMSFTime(object): params = (local_files, ["dask", "mda"]) param_names = ["filename", "method"] + timeout = 2400.0 def setup(self, filename, method): if method == "dask": - if filename.endswith(".h5md"): - self.positions = da.from_array( - h5py.File(filename)["/particles/trajectory/position/value"] - ) - - elif filename.endswith("zarrmd"): - self.positions = da.from_array( - zarr.open_group(filename)[ - "/particles/trajectory/position/value" - ] - ) + self.positions = da.from_array( + zarr.open_group(filename)[ + "/particles/trajectory/position/value" + ] + ) elif method == "mda": self.universe = mda.Universe( @@ -123,26 +156,26 @@ def time_rmsf(self, filename, method): rmsf = dask_rmsf(self.positions) rmsf.compute() + def teardown(self, filename, method): + if hasattr(self, "positions"): + del self.positions + if hasattr(self, "universe"): + del self.universe + -class H5MDFmtAWSRSMFTime(object): - """Benchmarks for zarrtraj file striding.""" +class H5MDFmtAWSRMSFTime(object): - params = (s3_files, ["dask", "mda"]) + params = (s3_zarrmd_files, ["dask", "mda"]) param_names = ["filename", "method"] + timeout = 2400.0 def setup(self, filename, method): if method == "dask": - if filename.endswith(".h5md"): - self.positions = da.from_array( - h5py.File(filename)["/particles/trajectory/position/value"] - ) - - elif filename.endswith(".zarrmd"): - self.positions = da.from_array( - zarr.open_group(filename)[ - "/particles/trajectory/position/value" - ] - ) + self.positions = da.from_array( + zarr.open_group(filename)[ + "/particles/trajectory/position/value" + ] + ) elif method == "mda": self.universe = mda.Universe( @@ -153,7 +186,13 @@ def setup(self, filename, method): def time_rmsf(self, filename, method): """Benchmark striding over full trajectory""" if method == "mda": - rms.RMSF(self.universe.atoms) + rms.RMSF(self.universe.atoms).run() elif method == "dask": rmsf = dask_rmsf(self.positions) rmsf.compute() + + def teardown(self, filename, method): + if hasattr(self, "positions"): + del self.positions + if hasattr(self, "universe"): + del self.universe From df90890c9d7ba9fcad7d61df0185211691351ed7 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Sun, 14 Jul 2024 12:55:45 -0700 Subject: [PATCH 6/7] fix dtype of test data --- zarrtraj/ZARR.py | 70 +++++++++------- .../COORDINATES_MISSING_H5MD_GROUP_H5MD.h5md | Bin 21512 -> 21520 bytes .../particles/trajectory/force/value/.zarray | 2 +- .../particles/trajectory/force/value/0.0.0 | Bin 235 -> 145 bytes .../trajectory/position/value/.zarray | 4 +- .../particles/trajectory/position/value/0.0.0 | Bin 115 -> 112 bytes .../trajectory/velocity/value/.zarray | 2 +- .../particles/trajectory/velocity/value/0.0.0 | Bin 186 -> 157 bytes .../COORDINATES_MISSING_TIME_DSET_H5MD.h5md | Bin 21512 -> 21520 bytes .../particles/trajectory/force/value/.zarray | 2 +- .../particles/trajectory/force/value/0.0.0 | Bin 235 -> 145 bytes .../trajectory/position/value/.zarray | 4 +- .../particles/trajectory/position/value/0.0.0 | Bin 115 -> 112 bytes .../trajectory/velocity/value/.zarray | 2 +- .../particles/trajectory/velocity/value/0.0.0 | Bin 186 -> 157 bytes zarrtraj/data/COORDINATES_SYNTHETIC_H5MD.h5md | Bin 22576 -> 22576 bytes .../particles/trajectory/force/value/.zarray | 2 +- .../particles/trajectory/force/value/0.0.0 | Bin 235 -> 145 bytes .../trajectory/position/value/.zarray | 4 +- .../particles/trajectory/position/value/0.0.0 | Bin 115 -> 112 bytes .../trajectory/velocity/value/.zarray | 2 +- .../particles/trajectory/velocity/value/0.0.0 | Bin 186 -> 157 bytes .../data/COORDINATES_VARIED_STEPS_H5MD.h5md | Bin 35360 -> 33320 bytes .../particles/trajectory/force/value/.zarray | 2 +- .../particles/trajectory/force/value/0.0.0 | Bin 220 -> 130 bytes .../trajectory/position/value/.zarray | 4 +- .../particles/trajectory/position/value/0.0.0 | Bin 115 -> 112 bytes .../trajectory/velocity/value/.zarray | 2 +- .../particles/trajectory/velocity/value/0.0.0 | Bin 143 -> 133 bytes zarrtraj/data/create_COORDINATES_SYNTHETIC.py | 15 +--- zarrtraj/tests/test_zarrtraj.py | 7 +- zarrtraj/tests/utils.py | 77 ------------------ 32 files changed, 58 insertions(+), 143 deletions(-) delete mode 100755 zarrtraj/tests/utils.py diff --git a/zarrtraj/ZARR.py b/zarrtraj/ZARR.py index 44ae3fa..ab0a2a1 100644 --- a/zarrtraj/ZARR.py +++ b/zarrtraj/ZARR.py @@ -823,6 +823,7 @@ def __init__( # to get buffer indices, do i.e. _val_idx % _val_frames_per_chunk self._val_idx = 0 self._t_idx = 0 + self._n_frames = n_frames val_filter = None time_filter = None @@ -889,7 +890,10 @@ def write( ): # flush buffer and extend zarr dset if reached end of chunk # this will never be called if n_frames is less than the chunk size - if self._val_idx % self._val_frames_per_chunk == 0: + if ( + self._val_idx != 0 + and self._val_idx % self._val_frames_per_chunk == 0 + ): self._val[self._val_idx - self._val_frames_per_chunk :] = ( self._val_buf[:] ) @@ -916,19 +920,20 @@ def flush(self): and shink the zarr datasets to the correct size. """ self._val[ - self._val_idx - self._val_frames_per_chunk : self._val_idx - ] = self._val_buf[ - : (self._val_idx - 1 % self._val_frames_per_chunk) + 1 - ] + self._val_idx + - (self._val_idx % (self._val_frames_per_chunk + 1)) : self._val_idx + ] = self._val_buf[: (self._val_idx % (self._val_frames_per_chunk + 1))] self._val.resize(self._val_idx, *self._val_chunks[1:]) - self._t[self._t_idx - self._t_frames_per_chunk : self._t_idx] = ( - self._t_buf[: (self._t_idx - 1 % self._t_frames_per_chunk) + 1] - ) + self._t[ + self._t_idx + - (self._t_idx % (self._t_frames_per_chunk + 1)) : self._t_idx + ] = self._t_buf[: (self._t_idx % (self._t_frames_per_chunk + 1))] self._t.resize(self._t_idx) - self._s[self._t_idx - self._t_frames_per_chunk : self._t_idx] = ( - self._s_buf[: (self._t_idx - 1 % self._t_frames_per_chunk) + 1] - ) + self._s[ + self._t_idx + - (self._t_idx % (self._t_frames_per_chunk + 1)) : self._t_idx + ] = self._s_buf[: (self._t_idx % (self._t_frames_per_chunk + 1))] self._s.resize(self._t_idx) @@ -988,7 +993,7 @@ class ZARRMDWriter(base.WriterBase): ValueError when ``n_atoms`` is 0 ValueError - when ``n_frames`` is not provided + when ``n_frames`` is not provided or negative ValueError when ``precision`` is less than 0 ValueError @@ -1110,10 +1115,9 @@ def __init__( raise ValueError("H5MDWriter: no atoms in output trajectory") self.n_atoms = n_atoms - if not n_frames: + if n_frames is None or n_frames < 0: raise ValueError( - "H5MDWriter: no frames in output trajectory. " - "Please provide a nonzero value for 'n_frames' kwarg" + "H5MDWriter: Please provide a non-negative value for 'n_frames' kwarg" ) self.n_frames = n_frames self.storage_options = storage_options @@ -1271,27 +1275,16 @@ def _allocate_buffers(self, ts): """Allocates buffers for timestep data that wasn't already allocated""" t_unit = self._unit_translation_dict["time"][self.units["time"]] - length_unit = ( - self._unit_translation_dict["length"][self.units["length"]] - if self.units["length"] is not None - else None - ) - vel_unit = ( - self._unit_translation_dict["velocity"][self.units["velocity"]] - if self.units["velocity"] is not None - else None - ) - force_unit = ( - self._unit_translation_dict["force"][self.units["force"]] - if self.units["force"] is not None - else None - ) - if ( ts.dimensions is not None and np.all(ts.dimensions > 0) and "box/edges" not in self._elements ): + length_unit = ( + self._unit_translation_dict["length"][self.units["length"]] + if self.units["length"] is not None + else None + ) self._traj["box"].attrs["boundary"] = 3 * ["periodic"] self._traj["box"].require_group("edges") self._elements["box/edges"] = H5MDElementBuffer( @@ -1310,6 +1303,11 @@ def _allocate_buffers(self, ts): and ts.has_positions and "position" not in self._elements ): + length_unit = ( + self._unit_translation_dict["length"][self.units["length"]] + if self.units["length"] is not None + else None + ) self._traj.require_group("position") self._elements["position"] = H5MDElementBuffer( ts.positions.shape, @@ -1327,6 +1325,11 @@ def _allocate_buffers(self, ts): and ts.has_velocities and "velocity" not in self._elements ): + vel_unit = ( + self._unit_translation_dict["velocity"][self.units["velocity"]] + if self.units["velocity"] is not None + else None + ) self._traj.require_group("velocity") self._elements["velocity"] = H5MDElementBuffer( ts.velocities.shape, @@ -1344,6 +1347,11 @@ def _allocate_buffers(self, ts): and ts.has_forces and "force" not in self._elements ): + force_unit = ( + self._unit_translation_dict["force"][self.units["force"]] + if self.units["force"] is not None + else None + ) self._traj.require_group("force") self._elements["force"] = H5MDElementBuffer( ts.forces.shape, diff --git a/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_H5MD.h5md b/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_H5MD.h5md index f9ed76b1564483f2c3ac309bcfb9dbfafcbce690..5bedfad260bc7c3daacaa2c489054a7f1b6c77f1 100644 GIT binary patch delta 1080 zcmX|55#ThSnxN!z6cqG?(R2J0Z$yZd9+QV2HfB+?;*90;!BQb-n|Lm`n{ zn&J=;ii4Dv1IgeJI#swroT?z0f^_Pd-RHeTk37wv?|a_w3;DbC$!YQ`IGMv)KVM6q z<`&bZ9~T=(1KxA)NnCatM{nHOjfP9zy;$o|i{1U@GwK7kGd z`qaIPwfi3Py#3`D9*)~A2B}t$`xI--_d`5jhc*19}KUz#Um-uq! z$x)xW&tji>K8t;3`Ya~f7-e(GeEI3qxm+SjlE5&+9cnS*Gr|MH6PCd?tmLoP+SZsZ z^ud&j$v_s0V4(t4NFHlU9puZECrAA{G@%U#(1jlKL635VvxPdep<6Pwq41E!kcA>x zs6Z8xXA~k}3^jRj)UQJm+He3}=s_R!Xqn+`p$={6LLbc1B#W0A2t}|^f$CCNyrgb za4yfD#8)kj4T8L7uBB#_BU4KPk|j!#Km$2ItaIL`-s1Bfy@;B^W)j(sM;hWhh4-cK_yIr{yKKF|0m-%67-U&aukD{IB?09(mWL>>fb0)l3-gJ7= zVfjYN`4N75`u58I5#h?}tJK#*`0K%~)U(z1htpwp@v<`+I+Yox&)=I-@$p?J)hmRX SYhRL8e+vIrMX%RCIDY}zRawFS delta 1684 zcmZ9KQAkr^6vy{XGq)^O4#|*OXtu?m$s~l3dx^*lX9xx3%_pa1XszTM1-x;>&Y zVO2dIF`6w8)s5h2$LjSyL!SLc?%=KOsoVp+4;%LfxgKGC)L2WgV;ubn!@o4t+wOk~ z-=q=gzWd^5_!<1qjbZ984dpR6WiXSAcou>CHAujtI#|GyI!M4%=uZRu65tttXW^d% zcpl(|f5F5ZrW}h;KA{0}1JgQCHZ-RLq`wI85_|=Kmyz=x;1$fRmVqbq0dkge)pJ-_ z)PVwiuLA`98Q`z*6#-rY_y@r2nA<1=Piu~wpLT=gs*5^Eu6hFlTLAwCxCHPv{C@!U zm@=nKyXQ#I+ChJE6=^4_n&cz-NxMijq}`-FBrY$hlEi-YvX4F750Lhef}}dqe$oL_ z1Br|0c!vG#WgmGg#}J~ICQ_IbCB;asq&5;45AqO)hDhvV5BE*dSyGaeCiRdmlX^*9 zJQpId-z2fGhs6Dxyu}Rtct{!}YVSpRaZ^B%28MuT4`y79FJYI`{dL1a>MjasF2!f;V#W2?ba2)+s z%$+drjmNSBQL{qjvxVay0`=j9$0;QbPo_(LjAa+h&MNz3H0Hn9EK6yx{kZ*fcAz6v zspjpOGoALGM4Nr8>#FxvBKxkb&P%Hc=dby6gq=!U@qUZg%O{&u(q2s*RROytdC1P6 ct5$EbEyxVZYc&h~3^y*azCe}^VNgVy`&*AHDkb13F| z;2HNraxp-W|9r}ezbgBb_AlOX=HXXXeul-$rHjve$rXH0Cod_HNQ6h0b!`ZfvQ7yaH!(i!_g;WoDmnS zqzm2?R-}w1MM@6oD(WaJzHfXo-0#t&ak&+(g(eZ{4sAslSqTCakTQWw2Wn^<*`8kZ zuj3neo7|1=(}ywou}ntkID48r=Py&lNt#XaX;IGJs&|bf%Zpj1=O2rDxmqJDs`+BM a-h6%k{Gx>u+b;Id#*PEe!~R(V-R?i7CU5=# diff --git a/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray b/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray index e81d5bd..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray +++ b/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray @@ -11,8 +11,8 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "QT yVZnw27aqtlNGsYqFfs@^GBSuUa5FG4F(@!NurVxibaa9Mh@db7FGGPd1ONc98Wr9E literal 115 zcmZQ#G-T0XWMBZ{Vg?2V5g?8R;{T2ej7-cdtZeKYoLt;IynGByENmQHJbVH|B4QF! zG7KynJOUyTG72ghItC^d3>*Ry3K|9$4jus!2^j?p0tyBW0SN^S6BcYZaNz;aJO&0< Jh9d!B1^}iI5QP8$ diff --git a/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray b/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray index bfb4f0a..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray +++ b/zarrtraj/data/COORDINATES_MISSING_H5MD_GROUP_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray @@ -11,7 +11,7 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "4EB&xOhlbLDSBPAeTW1hXz@s>+4q$jT50FoYq)nfQLYW0JgL-X i&FWnqjWNw1jF~XA1!wYWr%*Cy$!l!wDMMp_>zg0g*(&w` literal 186 zcmW-Vu?m7v07dW95{*Jbk%R`r5Q>K6MXU7}et~;`()!-mU(_E6IyE^J2!YUGSb`9k zcHo@h2*i^E0qDE{Fj`No4Zu0&pdnAdX%ebf-Wa2-JuC*Jv5F@6?9b6Py=Ra7S-i@3 zg(%JPqO9sKh}e^m=4B00Qij@CH%#QUsA#%rP3@U8cTp?4WjpR-wf5*{3%{ce9WoBS Uzw1yz1|R|^7y&9E;rRbuek27rEC2ui diff --git a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_H5MD.h5md b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_H5MD.h5md index 0bdfcf888b33d803fea09719af32b739d0fffd21..ee160dd9113b4c4fcf0f6acf53a61793c6b25ae4 100644 GIT binary patch delta 1063 zcmX|56gt!NO;q;@HVXiSS>qz(?fyFa#23c)6wL^?!}1Hn~Xgk%vq6cV|m zDIEeragfq-O9qF~N#P20s!m=(I(5zN^Ztn*dB~sdd*1I0`Lp%e>+&jlg`@ef)XJX~ zm-1(ymO95H-i!W8TJt-{Z~ghrj!)gcYz(NS{=w=w^^rfAoet(VZw7`(Ic4O_l_y7i z>cM5>Nx;0|VD)8y6AWfQknK#eMP*tpAX%a$2{Z@~sHKF@sgDRxhYSuUiwPH=|&z~(6ohV5H!w3(krG(E3j|fj&2HUt^y4C7iV}_7{ zsTxy&IjDezIyAvO)|fWPmn%<>`fcb!9}Zy%r;vdjoa7M8Fto^5m%BhA#Bs5QcCH8R*e6!`VU``Y?nH%<>%;FEbD-V4)7p<+ypT92XX* zTKW&@DtCK5cP*sC-QG;NxVY$k=_rc0hBoJI>b)?E_UR?=YLwGe%GdSDlcT;!v$5y>1yAZ# AasU7T delta 1671 zcmZ9KQAkr^6vuZrGq)^O8p%nv(&bjA*KWYd5+}-c|&;R#*-)?$ByEUPu z!&+S?qPLnZs%yc??)9rTba@Wzxx+WVCvy+*KCa&z<$8qmNqr;9jtu(Ky00)c)Zu#q z-;5sVzw`WO_$mC)^l|DI#`2h()tSjfJd?n~3MAl36)fNx6(rzU^ydJ60q{J)3-B)j zyae#_zhL5yQ;x|epH=|5fjJc@8(LHW(q93%0ACT{_sCfVcnx#wRp4oLfShSx^&%El zRG@%YRe*p$1N;@f62Kb({{VOsbLA@VoZ>k7DJNL2x}<{Ss>>MI2KYC?6@Yi({{yhg zkU7n;295^JUGyi{l6I5oNM4eUw1-qr+DqC;;&PK}NbF}X``E*MKj{D|NNOM*Bpo6( zk+^t{XV}kP_L0Z54Iz4IA%#g%QjF9_YA12=AP;e9h{QhjaNi)EA@z|`qyf@J(h!M@ z=Rzd*8zlA(khp)Hx0t3M4@py`JZXuvMxyHpoAk8H`OM`xugC3rtv&{EtzrF>e!;FU z>LYlyz_o!tdlTzr__uWFZ)5J4^HG>C2Pn^?0?2zw1#Ihq%!=vT1#S-e^|7 Q>%5)pKmK2G?p#Xy3r^xsKL7v# diff --git a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray index bfb4f0a..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray +++ b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray @@ -11,7 +11,7 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "xVZYc&h~3^y*azCe}^VNgVy`&*AHDkb13F| z;2HNraxp-W|9r}ezbgBb_AlOX=HXXXeul-$rHjve$rXH0Cod_HNQ6h0b!`ZfvQ7yaH!(i!_g;WoDmnS zqzm2?R-}w1MM@6oD(WaJzHfXo-0#t&ak&+(g(eZ{4sAslSqTCakTQWw2Wn^<*`8kZ zuj3neo7|1=(}ywou}ntkID48r=Py&lNt#XaX;IGJs&|bf%Zpj1=O2rDxmqJDs`+BM a-h6%k{Gx>u+b;Id#*PEe!~R(V-R?i7CU5=# diff --git a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray index e81d5bd..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray +++ b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray @@ -11,8 +11,8 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "QT yVZnw27aqtlNGsYqFfs@^GBSuUa5FG4F(@!NurVxibaa9Mh@db7FGGPd1ONc98Wr9E literal 115 zcmZQ#G-T0XWMBZ{Vg?2V5g?8R;{T2ej7-cdtZeKYoLt;IynGByENmQHJbVH|B4QF! zG7KynJOUyTG72ghItC^d3>*Ry3K|9$4jus!2^j?p0tyBW0SN^S6BcYZaNz;aJO&0< Jh9d!B1^}iI5QP8$ diff --git a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray index bfb4f0a..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray +++ b/zarrtraj/data/COORDINATES_MISSING_TIME_DSET_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray @@ -11,7 +11,7 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "4EB&xOhlbLDSBPAeTW1hXz@s>+4q$jT50FoYq)nfQLYW0JgL-X i&FWnqjWNw1jF~XA1!wYWr%*Cy$!l!wDMMp_>zg0g*(&w` literal 186 zcmW-Vu?m7v07dW95{*Jbk%R`r5Q>K6MXU7}et~;`()!-mU(_E6IyE^J2!YUGSb`9k zcHo@h2*i^E0qDE{Fj`No4Zu0&pdnAdX%ebf-Wa2-JuC*Jv5F@6?9b6Py=Ra7S-i@3 zg(%JPqO9sKh}e^m=4B00Qij@CH%#QUsA#%rP3@U8cTp?4WjpR-wf5*{3%{ce9WoBS Uzw1yz1|R|^7y&9E;rRbuek27rEC2ui diff --git a/zarrtraj/data/COORDINATES_SYNTHETIC_H5MD.h5md b/zarrtraj/data/COORDINATES_SYNTHETIC_H5MD.h5md index 746f56b94d0e519060b90a1921ce04ac3fbcf61c..858bf1fe9998b4c2a09bd12b63b11a30c93a04f7 100644 GIT binary patch delta 1017 zcmXYuF=!KE7>55l8-$`L4r;NKLO`g9h9-n4y}SRcv_eBkL8L>6xZEHeI_R7rxD`^k z>54-@=-^NSw`2^(Nv_bXi$mrFQ z*(7;t8EkLD1>vR(NT%i-^{L|~cwNr>)HB|X$mY{*(=E_Ux$@+w&n(Jyfb@VUYR}rvC=!4@L$%|1-+9m z!O~7IpdMUp4XG8u@%mrtlVG?|3YT{7=~|6)%E*^1PmcQ3!>g@^ZX`Tj{}|$g!-a2T zd(&)LnT`ucmMBRA4Z=hBEm%x_Lbwz$*nKB0Q%1gA7CT3M>Jf|G_hWH{6S0_VZ<;N; zjAqWBKPx*?k_3hk9=dPAV!{)`CCgx|>qy%{hA;7oK;W YKUsU^`7`x=ck}YL|G1vN|M18A4|)Vrb^rhX delta 1635 zcmZ9KK}Zx~6vt=ARdchjT$T)pHBA(@*jhK%pk@w|LLmty9z5(xI(XRgdYBG%3o6D- z9wN|77g-r0g$NCygiWjjSrE|@V{x$+tyqxgy?OunKp*>K_WR!d{r}%LyVsiY>)^NX$I7@_xFmaAJPc#x-JjXMv zXD#bUV|#)KEn0{+B1*)F4kAf#@gNVeX@p=MYq)O_J;Xg?fJhM!i8R5*a}k2|7Qwm{ z!Tm>+uRW(PuZb)%M=TQ?gkNJXxn@myE}C(#zG0m=MYnKM-n~A1Kef3XpX{8Onk7)x7k}@ICbXEZNsB(N|b}FguqESaRYj>*$U1 z>IQy*Zw#F>^5>;r;~D32weYJ$@EPdq(GPL1O@xmH7Z6;=jVoYKqYULh^*DGK{RS8} zLKne)6M~!3Z^2wE>1@p_|7QLT`~X=uC_~wARtC~ostcw zoVY}17zs^wZ)P5w@*Fj1E`$7-K2}VjKWzqA$I~~1Z=stt+XtV0 z_}%sn`uFApbyvsdFgNf2B_!9*jH)0Bo>azi!C7S_!Sm=ZAowGK7ZIF;ehIx<3 zI5Xk-*qfPY6(Db5K^e+{mXv|?R}j1kT^_+}@L5Oj2Ie-81y8FBkiSi@dI=jV%20yW zm4O6rA^0bB1q2rnyo=xx=Jt*SFQ^>fexVZYc&h~3^y*azCe}^VNgVy`&*AHDkb13F| z;2HNraxp-W|9r}ezbgBb_AlOX=HXXXeul-$rHjve$rXH0Cod_HNQ6h0b!`ZfvQ7yaH!(i!_g;WoDmnS zqzm2?R-}w1MM@6oD(WaJzHfXo-0#t&ak&+(g(eZ{4sAslSqTCakTQWw2Wn^<*`8kZ zuj3neo7|1=(}ywou}ntkID48r=Py&lNt#XaX;IGJs&|bf%Zpj1=O2rDxmqJDs`+BM a-h6%k{Gx>u+b;Id#*PEe!~R(V-R?i7CU5=# diff --git a/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray b/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray index e81d5bd..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray +++ b/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray @@ -11,8 +11,8 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "QT yVZnw27aqtlNGsYqFfs@^GBSuUa5FG4F(@!NurVxibaa9Mh@db7FGGPd1ONc98Wr9E literal 115 zcmZQ#G-T0XWMBZ{Vg?2V5g?8R;{T2ej7-cdtZeKYoLt;IynGByENmQHJbVH|B4QF! zG7KynJOUyTG72ghItC^d3>*Ry3K|9$4jus!2^j?p0tyBW0SN^S6BcYZaNz;aJO&0< Jh9d!B1^}iI5QP8$ diff --git a/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray b/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray index bfb4f0a..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray +++ b/zarrtraj/data/COORDINATES_SYNTHETIC_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray @@ -11,7 +11,7 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "4EB&xOhlbLDSBPAeTW1hXz@s>+4q$jT50FoYq)nfQLYW0JgL-X i&FWnqjWNw1jF~XA1!wYWr%*Cy$!l!wDMMp_>zg0g*(&w` literal 186 zcmW-Vu?m7v07dW95{*Jbk%R`r5Q>K6MXU7}et~;`()!-mU(_E6IyE^J2!YUGSb`9k zcHo@h2*i^E0qDE{Fj`No4Zu0&pdnAdX%ebf-Wa2-JuC*Jv5F@6?9b6Py=Ra7S-i@3 zg(%JPqO9sKh}e^m=4B00Qij@CH%#QUsA#%rP3@U8cTp?4WjpR-wf5*{3%{ce9WoBS Uzw1yz1|R|^7y&9E;rRbuek27rEC2ui diff --git a/zarrtraj/data/COORDINATES_VARIED_STEPS_H5MD.h5md b/zarrtraj/data/COORDINATES_VARIED_STEPS_H5MD.h5md index 115a988a3a350e60bc722c9bf311a876cbd833ee..47f883dcf87578bcdda96fb39fba09ddde61bc24 100644 GIT binary patch delta 1806 zcmb_aUuaWT7(eG^jx;MQPqiIl;vCH%hGwmdg6YkTmd#Y&8n-muG30s zuI>*V1e$pE(DW%H)I5kZoQFk_k&Qic5QoCR>}AB4(v>`vL817x-*<2NXY6%{{BpnZ zeZSxD`|i0X|Gg-DEi!)Lzf}yjw5MFp?uK2@4tCG|JlT!7`*Ed$c%wTo^s0uuRzKCH zJ+8#G2k#7P;Ao}&-)MoMIgMeFjx@w4F7b#%b>yKfAy{CbQur{qc`fA#9zK4v4vuaZ?K*sQloL6-C1+{OEY7Aq{DH5tn!i zx!2n)0tz4-Aes=TQOIGxh?ScSkIc`6@ z><3%@(hj!fbzighp((j`sF&;Vt;6^Dvijrj53gX8J?d*p4A=}){&duHV7tqRoqhjg ze-CF4=C!xXEqU0-z4A_4sNb$`jWZv!_a*gI zf9MeTfJ*o5NViEpZb4?F^gJv9K!yv&|U?K+jVAy3E7OWNbN+7HI2|NVOY#{R@iw)mS|Kj#*q zGw9vqZt!R2_=i3*e;!jP^P>qa+(FFxKnBmm<==@vd0agm#t578uaV25kk~Dqmp4E0 ziGs0pQ2wB*f7-%D$aYw}l@KQ=yf3{DuSi@}HwIdSa9)&aBV(d;0amW9DGHb9Jj#7e zQTlxAJQ6|GJoz#g*LUnPJ9bwlY`fM8%WipWw_WLK!;UY@>{XAd#XGnt1l7K2_?373 zsucd>+I1sebF_=XlzMRhlg6jy;J0I)@0_V5etE{`Cuc1F`VHdW jo*k5fT1eFWGOfjAb@n!GUEy~7+}D{5$9m-IcU}Jgj4UvN delta 2416 zcmcIkVMtq76n^)rZ4zCVwPIC7pRAY|Lba_cLM`uYh;yzqru#9qb=F2GwD-dV1$6mbMc~QgXs5V~s@o&NLFi@C$?SsBMJC zQ?@V~&)7m}JV*JPWIRvC3uJtY_={wmB;(tU8-wpP&Xx4{p0*D#9yYIfkv8twf@u7}7C_^NWc(-bR>^pcjQ=KMMaP?B zI^N*FHyB3Ze{}f&HW=3BATR(2qyf?a8GsFdjQ|J032*^20a<{j0Gj|ZWSy*$e94tO z$&qr42b2RU0F?k4vQE}Wz6y{$ z$&q;-Pz$IBGyqxvAwWAohOAQow z^@{8=<9mB@B?n40-2YiSYQ|lAz`mQ6u|$)ZqWxv!|E^*FA=N#yCm8&~@G+RNjVuFe z($F|%3!(8N;;)kMnk7gR%5>C8)6qCx$GQzV>RAU~03GQ+c;7Za<5gSG60!s^?%#3*-SQn=kCbK%_*a-S+KPJ# zF}S^GXW|sDQfKgKy$Z>5xVAJLVO)EA#IM_HSf$ue_63U@3uRws!lHkV$Jx+`jML)! zo}yyg6*^UPSFVVWdJk7F zNY`laZLau?f&I5TEtnU(bKqr-OMY3%^u$U8uTIJbq*FWMw%Nr5BZI?*KaXaYb>;aXG$y|KF)?k>k$u+ zylHWVjo6Xr8SnOsrJ4cJck~>Ow!yKkiXVNsBI2txB0dKFmop;ChN}G|@wh8`}kf{>t zL=|_ZsS@uX6&^iH?eTb*CF6w}km-7FjO!tkX$Xm`j%n9pgP5OI!iRf|rstry+8$AN z<`_?W5R2_*X7laHU%x;h`QyFFZ|ODp_dYcF(cos0-(4%dswsEn`tZg>Onlz`gSk~z Mf?G)@uJ&~O15bM|g#Z8m diff --git a/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray b/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray index b2ccdfa..7d893e4 100644 --- a/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray +++ b/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/force/value/.zarray @@ -11,7 +11,7 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "D6sT9lF{gSQDc>uC34x2(P0KcaIo-|XV@b#3OGGMr1Hav%i>76PGA zLJta3E>sH^DTp@r!n-`3;I6j`0RI{QYd7uxK$_}wqMJ0F-0G}}aQ(>`XT$}o=z))g z7p0^ksnwJo_25Hf1b_F z>*B3=Un0wAW>GB5YPGI67P%?PRc*H)yJr7&fGO+kZh!dxIsN`2z`gGRSLmVJK@{O~ I)WUFh2Aou8^#A|> diff --git a/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray b/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray index e81d5bd..bcc8cbf 100644 --- a/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray +++ b/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/position/value/.zarray @@ -11,8 +11,8 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "QT yVZnw27aqtlNGsYqFfs@^GBSuUa5FG4F(@!NurVxibaa9Mh@db7FGGPd1ONc98Wr9E literal 115 zcmZQ#G-T0XWMBZ{Vg?2V5g?8R;{T2ej7-cdtZeKYoLt;IynGByENmQHJbVH|B4QF! zG7KynJOUyTG72ghItC^d3>*Ry3K|9$4jus!2^j?p0tyBW0SN^S6BcYZaNz;aJO&0< Jh9d!B1^}iI5QP8$ diff --git a/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray b/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray index 2887f49..76e9c7c 100644 --- a/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray +++ b/zarrtraj/data/COORDINATES_VARIED_STEPS_ZARRMD.zarrmd/particles/trajectory/velocity/value/.zarray @@ -11,7 +11,7 @@ "id": "blosc", "shuffle": 1 }, - "dtype": "ml2&6uBo=lq#7GtV&0G(MBY zaAqby1OHzhA0S`S*e9)_XXdia3_fQheHvzNKJ$!$M^a6W;WLx1ot>RM95C3~aWM!u OFfx2#U;qg_zySb;0w>J? literal 143 zcmZQ#G~~!&WMBZ{eg*~x5g;xH;{OZ`vt}93VlYmdb(Ue4@!2$f2A=yfXBy80ip)I2 zFw^)9Kf}SV$~#Y9zWLzsvzKo^eEj_78^g()k6(WH{O#u-21aHUHinxoKmTN4X5-`$ e5EK>>W3UGSRt8}P4u%ix_Vx}S02TzZzytv7`!Wdt diff --git a/zarrtraj/data/create_COORDINATES_SYNTHETIC.py b/zarrtraj/data/create_COORDINATES_SYNTHETIC.py index 0be5672..d4508de 100644 --- a/zarrtraj/data/create_COORDINATES_SYNTHETIC.py +++ b/zarrtraj/data/create_COORDINATES_SYNTHETIC.py @@ -10,17 +10,6 @@ def create_COORDINATES_SYNTHETIC(uni, root): n_atoms = uni.atoms.n_atoms - # uni.trajectory.ts.dt = 1 - - # uni.trajectory.ts.dimensions = orig_box - # uni.trajectory.units = { - # "time": "ps", - # "length": "Angstrom", - # "velocity": "Angstrom/ps", - # "force": "kJ/(mol*Angstrom)", - # } - # print(uni.trajectory) - # print(uni.trajectory.ts.__class__) h5md = root.create_group("h5md") h5md.attrs["version"] = [1, 1] author = h5md.create_group("author") @@ -58,10 +47,10 @@ def create_COORDINATES_SYNTHETIC(uni, root): dim["time"].attrs["unit"] = "ps" pos = pgroup.create_group("position") - exp = np.logspace(0, 4, base=2, num=5, dtype=np.int32) + exp = np.logspace(0, 4, base=2, num=5, dtype=np.float32) pos_data = ( np.tile( - np.arange(3 * n_atoms, dtype=np.int32).reshape(n_atoms, 3), + np.arange(3 * n_atoms, dtype=np.float32).reshape(n_atoms, 3), (5, 1, 1), ) * exp[:, None, None] diff --git a/zarrtraj/tests/test_zarrtraj.py b/zarrtraj/tests/test_zarrtraj.py index 068b98e..d287e40 100644 --- a/zarrtraj/tests/test_zarrtraj.py +++ b/zarrtraj/tests/test_zarrtraj.py @@ -51,14 +51,9 @@ def upload_h5md_testfile(file_name): def upload_zarrmd_testfile(file_name): - source = zarr.open_group(file_name, mode="r") obj_name = os.path.basename(file_name) s3_fs = s3fs.S3FileSystem() - cloud_store = s3fs.S3Map( - root=f"s3://zarrtraj-test-data/{obj_name}", s3=s3_fs - ) - - zarr.convenience.copy_store(source.store, cloud_store, if_exists="raise") + s3_fs.put(file_name, "zarrtraj-test-data/" + obj_name, recursive=True) return True diff --git a/zarrtraj/tests/utils.py b/zarrtraj/tests/utils.py deleted file mode 100755 index a738f4d..0000000 --- a/zarrtraj/tests/utils.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Buffer-related helper functions for testing.""" - -import numpy as np -from MDAnalysis.analysis import distances -import socket - - -# Helper Functions -def get_memory_usage(writer): - mem = ( - writer._time_buffer.nbytes - + writer._step_buffer.nbytes - + writer._dimensions_buffer.nbytes - + writer._pos_buffer.nbytes - + writer._force_buffer.nbytes - + writer._vel_buffer.nbytes - ) - for key in writer._obsv_buffer: - mem += writer._obsv_buffer[key].nbytes - return mem - - -def get_frame_size(universe): - has = [] - data_blacklist = ["step", "time", "dt"] - ts = universe.trajectory[0] - mem_per_frame = 0 - try: - has.append(ts.data["step"]) - except KeyError: - has.append(ts.frame) - has.append(ts.time) - if ts.dimensions is not None: - has.append(ts.triclinic_dimensions) - if ts.has_positions: - has.append(ts.positions) - if ts.has_velocities: - has.append(ts.velocities) - if ts.has_forces: - has.append(ts.forces) - for key in ts.data: - if key not in data_blacklist: - has.append(ts.data[key]) - for dataset in has: - mem_per_frame += dataset.size * dataset.itemsize - return mem_per_frame - - -def get_n_closest_water_molecules(prot_ag, wat_ag, n): - # returns a numpy array of the indices of the n closest water molecules - # to a protein across all frames of a trajectory - if n > wat_ag.n_atoms: - raise ValueError("n must be less than the number of water molecules") - - result = np.empty((prot_ag.universe.trajectory.n_frames, n), dtype=int) - - i = 0 - for ts in prot_ag.universe.trajectory: - dist = distances.distance_array( - prot_ag.positions, wat_ag.positions, box=prot_ag.dimensions - ) - - minvals = np.empty(wat_ag.n_atoms) - for j in range(wat_ag.n_atoms): - minvals[j] = np.min(dist[:, j]) - - result[i] = np.argsort(minvals)[:n] - i += 1 - - return result - - -def find_free_port(): - """Find a free port on the host machine.""" - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind(("", 0)) - return s.getsockname()[1] From 5e60aba6dd2e542bed42eb1f0506854451b13d76 Mon Sep 17 00:00:00 2001 From: Lawson Woods Date: Tue, 16 Jul 2024 04:55:08 +0000 Subject: [PATCH 7/7] benchmarks fixes --- benchmarks/reader_bms.py | 11 ++++++++--- zarrtraj/ZARR.py | 7 +++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/benchmarks/reader_bms.py b/benchmarks/reader_bms.py index 012ef02..e818694 100644 --- a/benchmarks/reader_bms.py +++ b/benchmarks/reader_bms.py @@ -22,11 +22,11 @@ Development: - asv run -q -v -e benchmarks-dev > bm.log & + asv run -q -v -e > bm.log & Full run: - asv run -v -e benchmarks-dev > bm.log & + asv run -v -e > bm.log & 4. To publish, use @@ -61,6 +61,11 @@ "s3://zarrtraj-test-data/yiip_aligned_uncompressed.zarrmd", ] +local_zarrmd_files = [ + f"{BENCHMARK_DATA_DIR}/yiip_aligned_compressed.zarrmd", + f"{BENCHMARK_DATA_DIR}/yiip_aligned_uncompressed.zarrmd", +] + def dask_rmsf(positions): mean_positions = positions.mean(axis=0) @@ -130,7 +135,7 @@ def teardown(self, filename, reader): class H5MDFmtDiskRMSFTime(object): - params = (local_files, ["dask", "mda"]) + params = (local_zarrmd_files, ["dask", "mda"]) param_names = ["filename", "method"] timeout = 2400.0 diff --git a/zarrtraj/ZARR.py b/zarrtraj/ZARR.py index 9a11675..5107154 100644 --- a/zarrtraj/ZARR.py +++ b/zarrtraj/ZARR.py @@ -986,7 +986,7 @@ class ZARRMDWriter(base.WriterBase): ValueError when ``n_atoms`` is 0 ValueError - when ``n_frames`` is not provided + when ``n_frames`` is not provided or negative ValueError when ``precision`` is less than 0 ValueError @@ -1108,10 +1108,9 @@ def __init__( raise ValueError("H5MDWriter: no atoms in output trajectory") self.n_atoms = n_atoms - if not n_frames: + if n_frames is None or n_frames < 0: raise ValueError( - "H5MDWriter: no frames in output trajectory. " - "Please provide a nonzero value for 'n_frames' kwarg" + "H5MDWriter: Please provide a non-negative value for 'n_frames' kwarg" ) self.n_frames = n_frames self.storage_options = storage_options