Skip to content

Commit

Permalink
Cloud integration
Browse files Browse the repository at this point in the history
  • Loading branch information
ljwoods2 committed Jan 29, 2024
1 parent 61ff580 commit 153e778
Show file tree
Hide file tree
Showing 7 changed files with 221 additions and 399 deletions.
2 changes: 2 additions & 0 deletions devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ dependencies:

# MDAKit requires
- zarr
- fsspsec # Only needed for web services connections
- s3fs # Onle needed for AWS integration

# Pip-only installs
- pip:
Expand Down
548 changes: 167 additions & 381 deletions notebooks/format.ipynb

Large diffs are not rendered by default.

35 changes: 25 additions & 10 deletions notebooks/initial_benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -17,15 +17,15 @@
" pos = np.arange(3 * n_atoms)\n",
" np.random.shuffle(pos)\n",
" pos = pos.reshape(n_atoms, 3)\n",
" orig_box = np.array([81.1, 82.2, 83.3, 75, 80, 85], dtype=np.float32)\n",
" orig_box = np.array([81.1, 82.2, 83.3], dtype=np.float32)\n",
"\n",
" positions = np.empty((frames, n_atoms, 3), dtype=np.float32)\n",
" velocities = np.empty((frames, n_atoms, 3), dtype=np.float32)\n",
" forces = np.empty((frames, n_atoms, 3), dtype=np.float32)\n",
" time = np.empty((frames), dtype=np.float32)\n",
" frame = np.empty((frames), dtype=np.int32)\n",
"\n",
" dimensions = np.empty((frames, 6))\n",
" dimensions = np.empty((frames, 3))\n",
"\n",
" for i in range(frames):\n",
" positions[i] = 2** i * pos\n",
Expand All @@ -37,12 +37,12 @@
" dimensions[i][:3] = orig_box[:3] + i\n",
" dimensions[i][3:] = orig_box[3:] + i * 0.1\n",
"\n",
" return [frames, dimensions, positions, velocities, forces, time]\n"
" return [frame, dimensions, positions, velocities, forces, time]\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -55,7 +55,7 @@
"\n",
"def create_zarr_traj(n_atoms, frames, compressor):\n",
" # create zarr group layout\n",
" root = zarr.open(f'zarrfiles/zarr_{n_atoms}_{frames}.zarr', mode='a')\n",
" root = zarr.open(f'zarrfiles/zarr_{n_atoms}_{frames}.zarrtraj', mode='a')\n",
" particles = root.create_group('particles')\n",
" group1 = particles.create_group('group1')\n",
" box = group1.create_group('box')\n",
Expand All @@ -82,7 +82,7 @@
" force.create_dataset('time', data=traj[-1], dtype=np.float32)\n",
"\n",
" # Return filename to make it easy to open file\n",
" return f'zarrfiles/zarr_{n_atoms}_{frames}.zarr'\n",
" return f'zarrfiles/zarr_{n_atoms}_{frames}.zarrtraj'\n",
"\n",
"\n",
"def create_hdf5_traj(n_atoms, frames, compression, compression_opts):\n",
Expand Down Expand Up @@ -442,10 +442,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"'zarrfiles/zarr_3341_100.zarrtraj'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"import numcodecs\n",
"create_zarr_traj(3341, 100, compressor=numcodecs.Blosc(cname='zstd', clevel=9))"
]
}
],
"metadata": {
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ test = [
"pytest-xdist>=2.5",
"pytest-cov>=3.0",
"MDAnalysisTests>=2.0.0",
"fsspec",
"s3fs",
]
doc = [
"sphinx",
Expand All @@ -45,7 +47,8 @@ doc = [
# documentation = "https://zarrtraj.readthedocs.io"

[tool.setuptools]
py-modules = []
packages = ["zarrtraj"]
py-modules = ["__init__"]

[tool.pytest.ini_options]
minversion = "6.0"
Expand Down
13 changes: 10 additions & 3 deletions zarrtraj/ZARRTRAJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class ZarrTrajReader(base.ReaderBase):

@store_init_arguments
def __init__(self, filename,
storage_options=None,
**kwargs):

if not HAS_ZARR:
Expand All @@ -38,6 +39,11 @@ def __init__(self, filename,
# NOTE: Not yet implemented
# self.convert_units = convert_units

# NOTE: Add error checking
# These options will be passed to zarr.open_group()
# To allow fss
self.storage_options = storage_options

self.open_trajectory()

# _has dictionary used for checking whether zarrtraj file has
Expand Down Expand Up @@ -88,20 +94,21 @@ def _format_hint(thing):
except Exception:
# If an error occurs, it's likely not a Zarr file
return False

def open_trajectory(self):
"""opens the trajectory file using zarr library"""
self._frame = -1
if isinstance(self.filename, zarr.Group):
self._file = self.filename
else:
self._file = zarr.open_group(self.filename,
mode='r')
mode='r',
storage_options=self.storage_options)
# pulls first key out of 'particles'
# allows for arbitrary name of group1 in 'particles'
self._particle_group = self._file['particles'][
list(self._file['particles'])[0]]

@staticmethod
def parse_n_atoms(filename):
# NOTE: This may fail if the filename passed is already a zarr group
Expand Down
2 changes: 1 addition & 1 deletion zarrtraj/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@

# Add imports here
from importlib.metadata import version
from .ZARRTRAJ import *
from .ZARRTRAJ import ZarrTrajReader

__version__ = version("zarrtraj")
15 changes: 12 additions & 3 deletions zarrtraj/playground.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@

from ZARRTRAJ import *
import MDAnalysis as mda
from MDAnalysisTests.datafiles import PSF
import zarr
import numpy as np
import os
import h5py

print(os.getcwd())
import fsspec
import s3fs
import zarr





z = zarr.open_group('zarrtraj/10e3_zarr_c.zarrtraj', 'r')
print(z)
u = mda.Universe(PSF, 's3://test-zarrtraj-bucket/zarr_3341_100.zarrtraj', storage_options={'key':'AKIAUODTGZQXMD5QNMP5', 'secret':'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'})
for ts in u.trajectory:
print(ts[0])

0 comments on commit 153e778

Please sign in to comment.