Skip to content

Commit

Permalink
add jupyter notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
jcschaff committed Nov 5, 2024
1 parent 23f197b commit 0f0f1b4
Show file tree
Hide file tree
Showing 7 changed files with 2,846 additions and 27 deletions.
5 changes: 1 addition & 4 deletions biosim_client/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
from typing import Optional, Any

import numpy as np
Expand Down Expand Up @@ -38,16 +37,14 @@ def to_numpy(self) -> np.ndarray:
def to_pandas(self) -> pd.DataFrame:
dataframe = pd.DataFrame(self.to_numpy().transpose())
if self.column_names is not None:
dataframe.columns = self.column_names
dataframe.columns = self.column_names # type: ignore
return dataframe

@classmethod
def from_api(cls, data: DatasetData, hdf5_dataset: HDF5Dataset) -> 'Dataset':
values = data.to_dict()['values']
shape = hdf5_dataset.shape

# print(f"in from_api(), len(values)={len(values)}, shape={shape}, hdf5_dataset.attributes={ {attr.to_dict()['key']: attr.to_dict()['value'] for attr in hdf5_dataset.attributes} }")

attributes: dict[str, Any] = {}
column_names: Optional[list[str]] = None
for attr in hdf5_dataset.attributes:
Expand Down
1 change: 0 additions & 1 deletion biosim_client/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,3 @@
print()
print(dataframe)
print()

17 changes: 5 additions & 12 deletions biosim_client/sim_data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Optional, get_args

import biosim_client.api_clients.simdata.openapi_client as simdata_client
from biosim_client.api_clients.simdata.openapi_client import HDF5File, DatasetData, HDF5Dataset, HDF5Group, \
Configuration, HDF5Attribute
from biosim_client.api_clients.simdata.openapi_client import HDF5File, DatasetData, HDF5Dataset, Configuration, \
HDF5Attribute
from biosim_client.dataset import Dataset, AttributeValueTypes

attribute_value_types = get_args(AttributeValueTypes)
Expand All @@ -24,16 +24,15 @@ def dataset_names(self) -> list[str]:
return [dataset.name for group in self.hdf5_file.groups for dataset in group.datasets]

def dataset_uris(self) -> list[str]:
return [attr.to_dict()['value'] for group in self.hdf5_file.groups for dataset in group.datasets for attr in dataset.attributes if attr.to_dict()['key'] == "uri"]
return [attr.to_dict()['value'] for group in self.hdf5_file.groups for dataset in group.datasets for attr in
dataset.attributes if attr.to_dict()['key'] == "uri"]

def get_dataset(self, name: str) -> Dataset:
if name in self.datasets:
# print("cache hit, returning cached dataset")
return self.datasets[name]

dataset_uri = None
column_names = None
hdf5_group: Optional[HDF5Group] = None
hdf5_dataset: Optional[HDF5Dataset] = None
hdf5_attribute: HDF5Attribute
for hdf5_group in self.hdf5_file.groups:
Expand All @@ -42,25 +41,19 @@ def get_dataset(self, name: str) -> Dataset:
for hdf5_attribute in hdf5_dataset.attributes:
if hdf5_attribute.to_dict()['key'] == "uri":
dataset_uri = hdf5_attribute.to_dict()["value"]
# print(f"Found dataset '{name}' with URI {dataset_uri}")
if hdf5_attribute.to_dict()['key'] == "sedmlDataSetLabels":
column_names = hdf5_attribute.to_dict()["value"]
# print(f"Found dataset '{name}' with column names {column_names}")
break

if dataset_uri is None:
raise ValueError(f"Dataset '{name}' not found")
if hdf5_dataset is None:
raise ValueError(f"Dataset '{name}' not found")

# print(f"Reading dataset '{name}' with URI '''{dataset_uri}''' and column names '''{column_names}'''")
with (simdata_client.api_client.ApiClient(self.configuration) as api_client):
api_instance = simdata_client.DefaultApi(api_client)
dataset_data: DatasetData = api_instance.read_dataset(run_id=self.run_id, dataset_name=dataset_uri)
# print(f"Read dataset '{name}' with URI '''{dataset_uri}''' and column names {column_names} and data shape {dataset_data.shape} and shape from metadata {hdf5_dataset.shape}")
dataset = Dataset.from_api(data=dataset_data, hdf5_dataset=hdf5_dataset)
self.datasets[name] = dataset
return dataset

def __str__(self) -> str:
return f"SimResults(run_id='{self.run_id}', dataset_names={self.dataset_names()})"
return f"SimResults(run_id='{self.run_id}', dataset_names={self.dataset_names()})"
6 changes: 3 additions & 3 deletions biosim_client/simdataclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def get_health(self) -> str:
api_response: StatusResponse = api_instance.get_health()
return api_response.to_str()

def get_simdata(self, id: str) -> SimData:
def get_simdata(self, run_id: str) -> SimData:
with simdata_client.api_client.ApiClient(self.configuration) as api_client:
api_instance = simdata_client.DefaultApi(api_client)
hdf5_file: HDF5File = api_instance.get_metadata(id)
return SimData(configuration=self.configuration, run_id=id, hdf5_file=hdf5_file)
hdf5_file: HDF5File = api_instance.get_metadata(run_id)
return SimData(configuration=self.configuration, run_id=run_id, hdf5_file=hdf5_file)
613 changes: 613 additions & 0 deletions example.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 0f0f1b4

Please sign in to comment.