Skip to content

Commit

Permalink
Initial pass of ERDDAP datasets.xml parsing
Browse files Browse the repository at this point in the history
- Loads datasets.xml enough to extract datasets
- Differentiates between different types of datasets
- Stub models for all gridded dataset types
- Stub models for some tabular dataset types
- Examples for some gridded and tabular datasets

Closes #4
  • Loading branch information
abkfenris committed Sep 8, 2023
1 parent 693a702 commit ace074c
Show file tree
Hide file tree
Showing 47 changed files with 2,593 additions and 61 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ __pycache__/
build/
dist/
ioos_pkg_skeleton/_version.py
dataset_catalog/_version.py
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ repos:
rev: v0.0.282
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]

- repo: https://github.com/psf/black
rev: 23.7.0
Expand Down
146 changes: 105 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,44 +1,108 @@
## ioos_pkg_skeleton

[![Build Status](https://travis-ci.com/ioos/ioos-python-package-skeleton.svg?branch=master)](https://travis-ci.com/ioos/ioos-python-package-skeleton)

Quick description

### Documentation and code

URLs for the docs and code.

### Installation

For `conda` users you can

```shell
conda install --channel conda-forge ioos_pkg_skeleton
```

or, if you are a `pip` users

```shell
pip install ioos_pkg_skeleton
## dataset_catalog

Load and generate ERDDAP datasets.xml catalogs with
[pydantic-xml](https://pydantic-xml.readthedocs.io/en/latest/index.html),
and use those configs to load Xarray datasets and Pandas dataframes.

Additionally we should be able to use Xarray datasets and Pandas dataframes
to template out a config.

In the future it may support other catalog types like THREDDS...

```py
from dataset_catalog.erddap.dataset import Attribute, AxisVariable, DataVariable
from dataset_catalog.erddap.dataset.grid.from_nc_files import GridFromNcFiles

dataset = GridFromNcFiles(
dataset_id="WaveWatch3",
file_dir=Path("/datastore/models/WW3"),
file_name_regex="GulfOfMaine\\.nc",
add_attributes=[
Attribute(name="cdm_data_type", value="Grid"),
Attribute(name='Conventions', value='COARDS, CF-1.6'),
Attribute(name='infoUrl', value='http://www.neracoos.org'),
],
axis_variables=[
AxisVariable(
source_name='time',
destination_name='time',
add_attributes=[
Attribute(name='ioos_category', value='Time'),
Attribute(name='long_name', value='Time'),
Attribute(name='standard_name', value='time')
]
),
AxisVariable(
source_name='lat',
destination_name='latitude',
add_attributes=[
Attribute(name='ioos_category', value='Location'),
Attribute(name='long_name', value='Latitude'),
Attribute(name='standard_name', value='latitude')
]
),
AxisVariable(
source_name='lon',
destination_name='longitude',
add_attributes=[
Attribute(name='ioos_category', value='Location'),
Attribute(name='long_name', value='Longitude'),
Attribute(name='standard_name', value='longitude')
]
)
],
data_variables=[
DataVariable(source_name='hs', destination_name='hs', add_attributes=[
Attribute(name="ioos_category", value="Surface Waves")
])
]
)

print(dataset.to_xml(pretty_print=True))
```

### Example

```python
from ioos_pkg_skeleton import ioos_pkg_skeleton


ioos_pkg_skeleton.meaning_of_life_url()
```xml
<dataset type="EDDGridFromNcFiles" datasetID="WaveWatch3" active="true">
<fileDir>/datastore/models/WW3/</fileDir>
<fileNameRegex>GulfOfMaine\.nc</fileNameRegex>
<addAttributes>
<att name="cdm_data_type">Grid</att>
<att name="Conventions">COARDS, CF-1.6</att>
<att name="infoUrl">http://www.neracoos.org</att>
</addAttributes>
<axisVariable>
<sourceName>time</sourceName>
<destinationName>time</destinationName>
<addAttributes>
<att name="ioos_category">Time</att>
<att name="long_name">Time</att>
<att name="standard_name">time</att>
</addAttributes>
</axisVariable>
<axisVariable>
<sourceName>lat</sourceName>
<destinationName>latitude</destinationName>
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">Latitude</att>
<att name="standard_name">latitude</att>
</addAttributes>
</axisVariable>
<axisVariable>
<sourceName>lon</sourceName>
<destinationName>longitude</destinationName>
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">Longitude</att>
<att name="standard_name">longitude</att>
</addAttributes>
</axisVariable>
<dataVariable>
<sourceName>hs</sourceName>
<destinationName>hs</destinationName>
<addAttributes>
<att name="ioos_category">Surface Waves</att>
</addAttributes>
</dataVariable>
</dataset>
```


## Get in touch

Report bugs, suggest features or view the source code on [GitHub](https://github.com/ioos/ioos_pkg_skeleton/issues).


## License and copyright

ioos_pkg_skeleton is licensed under BSD 3-Clause "New" or "Revised" License (BSD-3-Clause).

Development occurs on GitHub at <https://github.com/ioos/ioos_pkg_skeleton>.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ioos_pkg_skeleton is not a real package, just a set of best practices examples.
"""

from ioos_pkg_skeleton.ioos_pkg_skeleton import (
from dataset_catalog.ioos_pkg_skeleton import (
meaning_of_life,
meaning_of_life_url,
)
Expand Down
4 changes: 4 additions & 0 deletions dataset_catalog/erddap/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .dataset import grid, table
from .erddap_datasets import ErddapDatasets

__all__ = ["ErddapDatasets", "grid", "table"]
4 changes: 4 additions & 0 deletions dataset_catalog/erddap/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .attributes import Attribute
from .variables import AxisVariable, DataVariable

__all__ = ["Attribute", "AxisVariable", "DataVariable"]
17 changes: 17 additions & 0 deletions dataset_catalog/erddap/dataset/attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
ERDDAP dataset additional attributes
"""

from typing import Optional, Union

from pydantic_xml import BaseXmlModel, attr


class Attribute(BaseXmlModel, tag="att", search_mode="unordered"):
"""
ERDDAP dataset additional attributes
"""

name: str = attr(name="name")
type: Optional[str] = attr(name="type", default=None)
value: Union[str, int, float]
14 changes: 14 additions & 0 deletions dataset_catalog/erddap/dataset/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Base ERDDAP dataset model
"""
from pydantic_xml import BaseXmlModel, attr


class BaseDataset(BaseXmlModel, tag="dataset", search_mode="unordered"):
"""
Base ERDDAP dataset model
"""

type: str = attr()
dataset_id: str = attr(name="datasetID")
active: bool = attr(default=True)
61 changes: 61 additions & 0 deletions dataset_catalog/erddap/dataset/grid/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
ERDDAP gridded datasets
"""

from typing import Union

from .base import BaseGrid
from .from_aggregate_existing_dimension import (
EDDGridAggregateExistingDimension,
GridAggregateExistingDimension,
)
from .from_audio_files import EDDGridFromAudioFiles, GridFromAudioFiles
from .from_copy import EDDGridCopy, GridCopy
from .from_dap import EDDGridFromDap, GridFromDap
from .from_edd_table import EDDGridFromEDDTable, GridFromEDDTable
from .from_erddap import EDDGridFromErddap, GridFromErddap
from .from_etopo import EDDGridFromEtopo, GridFromEtopo
from .from_files import EDDGridFromFiles, GridFromFiles
from .from_lon_pm_180 import EDDGridLonPM180, GridLonPM180
from .from_lon_pm_360 import EDDGridLon0360, GridLon0360
from .from_merge_ir_files import EDDGridFromMergeIRFiles, GridFromMergeIRFiles
from .from_nc_files import EDDGridFromNcFiles, GridFromNcFiles
from .from_nc_files_unpacked import (
EDDGridFromNcFilesUnpacked,
GridFromNcFilesUnpacked,
)
from .from_side_by_side import EDDGridSideBySide, GridSideBySide

grid_types: dict[str, BaseGrid] = {
EDDGridFromAudioFiles: GridFromAudioFiles,
EDDGridFromDap: GridFromDap,
EDDGridFromEDDTable: GridFromEDDTable,
EDDGridFromErddap: GridFromErddap,
EDDGridFromEtopo: GridFromEtopo,
EDDGridFromFiles: GridFromFiles,
EDDGridFromMergeIRFiles: GridFromMergeIRFiles,
EDDGridFromNcFiles: GridFromNcFiles,
EDDGridFromNcFilesUnpacked: GridFromNcFilesUnpacked,
EDDGridLonPM180: GridLonPM180,
EDDGridLon0360: GridLon0360,
EDDGridSideBySide: GridSideBySide,
EDDGridAggregateExistingDimension: GridAggregateExistingDimension,
EDDGridCopy: GridCopy,
}

GridDatasets = Union[
GridFromAudioFiles,
GridFromDap,
GridFromEDDTable,
GridFromErddap,
GridFromEtopo,
GridFromFiles,
GridFromMergeIRFiles,
GridFromNcFiles,
GridFromNcFilesUnpacked,
GridLonPM180,
GridLon0360,
GridSideBySide,
GridAggregateExistingDimension,
GridCopy,
]
25 changes: 25 additions & 0 deletions dataset_catalog/erddap/dataset/grid/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Gridded dataset base
"""
from typing import TYPE_CHECKING, Protocol, runtime_checkable

if TYPE_CHECKING:
import xarray as xr

from ..base import BaseDataset


class BaseGrid(BaseDataset):
"""
Base gridded datasets
"""

pass


@runtime_checkable
class GridToXarrayProtocol(Protocol):
"""Gridded datasets that can be converted into Xarray Datasets"""

def to_dataset(self) -> "xr.Dataset":
"""Return a dataset from a"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Aggregate child datasets along a dimension
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridAggregateExistingDimension
"""
from typing import Literal

from pydantic_xml import attr

from .base import BaseGrid

EDDGridAggregateExistingDimension = Literal["EDDGridAggregateExistingDimension"]


class GridAggregateExistingDimension(BaseGrid):
"""
Aggregate child datasets along a dimension
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridAggregateExistingDimension
"""

type: EDDGridAggregateExistingDimension = attr()
22 changes: 22 additions & 0 deletions dataset_catalog/erddap/dataset/grid/from_audio_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Aggregate a collection of audio files to treat as a gridded dataset
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridFromAudioFiles
"""
from typing import Literal

from pydantic_xml import attr

from .from_files import GridFromFiles

EDDGridFromAudioFiles = Literal["EDDGridFromAudioFiles"]


class GridFromAudioFiles(GridFromFiles):
"""
Aggregate a collection of audio files to treat as a gridded dataset
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridFromAudioFiles
"""

type: EDDGridFromAudioFiles = attr()
23 changes: 23 additions & 0 deletions dataset_catalog/erddap/dataset/grid/from_copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Make a copy of a remote ERDDAP grid dataset
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridCopy
"""

from typing import Literal

from pydantic_xml import attr

from .base import BaseGrid

EDDGridCopy = Literal["EDDGridCopy"]


class GridCopy(BaseGrid):
"""
Make a copy of a remote ERDDAP grid dataset
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridCopy
"""

type: EDDGridCopy = attr()
22 changes: 22 additions & 0 deletions dataset_catalog/erddap/dataset/grid/from_dap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Load gridded dataset from remote DAP compatible servers
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridFromDap
"""
from typing import Literal

from pydantic_xml import attr

from .base import BaseGrid

EDDGridFromDap = Literal["EDDGridFromDap"]


class GridFromDap(BaseGrid):
"""
Load gridded dataset from remote DAP compatible servers
https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDGridFromDap
"""

type: EDDGridFromDap = attr()
Loading

0 comments on commit ace074c

Please sign in to comment.