Skip to content

Commit

Permalink
Merge branch 'develop' into gtc-3069/asset_index
Browse files Browse the repository at this point in the history
  • Loading branch information
jterry64 authored Jan 6, 2025
2 parents 9f7d394 + 8fa3487 commit bf2b0e8
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 21 deletions.
69 changes: 61 additions & 8 deletions app/models/pydantic/creation_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,24 +122,69 @@ class RasterTileSetAssetCreationOptions(StrictBaseModel):
"when input files are in different projections from each other."
)
)
pixel_meaning: str
pixel_meaning: str = Field(
..., description="Description of what the pixel value in the "
"raster represents. This is used to clarify the meaning of the raster "
"and distinguish multiple raster tile sets based on the same dataset "
"version. The pixel_meaning string should be fairly short, use all "
"lower-case letters, and use underscores instead of spaces."
)
data_type: DataType
nbits: Optional[int]
calc: Optional[str]
nbits: Optional[int] = Field(
None,
description="Advanced option that lets GDAL compress the data even "
"more based on the number of bits you need."
)
calc: Optional[str] = Field(
None,
description="There are two modes for this field, one for rasterizing vector "
"sources and one for transforming and/or combining one or more "
"sources that are already raster. For rasterizing vector sources, "
"this field should be an SQL expression that yields the desired "
"raster value based on the fields of your vector dataset.\n\nFor raster "
"sources, this should be a raster algebra expression, similar to that "
"provided to gdal_calc (see "
"https://gdal.org/en/stable/programs/gdal_calc.html), "
"that transforms one or more input bands into one or more output "
"bands. For use in this expression, each band in "
"the sources is assigned an alphabetic variable (A-Z, then AA-AZ, "
"etc.) in the order it exists in those sources, with those of the "
"first source first, continuing with those of the second, and so on. "
"So with two input sources of two bands each, they would be assigned "
"to variables A and B (for the first source) and C and D (for the "
"second source). The NumPy module is in scope, accessible as np"
)
band_count: int = 1
union_bands: bool = False
no_data: Optional[Union[List[NoDataType], NoDataType]]
rasterize_method: Optional[RasterizeMethod]
rasterize_method: Optional[RasterizeMethod] = Field(
RasterizeMethod.value,
description="For raster sources or default assets, 'value' (the "
"default) means use the value from the last or only band processed, "
"and 'count' means count the number of bands with data values."
)
resampling: ResamplingMethod = PIXETL_DEFAULT_RESAMPLING
order: Optional[Order]
order: Optional[Order] = Field(
None,
description="For vector default assets, order the features by the "
"calculated raster value. For 'asc', the features are ordered by "
"ascending calculated value so that the largest calculated value is "
"used in the raster when there are overlapping features. For 'desc', "
"the ordering is descending, so that the smallest calculated value "
"is used when there are overlaps."
)
overwrite: bool = False
subset: Optional[str]
grid: Grid
symbology: Optional[Symbology] = None
compute_stats: bool = True
compute_histogram: bool = False
process_locally: bool = True
auxiliary_assets: Optional[List[UUID]] = None
auxiliary_assets: Optional[List[UUID]] = Field(
None,
description="Asset IDs of additional rasters you might want to include "
"in your calc expression."
)
photometric: Optional[PhotometricType] = None
num_processes: Optional[StrictInt] = None
timeout_sec: Optional[StrictInt] = Field(
Expand Down Expand Up @@ -209,7 +254,15 @@ class VectorSourceCreationOptions(StrictBaseModel):
Index(index_type=IndexType.gist.value, column_names=["geom_wm"]),
Index(index_type=IndexType.hash.value, column_names=["gfw_geostore_id"]),
],
description="List of indices to add to table",
description="List of indices to add to the database table representing "
"the vector dataset. Each element of the indices field contains an "
"index_type field (which is a string) and a column_names field (which "
"is a list of field names included in this index). The possibilities "
"for the index_type field are hash, btree, or gist. hash is efficient "
"for standard exact-value lookups, while btree is efficient for range "
"lookups. gist is used for geometry fields and can do "
"intersection-type lookups. See "
"https://www.postgresql.org/docs/current/indexes-types.html"
)
cluster: Optional[Index] = Field(None, description="Index to use for clustering.")
table_schema: Optional[List[FieldType]] = Field(
Expand Down Expand Up @@ -331,7 +384,7 @@ class RasterTileCacheCreationOptions(TileCacheBaseModel):
"default",
description="Name space to use for raster tile cache. "
"This will be part of the URI and will "
"allow to create multiple raster tile caches per version,",
"allow creation of multiple raster tile caches per version,",
)
symbology: Symbology = Field(..., description="Symbology to use for output tiles")
source_asset_id: str = Field(
Expand Down
8 changes: 6 additions & 2 deletions app/models/pydantic/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ class Version(BaseRecord):
metadata: Union[VersionMetadataOut, BaseModel]
status: VersionStatus = VersionStatus.pending

# Each element of assets is a tuple (asset_type, assert_uri, asset_id)
assets: List[Tuple[str, str, str]] = list()
assets: List[Tuple[str, str, str]] = Field(
list(),
description="List of saved (non-pending and non-failed) assets, with "
" elements in the form: [asset_type, asset_uri, asset_id]. The list "
"of assets is sorted by the creation time of each asset."
)


class VersionCreateIn(StrictBaseModel):
Expand Down
11 changes: 7 additions & 4 deletions app/routes/assets/asset.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Assets are replicas of the original source files.
"""Assets are usually alternate representations of the base dataset
version, sometimes combining in extra data from other datasets.
Assets might be served in different formats, attribute values might be
altered, additional attributes added, and feature resolution might have
changed. Assets are either managed or unmanaged. Managed assets are
created by the API and users can rely on data integrity. Unmanaged
assets are only loosely linked to a dataset version and users must
cannot rely on full integrity. We can only assume that unmanaged are
based on the same version and do not know the processing history.
cannot rely on full integrity. We can only assume that unmanaged assets
are based on the same version and do not know the processing history.
"""

from typing import List, Optional, Union
Expand Down Expand Up @@ -87,7 +88,9 @@ async def get_asset(
*,
asset_id: UUID = Path(...),
) -> AssetResponse:
"""Get a specific asset."""
"""Get a specific asset. This provides information on the asset, including
the asset id, the asset status, the asset URI, and creation & last update
times."""
try:
row: ORMAsset = await assets.get_asset(asset_id)
except RecordNotFoundError as e:
Expand Down
4 changes: 2 additions & 2 deletions app/routes/datasets/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ async def get_version_assets(
description="The number of assets per page. Default is `10`.",
),
) -> Union[PaginatedAssetsResponse, AssetsResponse]:
"""Get all assets for a given dataset version. The list of assets
is sorted by the creation time of each asset.
"""Get all assets for a given dataset version (including pending/failed assets).
The list of assets is sorted by the creation time of each asset.
Will attempt to paginate if `page[size]` or `page[number]` is
provided. Otherwise, it will attempt to return the entire list of
Expand Down
6 changes: 5 additions & 1 deletion app/routes/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ async def update_dataset(
request: DatasetUpdateIn,
user: User = Depends(get_owner),
) -> DatasetResponse:
"""Update metadata, accessibility or ownership of a dataset."""
"""Update metadata, accessibility or ownership of a dataset.
Individual fields of the metadata can be modified, without affecting other
existing fields.
"""
input_data: Dict = request.dict(exclude_none=True, by_alias=True)

if request.owner_id is not None:
Expand Down
15 changes: 12 additions & 3 deletions app/routes/datasets/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
async def get_version(
*, dv: Tuple[str, str] = Depends(dataset_version_dependency)
) -> VersionResponse:
"""Get basic metadata for a given version. The list of assets is sorted by
"""Get basic metadata for a given version. The list of assets only includes
saved (non-pending and non-failed) assets and is sorted by
the creation time of each asset."""

dataset, version = dv
Expand All @@ -106,8 +107,8 @@ async def add_new_version(
user: User = Depends(get_owner),
response: Response,
):
"""Create a version for a given dataset by uploading the geospatial/tabular
asset.
"""Create a version for a given dataset by uploading the tabular, vector,
or raster asset.
Only the dataset's owner or a user with `ADMIN` user role can do
this operation.
Expand Down Expand Up @@ -373,6 +374,14 @@ async def get_stats(dv: Tuple[str, str] = Depends(dataset_version_dependency)):
response_model=Union[FieldsMetadataResponse, RasterBandsMetadataResponse],
)
async def get_fields(dv: Tuple[str, str] = Depends(dataset_version_dependency)):
"""Get the fields of a version. For a version with a vector default asset,
these are the fields (attributes) of the features of the base vector dataset.
For a version with a raster default asset, the fields are all the raster
tile sets that use the same grid as the raster default asset. Also
included are some fields with special meaning such as 'area__ha',
'latitude', and 'longitude'.
"""
dataset, version = dv
orm_asset: ORMAsset = await assets.get_default_asset(dataset, version)

Expand Down
2 changes: 1 addition & 1 deletion batch/pixetl.dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM globalforestwatch/pixetl:v1.7.7_test_parallel
FROM globalforestwatch/pixetl:v1.7.7

# Copy scripts
COPY ./batch/scripts/ /opt/scripts/
Expand Down

0 comments on commit bf2b0e8

Please sign in to comment.