diff --git a/CHANGES.rst b/CHANGES.rst index de8c8f1..30e1e3b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,14 @@ Changes ======= +Version 1.0.1 (2022-12-06) +-------------------------- + +- Remove integration with lccs-db `#187 `_. +- Improve performance on items retrieval +- Fix migration 0.8 - 1.0 related bands metadata ``null`` entry `#189 `_. + + Version 1.0.0 (2022-09-22) -------------------------- diff --git a/MANIFEST.in b/MANIFEST.in index 497d1ad..0984841 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -38,3 +38,4 @@ recursive-include docs *.rst recursive-include docs Makefile recursive-include examples *.py recursive-include tests *.py +recursive-include examples *.json diff --git a/USAGE.rst b/USAGE.rst index 86d1f41..fde2168 100644 --- a/USAGE.rst +++ b/USAGE.rst @@ -41,7 +41,6 @@ all definitions for ``BDC-Catalog``:: bdc-db db init bdc-db db create-namespaces bdc-db db create-extension-postgis - lccs-db db create-extension-hstore bdc-db db create-schema @@ -62,9 +61,6 @@ Creating database definition SQLALCHEMY_DATABASE_URI="postgresql://postgres:postgres@localhost:5432/bdcdb" \ bdc-db db create-extension-postgis - SQLALCHEMY_DATABASE_URI="postgresql://postgres:postgres@localhost:5432/bdcdb" \ - lccs-db db create-extension-hstore - **2.** After that, run ``BDC-DB`` command to prepare the Brazil Data Cube data model:: diff --git a/bdc_catalog/alembic/5067fb4381c0_add_roles_system.py b/bdc_catalog/alembic/5067fb4381c0_add_roles_system.py index df598a4..5ddfb35 100644 --- a/bdc_catalog/alembic/5067fb4381c0_add_roles_system.py +++ b/bdc_catalog/alembic/5067fb4381c0_add_roles_system.py @@ -44,10 +44,6 @@ def upgrade(): schema='bdc' ) - op.drop_constraint('collections_classification_system_id_class_systems_fkey', 'collections', schema='bdc', type_='foreignkey') - op.create_foreign_key(op.f('collections_classification_system_id_classification_systems_fkey'), 'collections', 'classification_systems', ['classification_system_id'], ['id'], source_schema='bdc', referent_schema='lccs', onupdate='CASCADE', ondelete='CASCADE') - op.drop_column('collections', 'is_public', schema='bdc') - op.create_index(op.f('idx_bdc_quicklook_collection_id'), 'quicklook', ['collection_id'], unique=False, schema='bdc') # ### end Alembic commands ### @@ -56,9 +52,6 @@ def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_index(op.f('idx_bdc_quicklook_collection_id'), table_name='quicklook', schema='bdc') - op.drop_constraint(op.f('collections_classification_system_id_classification_systems_fkey'), 'collections', schema='bdc', type_='foreignkey') - op.create_foreign_key('collections_classification_system_id_class_systems_fkey', 'collections', 'classification_systems', ['classification_system_id'], ['id'], source_schema='bdc', referent_schema='lccs') - op.drop_table('collections_roles', schema='bdc') op.drop_index(op.f('idx_bdc_roles_name'), table_name='roles', schema='bdc') op.drop_table('roles', schema='bdc') diff --git a/bdc_catalog/alembic/c68b17b1860b_v0_8_0.py b/bdc_catalog/alembic/c68b17b1860b_v0_8_0.py index 7d043b0..877e4fc 100644 --- a/bdc_catalog/alembic/c68b17b1860b_v0_8_0.py +++ b/bdc_catalog/alembic/c68b17b1860b_v0_8_0.py @@ -12,21 +12,14 @@ revision = 'c68b17b1860b' down_revision = '566a05da999d' branch_labels = () -depends_on = 'e8b12ba52665' # LCCS-DB reference id +# Do not remove this migration. +# This part is important to keep migration tree + def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('collections', sa.Column('classification_system_id', sa.Integer(), nullable=True), schema='bdc') - op.create_foreign_key(op.f('collections_classification_system_id_class_systems_fkey'), 'collections', 'classification_systems', ['classification_system_id'], ['id'], source_schema='bdc', referent_schema='lccs', onupdate='CASCADE', ondelete='CASCADE') - op.create_index(op.f('idx_bdc_collections_classification_system_id'), 'collections', ['classification_system_id'], - unique=False, schema='bdc') - # ### end Alembic commands ### + pass def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_index(op.f('idx_bdc_collections_classification_system_id'), table_name='collections', schema='bdc') - op.drop_constraint(op.f('collections_classification_system_id_classification_systems_fkey'), 'collections', schema='bdc', type_='foreignkey') - op.drop_column('collections', 'classification_system_id', schema='bdc') - # ### end Alembic commands ### + pass diff --git a/bdc_catalog/alembic/d01f09b5dd8b_v1_0_0.py b/bdc_catalog/alembic/d01f09b5dd8b_v1_0_0.py index b5ff742..3a0a2ee 100644 --- a/bdc_catalog/alembic/d01f09b5dd8b_v1_0_0.py +++ b/bdc_catalog/alembic/d01f09b5dd8b_v1_0_0.py @@ -17,7 +17,6 @@ revision = 'd01f09b5dd8b' down_revision = 'c68b17b1860b' branch_labels = () -depends_on = '561ebe6266ad' # LCCS-DB stable 0.8.1 def upgrade(): @@ -56,7 +55,7 @@ def upgrade(): comment='The value to sum in scale mult')) sql = """UPDATE bdc.bands - SET metadata = coalesce(metadata::jsonb,'{}'::jsonb) || ('{\"eo\":{\"resolution_x\":'||resolution_x||',\"resolution_y\":'||resolution_y||',\"center_wavelength\":'||coalesce(center_wavelength, 0)||',\"full_width_half_max\": '||coalesce(full_width_half_max, 0)||'}}')::jsonb""" + SET metadata = CASE WHEN metadata::TEXT = 'null' THEN '{}'::jsonb ELSE coalesce(metadata::jsonb,'{}'::jsonb) || ('{\"eo\":{\"resolution_x\":'||resolution_x||',\"resolution_y\":'||resolution_y||',\"center_wavelength\":'||coalesce(center_wavelength, 0)||',\"full_width_half_max\": '||coalesce(full_width_half_max, 0)||'}}')::jsonb END""" bands_op.execute(sql) bands_op.drop_column('center_wavelength') bands_op.drop_column('full_width_half_max') diff --git a/bdc_catalog/alembic/f3112636be24_remove_lccs_db.py b/bdc_catalog/alembic/f3112636be24_remove_lccs_db.py new file mode 100644 index 0000000..75ac967 --- /dev/null +++ b/bdc_catalog/alembic/f3112636be24_remove_lccs_db.py @@ -0,0 +1,79 @@ +"""remove lccs-db + +Revision ID: f3112636be24 +Revises: 561ebe6266ad +Create Date: 2022-11-01 17:05:17.726004 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy.engine.reflection import Inspector + +# revision identifiers, used by Alembic. +revision = 'f3112636be24' +down_revision = '5067fb4381c0' +branch_labels = () +depends_on = None + + +def _has_column(table_name: str, column: str, schema: str = None) -> bool: + bind = op.get_bind() + inspector = Inspector.from_engine(bind=bind) + + found_column = False + for col in inspector.get_columns(table_name=table_name, schema=schema): + if column in col['name']: + found_column = True + break + + return found_column + + +def upgrade(): + with op.batch_alter_table('collections', schema='bdc') as batch_op: + if _has_column(table_name='collections', schema='bdc', column='classification_system_id'): + batch_op.drop_column('classification_system_id') + if not _has_column(table_name='collections', schema='bdc', column='is_public'): + batch_op.add_column(sa.Column('is_public', sa.Boolean(), nullable=False, server_default='true')) + batch_op.create_index(op.f('idx_bdc_collections_is_public'), ['is_public'], unique=False) + + op.create_index('idx_bdc_items_start_date_desc_id', 'items', [sa.text('start_date DESC'), 'id'], unique=False, + schema='bdc') + op.create_index('idx_bdc_items_start_date_desc_id_is_available', 'items', + [sa.text('start_date DESC'), 'id', 'is_available'], unique=False, + schema='bdc') + op.drop_table('collections_roles', schema='bdc') + op.drop_table('roles', schema='bdc') + # ### end Alembic commands ### + + +def downgrade(): + op.create_table( + 'roles', + sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), + sa.Column('name', sa.String(length=64), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('created', sa.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('updated', sa.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('roles_pkey')), + sa.UniqueConstraint('name', name=op.f('roles_name_key')), + schema='bdc' + ) + op.create_index(op.f('idx_bdc_roles_name'), 'roles', ['name'], unique=False, schema='bdc') + op.create_table( + 'collections_roles', + sa.Column('collection_id', sa.Integer(), nullable=False), + sa.Column('role_id', sa.Integer(), nullable=False), + sa.Column('created', sa.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('updated', sa.TIMESTAMP(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.ForeignKeyConstraint(['collection_id'], ['bdc.collections.id'], + name=op.f('collections_roles_collection_id_collections_fkey'), onupdate='CASCADE', + ondelete='CASCADE'), + sa.ForeignKeyConstraint(['role_id'], ['bdc.roles.id'], name=op.f('collections_roles_role_id_roles_fkey'), + onupdate='CASCADE', ondelete='CASCADE'), + sa.PrimaryKeyConstraint('collection_id', 'role_id', name=op.f('collections_roles_pkey')), + schema='bdc' + ) + op.drop_index('idx_bdc_items_start_date_desc_id', table_name='items', schema='bdc') + op.drop_index('idx_bdc_items_start_date_desc_id_is_available', table_name='items', schema='bdc') diff --git a/bdc_catalog/cli.py b/bdc_catalog/cli.py index 4882fb0..38e588a 100644 --- a/bdc_catalog/cli.py +++ b/bdc_catalog/cli.py @@ -72,14 +72,16 @@ def load_data(ifile: str, from_dir: str, verbose: bool): 'Make sure to install with "pip install shapely" or ' '"pip install -e .[geo]".') + if not ifile and not from_dir: + raise click.MissingParameter("Missing --ifile or --from-dir parameter.") + entries = [] + if ifile: entries.append(Path(ifile)) elif from_dir: for entry in Path(from_dir).glob('*.json'): entries.append(entry) - else: - raise click.MissingParameter("Missing --ifile or --from-dir parameter.") for entry in entries: with entry.open() as fd: diff --git a/bdc_catalog/ext.py b/bdc_catalog/ext.py index 74a468a..676e4ef 100644 --- a/bdc_catalog/ext.py +++ b/bdc_catalog/ext.py @@ -83,7 +83,7 @@ def __init__(self, app=None): if app: self.init_app(app) - def init_app(self, app: Flask): + def init_app(self, app: Flask, **kwargs): """Initialize Flask application instance. Args: diff --git a/bdc_catalog/models/__init__.py b/bdc_catalog/models/__init__.py index 9ed0d26..8db22a5 100644 --- a/bdc_catalog/models/__init__.py +++ b/bdc_catalog/models/__init__.py @@ -20,8 +20,7 @@ from .band import Band, BandSRC from .base_sql import db -from .collection import (Collection, CollectionRole, CollectionsProviders, - CollectionSRC) +from .collection import Collection, CollectionsProviders, CollectionSRC from .composite_function import CompositeFunction from .grid_ref_sys import GridRefSys from .item import Item, ItemsProcessors, SpatialRefSys @@ -30,7 +29,6 @@ from .provider import Provider from .quicklook import Quicklook from .resolution_unit import ResolutionUnit -from .role import Role from .tile import Tile from .timeline import Timeline @@ -39,7 +37,6 @@ 'Band', 'BandSRC', 'Collection', - 'CollectionRole', 'CollectionSRC', 'CollectionsProviders', 'CompositeFunction', @@ -51,7 +48,6 @@ 'Provider', 'Quicklook', 'ResolutionUnit', - 'Role', 'SpatialRefSys', 'Tile', 'Timeline', diff --git a/bdc_catalog/models/collection.py b/bdc_catalog/models/collection.py index 0b46d8e..5f678e0 100644 --- a/bdc_catalog/models/collection.py +++ b/bdc_catalog/models/collection.py @@ -22,7 +22,6 @@ from bdc_db.sqltypes import JSONB from geoalchemy2 import Geometry -from lccs_db.models import LucClassificationSystem from sqlalchemy import (ARRAY, TIMESTAMP, Boolean, Column, Enum, ForeignKey, Index, Integer, PrimaryKeyConstraint, String, Text, UniqueConstraint) @@ -33,7 +32,6 @@ from ..config import BDC_CATALOG_SCHEMA from .base_sql import BaseModel, db from .provider import Provider -from .role import Role name_collection_type = 'collection_type' options_collection_type = ('cube', 'collection', 'classification', 'mosaic') @@ -57,7 +55,6 @@ class Collection(BaseModel): ForeignKey(f'{BDC_CATALOG_SCHEMA}.composite_functions.id', onupdate='CASCADE', ondelete='CASCADE'), comment='Function schema identifier. Used for data cubes.') grid_ref_sys_id = Column(ForeignKey(f'{BDC_CATALOG_SCHEMA}.grid_ref_sys.id', onupdate='CASCADE', ondelete='CASCADE')) - classification_system_id = Column(ForeignKey(LucClassificationSystem.id, onupdate='CASCADE', ondelete='CASCADE')) collection_type = Column(enum_collection_type, nullable=False) metadata_ = Column('metadata', JSONB('bdc-catalog/collection-metadata.json'), comment='Follow the JSONSchema @jsonschemas/collection-metadata.json') @@ -69,6 +66,7 @@ class Collection(BaseModel): item_assets = Column('item_assets', JSONB('bdc-catalog/collection-item-assets.json'), comment='Contains the STAC Extension Item Assets.') is_available = Column(Boolean(), nullable=False, default=False, server_default='False') + is_public = Column(Boolean(), nullable=False, default=True, server_default='true') category = Column(enum_collection_category, nullable=False) start_date = Column(TIMESTAMP(timezone=True)) end_date = Column(TIMESTAMP(timezone=True)) @@ -82,17 +80,15 @@ class Collection(BaseModel): bands = relationship('Band', back_populates='collection') quicklook = relationship('Quicklook') timeline = relationship('Timeline') - # Joined Eager Loading. Default is Left Outer Join to lead object that does not refer to a related row. - classification_system = relationship('LucClassificationSystem', lazy='joined') __table_args__ = ( UniqueConstraint('name', 'version'), Index(None, grid_ref_sys_id), Index(None, name), Index(None, spatial_extent, postgresql_using='gist'), - Index(None, classification_system_id), Index(None, category), Index(None, is_available), + Index(None, is_public), Index(None, start_date, end_date), dict(schema=BDC_CATALOG_SCHEMA), ) @@ -143,6 +139,44 @@ def identifier(self): """ return func.concat(self.name, '-', self.version) + @classmethod + def get_collection_sources(cls, collection: Union['Collection', str, int]) -> List['Collection']: + """Trace data cube collection origin. + + It traces all the collection origin from the given collection using + :class:`bdc_catalog.models.CollectionSRC` + + Raises: + ValueError: When collection is related itself (cyclic relationship). + """ + out = [] + dupes = [] + ref = collection + if not isinstance(collection, Collection): + ref = Collection.get_by_id(collection) + + while ref is not None: + source: CollectionSRC = ( + CollectionSRC.query() + .filter(CollectionSRC.collection_id == ref.id) + .first() + ) + if source is None: + break + + ref: Collection = Collection.query().get(source.collection_src_id) + if ref.id in dupes: + raise ValueError(f'Collection {ref.identifier} has self reference') + + dupes.append(ref.id) + out.append(ref) + return out + + @property + def sources(self) -> List['Collection']: + """Retrieve the list of referred collections marked as origin.""" + return Collection.get_collection_sources(self) + class CollectionSRC(BaseModel): """Model for collection provenance/lineage.""" @@ -223,25 +257,3 @@ def to_dict(self) -> dict: """ return dict(name=self.provider.name, description=self.provider.description, url=self.provider.url, roles=self.roles) - - -class CollectionRole(BaseModel): - """Model to represent the link between Collection and Role.""" - - __tablename__ = 'collections_roles' - - collection_id = db.Column('collection_id', db.Integer(), - db.ForeignKey(Collection.id, onupdate='CASCADE', ondelete='CASCADE'), - nullable=False) - - role_id = db.Column('role_id', db.Integer(), - db.ForeignKey(Role.id, onupdate='CASCADE', ondelete='CASCADE'), - nullable=False) - - collection = relationship('Collection', lazy='joined', foreign_keys=[collection_id]) - role = relationship('Role', lazy='joined') - - __table_args__ = ( - PrimaryKeyConstraint(collection_id, role_id), - dict(schema=BDC_CATALOG_SCHEMA), - ) diff --git a/bdc_catalog/models/grid_ref_sys.py b/bdc_catalog/models/grid_ref_sys.py index ac3b98c..54bb615 100644 --- a/bdc_catalog/models/grid_ref_sys.py +++ b/bdc_catalog/models/grid_ref_sys.py @@ -70,11 +70,12 @@ def create_geometry_table(cls, table_name: str, features: Iterable[Feature], sri srid = 100001 opts = kwargs.copy() - if not opts.get('schema'): - opts['schema'] = BDC_CATALOG_SCHEMA + schema = opts.get('schema', BDC_CATALOG_SCHEMA) + table_name_ = table_name.lower() + opts['schema'] = schema grid_table = Table( - table_name.lower(), db.metadata, + table_name_, db.metadata, db.Column('id', db.Integer(), primary_key=True, autoincrement=True), db.Column('tile', db.String), db.Column('geom', geoalchemy2.Geometry(geometry_type='Polygon', srid=srid, spatial_index=False)), @@ -83,14 +84,14 @@ def create_geometry_table(cls, table_name: str, features: Iterable[Feature], sri ) inspector = inspect(db.engine) - if inspector.has_table(table_name, schema=BDC_CATALOG_SCHEMA): - raise RuntimeError(f'Table {table_name} already exists') + if inspector.has_table(table_name_, schema=schema): + raise RuntimeError(f'Table {table_name_} already exists') grid_table.create(bind=db.engine) db.session.execute(grid_table.insert().values(features)) - table_id = cls.get_table_id(table_name, schema=opts.get('schema')) + table_id = cls.get_table_id(table_name_, schema=opts.get('schema')) grs.table_id = table_id diff --git a/bdc_catalog/models/item.py b/bdc_catalog/models/item.py index 45d41b0..ff2d0ff 100644 --- a/bdc_catalog/models/item.py +++ b/bdc_catalog/models/item.py @@ -32,10 +32,10 @@ from sqlalchemy.sql import expression from ..config import BDC_CATALOG_SCHEMA +from ..utils import multihash_checksum_sha256 from .base_sql import BaseModel, db from .collection import Collection from .processor import Processor -from ..utils import multihash_checksum_sha256 try: import rasterio @@ -135,6 +135,8 @@ class Item(BaseModel): Index(None, tile_id), Index(None, start_date), Index('idx_bdc_items_start_date_desc', start_date.desc()), + Index(None, start_date.desc(), id), + Index('idx_bdc_items_start_date_desc_id_is_available', start_date.desc(), id, is_available), Index(None, metadata_), dict(schema=BDC_CATALOG_SCHEMA), ) @@ -302,6 +304,7 @@ def add_processor(self, processor: Processor) -> 'ItemsProcessors': Note: May raise error when processor is already attached. + Make sure to ``commit`` or ``rollback`` changes. Args: processor (Processor): Instance of Processor diff --git a/bdc_catalog/models/provider.py b/bdc_catalog/models/provider.py index 638d171..633d15c 100644 --- a/bdc_catalog/models/provider.py +++ b/bdc_catalog/models/provider.py @@ -19,6 +19,7 @@ """Model for table ``bdc.providers``.""" from sqlalchemy import Column, Index, Integer, String, Text +from sqlalchemy.orm import relationship from ..config import BDC_CATALOG_SCHEMA from .base_sql import BaseModel diff --git a/bdc_catalog/models/role.py b/bdc_catalog/models/role.py deleted file mode 100644 index 52f2cb4..0000000 --- a/bdc_catalog/models/role.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# This file is part of BDC-Catalog. -# Copyright (C) 2022 INPE. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -"""Model for table ``bdc.role``.""" - -from sqlalchemy import Column, Index, Integer, String, Text -from sqlalchemy.orm import relationship - -from ..config import BDC_CATALOG_SCHEMA -from .base_sql import BaseModel - - -class Role(BaseModel): - """Model for table ``bdc.role``. - - The role model consists in a basic way to specify a required roles to access collections/items. - These values are used by BDC-Catalog NGINX plugin in order to validate user access. - """ - - __tablename__ = 'roles' - - id = Column(Integer, primary_key=True, autoincrement=True) - name = Column(String(64), unique=True, nullable=False) - description = Column(Text) - - __table_args__ = ( - Index(None, name), - dict(schema=BDC_CATALOG_SCHEMA), - ) diff --git a/bdc_catalog/utils.py b/bdc_catalog/utils.py index 59cddf8..d6d7599 100644 --- a/bdc_catalog/utils.py +++ b/bdc_catalog/utils.py @@ -81,19 +81,22 @@ def multihash_checksum_sha256(file_path: Union[str, BytesIO]): def geom_to_wkb(geom: Any, srid: int = None) -> geoalchemy2.WKBElement: """Create a WKB geometry from a shapely.geometry.Geometry. - This helper uses the GeoAlchemy2 helper to ensure to create a extended WKB element (EWKB). + This helper uses the GeoAlchemy2 helper to ensure to create an extended WKB element (EWKB). It forces the SQLAlchemy field to load the Geometry into database with EWKB instead WKT to avoid any bit error precision. Note: - Make sure you have installed extra ``geo`` or the library ``Shapely`` before. + Make sure you have installed extra ``geo`` or the library ``Shapely`` before:: + + pip install Shapely Args: geom: A shapely Geometry srid: The Geometry SRID associated. """ from geoalchemy2.shape import from_shape - # Use extended=True to available the Geometry as EWKB + + # Use extended=True to transform the geometry as EWKB return from_shape(geom, srid=-1 if srid is None else srid, extended=True) @@ -104,7 +107,8 @@ def create_collection(name: str, version: Any, bands: list, Note: Used from :py:meth:`bdc_catalog.cli.load_data` """ - from bdc_catalog.models import Band, Collection, GridRefSys, MimeType, ResolutionUnit, db + from bdc_catalog.models import (Band, Collection, GridRefSys, MimeType, + ResolutionUnit, db) collection = ( Collection.query() diff --git a/bdc_catalog/version.py b/bdc_catalog/version.py index bc62534..fd7e024 100644 --- a/bdc_catalog/version.py +++ b/bdc_catalog/version.py @@ -19,4 +19,4 @@ """Version information for BDC-Catalog.""" -__version__ = '1.0.0' +__version__ = '1.0.1' diff --git a/docs/model/db-schema.png b/docs/model/db-schema.png index 0b874b5..d062691 100644 Binary files a/docs/model/db-schema.png and b/docs/model/db-schema.png differ diff --git a/docs/model/db-schema.xml b/docs/model/db-schema.xml index 72a6991..1141ac8 100644 --- a/docs/model/db-schema.xml +++ b/docs/model/db-schema.xml @@ -1,2 +1,2 @@  \ No newline at end of file  \ No newline at end of file diff --git a/docs/sphinx/tutorial.rst b/docs/sphinx/tutorial.rst index a6b14ae..223d099 100644 --- a/docs/sphinx/tutorial.rst +++ b/docs/sphinx/tutorial.rst @@ -193,21 +193,53 @@ In order to search for Collections, please, take a look in the next query. To re .. literalinclude:: ../../examples/query_collections.py :language: python - :lines: 24-40 + :lines: 24-25,27,30-41 You can increment the query and restrict to show only ``available`` collections: .. literalinclude:: ../../examples/query_collections.py :language: python - :lines: 24-34,42-48 + :lines: 24-25,27,34-35,43-49 A collection, essentially, has a few unique keys. Its defined by both ``id`` and ``Name-Version``. .. literalinclude:: ../../examples/query_collections.py :language: python - :lines: 24-34,50-59 + :lines: 24-25,27,34-35,51-60 + + +For spatial query constraints, consider to use special `SQLAlchemy func `_ to generate SQL expressions in runtime. In this case, we are going to use `ST_Intersects `_: + + +.. literalinclude:: ../../examples/query_collections.py + :language: python + :lines: 24-25,27,34-35,63-70 + + +.. note:: + + Remember that property :attr:`bdc_catalog.models.Collection.spatial_extent` are updated whenever an item is inserted in database using SQL Triggers. + + +Update collection ++++++++++++++++++ + +The process to update any collection is quite simple. With support of SQLAlchemy, there a few ways to update a collection, which are: + +.. literalinclude:: ../../examples/query_collections.py + :language: python + :lines: 24-25,27,34-35,51-52,71-82 + + +If you would like to update several collections once, you have to adapt the query and then update in cascade as following: + + +.. literalinclude:: ../../examples/query_collections.py + :language: python + :lines: 24-25,27,34-35,84-91 + .. item_: @@ -257,6 +289,15 @@ You can register this item as following: :lines: 26- +.. note:: + + We strongly recommend you to use :func:`bdc_catalog.utils.geom_to_wkb` to convert geometries into WKB instead WKT to avoid floating precision errors. + +.. note:: + + We also suggest you to keep the ``footprint`` as simplified geometries. It optimizes the queries while searching for spatial areas in PostgreSQL/PostGIS. + + Access Items ++++++++++++ @@ -288,6 +329,14 @@ Since the ``BDC-Catalog`` integrates with ``SQLAlchemy ORM``, you can join relat :emphasize-lines: 16,18 +If you would like to make a spatial query condition to filter a region of interest (ROI), use may use the following syntax: + +.. literalinclude:: ../../examples/query_items.py + :language: python + :lines: 25-37,66-75 + :emphasize-lines: 14,18 + + .. note:: Whenever the entry ``Item.query()`` is used, it retrieves `ALL` columns from :class:`bdc_catalog.models.Item`. diff --git a/examples/query_collections.py b/examples/query_collections.py index 3c65dc9..673bfec 100644 --- a/examples/query_collections.py +++ b/examples/query_collections.py @@ -22,10 +22,11 @@ """ from bdc_catalog import BDCCatalog -from bdc_catalog.models import Collection +from bdc_catalog.models import Collection, db +from bdc_catalog.utils import geom_to_wkb from flask import Flask from shapely.geometry import box -from sqlalchemy import func +from sqlalchemy import func, update app = Flask(__name__) app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql://postgres:postgres@localhost:5432/bdc' @@ -62,8 +63,29 @@ roi = box(-54, -12, -53, -11) collections = ( Collection.query() - .filter(func.ST_Intersects(Collection.spatial_extent, roi), + .filter(func.ST_Intersects(Collection.spatial_extent, geom_to_wkb(roi)), Collection.start_date >= '2022-01-01') .all() ) print(f"Collections Filter ({roi.wkt}): {','.join([c.identifier for c in collections])}") + + # Update collection + collection.title = 'Sentinel-2 - Level-1C' + collection.save() + + # Alternative ways to update + ( + db.session.query(Collection) + .filter(Collection.identifier == "S2_L1C-1") + .update({"title": 'Sentinel-2 - Level-1C'}, synchronize_session="fetch") + ) + db.session.commit() + + # Mark all EO collections as available. + statement = ( + update(Collection) + .where(Collection.category == 'eo') + .values(is_available=True) + ) + db.session.execute(statement) + db.session.commit() diff --git a/setup.py b/setup.py index 4048d9d..1a87840 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ 'Flask-Alembic>=2.0.0', 'GeoAlchemy2>=0.8.4', 'py-multihash>=2,<3', - 'lccs-db @ git+https://github.com/brazil-data-cube/lccs-db@v0.8.1', + 'bdc-db @ git+https://github.com/brazil-data-cube/bdc-db@v0.6.3', ] packages = find_packages() @@ -116,7 +116,7 @@ setup_requires=setup_requires, tests_require=tests_require, classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 3 - Stable', 'Environment :: Web Environment', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', diff --git a/tests/conftest.py b/tests/conftest.py index 0f4e79c..17c06ed 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,8 +18,11 @@ """Config test fixtures.""" +import json +import os import subprocess +import pkg_resources import pytest from flask import Flask @@ -33,12 +36,33 @@ def app(): yield _app +@pytest.fixture() +def fixture_dir(): + """Retrieve the base path for fixtures.""" + return pkg_resources.resource_filename(__name__, '../examples/fixtures/') + + +@pytest.fixture() +def json_data(fixture_dir): + """Load the fixture json data in test cases.""" + data = {} + for filename in os.listdir(fixture_dir): + entry = os.path.join(fixture_dir, filename) + + if not os.path.isfile(entry): + continue + + with open(entry) as fd: + data[filename] = json.load(fd) + + return data + + def pytest_sessionstart(session): """Load BDC-Catalog and prepare database environment.""" for command in ['init', 'create-namespaces', 'create-extension-postgis']: subprocess.call(f'bdc-catalog db {command}', shell=True) - subprocess.call(f'lccs-db db create-extension-hstore', shell=True) # Create tables subprocess.call(f'bdc-catalog db create-schema', shell=True) diff --git a/tests/test_cli.py b/tests/test_cli.py index 2a2fe42..a59b4ee 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -21,9 +21,11 @@ import subprocess import sys +import pytest +from click import MissingParameter from click.testing import CliRunner -from bdc_catalog.cli import cli +from bdc_catalog.cli import cli, load_data def test_basic_cli(): @@ -38,3 +40,14 @@ def test_cli_module(): res = subprocess.call(f'{sys.executable} -m bdc_catalog', shell=True) assert res == 0 + + +def test_cli_load_data(fixture_dir): + """Test the BDCCatalog invoked as a module.""" + # Test missing parameter + res = CliRunner().invoke(load_data, args=[]) + assert res.exit_code != 0 + assert "Missing --ifile or --from-dir parameter" in res.output + + res = CliRunner().invoke(load_data, args=["--from-dir", fixture_dir]) + assert res.exit_code == 0 diff --git a/tests/test_models.py b/tests/test_models.py index d4907cf..5c4e4dd 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -23,7 +23,7 @@ from shapely.geometry import Polygon from bdc_catalog import BDCCatalog -from bdc_catalog.models import GridRefSys, MimeType +from bdc_catalog.models import Collection, GridRefSys, MimeType, Provider @pytest.fixture @@ -57,7 +57,7 @@ def test_create_grid(db): with pytest.raises(RuntimeError) as e: _ = GridRefSys.create_geometry_table(**fields, srid=4326, schema='public', extend_existing=True) - assert str(e.value) == f'Table {fields["table_name"]} already exists' + assert str(e.value) == f'Table {fields["table_name"].lower()} already exists' def test_base_query_methods(db): @@ -75,3 +75,21 @@ def test_base_query_methods(db): db_mimes = MimeType.query().all() for mime in db_mimes: assert mime.name in expected_mime_types + + +def test_provider_creation(db): + with db.session.begin_nested(): + provider = Provider() + provider.name = 'ESA' + provider.url = 'https://www.esa.int/' + provider.save(commit=False) + db.session.commit() + + assert provider.id > 0 + + +def test_collection_methods(db): + collection = Collection.get_by_id('S2_L1C-1') + assert collection + + providers = collection.providers \ No newline at end of file