Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import Monitoring #3279

Draft
wants to merge 5 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/cypress.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ on:
- master
- hotfixes
- develop
- feat/import
- feat/import-monitorings
pull_request:
branches:
- master
- hotfixes
- develop
- feat/import
- feat/import-monitorings

jobs:
mount_app_and_run_cypress:
Expand Down Expand Up @@ -111,7 +111,7 @@ jobs:
cp ./config/settings.ini.sample ./config/settings.ini
./install/05_install_frontend.sh --ci
env:
GEONATURE_CONFIG_FILE: '${{ github.workspace }}/config/test_config.toml'
GEONATURE_CONFIG_FILE: "${{ github.workspace }}/config/test_config.toml"
- name: Install core modules
run: |
geonature install-gn-module contrib/occtax OCCTAX --build=false
Expand Down
22 changes: 12 additions & 10 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,30 @@ on:
- master
- hotfixes
- develop
- feat/import-monitorings
pull_request:
branches:
- master
- hotfixes
- develop
- feat/import-monitorings

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
debian-version: ['11', '12']
debian-version: ["11", "12"]
include:
- debian-version: '11'
python-version: '3.9'
postgres-version: '13'
postgis-version: '3.2'
- debian-version: '12'
python-version: '3.11'
postgres-version: '15'
postgis-version: '3.3'
- debian-version: "11"
python-version: "3.9"
postgres-version: "13"
postgis-version: "3.2"
- debian-version: "12"
python-version: "3.11"
postgres-version: "15"
postgis-version: "3.3"

name: Debian ${{ matrix.debian-version }}

Expand Down Expand Up @@ -68,7 +70,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- name: Install GDAL
run: |
sudo apt update
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.15.0
2.16.0alpha
5 changes: 5 additions & 0 deletions backend/geonature/core/gn_monitoring/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class TBaseVisits(DB.Model):
foreign_keys=[id_dataset],
)

id_import = DB.Column(DB.Integer, nullable=True)


@serializable
@geoserializable(geoCol="geom", idCol="id_base_site")
Expand Down Expand Up @@ -212,6 +214,8 @@ class TBaseSites(DB.Model):
foreign_keys=[cor_site_module.c.id_base_site, cor_site_module.c.id_module],
)

id_import = DB.Column(DB.Integer, nullable=True)


@serializable
class TObservations(DB.Model):
Expand All @@ -226,3 +230,4 @@ class TObservations(DB.Model):
cd_nom = DB.Column(DB.Integer)
comments = DB.Column(DB.String)
uuid_observation = DB.Column(UUID(as_uuid=True), default=select(func.uuid_generate_v4()))
id_import = DB.Column(DB.Integer, nullable=True)
20 changes: 16 additions & 4 deletions backend/geonature/core/gn_synthese/imports/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def check_transient_data(task, logger, imprt: TImports):
selected_fields = {
field_name: fields[field_name]
for field_name, source_field in imprt.fieldmapping.items()
if source_field in imprt.columns
if source_field.get("column_src", None) in imprt.columns
}
init_rows_validity(imprt)
task.update_state(state="PROGRESS", meta={"progress": 0.05})
Expand Down Expand Up @@ -218,7 +218,15 @@ def update_batch_progress(batch, step):
do_nomenclatures_mapping(
imprt,
entity,
selected_fields,
{
field_name: fields[field_name]
for field_name, mapping in imprt.fieldmapping.items()
if field_name in fields
and (
mapping.get("column_src", None) in imprt.columns
or mapping.get("default_value") is not None
)
},
fill_with_defaults=current_app.config["IMPORT"][
"FILL_MISSING_NOMENCLATURE_WITH_DEFAULT_VALUE"
],
Expand Down Expand Up @@ -339,11 +347,15 @@ def import_data_to_destination(imprt: TImports) -> None:
if field_name not in fields: # not a destination field
continue
field = fields[field_name]
column_src = source_field.get("column_src", None)
if field.multi:
if not set(source_field).isdisjoint(imprt.columns):
if not set(column_src).isdisjoint(imprt.columns):
insert_fields |= {field}
else:
if source_field in imprt.columns:
if (
column_src in imprt.columns
or source_field.get("default_value", None) is not None
):
insert_fields |= {field}

insert_fields -= {fields["unique_dataset_id"]} # Column only used for filling `id_dataset`
Expand Down
6 changes: 3 additions & 3 deletions backend/geonature/core/imports/checks/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from geonature.utils.env import db

from geonature.core.imports.models import ImportUserError, ImportUserErrorType
from geonature.core.imports.models import ImportUserError, ImportUserErrorType, TImports
from geonature.core.imports.utils import generated_fields


Expand Down Expand Up @@ -101,7 +101,7 @@ def __error_replace(*args, **kwargs):
return _error_replace


def report_error(imprt, entity, df, error):
def report_error(imprt: TImports, entity, df, error):
"""
Reports an error found in the dataframe, updates the validity column and insert
the error in the `t_user_errors` table.
Expand Down Expand Up @@ -147,7 +147,7 @@ def report_error(imprt, entity, df, error):
# f'{error_type.name}' # FIXME comment
ordered_invalid_rows = sorted(invalid_rows["line_no"])
column = generated_fields.get(error["column"], error["column"])
column = imprt.fieldmapping.get(column, column)
column = imprt.fieldmapping.get(column, {}).get("column_src", column)
# If an error for same import, same column and of the same type already exists,
# we concat existing erroneous rows with current rows.
stmt = pg_insert(ImportUserError).values(
Expand Down
16 changes: 9 additions & 7 deletions backend/geonature/core/imports/checks/sql/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ def init_rows_validity(imprt: TImports, dataset_name_field: str = "id_dataset"):
# as rows with multi-entity field only will raise an ORPHAN_ROW error
selected_fields_names = []
for field_name, source_field in imprt.fieldmapping.items():
if type(source_field) == list:
selected_fields_names.extend(set(source_field) & set(imprt.columns))
elif source_field in imprt.columns:
column_src = source_field.get("column_src", None)
if type(column_src) == list:
selected_fields_names.extend(set(column_src) & set(imprt.columns))
elif column_src in imprt.columns:
selected_fields_names.append(field_name)
for entity in entities:
# Select fields associated to this entity *and only to this entity*
Expand All @@ -64,15 +65,16 @@ def init_rows_validity(imprt: TImports, dataset_name_field: str = "id_dataset"):
)


def check_orphan_rows(imprt):
def check_orphan_rows(imprt: TImports):
transient_table = imprt.destination.get_transient_table()
# TODO: handle multi-source fields
# This is actually not a big issue as multi-source fields are unlikely to also be multi-entity fields.
selected_fields_names = []
for field_name, source_field in imprt.fieldmapping.items():
if type(source_field) == list:
selected_fields_names.extend(set(source_field) & set(imprt.columns))
elif source_field in imprt.columns:
column_src = source_field.get("column_src", None)
if type(column_src) == list:
selected_fields_names.extend(set(column_src) & set(imprt.columns))
elif column_src in imprt.columns:
selected_fields_names.append(field_name)
# Select fields associated to multiple entities
AllEntityField = sa.orm.aliased(EntityField)
Expand Down
2 changes: 1 addition & 1 deletion backend/geonature/core/imports/checks/sql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def report_erroneous_rows(
transient_table = imprt.destination.get_transient_table()
error_type = ImportUserErrorType.query.filter_by(name=error_type).one()
error_column = generated_fields.get(error_column, error_column)
error_column = imprt.fieldmapping.get(error_column, error_column)
error_column = imprt.fieldmapping.get(error_column, {}).get("column_src", error_column)
if error_type.level in level_validity_mapping:
assert entity is not None
cte = (
Expand Down
27 changes: 23 additions & 4 deletions backend/geonature/core/imports/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@ class BibFields(db.Model):
fr_label = db.Column(db.Unicode, nullable=False)
eng_label = db.Column(db.Unicode, nullable=True)
type_field = db.Column(db.Unicode, nullable=True)
type_field_params = db.Column(MutableDict.as_mutable(JSON))
mandatory = db.Column(db.Boolean, nullable=False)
autogenerated = db.Column(db.Boolean, nullable=False)
mnemonique = db.Column(db.Unicode, db.ForeignKey(BibNomenclaturesTypes.mnemonique))
Expand Down Expand Up @@ -608,7 +609,7 @@ def optional_conditions_to_jsonschema(name_field: str, optional_conditions: Iter
"if": {
"not": {
"properties": {
field_opt: {"type": "string"} for field_opt in optional_conditions
field_opt: {"type": "object"} for field_opt in optional_conditions
}
}
},
Expand Down Expand Up @@ -726,9 +727,27 @@ def validate_values(field_mapping_json):
"type": "object",
"properties": {
field.name_field: {
"type": (
"boolean" if field.autogenerated else ("array" if field.multi else "string")
),
"type": "object",
"properties": {
"column_src": {
"type": (
"boolean"
if field.autogenerated
else ("array" if field.multi else "string")
),
},
"default_value": {
"oneOf": [
{"type": "boolean"},
{"type": "number"},
{"type": "string"},
{"type": "array"},
]
},
},
"required": [],
"additionalProperties": False,
"anyOf": [{"required": ["column_src"]}, {"required": ["default_value"]}],
}
for field in fields
},
Expand Down
3 changes: 3 additions & 0 deletions backend/geonature/core/imports/routes/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,11 @@ def get_fields(scope, destination):
fields=[
"id_field",
"name_field",
"type_field",
"type_field_params",
"fr_label",
"eng_label",
"mnemonique",
"mandatory",
"autogenerated",
"multi",
Expand Down
7 changes: 5 additions & 2 deletions backend/geonature/core/imports/routes/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,11 @@ def get_import_values(scope, imprt):
# this nomenclated field is not mapped
continue
source = imprt.fieldmapping[field.name_field]
if source not in imprt.columns:
# the file do not contain this field expected by the mapping
if (
source.get("column_src", None) not in imprt.columns
and source.get("default_value", None) is None
):
# the file do not contain this field expected by the mapping and there is no default value
continue
# TODO: vérifier que l’on a pas trop de valeurs différentes ?
column = field.source_column
Expand Down
Loading
Loading