From b7199913db79478895eb357df99779650aca9c5b Mon Sep 17 00:00:00 2001 From: Pierre-Narcisi Date: Thu, 7 Dec 2023 16:08:34 +0100 Subject: [PATCH] Feat/update dependencies (#498) * Updating SqlAlchemy to 1.4 * Abandoning support for Debian 10 --------- Co-authored-by: Jacobe2169 --- .github/workflows/pytest.yml | 9 +- backend/gn_module_import/admin.py | 2 +- .../gn_module_import/checks/sql/__init__.py | 87 ++++++++----------- backend/gn_module_import/routes/imports.py | 18 ++-- backend/gn_module_import/tasks.py | 8 +- backend/gn_module_import/tests/conftest.py | 1 + .../gn_module_import/tests/test_imports.py | 20 +++-- .../gn_module_import/tests/test_mappings.py | 15 ++-- backend/gn_module_import/utils.py | 2 +- dependencies/GeoNature | 2 +- requirements-dev.in | 9 ++ 11 files changed, 89 insertions(+), 84 deletions(-) create mode 100644 requirements-dev.in diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 54e7c9cc..a4d181b8 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,12 +22,8 @@ jobs: strategy: fail-fast: false matrix: - debian-version: [ '10', '11', '12' ] + debian-version: [ '11', '12' ] include: - - debian-version: '10' - python-version: "3.7" - postgres-version: '11' - postgis-version: '2.5' - debian-version: '11' python-version: '3.9' postgres-version: '13' @@ -110,7 +106,8 @@ jobs: GEONATURE_CONFIG_FILE: dependencies/GeoNature/config/test_config.toml - name: Install import module backend run: | - pip install --editable . + pip install -r requirements-dev.in + pip install . - name: Install import module database run: | geonature upgrade-modules-db IMPORT diff --git a/backend/gn_module_import/admin.py b/backend/gn_module_import/admin.py index 4e19465b..3f32763c 100644 --- a/backend/gn_module_import/admin.py +++ b/backend/gn_module_import/admin.py @@ -2,7 +2,7 @@ from itertools import groupby from pprint import pformat -from flask import Markup +from markupsafe import Markup from flask_admin.contrib.sqla import ModelView from flask_admin.form import BaseForm from wtforms.validators import StopValidation diff --git a/backend/gn_module_import/checks/sql/__init__.py b/backend/gn_module_import/checks/sql/__init__.py index ba5c0acc..1a822058 100644 --- a/backend/gn_module_import/checks/sql/__init__.py +++ b/backend/gn_module_import/checks/sql/__init__.py @@ -38,7 +38,7 @@ def do_nomenclatures_mapping(imprt, fields): source_field = getattr(ImportSyntheseData, field.source_field) # This CTE return the list of source value / cd_nomenclature for a given nomenclature type cte = ( - select([column("key").label("value"), column("value").label("cd_nomenclature")]) + select(column("key").label("value"), column("value").label("cd_nomenclature")) .select_from(sa.func.JSON_EACH_TEXT(TImports.contentmapping[field.mnemonique])) .where(TImports.id_import == imprt.id_import) .cte("cte") @@ -54,7 +54,7 @@ def do_nomenclatures_mapping(imprt, fields): .where(TNomenclatures.id_type == BibNomenclaturesTypes.id_type) .values({field.synthese_field: TNomenclatures.id_nomenclature}) ) - db.session.execute(stmt) + db.session.execute(stmt, execution_options=dict({"synchronize_session": 'fetch'})) for field in BibFields.query.filter(BibFields.mnemonique != None).all(): if ( @@ -181,7 +181,7 @@ def check_referential(imprt, field, reference_field, error_type, reference_table # We outerjoin the referential, and select rows where there is a value in synthese field # but no value in referential, which means no value in the referential matched synthese field. cte = ( - select([ImportSyntheseData.line_no]) + select(ImportSyntheseData.line_no) .select_from( join( ImportSyntheseData, @@ -234,22 +234,18 @@ def set_altitudes(imprt, fields): return altitudes = ( select( - [ - column("altitude_min"), - column("altitude_max"), - ] + column("altitude_min"), + column("altitude_max"), ) .select_from(func.ref_geo.fct_get_altitude_intersection(ImportSyntheseData.the_geom_local)) .lateral("altitudes") ) cte = ( select( - [ - ImportSyntheseData.id_import, - ImportSyntheseData.line_no, - altitudes.c.altitude_min, - altitudes.c.altitude_max, - ] + ImportSyntheseData.id_import, + ImportSyntheseData.line_no, + altitudes.c.altitude_min, + altitudes.c.altitude_max, ) .where(ImportSyntheseData.id_import == imprt.id_import) .where(ImportSyntheseData.the_geom_local != None) @@ -270,27 +266,23 @@ def set_altitudes(imprt, fields): .values( { ImportSyntheseData.altitude_min: sa.case( - whens=[ - ( - sa.or_( - ImportSyntheseData.src_altitude_min == None, - ImportSyntheseData.src_altitude_min == "", - ), - cte.c.altitude_min, + ( + sa.or_( + ImportSyntheseData.src_altitude_min == None, + ImportSyntheseData.src_altitude_min == "", ), - ], + cte.c.altitude_min, + ), else_=ImportSyntheseData.altitude_min, ), ImportSyntheseData.altitude_max: sa.case( - whens=[ - ( - sa.or_( - ImportSyntheseData.src_altitude_max == None, - ImportSyntheseData.src_altitude_max == "", - ), - cte.c.altitude_max, + ( + sa.or_( + ImportSyntheseData.src_altitude_max == None, + ImportSyntheseData.src_altitude_max == "", ), - ], + cte.c.altitude_max, + ), else_=ImportSyntheseData.altitude_max, ), } @@ -302,7 +294,7 @@ def set_altitudes(imprt, fields): "altitude_max": BibFields.query.filter_by(name_field="altitude_max").one(), } ) - db.session.execute(stmt) + db.session.execute(stmt.execution_options(synchronize_session="fetch")) def get_duplicates_query(imprt, synthese_field, whereclause=sa.true()): @@ -312,19 +304,17 @@ def get_duplicates_query(imprt, synthese_field, whereclause=sa.true()): ) partitions = ( select( - [ - array_agg(ImportSyntheseData.line_no) - .over( - partition_by=synthese_field, - ) - .label("duplicate_lines") - ] + array_agg(ImportSyntheseData.line_no) + .over( + partition_by=synthese_field, + ) + .label("duplicate_lines") ) .where(whereclause) .alias("partitions") ) duplicates = ( - select([func.unnest(partitions.c.duplicate_lines).label("lines")]) + select(func.unnest(partitions.c.duplicate_lines).label("lines")) .where(func.array_length(partitions.c.duplicate_lines, 1) > 1) .alias("duplicates") ) @@ -557,7 +547,6 @@ def set_geom_from_area_code(imprt, source_column, area_type_filter): ) .join( LAreas.area_type, - aliased=True, ) .filter(area_type_filter) .with_entities( @@ -580,7 +569,7 @@ def set_geom_from_area_code(imprt, source_column, area_type_filter): .where(ImportSyntheseData.id_import == cte.c.id_import) .where(ImportSyntheseData.line_no == cte.c.line_no) ) - db.session.execute(stmt) + db.session.execute(stmt.execution_options(synchronize_session="fetch")) def report_erroneous_rows(imprt, error_type, error_column, whereclause): @@ -602,20 +591,18 @@ def report_erroneous_rows(imprt, error_type, error_column, whereclause): ) else: cte = ( - select([ImportSyntheseData.line_no]) + select(ImportSyntheseData.line_no) .where(ImportSyntheseData.imprt == imprt) .where(whereclause) .cte("cte") ) error = select( - [ - literal(imprt.id_import).label("id_import"), - literal(error_type.pk).label("id_type"), - array_agg( - aggregate_order_by(cte.c.line_no, cte.c.line_no), - ).label("rows"), - literal(error_column).label("error_column"), - ] + literal(imprt.id_import).label("id_import"), + literal(error_type.pk).label("id_type"), + array_agg( + aggregate_order_by(cte.c.line_no, cte.c.line_no), + ).label("rows"), + literal(error_column).label("error_column"), ).alias("error") stmt = insert(ImportUserError).from_select( names=[ @@ -624,7 +611,7 @@ def report_erroneous_rows(imprt, error_type, error_column, whereclause): ImportUserError.rows, ImportUserError.column, ], - select=(select([error]).where(error.c.rows != None)), + select=(select(error).where(error.c.rows != None)), ) db.session.execute(stmt) diff --git a/backend/gn_module_import/routes/imports.py b/backend/gn_module_import/routes/imports.py index 9749dacc..ea569c6e 100644 --- a/backend/gn_module_import/routes/imports.py +++ b/backend/gn_module_import/routes/imports.py @@ -5,7 +5,10 @@ from flask import request, current_app, jsonify, g, stream_with_context, send_file from werkzeug.exceptions import Conflict, BadRequest, Forbidden, Gone -from werkzeug.urls import url_quote +# url_quote was deprecated in werkzeug 3.0 https://stackoverflow.com/a/77222063/5807438 +from urllib.parse import ( + quote as url_quote, +) from sqlalchemy import or_, func, desc from sqlalchemy.inspection import inspect from sqlalchemy.orm import joinedload, Load, load_only, undefer, contains_eager, class_mapper @@ -120,7 +123,7 @@ def get_import_list(scope): .join(TImports.dataset, isouter=True) .join(TImports.authors, isouter=True) .filter_by_scope(scope) - .filter(or_(*filters)) + .filter(or_(*filters) if len(filters) > 0 else True) .order_by(order_by) .paginate(page=page, error_out=False, max_per_page=limit) ) @@ -185,7 +188,7 @@ def upload_file(scope, import_id): dataset_id = int(request.form["datasetId"]) except ValueError: raise BadRequest(description="'datasetId' must be an integer.") - dataset = TDatasets.query.get(dataset_id) + dataset = db.session.get(TDatasets, (dataset_id)) if dataset is None: raise BadRequest(description=f"Dataset '{dataset_id}' does not exist.") if not dataset.has_instance_permission(scope): # FIXME wrong scope @@ -369,13 +372,13 @@ def get_import_values(scope, import_id): # the file do not contain this field expected by the mapping continue # TODO: vérifier que l’on a pas trop de valeurs différentes ? - column = field.source_column + column = getattr(ImportSyntheseData, field.source_column) values = [ - getattr(data, column) + getattr(data, field.source_column) for data in ( ImportSyntheseData.query.filter_by(imprt=imprt) .options(load_only(column)) - .distinct(getattr(ImportSyntheseData, column)) + .distinct(column) .all() ) ] @@ -454,13 +457,14 @@ def preview_valid_data(scope, import_id): BibFields.name_field.in_(imprt.fieldmapping.keys()), ).all() columns = [field.name_field for field in fields] + columns_instance = [getattr(ImportSyntheseData, field.name_field) for field in fields] valid_data = ( ImportSyntheseData.query.filter_by( imprt=imprt, valid=True, ) .options( - load_only(*columns), + load_only(*columns_instance), ) .limit(100) ) diff --git a/backend/gn_module_import/tasks.py b/backend/gn_module_import/tasks.py index b4ef6d40..f6a93240 100644 --- a/backend/gn_module_import/tasks.py +++ b/backend/gn_module_import/tasks.py @@ -46,7 +46,7 @@ @celery_app.task(bind=True) def do_import_checks(self, import_id): logger.info(f"Starting verification of import {import_id}.") - imprt = TImports.query.get(import_id) + imprt = db.session.get(TImports, import_id) if imprt is None or imprt.task_id != self.request.id: logger.warning("Task cancelled, doing nothing.") return @@ -131,7 +131,7 @@ def update_batch_progress(batch, step): progress = 0.4 + ((i + 1) / len(sql_checks)) * 0.6 self.update_state(state="PROGRESS", meta={"progress": progress}) - imprt = TImports.query.with_for_update(of=TImports).get(import_id) + imprt = db.session.get(TImports, import_id, with_for_update={"of": TImports}) if imprt is None or imprt.task_id != self.request.id: logger.warning("Task cancelled, rollback changes.") db.session.rollback() @@ -154,7 +154,7 @@ def update_batch_progress(batch, step): @celery_app.task(bind=True) def do_import_in_synthese(self, import_id): logger.info(f"Starting insertion in synthese of import {import_id}.") - imprt = TImports.query.get(import_id) + imprt = db.session.get(TImports, import_id) if imprt is None or imprt.task_id != self.request.id: logger.warning("Task cancelled, doing nothing.") return @@ -170,7 +170,7 @@ def do_import_in_synthese(self, import_id): ) import_data_to_synthese(imprt) ImportSyntheseData.query.filter_by(imprt=imprt).delete() - imprt = TImports.query.with_for_update(of=TImports).get(import_id) + imprt = db.session.get(TImports, import_id, with_for_update={"of": TImports}) if imprt is None or imprt.task_id != self.request.id: logger.warning("Task cancelled, rollback changes.") db.session.rollback() diff --git a/backend/gn_module_import/tests/conftest.py b/backend/gn_module_import/tests/conftest.py index 0f3b5091..f3606998 100644 --- a/backend/gn_module_import/tests/conftest.py +++ b/backend/gn_module_import/tests/conftest.py @@ -1,2 +1,3 @@ from geonature.tests.fixtures import * from geonature.tests.fixtures import app, _session, users +from pypnusershub.tests.fixtures import teardown_logout_user \ No newline at end of file diff --git a/backend/gn_module_import/tests/test_imports.py b/backend/gn_module_import/tests/test_imports.py index d32f193c..a2bd3db2 100644 --- a/backend/gn_module_import/tests/test_imports.py +++ b/backend/gn_module_import/tests/test_imports.py @@ -16,7 +16,6 @@ from apptax.taxonomie.models import BibListes, CorNomListe, BibNoms from geonature.utils.env import db -from geonature.tests.utils import set_logged_user_cookie, unset_logged_user_cookie from geonature.core.gn_permissions.tools import ( get_scopes_by_action as _get_scopes_by_action, ) @@ -27,6 +26,11 @@ from geonature.tests.fixtures import synthese_data, celery_eager from pypnusershub.db.models import User, Organisme +from pypnusershub.tests.utils import ( + set_logged_user, + set_logged_user_cookie, + unset_logged_user_cookie +) from pypnnomenclature.models import TNomenclatures, BibNomenclaturesTypes from ref_geo.tests.test_ref_geo import has_french_dem from ref_geo.models import LAreas @@ -306,7 +310,8 @@ def test_import_permissions(self, g_permissions): db.session.add(group) user = User(groupe=False) db.session.add(user) - other_user = User(groupe=False, organisme=organisme) + other_user = User(groupe=False) + other_user.organisme = organisme db.session.add(other_user) user.groups.append(group) imprt = TImports() @@ -427,7 +432,7 @@ def test_order_import_foreign(self, users, imports, uploaded_import): imprt["id_import"] for imprt in r_ordered_dataset.get_json()["imports"] ] assert r_ordered_dataset.status_code == 200, r_ordered_dataset.data - assert import_ids_sorte_dataset == import_ids_ordered_dataset + assert set(import_ids_sorte_dataset) == set(import_ids_ordered_dataset) def test_get_import(self, users, imports): def get(import_name): @@ -437,7 +442,7 @@ def get(import_name): assert get("own_import").status_code == Unauthorized.code - set_logged_user_cookie(self.client, users["noright_user"]) + set_logged_user(self.client, users["noright_user"]) assert get("own_import").status_code == Forbidden.code set_logged_user_cookie(self.client, users["user"]) @@ -532,7 +537,7 @@ def test_import_upload(self, users, datasets): ) assert r.status_code == 200, r.data - imprt = TImports.query.get(r.json["id_import"]) + imprt = db.session.get(TImports, r.json["id_import"]) assert imprt.source_file is not None assert imprt.full_file_name == "simple_file.csv" @@ -550,7 +555,6 @@ def test_import_error(self, users, datasets): ) assert r.status_code == 400, r.data assert r.json["description"] == "Impossible to upload empty files" - with open(tests_path / "files" / "starts_with_empty_line.csv", "rb") as f: data = { "file": (f, "starts_with_empty_line.csv"), @@ -615,6 +619,7 @@ def test_import_decode(self, users, new_import): with open(tests_path / "files" / "utf8_file.csv", "rb") as f: imprt.source_file = f.read() + db.session.flush() r = self.client.post(url_for("import.decode_file", import_id=imprt.id_import), data=data) assert r.status_code == BadRequest.code, r.data @@ -622,6 +627,7 @@ def test_import_decode(self, users, new_import): with open(tests_path / "files" / "duplicate_column_names.csv", "rb") as f: imprt.source_file = f.read() + db.session.flush() r = self.client.post(url_for("import.decode_file", import_id=imprt.id_import), data=data) assert r.status_code == BadRequest.code, r.data assert "Duplicates column names" in r.json["description"] @@ -833,7 +839,7 @@ def test_import_valid_file(self, users, datasets): ) assert r.status_code == 200, r.data imprt_json = r.get_json() - imprt = TImports.query.get(imprt_json["id_import"]) + imprt = db.session.get(TImports, imprt_json["id_import"]) assert len(imprt.authors) == 1 assert imprt_json["date_create_import"] assert imprt_json["date_update_import"] diff --git a/backend/gn_module_import/tests/test_mappings.py b/backend/gn_module_import/tests/test_mappings.py index 79dd1355..010a42ab 100644 --- a/backend/gn_module_import/tests/test_mappings.py +++ b/backend/gn_module_import/tests/test_mappings.py @@ -10,7 +10,6 @@ from sqlalchemy.orm import joinedload from geonature.utils.env import db -from geonature.tests.utils import set_logged_user_cookie from geonature.core.gn_permissions.models import ( Permission, ) @@ -29,6 +28,7 @@ Profils as Profil, UserApplicationRight, ) +from pypnusershub.tests.utils import (set_logged_user_cookie) from gn_module_import.models import ( MappingTemplate, @@ -282,7 +282,7 @@ def test_add_field_mapping(self, users, mappings): assert r.json["label"] == mappings["content_public"].label assert r.json["type"] == "FIELD" assert r.json["values"] == fieldmapping - mapping = MappingTemplate.query.get(r.json["id"]) + mapping = db.session.get(MappingTemplate, r.json["id"]) assert mapping.owners == [users["user"]] def test_add_content_mapping(self, users, mappings): @@ -308,7 +308,7 @@ def test_add_content_mapping(self, users, mappings): assert r.json["label"] == "test content mapping" assert r.json["type"] == "CONTENT" assert r.json["values"] == contentmapping - mapping = MappingTemplate.query.get(r.json["id"]) + mapping = db.session.get(MappingTemplate, r.json["id"]) assert mapping.owners == [users["user"]] def test_update_mapping_label(self, users, mappings): @@ -404,6 +404,7 @@ def test_update_content_mapping_values(self, users, mappings): contentvalues_update["NAT_OBJ_GEO"]["ne sais pas"] = "NSP" # add new mapping contentvalues_should = deepcopy(contentvalues_update) del contentvalues_update["NAT_OBJ_GEO"]["St"] # should not be removed! + db.session.flush() r = self.client.post( url_for("import.update_mapping", mappingtype=cm.type.lower(), id_mapping=cm.id), data=contentvalues_update, @@ -429,7 +430,7 @@ def test_delete_mapping(self, users, mappings): ) ) assert r.status_code == Unauthorized.code - assert MappingTemplate.query.get(mapping.id) is not None + assert db.session.get(MappingTemplate, mapping.id) is not None set_logged_user_cookie(self.client, users["self_user"]) r = self.client.delete( @@ -440,14 +441,14 @@ def test_delete_mapping(self, users, mappings): ) ) assert r.status_code == Forbidden.code - assert MappingTemplate.query.get(mapping.id) is not None + assert db.session.get(MappingTemplate, mapping.id) is not None set_logged_user_cookie(self.client, users["user"]) r = self.client.delete( url_for("import.delete_mapping", mappingtype="content", id_mapping=mapping.id) ) assert r.status_code == NotFound.code - assert MappingTemplate.query.get(mapping.id) is not None + assert db.session.get(MappingTemplate, mapping.id) is not None r = self.client.delete( url_for( @@ -457,7 +458,7 @@ def test_delete_mapping(self, users, mappings): ) ) assert r.status_code == 204 - assert MappingTemplate.query.get(mapping.id) is None + assert db.session.get(MappingTemplate, mapping.id) is None def test_synthesis_fields(self, users): assert ( diff --git a/backend/gn_module_import/utils.py b/backend/gn_module_import/utils.py index 0cd76a5a..d714746c 100644 --- a/backend/gn_module_import/utils.py +++ b/backend/gn_module_import/utils.py @@ -114,7 +114,7 @@ def detect_separator(f, encoding): def get_valid_bbox(imprt): - stmt = db.session.query( + stmt = db.select( func.ST_AsGeojson(func.ST_Extent(ImportSyntheseData.the_geom_4326)) ).filter( ImportSyntheseData.imprt == imprt, diff --git a/dependencies/GeoNature b/dependencies/GeoNature index 0a3764b2..4465b137 160000 --- a/dependencies/GeoNature +++ b/dependencies/GeoNature @@ -1 +1 @@ -Subproject commit 0a3764b29d83fb400b5bebb201bb94e353a915be +Subproject commit 4465b13790ca9b5e72a60cff7aeb24d349e58bde diff --git a/requirements-dev.in b/requirements-dev.in new file mode 100644 index 00000000..b01ba5da --- /dev/null +++ b/requirements-dev.in @@ -0,0 +1,9 @@ +-r dependencies/GeoNature/backend/requirements-common.in +pandas<1.4 # panda 1.4 requires sqlalchemy>=1.4 +pyproj<3;python_version<"3.9" # v3 require PROJ 7.2.0 but Debian 10 provide PROJ 5.2.0 +pyproj<3.1;python_version>="3.9" and python_version<"3.10" +pyproj;python_version>="3.10" +chardet +jsonschema +utils-flask-sqlalchemy>=0.3.0,<1.0.0 +geonature>=2.13.2