Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
jacquesfize committed Apr 23, 2024
2 parents 7fb5c58 + d5adec4 commit d6932aa
Show file tree
Hide file tree
Showing 20 changed files with 317 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
TAXHUB_SETTINGS: test_config.py
- name: Install taxref
run: |
flask taxref import-v16
flask taxref import-v17
env:
TAXHUB_SETTINGS: test_config.py
- name: Test with pytest
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.13.4
1.14
1 change: 1 addition & 0 deletions apptax/migrations/data/taxonomie_bdc_statuts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ SET search_path = taxonomie, pg_catalog, public;
--------- BDC statuts
-- ##############################################"""""

DELETE FROM taxonomie.bdc_statut_cor_text_area;
DELETE FROM taxonomie.bdc_statut_taxons;
DELETE FROM taxonomie.bdc_statut_cor_text_values;
DELETE FROM taxonomie.bdc_statut_text;
Expand Down
11 changes: 8 additions & 3 deletions apptax/taxonomie/commands/migrate_taxref/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Update Taxref

Scripts de migration permettant de mettre à jour une version de Taxref à une autre, à partir de la mise à jour vers la version 14 de Taxref.

A noter qu'il n'est pas nécessaire de migrer les versions de taxref une à une. Il est par exemple possible de passer directement de Taxref version 13 à 16.
A noter qu'il n'est pas nécessaire de migrer les versions de taxref une à une. Il est par exemple possible de passer directement de Taxref version 13 à 17.

Avant de commencer :

Expand Down Expand Up @@ -49,6 +49,7 @@ Pour exécuter ce script, il faut lancer la commande suivante :

flask taxref migrate-to-v15 import-taxref-v15 # Si migration vers Taxref v15
flask taxref migrate-to-v16 import-taxref-v16 # Si migration vers Taxref v16
flask taxref migrate-to-v16 import-taxref-v17 # Si migration vers Taxref v17

Analysez les fichiers CSV générés dans le dossier ``tmp``. Réalisez les corrections de données en fonction :

Expand All @@ -65,6 +66,7 @@ Toutes ces opérations peuvent être regroupés dans un fichier SQL exécuté da

flask taxref migrate-to-v15 test-changes-detection # Si migration vers taxref v15
flask taxref migrate-to-v16 test-changes-detection # Si migration vers taxref v16
flask taxref migrate-to-v17 test-changes-detection # Si migration vers taxref v17

::

Expand All @@ -81,6 +83,7 @@ Lancer le script avec la commande :

flask taxref migrate-to-v15 apply-changes # Si migration vers taxref v15
flask taxref migrate-to-v16 apply-changes # Si migration vers taxref v16
flask taxref migrate-to-v17 apply-changes # Si migration vers taxref v17

flask taxref link-bdc-statut-to-areas

Expand Down Expand Up @@ -112,12 +115,14 @@ Après correction des données d'observation (Occtax, Synthèse...), vous pourre

⚠️ Si vous utilisez GeoNature, mettez à jour les règles de sensibilité suite à la mise à jour de Taxref :

::

source geonature/backend/venv/bin/activate
geonature sensitivity refresh-rules-cache


⚠️ Si vous aviez activé uniquement les statuts de protection dans un ou plusieurs départements auparavant, la mise à jour de Taxref les réactive tous. Renouvelez donc l'opération à l'aide de la commande suivante :

flask taxref enable-bdc-statut-text -d <MON_DEP_1> -d <MON_DEP_2> --clean

.. image:: ../../../../data/scripts/update_taxref/images/bdc_statut.png

.. image:: ../../../../data/scripts/update_taxref/images/update-taxref-cas-1.jpg
Expand Down
196 changes: 196 additions & 0 deletions apptax/taxonomie/commands/migrate_taxref/commands_v17.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import importlib
import click
from zipfile import ZipFile
from sqlalchemy import text
from flask.cli import with_appcontext

from utils_flask_sqla.migrations.utils import open_remote_file

from apptax.database import db
from apptax.taxonomie.commands.utils import (
copy_from_csv,
truncate_bdc_statuts,
refresh_taxref_vm,
insert_taxref_numversion,
)
from apptax.taxonomie.commands.taxref_v15_v16 import import_bdc_statuts_v17
from .utils import save_data, analyse_taxref_changes
from . import logger


base_url = "http://geonature.fr/data/inpn/taxonomie/"


@click.group(help="Migrate to TaxRef v17.")
def migrate_to_v17():
pass


@migrate_to_v17.command()
@with_appcontext
def import_taxref_v17():
"""
Procédure de migration de taxref vers la version 17
Test de la disparition des cd_noms
"""
# Prerequis : deps_test_fk_dependencies_cd_nom
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data.changes_detection",
"0.2_taxref_detection_repercussion_disparition_cd_nom.sql",
)
)
db.session.execute(query)

# import taxref v17 data
import_data_taxref_v17()
db.session.commit()

# Analyse des changements à venir
analyse_taxref_changes()


@migrate_to_v17.command()
@click.option("--keep-cdnom", is_flag=True)
@with_appcontext
def test_changes_detection(keep_cdnom):
"""Analyse des répercussions de changement de taxref
:param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer
:type keep-cdnom: boolean
3 étapes :
- Detection des cd_noms manquants
- Création d'une copie de travail de bib_noms
- Analyse des modifications taxonomique (split, merge, ...) et
de leur répercussion sur les attributs et medias de taxhub
"""
# Analyse des changements à venir
analyse_taxref_changes(keep_missing_cd_nom=keep_cdnom)


@migrate_to_v17.command()
@click.option("--keep-oldtaxref", is_flag=True)
@click.option("--keep-oldbdc", is_flag=True)
@click.option("--keep-cdnom", is_flag=True)
@click.option("--script_predetection", type=click.Path(exists=True))
@click.option("--script_postdetection", type=click.Path(exists=True))
@with_appcontext
def apply_changes(
keep_oldtaxref, keep_oldbdc, keep_cdnom, script_predetection, script_postdetection
):
"""Procédure de migration de taxref vers la version 17
Application des changements import des données dans les tables taxref et bdc_status
:param keep-oldtaxref: Indique si l'on souhaite concerver l'ancienne version du referentiel taxref
:type keep-oldtaxref: boolean
:param keep-oldbdc: Indique si l'on souhaite concerver l'ancienne version du referentiel bdc_status
:type keep-oldbdc: boolean
:param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer
:type keep-cdnom: boolean
:param script_predetection: Emplacement d'un fichier sql de correction avant la detection des changements
:type script_predetection: Path
:param script_postdetection: Emplacement d'un fichier sql de correction après la detection des changements
:type script_postdetection: Path
"""

# Analyse des changements à venir
analyse_taxref_changes(
keep_missing_cd_nom=keep_cdnom,
script_predetection=script_predetection,
script_postdetection=script_postdetection,
)

# Save taxref and bdc_status data
save_data(16, keep_oldtaxref, keep_oldbdc)

# Update taxref v17
logger.info("Migration of taxref ...")
try:
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v15_v16",
"3.2_alter_taxref_data.sql",
)
)
db.session.execute(query, {"keep_cd_nom": keep_cdnom})
db.session.commit()
logger.info("it's done")
except Exception as e:
logger.error(str(e))

# Import bdc status data and insert into taxhub tables
import_and_format_dbc_status()

# Clean DB
logger.info("Clean DB")
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data", "5_clean_db.sql"
)
)
db.session.execute(query)

logger.info("Refresh materialized views…")
refresh_taxref_vm()

insert_taxref_numversion(17)
db.session.commit()

logger.info("Vacuum the database... (cette opération peut être longue)")
with db.session.connection(execution_options={"isolation_level": "AUTOCOMMIT"}) as conn:
conn.execute(text("VACUUM FULL VERBOSE"))


def import_data_taxref_v17():
"""
Import des données brutes de taxref v17 en base
avant leur traitement
"""

logger.info("Import TAXREFv17 into tmp table…")

# Préparation création de table temporaire permettant d'importer taxref
query = text(
importlib.resources.read_text(
"apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v15_v16",
"0_taxref_import_data.sql",
)
)
db.session.execute(query)
db.session.commit()

with open_remote_file(base_url, "TAXREF_v17_2024.zip", open_fct=ZipFile) as archive:
with archive.open("TAXREFv17.txt") as f:
logger.info("Insert TAXREFv17 into taxonomie.import_taxref table…")
copy_from_csv(
f,
table_name="import_taxref",
delimiter="\t",
)
with archive.open("CDNOM_DISPARUS.csv") as f:
logger.info("Insert missing cd_nom into taxonomie.cdnom_disparu table…")
copy_from_csv(
f,
table_name="cdnom_disparu",
delimiter=",",
)

with archive.open("rangs_note.csv") as f:
logger.info("Insert rangs_note tmp table…")
copy_from_csv(
f,
table_name="import_taxref_rangs",
encoding="WIN1252",
delimiter=";",
)


def import_and_format_dbc_status():
"""
Import des données brutes de la base bdc_status en base
Puis traitement des données de façon à les ventiler dans les différentes tables
"""
truncate_bdc_statuts()
import_bdc_statuts_v17(logger)
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ SELECT setval(
true
);

--- ajout Nicolas Imbert
create index IF NOT EXISTS i_tmp_cdnom_disparu_cd_nom on taxonomie.cdnom_disparu (cd_nom);


--- CAS 1 - cd_nom de remplacement à utiliser.
UPDATE taxonomie.tmp_bib_noms_copy n SET deleted = true ,
commentaire_disparition = raison_suppression || COALESCE(' nouveau cd_nom :' || a.cd_nom_remplacement, ''),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ DROP TABLE IF EXISTS tmp_taxref_changes.comp_grap ;

CREATE TABLE tmp_taxref_changes.comp_grap AS
WITH grappe_init AS (
SELECT b.cd_ref , array_agg(cd_nom ORDER BY cd_nom) as array_agg, count(DISTINCT cd_nom)
SELECT distinct b.cd_ref , array_agg(cd_nom ORDER BY cd_nom) as array_agg, count(DISTINCT cd_nom)
FROM taxonomie.tmp_bib_noms_copy b
WHERE NOT deleted = true
WHERE NOT deleted = true and cd_nom is not null
GROUP BY cd_ref
),
grappe_final AS (
SELECT t.cd_ref , array_agg(b.cd_nom ORDER BY b.cd_nom) as array_agg, count(DISTINCT b.cd_nom)
SELECT distinct t.cd_ref , array_agg(b.cd_nom ORDER BY b.cd_nom) as array_agg, count(DISTINCT b.cd_nom)
FROM taxonomie.tmp_bib_noms_copy b
JOIN taxonomie.import_taxref t
ON b.cd_nom = t.cd_nom
WHERE NOT deleted = true
WHERE NOT deleted = true and b.cd_nom is not null
GROUP BY t.cd_ref
),
attribs AS (
Expand All @@ -33,12 +33,23 @@ media AS (
SELECT DISTINCT cd_ref, count(id_media) as media_nb
FROM taxonomie.t_medias
GROUP BY cd_ref
)
SELECT i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count,
),
init_cdnom as (
select distinct t1.cd_ref, t2.cd_nom, t1.array_agg, t1.count
from grappe_init t1, taxonomie.tmp_bib_noms_copy t2
where t1.cd_ref = t2.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null
order by 1,2),
final_cdnom as (
select distinct t3.cd_ref, t2.cd_nom, t1.array_agg, t1.count
from grappe_final t1, taxonomie.tmp_bib_noms_copy t2, taxonomie.import_taxref t3
where t1.cd_ref = t3.cd_ref and NOT t2.deleted = true and t2.cd_nom is not null
and t2.cd_nom = t3.cd_nom
order by 1,2)
SELECT distinct i.cd_ref as i_cd_ref, i.array_agg as i_array_agg, i.count as i_count,
f.cd_ref as f_cd_ref, f.array_agg as f_array_agg, f.count as f_count,
att_list, att_nb, media_nb
FROM grappe_init i
LEFT OUTER JOIN grappe_final f ON i.array_agg && f.array_agg
FROM init_cdnom i
LEFT OUTER JOIN final_cdnom f ON i.cd_nom = f.cd_nom
LEFT OUTER JOIN attribs a ON i.cd_ref = a.cd_ref
LEFT OUTER JOIN media m ON i.cd_ref = m.cd_ref;

Expand Down
2 changes: 1 addition & 1 deletion apptax/taxonomie/commands/migrate_taxref/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def missing_cd_nom_query(query_name, export_file_name):
data = results.fetchall()
if len(data) > 0:
logger.warning(
f"Some cd_nom referencing in data where missing from taxref v15 -> see file {export_file_name}"
f"Some cd_nom referencing in data where missing from new taxref -> see file {export_file_name}"
)
export_as_csv(file_name=export_file_name, columns=results.keys(), data=data)

Expand Down
6 changes: 6 additions & 0 deletions apptax/taxonomie/commands/taxref.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
from flask.cli import with_appcontext

from apptax.database import db
from apptax.taxonomie.commands.migrate_taxref.commands_v17 import migrate_to_v17
from apptax.taxonomie.models import Taxref, TaxrefBdcStatutText, TMetaTaxref

from .utils import truncate_bdc_statuts
from .taxref_v14 import import_v14, import_bdc_v14
from .taxref_v15_v16 import (
import_bdc_v17,
import_v15,
import_bdc_v15,
import_v17,
link_bdc_statut_to_areas,
enable_bdc_statut_text,
import_v16,
Expand Down Expand Up @@ -87,8 +90,11 @@ def delete_bdc():
taxref.add_command(import_v15)
taxref.add_command(import_bdc_v15)
taxref.add_command(import_v16)
taxref.add_command(import_v17)
taxref.add_command(import_bdc_v16)
taxref.add_command(import_bdc_v17)
taxref.add_command(migrate_to_v15)
taxref.add_command(migrate_to_v16)
taxref.add_command(migrate_to_v17)
taxref.add_command(link_bdc_statut_to_areas)
taxref.add_command(enable_bdc_statut_text)
Loading

0 comments on commit d6932aa

Please sign in to comment.