-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1065 from MTES-MCT/feat-import-housing-data
Import housing mutation data
- Loading branch information
Showing
3 changed files
with
179 additions
and
0 deletions.
There are no files selected for viewing
21 changes: 21 additions & 0 deletions
21
server/src/infra/database/migrations/20241231090357_housings-add-mutation-data.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import type { Knex } from 'knex'; | ||
|
||
export async function up(knex: Knex): Promise<void> { | ||
await knex.schema.alterTable('fast_housing', (table) => { | ||
table.integer('plot_area').nullable(); | ||
table.timestamp('last_mutation_date').nullable(); | ||
table.timestamp('last_transaction_date').nullable(); | ||
table.integer('last_transaction_value').nullable(); | ||
table.string('occupancy_history').nullable(); | ||
}); | ||
} | ||
|
||
export async function down(knex: Knex): Promise<void> { | ||
await knex.schema.alterTable('fast_housing', (table) => { | ||
table.dropColumn('plot_area'); | ||
table.dropColumn('last_mutation_date'); | ||
table.dropColumn('last_transaction_date'); | ||
table.dropColumn('last_transaction_value'); | ||
table.dropColumn('occupancy_history'); | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
## Usage | ||
|
||
### Migrate the production database | ||
|
||
```shell | ||
DATABASE_URL=... yarn workspace @zerologementvacant/server migrate | ||
``` | ||
|
||
### Create the CSV file | ||
|
||
```sql | ||
INSTALL httpfs; | ||
LOAD httpfs; | ||
|
||
CREATE OR REPLACE SECRET ( | ||
TYPE S3, | ||
KEY_ID '...', | ||
SECRET '...', | ||
ENDPOINT 'cellar-c2.services.clever-cloud.com', | ||
REGION 'us-east-1' | ||
); | ||
|
||
-- Copy data to a new table | ||
CREATE OR REPLACE TABLE "dev"."main"."housings" AS | ||
SELECT | ||
DISTINCT ON (local_id) local_id, | ||
plot_area, | ||
cadastral_classification, | ||
latitude, | ||
longitude, | ||
CASE | ||
WHEN TRY_CAST(TRY_STRPTIME(last_mutation_date, '%d%m%Y') AS DATE) IS NOT NULL THEN | ||
TRY_STRPTIME(last_mutation_date, '%d%m%Y') | ||
ELSE | ||
DATE_TRUNC('month', TRY_STRPTIME(last_mutation_date, '%d%m%Y')) | ||
+ INTERVAL '1 month' - INTERVAL '1 day' | ||
END AS last_mutation_date, | ||
last_transaction_date::date AS last_transaction_date, | ||
last_transaction_value, | ||
occupancy_history | ||
FROM read_csv( | ||
's3://zlv-production/production/dump_20241218/housing_data.csv', | ||
auto_detect = TRUE, | ||
ignore_errors = TRUE | ||
) | ||
WHERE local_id IS NOT NULL | ||
ORDER BY local_id; | ||
|
||
-- Check that the data has been loaded | ||
SELECT * FROM "dev"."main"."housings" | ||
LIMIT 100; | ||
|
||
-- Export housings to housings-gold.csv | ||
COPY "dev"."main"."housings" TO 'housings-gold.csv' (HEADER, DELIMITER ','); | ||
``` | ||
|
||
### Import the CSV file into the production database | ||
|
||
```sql | ||
INSTALL postgres; | ||
LOAD postgres; | ||
|
||
CREATE OR REPLACE SECRET ( | ||
TYPE POSTGRES, | ||
HOST '...', | ||
PORT ..., | ||
DATABASE '...', | ||
USER '...', | ||
PASSWORD '...' | ||
); | ||
|
||
ATTACH IF NOT EXISTS '' AS postgres (TYPE POSTGRES); | ||
|
||
CREATE OR REPLACE TABLE "postgres"."public"."housings_gold" AS | ||
SELECT * FROM read_csv('housings-gold.csv'); | ||
|
||
DETACH postgres; | ||
``` | ||
|
||
### Add a primary key to the `housings_gold` table | ||
|
||
In another terminal, connect to the production database and run the following: | ||
```sql | ||
ALTER TABLE housings_gold | ||
ADD CONSTRAINT housings_gold_pkey | ||
PRIMARY KEY (local_id); | ||
``` | ||
|
||
### Run the import script | ||
|
||
```shell | ||
cd server | ||
DATABASE_URL=... yarn dlx tsx src/scripts/import-housing-mutation-data/index.ts | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import async from 'async'; | ||
import { Range } from 'immutable'; | ||
import fp from 'lodash/fp'; | ||
|
||
import db from '~/infra/database'; | ||
|
||
const BATCH_SIZE = 1_000; | ||
const LIMIT = 10; | ||
|
||
const HOUSINGS_GOLD = 'housings_gold'; | ||
|
||
async function run(): Promise<void> { | ||
async function update( | ||
department: string, | ||
ids: ReadonlyArray<string> | ||
): Promise<void> { | ||
console.log( | ||
`Updating ${ids.length} housings in department ${department}...` | ||
); | ||
const fastHousing = `fast_housing_${department.toLowerCase()}`; | ||
const chunks = fp.chunk(BATCH_SIZE, ids); | ||
await async.forEachLimit(chunks, LIMIT, async (chunk) => { | ||
await db(fastHousing) | ||
.update({ | ||
plot_area: db.ref(`${HOUSINGS_GOLD}.plot_area`), | ||
cadastral_classification: db.ref( | ||
`${HOUSINGS_GOLD}.cadastral_classification` | ||
), | ||
latitude_dgfip: db.ref(`${HOUSINGS_GOLD}.latitude`), | ||
longitude_dgfip: db.ref(`${HOUSINGS_GOLD}.longitude`), | ||
last_mutation_date: db.ref(`${HOUSINGS_GOLD}.last_mutation_date`), | ||
last_transaction_date: db.ref( | ||
`${HOUSINGS_GOLD}.last_transaction_date` | ||
), | ||
last_transaction_value: db.ref( | ||
`${HOUSINGS_GOLD}.last_transaction_value` | ||
), | ||
occupancy_history: db.ref(`${HOUSINGS_GOLD}.occupancy_history`) | ||
}) | ||
.updateFrom(HOUSINGS_GOLD) | ||
.where(`${fastHousing}.local_id`, db.ref(`${HOUSINGS_GOLD}.local_id`)) | ||
.whereIn(`${HOUSINGS_GOLD}.local_id`, chunk); | ||
}); | ||
} | ||
|
||
const departments = Range(1, 99) | ||
.filter((department) => department !== 20) | ||
.map((department) => department.toString().padStart(2, '0')) | ||
.concat(['2A', '2B']); | ||
|
||
await async.forEachSeries( | ||
departments.toArray(), | ||
async (department: string) => { | ||
const housings = await db(HOUSINGS_GOLD) | ||
.select('local_id') | ||
.whereLike('local_id', `${department}%`) | ||
.orderBy('local_id'); | ||
const ids = housings.map((housing) => housing.local_id); | ||
await update(department, ids); | ||
} | ||
); | ||
} | ||
|
||
run(); |