Skip to content

Commit

Permalink
Merge pull request #1065 from MTES-MCT/feat-import-housing-data
Browse files Browse the repository at this point in the history
Import housing mutation data
  • Loading branch information
Falinor authored Dec 31, 2024
2 parents b86db34 + 4bb2b0c commit 981512a
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import type { Knex } from 'knex';

export async function up(knex: Knex): Promise<void> {
await knex.schema.alterTable('fast_housing', (table) => {
table.integer('plot_area').nullable();
table.timestamp('last_mutation_date').nullable();
table.timestamp('last_transaction_date').nullable();
table.integer('last_transaction_value').nullable();
table.string('occupancy_history').nullable();
});
}

export async function down(knex: Knex): Promise<void> {
await knex.schema.alterTable('fast_housing', (table) => {
table.dropColumn('plot_area');
table.dropColumn('last_mutation_date');
table.dropColumn('last_transaction_date');
table.dropColumn('last_transaction_value');
table.dropColumn('occupancy_history');
});
}
94 changes: 94 additions & 0 deletions server/src/scripts/import-housing-mutation-data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
## Usage

### Migrate the production database

```shell
DATABASE_URL=... yarn workspace @zerologementvacant/server migrate
```

### Create the CSV file

```sql
INSTALL httpfs;
LOAD httpfs;

CREATE OR REPLACE SECRET (
TYPE S3,
KEY_ID '...',
SECRET '...',
ENDPOINT 'cellar-c2.services.clever-cloud.com',
REGION 'us-east-1'
);

-- Copy data to a new table
CREATE OR REPLACE TABLE "dev"."main"."housings" AS
SELECT
DISTINCT ON (local_id) local_id,
plot_area,
cadastral_classification,
latitude,
longitude,
CASE
WHEN TRY_CAST(TRY_STRPTIME(last_mutation_date, '%d%m%Y') AS DATE) IS NOT NULL THEN
TRY_STRPTIME(last_mutation_date, '%d%m%Y')
ELSE
DATE_TRUNC('month', TRY_STRPTIME(last_mutation_date, '%d%m%Y'))
+ INTERVAL '1 month' - INTERVAL '1 day'
END AS last_mutation_date,
last_transaction_date::date AS last_transaction_date,
last_transaction_value,
occupancy_history
FROM read_csv(
's3://zlv-production/production/dump_20241218/housing_data.csv',
auto_detect = TRUE,
ignore_errors = TRUE
)
WHERE local_id IS NOT NULL
ORDER BY local_id;

-- Check that the data has been loaded
SELECT * FROM "dev"."main"."housings"
LIMIT 100;

-- Export housings to housings-gold.csv
COPY "dev"."main"."housings" TO 'housings-gold.csv' (HEADER, DELIMITER ',');
```

### Import the CSV file into the production database

```sql
INSTALL postgres;
LOAD postgres;

CREATE OR REPLACE SECRET (
TYPE POSTGRES,
HOST '...',
PORT ...,
DATABASE '...',
USER '...',
PASSWORD '...'
);

ATTACH IF NOT EXISTS '' AS postgres (TYPE POSTGRES);

CREATE OR REPLACE TABLE "postgres"."public"."housings_gold" AS
SELECT * FROM read_csv('housings-gold.csv');

DETACH postgres;
```

### Add a primary key to the `housings_gold` table

In another terminal, connect to the production database and run the following:
```sql
ALTER TABLE housings_gold
ADD CONSTRAINT housings_gold_pkey
PRIMARY KEY (local_id);
```

### Run the import script

```shell
cd server
DATABASE_URL=... yarn dlx tsx src/scripts/import-housing-mutation-data/index.ts
```
64 changes: 64 additions & 0 deletions server/src/scripts/import-housing-mutation-data/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import async from 'async';
import { Range } from 'immutable';
import fp from 'lodash/fp';

import db from '~/infra/database';

const BATCH_SIZE = 1_000;
const LIMIT = 10;

const HOUSINGS_GOLD = 'housings_gold';

async function run(): Promise<void> {
async function update(
department: string,
ids: ReadonlyArray<string>
): Promise<void> {
console.log(
`Updating ${ids.length} housings in department ${department}...`
);
const fastHousing = `fast_housing_${department.toLowerCase()}`;
const chunks = fp.chunk(BATCH_SIZE, ids);
await async.forEachLimit(chunks, LIMIT, async (chunk) => {
await db(fastHousing)
.update({
plot_area: db.ref(`${HOUSINGS_GOLD}.plot_area`),
cadastral_classification: db.ref(
`${HOUSINGS_GOLD}.cadastral_classification`
),
latitude_dgfip: db.ref(`${HOUSINGS_GOLD}.latitude`),
longitude_dgfip: db.ref(`${HOUSINGS_GOLD}.longitude`),
last_mutation_date: db.ref(`${HOUSINGS_GOLD}.last_mutation_date`),
last_transaction_date: db.ref(
`${HOUSINGS_GOLD}.last_transaction_date`
),
last_transaction_value: db.ref(
`${HOUSINGS_GOLD}.last_transaction_value`
),
occupancy_history: db.ref(`${HOUSINGS_GOLD}.occupancy_history`)
})
.updateFrom(HOUSINGS_GOLD)
.where(`${fastHousing}.local_id`, db.ref(`${HOUSINGS_GOLD}.local_id`))
.whereIn(`${HOUSINGS_GOLD}.local_id`, chunk);
});
}

const departments = Range(1, 99)
.filter((department) => department !== 20)
.map((department) => department.toString().padStart(2, '0'))
.concat(['2A', '2B']);

await async.forEachSeries(
departments.toArray(),
async (department: string) => {
const housings = await db(HOUSINGS_GOLD)
.select('local_id')
.whereLike('local_id', `${department}%`)
.orderBy('local_id');
const ids = housings.map((housing) => housing.local_id);
await update(department, ids);
}
);
}

run();

0 comments on commit 981512a

Please sign in to comment.