From bcf24a547927b046215ee992087d4fb6356fadb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment?= <clement@numericite.eu>
Date: Fri, 19 Jan 2024 16:58:36 +0100
Subject: [PATCH] feat: faker script add home_department

---
 faker/main.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/faker/main.py b/faker/main.py
index a82c022..5ef0c26 100644
--- a/faker/main.py
+++ b/faker/main.py
@@ -15,6 +15,9 @@
     '93': ['Saint-Denis', 'Montreuil', 'Aubervilliers'],
     '94': ['Créteil', 'Vincennes', 'Ivry-sur-Seine'],
     '95': ['Cergy', 'Argenteuil', 'Sarcelles'],
+    '86': ['Poitiers', 'Jaunay-clan', 'Chatellerault'],
+    '17': ['La Rochelle', 'Aytré', 'Ronce-les-bains'],
+    '14': ['Cabourg', 'Honfleur', 'Trouville'],
 }
 
 category_1 = ['suicide', 'avc', 'cancer', 'tuberculose', 'thrombose']
@@ -97,6 +100,9 @@
 # Add weighted probabilities for departments
 weights = [0.15 if dept == '75' else 0.4 if dept == '77' else 0.2 if dept == '95' else 0.05 for dept in departments]
 
+total_weight = sum(weights)
+weights = [weight / total_weight for weight in weights]
+
 def get_multiple_values(category):
     if random.random() < 0.2:
         return random.choice(category)
@@ -106,7 +112,7 @@ def get_multiple_values(category):
         return "; ".join(values)
 
 with open('sample_data.csv', mode='w', newline='') as csv_file:
-    fieldnames = ['categories_level_1', 'categories_level_2', 'categories_associate', 'age', 'kind', 'sex', 'death_location', 'home_location', 'department', 'coordinates', 'date']
+    fieldnames = ['categories_level_1', 'categories_level_2', 'categories_associate', 'age', 'kind', 'sex', 'death_location', 'home_location', 'home_department', 'department', 'coordinates', 'date']
     writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
 
     writer.writeheader()
@@ -121,6 +127,7 @@ def get_multiple_values(category):
         row['sex'] = random.choice(['homme', 'femme', 'indéterminé'])
         row['death_location'] = random.choice(death_locations)
         row['department'] = np.random.choice(departments, p=weights)  # Select department based on weights
+        row['home_department'] = np.random.choice(departments, p=weights)  # Select department based on weights
         row['home_location'] = random.choice(department_dict[row['department']])
         row['coordinates'] = f"{fake.latitude()}, {fake.longitude()}"
         row['date'] = fake.date_between(start_date=date(2021, 1, 1), end_date=date(2023, 12, 31)).isoformat()