From bcf24a547927b046215ee992087d4fb6356fadb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment?= Date: Fri, 19 Jan 2024 16:58:36 +0100 Subject: [PATCH] feat: faker script add home_department --- faker/main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/faker/main.py b/faker/main.py index a82c022..5ef0c26 100644 --- a/faker/main.py +++ b/faker/main.py @@ -15,6 +15,9 @@ '93': ['Saint-Denis', 'Montreuil', 'Aubervilliers'], '94': ['Créteil', 'Vincennes', 'Ivry-sur-Seine'], '95': ['Cergy', 'Argenteuil', 'Sarcelles'], + '86': ['Poitiers', 'Jaunay-clan', 'Chatellerault'], + '17': ['La Rochelle', 'Aytré', 'Ronce-les-bains'], + '14': ['Cabourg', 'Honfleur', 'Trouville'], } category_1 = ['suicide', 'avc', 'cancer', 'tuberculose', 'thrombose'] @@ -97,6 +100,9 @@ # Add weighted probabilities for departments weights = [0.15 if dept == '75' else 0.4 if dept == '77' else 0.2 if dept == '95' else 0.05 for dept in departments] +total_weight = sum(weights) +weights = [weight / total_weight for weight in weights] + def get_multiple_values(category): if random.random() < 0.2: return random.choice(category) @@ -106,7 +112,7 @@ def get_multiple_values(category): return "; ".join(values) with open('sample_data.csv', mode='w', newline='') as csv_file: - fieldnames = ['categories_level_1', 'categories_level_2', 'categories_associate', 'age', 'kind', 'sex', 'death_location', 'home_location', 'department', 'coordinates', 'date'] + fieldnames = ['categories_level_1', 'categories_level_2', 'categories_associate', 'age', 'kind', 'sex', 'death_location', 'home_location', 'home_department', 'department', 'coordinates', 'date'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() @@ -121,6 +127,7 @@ def get_multiple_values(category): row['sex'] = random.choice(['homme', 'femme', 'indéterminé']) row['death_location'] = random.choice(death_locations) row['department'] = np.random.choice(departments, p=weights) # Select department based on weights + row['home_department'] = np.random.choice(departments, p=weights) # Select department based on weights row['home_location'] = random.choice(department_dict[row['department']]) row['coordinates'] = f"{fake.latitude()}, {fake.longitude()}" row['date'] = fake.date_between(start_date=date(2021, 1, 1), end_date=date(2023, 12, 31)).isoformat()