From 06b6cdb8896f9b2c4ac2929940ee30c246e99c80 Mon Sep 17 00:00:00 2001
From: philipperemy <premy.enseirb@gmail.com>
Date: Tue, 1 Oct 2024 13:51:12 +0900
Subject: [PATCH] LZ4

---
 names_dataset/nd_v3.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/names_dataset/nd_v3.py b/names_dataset/nd_v3.py
index 388c3a1..23f28af 100644
--- a/names_dataset/nd_v3.py
+++ b/names_dataset/nd_v3.py
@@ -1,15 +1,23 @@
 import copy
-import json
 import operator
 import os
-import zipfile
+import pickle
 from collections import defaultdict
 from pathlib import Path
 from typing import Optional
 
+import lz4.frame
 import pycountry
 
 
+# Function to decompress and unpickle data
+def decompress_and_unpickle(compressed_file):
+    with open(compressed_file, 'rb') as file:
+        compressed_data = file.read()
+    original_data = pickle.loads(lz4.frame.decompress(compressed_data))
+    return original_data
+
+
 def _query(search_set, key):
     key = key.strip().title()
     if key in search_set:
@@ -54,16 +62,16 @@ class NameDataset:
     def __init__(self, load_first_names=True, load_last_names=True):
         if not load_first_names and not load_last_names:
             raise ValueError('Select either [load_first_names=True] and/or [load_last_names=True].')
-        first_names_filename = Path(os.path.dirname(__file__)) / 'v3/first_names.zip'
-        last_names_filename = Path(os.path.dirname(__file__)) / 'v3/last_names.zip'
+        first_names_filename = Path(os.path.dirname(__file__)) / 'v3/first_names.lz4'
+        last_names_filename = Path(os.path.dirname(__file__)) / 'v3/last_names.lz4'
         self.first_names = self._read_json_from_zip(first_names_filename) if load_first_names else None
         self.last_names = self._read_json_from_zip(last_names_filename) if load_last_names else None
 
     @staticmethod
     def _read_json_from_zip(zip_file):
-        with zipfile.ZipFile(zip_file) as z:
-            with z.open(z.filelist[0]) as f:
-                return json.load(f)
+        print(zip_file)
+        # return pickle.load(gzip.open(zip_file, 'rb'))
+        return decompress_and_unpickle(zip_file)
 
     def search(self, name: str):
         key = name.strip().title()