jzhang38 · peiji1981 · Feb 29, 2024 · Mar 1, 2024 · Apr 16, 2024
diff --git a/convert_model/convert_model.sh b/convert_model/convert_model.sh
@@ -0,0 +1,4 @@
+python scripts/convert_lit_checkpoint.py \
+    --out_dir ./out_20240217/tinyllama_1b \
+    --checkpoint_name iter-2860000-ckpt.pth \
+    --model_name csg-tiny-1B
diff --git a/convert_model/convert_model_hf.sh b/convert_model/convert_model_hf.sh
@@ -0,0 +1,3 @@
+python scripts/convert_hf_checkpoint.py \
+    --checkpoint_dir /data/models/csg-tiny-1B/csg-tiny-1B-480K \
+    --model_name tiny_LLaMA_1b 
diff --git a/convert_model/convert_model_lit.sh b/convert_model/convert_model_lit.sh
@@ -0,0 +1,4 @@
+python scripts/convert_lit_checkpoint.py \
+    --out_dir /data/train/csg_tiny_train/out_20240217/tinyllama_1b \
+    --checkpoint_name iter-4780000-ckpt.pth \
+    --model_name csg-tiny-1B
diff --git a/convert_model/pytorch-to-safetensor-converter/README.md b/convert_model/pytorch-to-safetensor-converter/README.md
@@ -0,0 +1,41 @@
+# Pytorch to Safetensor Converter
+
+---
+
+
+
+A simple converter which converts pytorch .bin tensor files (Usually listed as "pytorch_model.bin" or "pytorch_model-xxxx-of-xxxx.bin") to safetensor files. Reason? 
+
+~~because it's cool!~~
+
+Because the safetensor format decreases the loading time of large LLM models, currently supported in [oobabooga's text-generation-webui](https://github.com/oobabooga/text-generation-webui). It also supports in-place loading, which effectively decreased the required memory to load a LLM.
+
+Note: Most of the code originated from [Convert to Safetensors - a Hugging Face Space by safetensors](https://huggingface.co/spaces/safetensors/convert), and this code cannot deal with files that are not named as "pytorch_model.bin" or "pytorch_model-xxxx-of-xxxx.bin".
+
+### Limitations:
+
+The program requires **A lot** of memory. To be specific, your idle memory should be **at least** twice the size of your largest ".bin" file. Or else, the program will run out of memory and use your swap... that would be **slow!**
+
+This program **will not** re-shard (aka break down) the model, you'll need to do it yourself using some other tools.
+
+### Usage:
+
+After installing python (Python 3.10.x is suggested), ``cd`` into the repository and install dependencies first:
+
+```
+git clone https://github.com/Silver267/pytorch-to-safetensor-converter.git
+cd pytorch-to-safetensor-converter
+pip install -r requirements.txt
+```
+
+Copy **all content** of your model's folder into this repository, then run:
+
+```
+python convert_to_safetensor.py
+```
+Follow the instruction in the program. Remember to use the **full path** for the model directory (Something like ``E:\models\xxx-fp16`` that contains all the model files). Wait for a while, and you're good to go. The program will automatically copy all other files to your destination folder, enjoy!
+
+### Precision stuff
+if your original model is fp32 then don't forget to edit ``"torch_dtype": "float32",`` to ``"torch_dtype": "float16",`` in ``config.json``
+#### Note that this operation might (in rare occasions) cause the LLM to output NaN while performing operations since it decreases the precision to fp16.
+If you're worried about that, simply edit the line ``loaded = {k: v.contiguous().half() for k, v in loaded.items()}`` in ``convert_to_safetensor.py`` into ``loaded = {k: v.contiguous() for k, v in loaded.items()}`` and you'll have a full precision model.
diff --git a/convert_model/pytorch-to-safetensor-converter/convert_to_safetensor.py b/convert_model/pytorch-to-safetensor-converter/convert_to_safetensor.py
@@ -0,0 +1,103 @@
+import json
+import os
+import shutil
+import torch
+from collections import defaultdict
+from safetensors.torch import load_file, save_file
+from tqdm import tqdm
+
+def shared_pointers(tensors):
+    ptrs = defaultdict(list)
+    for k, v in tensors.items():
+        ptrs[v.data_ptr()].append(k)
+    return [names for names in ptrs.values() if len(names) > 1]
+
+def check_file_size(sf_filename, pt_filename):
+    sf_size = os.stat(sf_filename).st_size
+    pt_size = os.stat(pt_filename).st_size
+    if (sf_size - pt_size) / pt_size > 0.01:
+        raise RuntimeError(f"File size difference exceeds 1% between {sf_filename} and {pt_filename}")
+
+def convert_file(pt_filename, sf_filename, copy_add_data=True):
+    source_folder = os.path.dirname(pt_filename)
+    dest_folder = os.path.dirname(sf_filename)
+    loaded = torch.load(pt_filename, map_location="cpu")
+    loaded = loaded.get("state_dict", loaded)
+    shared = shared_pointers(loaded)
+
+    for shared_weights in shared:
+        for name in shared_weights[1:]:
+            loaded.pop(name)
+
+    loaded = {k: v.contiguous().half() for k, v in loaded.items()}
+
+    os.makedirs(dest_folder, exist_ok=True)
+    save_file(loaded, sf_filename, metadata={"format": "pt"})
+    check_file_size(sf_filename, pt_filename)
+    if copy_add_data:
+        copy_additional_files(source_folder, dest_folder)
+
+    reloaded = load_file(sf_filename)
+    for k, v in loaded.items():
+        if not torch.equal(v, reloaded[k]):
+            raise RuntimeError(f"Mismatch in tensors for key {k}.")
+
+def rename(pt_filename):
+    return pt_filename.replace("pytorch_model", "model").replace(".bin", ".safetensors")
+
+def copy_additional_files(source_folder, dest_folder):
+    for file in os.listdir(source_folder):
+        file_path = os.path.join(source_folder, file)
+        if os.path.isfile(file_path) and not (file.endswith('.bin') or file.endswith('.py')):
+            shutil.copy(file_path, dest_folder)
+
+def find_index_file(source_folder):
+    for file in os.listdir(source_folder):
+        if file.endswith('.bin.index.json'):
+            return file
+    return None
+
+def convert_files(source_folder, dest_folder, delete_old):
+    index_file = find_index_file(source_folder)
+    if not index_file:
+        raise RuntimeError("Index file not found. Please ensure the correct folder is specified.")
+
+    index_file = os.path.join(source_folder, index_file)
+    with open(index_file) as f:
+        index_data = json.load(f)
+
+    for pt_filename in tqdm(set(index_data["weight_map"].values())):
+        full_pt_filename = os.path.join(source_folder, pt_filename)
+        sf_filename = os.path.join(dest_folder, rename(pt_filename))
+        convert_file(full_pt_filename, sf_filename, copy_add_data=False)
+        if delete_old:
+            os.remove(full_pt_filename)
+
+    copy_additional_files(source_folder, dest_folder)
+
+    index_path = os.path.join(dest_folder, "model.safetensors.index.json")
+    with open(index_path, "w") as f:
+        new_map = {k: rename(v) for k, v in index_data["weight_map"].items()}
+        json.dump({**index_data, "weight_map": new_map}, f, indent=4)
+
+def main():
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+
+    source_folder = input("Source folder for PyTorch files (leave blank for script's directory): ").strip() or script_dir
+    dest_folder = input("Destination folder for SafeTensors files (leave blank for default): ").strip()
+
+    if not dest_folder:
+        model_name = os.path.basename(os.path.normpath(source_folder))
+        dest_folder = os.path.join(source_folder, model_name + "_safetensors")
+
+    delete_old = input("Delete old PyTorch files? (Y/N): ").strip().upper() == 'Y'
+
+    if "pytorch_model.bin" in os.listdir(source_folder):
+        convert_file(os.path.join(source_folder, "pytorch_model.bin"), os.path.join(dest_folder, "model.safetensors"), copy_add_data=True)
+        if delete_old:
+            os.remove(os.path.join(source_folder, "pytorch_model.bin"))
+    else:
+        convert_files(source_folder, dest_folder, delete_old)
+
+if __name__ == "__main__":
+    main()
diff --git a/convert_model/pytorch-to-safetensor-converter/requirements.txt b/convert_model/pytorch-to-safetensor-converter/requirements.txt
@@ -0,0 +1,4 @@
+safetensors
+torch
+tqdm
+numpy