-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpre_download_models.py
65 lines (51 loc) · 2.08 KB
/
pre_download_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import nltk
import requests
import timm
from transformers import AutoModel
def download_file_from_url(url, save_path):
"""Download a file from a specified URL to a specified save path."""
os.makedirs(os.path.dirname(save_path), exist_ok=True)
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(save_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"File downloaded and saved to {save_path}.")
else:
print(f"Failed to download file. Status code: {response.status_code}")
def download_transformers_model(model_name):
print(f"Downloading and caching {model_name} from Hugging Face Transformers...")
model = AutoModel.from_pretrained(model_name)
print(f"Model {model_name} downloaded and cached successfully.")
def download_timm_model(model_name):
print(f"Downloading and caching {model_name} from TIMM...")
model = timm.create_model(model_name, pretrained=True)
print(f"Model {model_name} downloaded and cached successfully.")
def download_nltk_data():
nltk_data_sets = [
"punkt",
"punkt_tab",
"wordnet",
"averaged_perceptron_tagger",
"averaged_perceptron_tagger_eng",
"stopwords",
]
print("Downloading NLTK data sets...")
for data_set in nltk_data_sets:
nltk.download(data_set, download_dir="/root/nltk_data")
print("NLTK data sets downloaded and cached successfully.")
print(os.listdir("/root/nltk_data/tokenizers"))
if __name__ == "__main__":
# Hugging Face Transformers model
download_transformers_model("microsoft/table-transformer-structure-recognition")
# YOLOX model
model_url = (
"https://huggingface.co/unstructuredio/yolo_x_layout/blob/main/yolox_l0.05.onnx"
)
save_path = "/root/.cache/huggingface/hub/models--unstructuredio--yolo_x_layout/yolox_l0.05.onnx"
download_file_from_url(model_url, save_path)
# TIMM model
download_timm_model("resnet18.a1_in1k")
# NLTK data
download_nltk_data()