-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
98 lines (87 loc) · 2.98 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# config.py
import os
from dotenv import load_dotenv
# Load environment variables from the .env file
load_dotenv('../.env')
HF_TOKEN = os.getenv("HF_TOKEN")
RUNPOD_API_KEY = os.getenv("RUNPOD_API_KEY")
if not RUNPOD_API_KEY:
raise ValueError("RUNPOD_API_KEY is missing. Please set it in your environment variables.")
class ModelCapabilities:
TEXT = "text"
TEXT_AND_IMAGE = "text_and_image"
# Define model configurations
MODEL_CONFIGS = {
# Corrected 0.5B QWEN model
"SGLang-Qwen/Qwen2.5-1.5B-Instruct": {
"name": "SGLang-Qwen/Qwen2.5-1.5B-Instruct",
"image_name": "lmsysorg/sglang:latest",
"docker_args": (
"python3 -m sglang.launch_server "
"--model-path Qwen/Qwen2.5-0.5B "
"--mem-fraction-static 0.95 "
"--host 0.0.0.0 "
"--port 8000"
),
"cloud_type": "SECURE",
"volume_in_gb": 5,
"ports": "8000/http",
"container_disk_in_gb": 10,
"volume_mount_path": "/root/.cache/huggingface",
"env": {"HF_TOKEN": os.getenv("HF_TOKEN"), "HF_HUB_ENABLE_HF_TRANSFER": "1"},
"preferred_gpu_names": ["RTX 4090", "RTX 4080", "RTX 6000 Ada", "RTX A6000"],
},
# 32B QWEN Instruct
"SGLang-Qwen/Qwen2.5-Coder-32B-Instruct": {
"name": "SGLang-Qwen/Qwen2.5-Coder-32B-Instruct",
"image_name": "lmsysorg/sglang:latest",
"docker_args": (
"python3 -m sglang.launch_server "
"--model-path Qwen/Qwen2.5-Coder-32B-Instruct "
"--mem-fraction-static 0.95 "
"--host 0.0.0.0 "
"--port 8000"
),
"cloud_type": "SECURE",
"volume_in_gb": 100,
"ports": "8000/http",
"container_disk_in_gb": 50,
"volume_mount_path": "/root/.cache/huggingface",
"env": {"HF_TOKEN": os.getenv("HF_TOKEN"), "HF_HUB_ENABLE_HF_TRANSFER": "1"},
"preferred_gpu_names": ["H100 PCIe", "H100 NVL", "H100 SXM", "RTX A6000"],
},
}
# Default settings for pods
DEFAULT_POD_SETTINGS = {
"image_name": "lmsysorg/sglang:latest",
"cloud_type": "SECURE",
"ports": "8000/http",
"container_disk_in_gb": 10,
"volume_in_gb": 100,
"volume_mount_path": "/root/.cache/huggingface",
"env": {
"HF_HUB_ENABLE_HF_TRANSFER": "1",
"HF_TOKEN": HF_TOKEN
},
"scale_cooldown": 180,
"metrics_window": 60,
"monitor_interval": 15
}
# Configuration for the pipeline
pipeline_config = {
"arango_config": {
"host": "http://localhost:8529",
"username": "root",
"password": "openSesame",
"db_name": "verifaix",
"collection_name": "test_documents",
"cache_collection_name": "litellm_cache", # Store litellm responses
"truncate_cache": True # Truncate the cache collection before starting
},
"llm_config": {
"model": "openai/Qwen/Qwen2.5-0.5B",
"max_tokens": 50,
"temperature": 0.7,
"api_base": "api_base" # This will be set dynamically
}
}