Skip to content

Commit

Permalink
Merge branch 'main' into moderation-log
Browse files Browse the repository at this point in the history
  • Loading branch information
BabyChouSr committed Nov 28, 2024
2 parents 87b6390 + 1cd4b74 commit 4c9c98f
Show file tree
Hide file tree
Showing 18 changed files with 579 additions and 308 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,33 @@ This is the user interface that users will interact with.
By following these steps, you will be able to serve your models using the web UI. You can open your browser and chat with a model now.
If the models do not show up, try to reboot the gradio web server.

## Launch Chatbot Arena (side-by-side battle UI)

Currently, Chatbot Arena is powered by FastChat. Here is how you can launch an instance of Chatbot Arena locally.

FastChat supports popular API-based models such as OpenAI, Anthropic, Gemini, Mistral and more. To add a custom API, please refer to the model support [doc](./docs/model_support.md). Below we take OpenAI models as an example.

Create a JSON configuration file `api_endpoint.json` with the api endpoints of the models you want to serve, for example:
```
{
"gpt-4o-2024-05-13": {
"model_name": "gpt-4o-2024-05-13",
"api_base": "https://api.openai.com/v1",
"api_type": "openai",
"api_key": [Insert API Key],
"anony_only": false
}
}
```
For Anthropic models, specify `"api_type": "anthropic_message"` with your Anthropic key. Similarly, for gemini model, specify `"api_type": "gemini"`. More details can be found in [api_provider.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/api_provider.py).

To serve your own model using local gpus, follow the instructions in [Serving with Web GUI](#serving-with-web-gui).

Now you're ready to launch the server:
```
python3 -m fastchat.serve.gradio_web_server_multi --register-api-endpoint-file api_endpoint.json
```

#### (Optional): Advanced Features, Scalability, Third Party UI
- You can register multiple model workers to a single controller, which can be used for serving a single model with higher throughput or serving multiple models at the same time. When doing so, please allocate different GPUs and ports for different model workers.
```
Expand Down
4 changes: 2 additions & 2 deletions fastchat/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

# Survey Link URL (to be removed) #00729c
SURVEY_LINK = """<div style='text-align: left; margin: 20px 0;'>
<div style='display: inline-block; border: 2px solid #C41E3A; padding: 20px; padding-bottom: 10px; padding-top: 10px; border-radius: 5px;'>
<span style='color: #C41E3A; font-weight: bold;'>New Launch! Jailbreak models at <a href='https://redarena.ai' style='color: #C41E3A; text-decoration: underline;'>RedTeam Arena</a>. </span>
<div style='display: inline-block; border: 2px solid #00729c; padding: 20px; padding-bottom: 10px; padding-top: 10px; border-radius: 5px;'>
<span style='color: #00729c; font-weight: bold;'>New Launch! Copilot Arena: <a href='https://marketplace.visualstudio.com/items?itemName=copilot-arena.copilot-arena' style='color: #00729c; text-decoration: underline;'>VS Code Extension</a> to compare Top LLMs</span>
</div>
</div>"""
# SURVEY_LINK = ""
Expand Down
82 changes: 9 additions & 73 deletions fastchat/serve/api_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,14 @@ def get_api_provider_stream_iter(
)
elif model_api_dict["api_type"] == "bard":
prompt = conv.to_openai_api_messages()
stream_iter = bard_api_stream_iter(
stream_iter = gemini_api_stream_iter(
model_api_dict["model_name"],
prompt,
temperature,
top_p,
api_key=model_api_dict["api_key"],
None, # use Bard's default temperature
None, # use Bard's default top_p
max_new_tokens,
api_key=(model_api_dict["api_key"] or os.environ["BARD_API_KEY"]),
use_stream=False,
)
elif model_api_dict["api_type"] == "mistral":
if model_api_dict.get("vision-arena", False):
Expand Down Expand Up @@ -242,6 +244,7 @@ def get_api_provider_stream_iter(
max_new_tokens,
api_base=model_api_dict["api_base"],
api_key=model_api_dict["api_key"],
conversation_id=state.conv_id,
)
else:
raise NotImplementedError()
Expand Down Expand Up @@ -759,75 +762,6 @@ def gemini_api_stream_iter(
}


def bard_api_stream_iter(model_name, conv, temperature, top_p, api_key=None):
del top_p # not supported
del temperature # not supported

if api_key is None:
api_key = os.environ["BARD_API_KEY"]

# convert conv to conv_bard
conv_bard = []
for turn in conv:
if turn["role"] == "user":
conv_bard.append({"author": "0", "content": turn["content"]})
elif turn["role"] == "assistant":
conv_bard.append({"author": "1", "content": turn["content"]})
else:
raise ValueError(f"Unsupported role: {turn['role']}")

params = {
"model": model_name,
"prompt": conv_bard,
}
logger.info(f"==== request ====\n{params}")

try:
res = requests.post(
f"https://generativelanguage.googleapis.com/v1beta2/models/{model_name}:generateMessage?key={api_key}",
json={
"prompt": {
"messages": conv_bard,
},
},
timeout=60,
)
except Exception as e:
logger.error(f"==== error ====\n{e}")
yield {
"text": f"**API REQUEST ERROR** Reason: {e}.",
"error_code": 1,
}

if res.status_code != 200:
logger.error(f"==== error ==== ({res.status_code}): {res.text}")
yield {
"text": f"**API REQUEST ERROR** Reason: status code {res.status_code}.",
"error_code": 1,
}

response_json = res.json()
if "candidates" not in response_json:
logger.error(f"==== error ==== response blocked: {response_json}")
reason = response_json["filters"][0]["reason"]
yield {
"text": f"**API REQUEST ERROR** Reason: {reason}.",
"error_code": 1,
}

response = response_json["candidates"][0]["content"]
pos = 0
while pos < len(response):
# simulate token streaming
pos += 5
time.sleep(0.001)
data = {
"text": response[:pos],
"error_code": 0,
}
yield data


def ai2_api_stream_iter(
model_name,
model_id,
Expand Down Expand Up @@ -1262,6 +1196,7 @@ def metagen_api_stream_iter(
max_new_tokens,
api_key,
api_base,
conversation_id,
):
try:
text_messages = []
Expand Down Expand Up @@ -1294,6 +1229,7 @@ def metagen_api_stream_iter(
"model": model_name,
"chunks_delimited": True,
"messages": messages,
"conversation_id": conversation_id,
"options": {
"max_tokens": max_new_tokens,
"generation_algorithm": "top_p",
Expand Down
12 changes: 9 additions & 3 deletions fastchat/serve/gradio_block_arena_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def load_demo_side_by_side_anony(models_, url_params):
global models
models = models_

states = (None,) * num_sides
selector_updates = (
states = [None] * num_sides
selector_updates = [
gr.Markdown(visible=True),
gr.Markdown(visible=True),
)
]

return states + selector_updates

Expand Down Expand Up @@ -522,6 +522,12 @@ def build_side_by_side_ui_anony(models):
elem_id="chatbot",
height=650,
show_copy_button=True,
latex_delimiters=[
{"left": "$", "right": "$", "display": False},
{"left": "$$", "right": "$$", "display": True},
{"left": r"\(", "right": r"\)", "display": False},
{"left": r"\[", "right": r"\]", "display": True},
],
)

with gr.Row():
Expand Down
12 changes: 9 additions & 3 deletions fastchat/serve/gradio_block_arena_named.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def set_global_vars_named(enable_moderation_, use_remote_storage_):


def load_demo_side_by_side_named(models, url_params):
states = (None,) * num_sides
states = [None] * num_sides

model_left = models[0] if len(models) > 0 else ""
if len(models) > 1:
Expand All @@ -60,10 +60,10 @@ def load_demo_side_by_side_named(models, url_params):
else:
model_right = model_left

selector_updates = (
selector_updates = [
gr.Dropdown(choices=models, value=model_left, visible=True),
gr.Dropdown(choices=models, value=model_right, visible=True),
)
]

return states + selector_updates

Expand Down Expand Up @@ -409,6 +409,12 @@ def build_side_by_side_ui_named(models):
elem_id=f"chatbot",
height=650,
show_copy_button=True,
latex_delimiters=[
{"left": "$", "right": "$", "display": False},
{"left": "$$", "right": "$$", "display": True},
{"left": r"\(", "right": r"\)", "display": False},
{"left": r"\[", "right": r"\]", "display": True},
],
)

with gr.Row():
Expand Down
6 changes: 6 additions & 0 deletions fastchat/serve/gradio_block_arena_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,12 @@ def build_single_vision_language_model_ui(
label="Scroll down and start chatting",
height=650,
show_copy_button=True,
latex_delimiters=[
{"left": "$", "right": "$", "display": False},
{"left": "$$", "right": "$$", "display": True},
{"left": r"\(", "right": r"\)", "display": False},
{"left": r"\[", "right": r"\]", "display": True},
],
)

with gr.Row():
Expand Down
6 changes: 6 additions & 0 deletions fastchat/serve/gradio_block_arena_vision_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,12 @@ def build_side_by_side_vision_ui_anony(context: Context, random_questions=None):
elem_id="chatbot",
height=650,
show_copy_button=True,
latex_delimiters=[
{"left": "$", "right": "$", "display": False},
{"left": "$$", "right": "$$", "display": True},
{"left": r"\(", "right": r"\)", "display": False},
{"left": r"\[", "right": r"\]", "display": True},
],
)

with gr.Row():
Expand Down
6 changes: 6 additions & 0 deletions fastchat/serve/gradio_block_arena_vision_named.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,12 @@ def build_side_by_side_vision_ui_named(context: Context, random_questions=None):
elem_id=f"chatbot",
height=650,
show_copy_button=True,
latex_delimiters=[
{"left": "$", "right": "$", "display": False},
{"left": "$$", "right": "$$", "display": True},
{"left": r"\(", "right": r"\)", "display": False},
{"left": r"\[", "right": r"\]", "display": True},
],
)

with gr.Row():
Expand Down
6 changes: 6 additions & 0 deletions fastchat/serve/gradio_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,12 @@ def build_single_model_ui(models, add_promotion_links=False):
label="Scroll down and start chatting",
height=650,
show_copy_button=True,
latex_delimiters=[
{"left": "$", "right": "$", "display": False},
{"left": "$$", "right": "$$", "display": True},
{"left": r"\(", "right": r"\)", "display": False},
{"left": r"\[", "right": r"\]", "display": True},
],
)
with gr.Row():
textbox = gr.Textbox(
Expand Down
60 changes: 60 additions & 0 deletions fastchat/serve/monitor/classify/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
## Download dataset
We have pre-generated several category classifier benchmarks and ground truths. You can download them (with [`git-lfs`](https://git-lfs.com) installed) to the directory `classify/` by running
```console
> git clone https://huggingface.co/datasets/lmarena-ai/categories-benchmark-eval
// cd into classify/ and then copy the label_bench directory to the current directory
> cp -r categories-benchmark-eval/label_bench .
```
Your label_bench directory should follow the structure:
```markdown
├── label_bench/
│ ├── creative_writing_bench/
│ │ ├── data/
│ │ │ └── llama-v3p1-70b-instruct.json
│ │ └── test.json
│ ├── ...
│ ├── your_bench_name/
│ │ ├── data/
│ │ │ ├── your_classifier_data_1.json
│ │ │ ├── your_classifier_data_2.json
│ │ │ └── ...
│ │ └── test.json (your ground truth)
└── ...
```

## How to evaluate your category classifier?

To test your new classifier for a new category, you would have to make sure you created the category child class in `category.py`. Then, to generate classification labels, make the necessary edits in `config.yaml` and run
```console
python label.py --config config.yaml --testing
```

Then, add your new category bench to `tag_names` in `display_score.py`. After making sure that you also have a correctly formatted ground truth json file, you can report the performance of your classifier by running
```console
python display_score.py --bench <your_bench>
```

If you want to check out conflicts between your classifier and ground truth, use
```console
python display_score.py --bench <your_bench> --display-conflict
```

Example output:
```console
> python display_score.py --bench if_bench --display-conflict
Model: gpt-4o-mini-2024-07-18
Accuracy: 0.967
Precision: 0.684
Recall: 0.918

###### CONFLICT ######

Ground Truth = True; Pred = False
\####################
...

Ground Truth = False; Pred = True
\####################
...
```

40 changes: 40 additions & 0 deletions fastchat/serve/monitor/classify/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def create_category(name):
return CategoryIF()
elif name == "math_v0.1":
return CategoryMath()
elif name == "creative_writing_v0.1":
return CategoryCreativeWriting()

raise Exception(f"Category name is incorrect: {name}")

Expand Down Expand Up @@ -134,3 +136,41 @@ def pre_process(self, prompt):
def post_process(self, judgment):
score = self.get_score(judgment=judgment)
return {"math": bool(score == "yes") if score else False}


class CategoryCreativeWriting(Category):
def __init__(self):
super().__init__()
self.name_tag = "creative_writing_v0.1"
self.pattern = re.compile(r"<decision>(\w+)<\/decision>")
self.system_prompt = 'You are tasked with determining whether a given user prompt is asking for creative writing. Creative writing is defined as any form of writing that goes beyond standard professional, journalistic, academic, or technical literature. It typically involves imagination, originality, and expression of thoughts and emotions. Creative writing can include, but is not limited to, the following formats:\n- Fiction (e.g., short stories, novels)\n- Poetry (e.g., sonnets, free verse)\n- Dramatic writing (e.g., screenplays, monologues, scripts)\n- Personal essays (focusing on subjective experiences or narrative storytelling)\n- Songs and lyrics\n\nCarefully analyze the user prompt and consider whether it primarily requires creative writing. Think about the following aspects:\n1. Does the prompt ask for fictional content, speculative scenarios, or the use of imagination to construct narratives?\n2. Does it encourage the expression of thoughts, emotions, or personal experiences beyond mere factual reporting or analysis?\n3. Is it asking for writing in a specific creative format (e.g., story, poem, script, etc)?\n4. Is the primary purpose of the prompt to foster creative expression or originality rather than information delivery, technical documentation, or analytical reasoning?\n5. Does the prompt request stylistic or rhetorical elements often associated with creative writing, such as metaphor, imagery, dialogue, etc?\n6. Does the prompt expect a response in natural language (e.g., sentences, paragraphs) rather than visual, mathematical, or non-linguistic output?\n\nOutput your verdict as either "yes" or "no"in the following format:\n<decision>\n[yes/no]\n</decision>. Do NOT explain.'
self.prompt_template = "<user_prompt>\n{PROMPT}\n</user_prompt>"

def get_score(self, judgment):
matches = self.pattern.findall(
judgment.replace("\n", "")
.replace("[", "")
.replace("]", "")
.replace(" ", "")
.lower()
)
matches = [m for m in matches if m != ""]
if len(set(matches)) == 0:
return None
elif len(set(matches)) == 1:
return matches[0]
else:
return None

def pre_process(self, prompt):
args = {"PROMPT": prompt}
conv = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": self.prompt_template.format(**args)},
]
return conv

def post_process(self, judgment):
score = self.get_score(judgment=judgment)
bool_score = bool(score == "yes") if score else False
return {"creative_writing": bool_score, "score": score}
1 change: 1 addition & 0 deletions fastchat/serve/monitor/classify/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ task_name:
- criteria_v0.1
- if_v0.1
- math_v0.1
- creative_writing_v0.1

model_name: null
name: llama-3-70b-instruct
Expand Down
Loading

0 comments on commit 4c9c98f

Please sign in to comment.