From 3ff58a3143b4092fd0b80e493f0841a75f24b68e Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Wed, 13 Nov 2024 10:45:02 +0800
Subject: [PATCH] Update README.md

---
 README.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/README.md b/README.md
index 3401deb..5204857 100644
--- a/README.md
+++ b/README.md
@@ -163,6 +163,25 @@ response = client.chat.completions.create(
 )
 ```
 
+You can also use the alternate decoding techniques like `cot_decoding` and `entropy_decoding` directly with the local inference server.
+
+```python
+response = client.chat.completions.create(
+  model="meta-llama/Llama-3.2-1B-Instruct",
+  messages=messages,
+  temperature=0.2,
+  extra_body={
+        "decoding": "cot_decoding",  # or "entropy_decoding"
+        # CoT specific params
+        "k": 10,
+        "aggregate_paths": True,
+        # OR Entropy specific params
+        "top_k": 27,
+        "min_p": 0.03,
+    }
+)
+```
+
 ### Starting the optillm proxy with an external server (e.g. llama.cpp or ollama)
 
 - Set the `OPENAI_API_KEY` env variable to a placeholder value