diff --git a/autogen/agentchat/contrib/reasoning_agent.py b/autogen/agentchat/contrib/reasoning_agent.py
index c1e3391b89..c8c1c3db93 100644
--- a/autogen/agentchat/contrib/reasoning_agent.py
+++ b/autogen/agentchat/contrib/reasoning_agent.py
@@ -5,7 +5,12 @@
 from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
 
 from ..agent import Agent
-from ..assistant_agent import AssistantAgent
+from ..assistant_agent  import AssistantAgent
+import random
+import math
+
+EPSILON = 1e-6
+
 
 TreeofThought_message = """
 Role: Expert Planning AI Assistant
@@ -63,11 +68,11 @@ def __init__(self, content: str, parent: Optional["ThinkNode"] = None) -> None:
         - Providing trajectory utilities to get the full path from root to this node
         """
         self.content = content
-        self.value = None
+        self.value = 0
         self.parent = parent
         self.depth = self.parent.depth + 1 if parent else 0
         self.children = []
-        self.visits = 0  # TODO: remove this line if not used.
+        self.visits = 0
         if self.parent:
             self.parent.children.append(self)
 
@@ -175,9 +180,101 @@ def add_nodes(node: ThinkNode, node_id: str = "0"):
         print("Make sure graphviz is installed on your system: https://graphviz.org/download/")
 
 
+
+def extract_sft_dataset(root):
+    """
+    Extract the best trajectory or multiple equally good trajectories
+    for SFT training.
+
+    Args:
+        root: The root node of the tree.
+
+    Returns:
+        List of best trajectories, where each trajectory is a pair of instruction and response.
+    """
+    instruction = root.content
+    idx = len("# Question: ") + len(root.content) + 1
+
+    def _find_leaf_nodes(node):
+        """Recursively find all leaf nodes."""
+        if not node.children:
+            return [node]
+        leafs = []
+        for child in node.children:
+            leafs.extend(_find_leaf_nodes(child))
+        return leafs
+
+    # Step 1: Find all leaf nodes
+    leaf_nodes = _find_leaf_nodes(root)
+
+    # Step 2: Determine the highest score among leaf nodes
+    max_value = max(leaf_nodes, key=lambda x: x.value).value
+
+    # Step 3: Collect all leaf nodes with the highest score
+    best_leafs = [leaf for leaf in leaf_nodes if leaf.value == max_value]
+
+    # Step 4: Collect trajectories for all the best leaf nodes
+    best_trajectories = [{"instruction": instruction, "response": leaf.trajectory[idx:]} for leaf in best_leafs]
+
+    return best_trajectories
+
+
+def extract_rlhf_preference_dataset(root, contrastive_threshold=0.2):
+    """
+    Extract and generate preference pairs for RLHF training by comparing sibling nodes.
+
+    Args:
+        root: The root node of the tree.
+        contrastive_threshold (float): between (0, 1), a distance measure that we are confidence to call
+            one is positive and another is negative.
+
+    Returns:
+        A list of preference pairs, where each pair contains two responses and
+        indicates which one is preferred.
+    """
+    preference_pairs = []
+
+    assert contrastive_threshold > 0
+    assert contrastive_threshold < 1
+
+    def traverse_tree(node):
+        """Traverse the tree to compare sibling nodes and collect preferences."""
+        if not node.children:
+            return  # Leaf node, no comparisons needed
+
+        # Step 1: Compare all sibling nodes
+        for i in range(len(node.children)):
+            for j in range(len(node.children)):
+                if i == j:
+                    continue
+                child_a, child_b = node.children[i], node.children[j]
+
+                is_a_better = False
+                if child_a.visits > 0 and child_b.visits > 0:
+                    # for MCTS
+                    is_a_better = child_a.value / child_a.visits - child_b.value / child_b.visits > contrastive_threshold
+                else:
+                    # for Beam Search
+                    is_a_better = child_a.value - child_b.value > contrastive_threshold
+                if is_a_better:
+                    preference_pairs.append({
+                        "instruction": node.trajectory,
+                        "preferred_response": f"Step {child_a.depth}: {child_a.content}",
+                        "dispreferred_response": f"Step {child_b.depth}: {child_b.content}",
+                    })
+
+        # Step 2: Recurse into child nodes
+        for child in node.children:
+            traverse_tree(child)
+
+    # Start traversal from the root
+    traverse_tree(root)
+
+    return preference_pairs
+
 class ReasoningAgent(AssistantAgent):
     def __init__(
-        self, name, llm_config, max_depth=4, beam_size=3, answer_approach="pool", verbose=True, **kwargs
+        self, name, llm_config, max_depth=4, beam_size=3, answer_approach="pool", verbose=True, reason_config: dict=None, **kwargs
     ) -> None:
         """Initialize a ReasoningAgent that uses tree-of-thought reasoning.,
 
@@ -185,8 +282,8 @@ def __init__(
             name: Name of the agent
             llm_config: Configuration for the language model
             max_depth (int): Maximum depth of the reasoning tree
-            beam_size (int): Number of parallel reasoning paths to maintain
-            answer_approach (str): Either "pool" or "best" - how to generate final answer
+            beam_size (int): DEPRECATED. Number of parallel reasoning paths to maintain
+            answer_approach (str): DEPRECATED. Either "pool" or "best" - how to generate final answer
             verbose (bool): Whether to show intermediate steps
         """
         super().__init__(name=name, llm_config=llm_config, **kwargs)
@@ -202,7 +299,19 @@ def __init__(
             system_message="Rate the thinking trajectories for score 1 - 5 (1: worst, 5: best).",
             llm_config=llm_config,
         )
-        self.register_reply([Agent, None], ReasoningAgent.generate_response)
+
+        if reason_config:
+            method = reason_config.get("method", "beam_search")
+            if method == "beam_search":
+                self.register_reply([Agent, None], ReasoningAgent.generate_beam_response)
+                if "beam_size" in reason_config:
+                    self.beam_size = reason_config["beam_size"]
+                if "answer_approach" in reason_config:
+                    self.answer_approach = reason_config["answer_approach"]
+            elif method == "mcts":
+                self.register_reply([Agent, None], ReasoningAgent.generate_mcts_response)
+                self.mcts_simulations = reason_config.get("nsim", 10)
+                self.exploration_constant = reason_config.get("exploration_constant", 1.41)
 
         self._root = None
 
@@ -216,7 +325,8 @@ def rate_node(self, node: ThinkNode) -> float:
             float: Normalized score between 0 and 1 indicating trajectory quality
         """
         self.send(
-            message=f"Rate the trajectory:\n{node.trajectory}", recipient=self.grader, request_reply=True, silent=False
+            message=f"Rate:\n{node.trajectory}", recipient=self.grader, request_reply=True, 
+            silent=not self.verbose,
         )
         rating = self.grader.last_message()["content"].strip()
         try:
@@ -226,7 +336,7 @@ def rate_node(self, node: ThinkNode) -> float:
             reward = 0.0  # Default reward if parsing fails
         return reward
 
-    def generate_response(self, messages, sender, config=None):
+    def generate_beam_response(self, messages, sender, config=None):
         """Generate a response using tree-of-thought reasoning.
 
         Implements beam search through a tree of reasoning steps, using the thinker
@@ -257,29 +367,14 @@ def generate_response(self, messages, sender, config=None):
         while prev_leafs and len(final_answers) < self.beam_size:
             new_leafs = []
             for node in prev_leafs:
-                if (self.max_depth and node.depth >= self.max_depth) or "TERMINATE" in node.content:
+                if self.is_terminal(node):
                     # Reached max depth; collect possible answers
                     if node.value is None:
                         node.value = self.rate_node(node)
                     final_answers.add(node)
                     continue
 
-                self.thinker.clear_history()
-                self.send(
-                    message=f"{node.trajectory}\n---\nWhat are the possible next steps?",
-                    recipient=self.thinker,
-                    request_reply=True,
-                    silent=False,
-                )
-                reply = self.thinker.last_message()["content"].strip()
-
-                options = re.findall(
-                    r"Option \d+:(.+?)(?=Option \d+:|$)", reply, re.DOTALL
-                )  # the options that the thinker provides
-                for option in options:
-                    new_leafs.append(
-                        ThinkNode(content=option.strip().rstrip(), parent=node)
-                    )  # each option is a new leaf node
+                new_leafs += self.expand(node)
 
             prev_leafs = new_leafs
 
@@ -321,3 +416,113 @@ def generate_response(self, messages, sender, config=None):
 
         final_answer = self.chat_messages[self][-1]["content"].strip()
         return True, final_answer
+
+    def generate_mcts_response(self, messages, sender, config=None):
+        if sender == self:
+            return False, ""  # Defer the LLM call to next reply functions.
+
+        messages = self._oai_messages[sender] if messages is None else messages
+        prompt = messages[-1]["content"].strip()
+        if not prompt:
+            return True, "TERMINATE"
+
+        # Extract the ground truth for more accurate evaluation.
+        # TODO: in the future, allow user to pass a callable (func) to calculate reward.
+        if "GROUND_TRUTH" in prompt:
+            idx = prompt.find("GROUND_TRUTH")
+            prompt, ground_truth = prompt[:idx].rstrip(), prompt[idx:]
+        else:
+            ground_truth = None
+
+        root = ThinkNode(content=prompt, parent=None)
+        self._root = root
+        answer_nodes = []
+
+        # TODO: future, parallelism with Swarm agent or AsyncOpenAI client.
+        for _ in range(self.mcts_simulations):
+            node = root
+
+            # Selection
+            while not self.is_terminal(node) and len(node.children) > 0:
+                choices_weights = [
+                    # exploitation term +
+                    (child.value / (child.visits + EPSILON)) + 
+                    # exploration term
+                    self.exploration_constant * math.sqrt((2 * math.log(node.visits + EPSILON) / (child.visits + EPSILON)))
+                    for child in node.children
+                ]
+                node = node.children[choices_weights.index(max(choices_weights))]
+
+            # Expansion and Simulation
+            while not self.is_terminal(node):
+                if len(node.children) == 0:
+                    self.expand(node)
+                node = random.choice(node.children)
+                
+            # Add answer (leaf) node and evaluate answer
+            self.send(
+                message=f"Answer the question {prompt}. Here is my thinking process:\n{node.trajectory}",
+                recipient=self,
+                request_reply=True,            
+                silent=not self.verbose)
+            _answer  = self.last_message(self)["content"].strip()
+            # We add the answer (as a node) to the leaf to help
+            # future logging and debugging.
+            _ans_node = ThinkNode(content=_answer, parent=node)
+            if ground_truth:
+                # override the system message
+                self.grader.update_system_message(f"Rate the answer for score 1 - 5 (1: worst, 5: best). The Ground Truth is:\n{ground_truth}")
+
+            reward = self.rate_node(_ans_node)
+            _ans_node.value = reward
+            answer_nodes.append(_ans_node)
+            
+            # Backpropagation
+            while node is not None:
+                node.visits += 1
+                if node.value is None:
+                    node.value = reward
+                else:
+                    node.value += reward
+                node = node.parent
+
+        # Best action
+        best_ans_node = max(answer_nodes, key=lambda node: node.value)
+        return True, best_ans_node.content
+
+
+    def expand(self, node: ThinkNode) -> List:
+        """
+        Expand the node by generating possible next steps based on the current trajectory.
+
+        This method sends a message to the thinker agent, asking for possible next steps
+        that can be taken from the current node's trajectory. It processes the response to
+        extract the options provided by the thinker and creates new ThinkNode instances
+        for each option.
+
+        Args:
+            node (ThinkNode): The node to expand, representing the current state in the reasoning process.
+
+        Returns:
+            List[ThinkNode]: A list of new ThinkNode instances created from the options provided by the thinker.
+        """
+        self.thinker.clear_history()
+        self.send(
+            message=f"{node.trajectory}\n---\nWhat are the possible next steps?",
+            recipient=self.thinker,
+            request_reply=True,
+            silent=not self.verbose)
+        reply = self.thinker.last_message()["content"].strip()
+
+        # Extract options from reply using regex:
+        # - Matches text between "Option N:" and either next "Option N:" or end of string
+        # - (?=...) is a lookahead to match option boundary without including it
+        # - re.DOTALL allows . to match newlines
+        options = re.findall(r"Option \d+:(.+?)(?=Option \d+:|$)", reply, re.DOTALL)
+
+        return [ThinkNode(content=option.strip().rstrip(), parent=node) for option in options]
+
+
+    def is_terminal(self, node):
+        return node.depth >= self.max_depth or "TERMINATE" in node.content
+
diff --git a/notebook/tree_of_thoughts.png b/notebook/tree_of_thoughts.png
index 57825cdfff..85ffa38c25 100644
--- a/notebook/tree_of_thoughts.png
+++ b/notebook/tree_of_thoughts.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6049d94ab2659ee43a0e50b2086c5e43a4bab419834a8c80acb1bbb5c780a65
-size 300686
+oid sha256:45a644fa66a8052cf166e9c691e45f2aa60e3ca6e30c629265b9d6d68898279b
+size 396536
diff --git a/website/blog/2024-12-02-ReasoningAgent2/index.mdx b/website/blog/2024-12-02-ReasoningAgent2/index.mdx
index d194c3f365..a4f747032f 100644
--- a/website/blog/2024-12-02-ReasoningAgent2/index.mdx
+++ b/website/blog/2024-12-02-ReasoningAgent2/index.mdx
@@ -5,7 +5,6 @@ authors:
   - BabyCNM
   - skzhang1
   - sonichi
-  - BabyCNM
   - qingyunwu
 tags: [LLM, GPT, research]
 ---
@@ -83,6 +82,7 @@ from autogen import (
     visualize_tree
 )
 
+
 # Configure the model
 config_list = [{"model": "gpt-4", "api_key": os.environ.get("OPENAI_API_KEY")}]
 
@@ -186,6 +186,7 @@ After asking a question to the `ReasoningAgent`, you only need to simply call th
 
 ```python
 import json
+
 data = reasoning_agent._root.to_dict()
 with open("reasoning_tree.json", "w") as f:
     json.dump(data, f)
@@ -208,43 +209,7 @@ new_node = pickle.load(open("reasoning_tree.pkl", "rb"))
 This step finds the best trajectory in the thought tree and converts it to a SFT dataset as a sequence of strings. The best trajectory is determined by following the highest-scoring path from root to leaf.
 
 ```python
-def extract_sft_dataset(root):
-    """
-    Extract the best trajectory or multiple equally good trajectories
-    for SFT training.
-
-    Args:
-        root: The root node of the tree.
-
-    Returns:
-        List of best trajectories, where each trajectory is a pair of instruction and response.
-    """
-    instruction = root.content
-    idx = len("# Question: ") + len(root.content) + 1
-
-    def find_leaf_nodes(node):
-        """Recursively find all leaf nodes."""
-        if not node.children:
-            return [node]
-        leafs = []
-        for child in node.children:
-            leafs.extend(find_leaf_nodes(child))
-        return leafs
-
-    # Step 1: Find all leaf nodes
-    leaf_nodes = find_leaf_nodes(root)
-
-    # Step 2: Determine the highest score among leaf nodes
-    max_value = max(leaf_nodes, key=lambda x: x.value).value
-
-    # Step 3: Collect all leaf nodes with the highest score
-    best_leafs = [leaf for leaf in leaf_nodes if leaf.value == max_value]
-
-    # Step 4: Collect trajectories for all the best leaf nodes
-    best_trajectories = [{"instruction": instruction, "response": leaf.trajectory[idx:]} for leaf in best_leafs]
-
-    return best_trajectories
-
+from autogen.agentchat.contrib.reasoning_agent import extract_sft_dataset
 
 # Example usage
 sft_data = extract_sft_dataset(reason_agent._root)
@@ -255,52 +220,7 @@ json.dump(sft_data, open("sft_data.json", "w"), indent=2)
 This step generates preference pairs by comparing sibling nodes in the tree. For each parent node with multiple children, we create training pairs where the higher-scored response is marked as preferred over the lower-scored one.
 
 ```python
-def extract_rlhf_preference_dataset(root, contrastive_threshold=0.2):
-    """
-    Extract and generate preference pairs for RLHF training by comparing sibling nodes.
-
-    Args:
-        root: The root node of the tree.
-        contrastive_threshold (float): between (0, 1), a distance measure that we are confidence to call
-            one is positive and another is negative.
-
-    Returns:
-        A list of preference pairs, where each pair contains two responses and
-        indicates which one is preferred.
-    """
-    preference_pairs = []
-
-    assert contrastive_threshold > 0
-    assert contrastive_threshold < 1
-
-    def traverse_tree(node):
-        """Traverse the tree to compare sibling nodes and collect preferences."""
-        if not node.children:
-            return  # Leaf node, no comparisons needed
-
-        # Step 1: Compare all sibling nodes
-        for i in range(len(node.children)):
-            for j in range(len(node.children)):
-                if i == j:
-                    continue
-                child_a, child_b = node.children[i], node.children[j]
-                if child_a.value - child_b.value > contrastive_threshold:
-                    preference_pairs.append({
-                        "instruction": node.trajectory,
-                        "preferred_response": f"Step {child_a.depth}: {child_a.content}",
-                        "dispreferred_response": f"Step {child_b.depth}: {child_b.content}",
-                    })
-
-
-        # Step 2: Recurse into child nodes
-        for child in node.children:
-            traverse_tree(child)
-
-    # Start traversal from the root
-    traverse_tree(root)
-
-    return preference_pairs
-
+from autogen.agentchat.contrib.reasoning_agent import extract_rlhf_preference_dataset
 
 # Example usage
 rlhf_data = extract_rlhf_preference_dataset(reason_agent._root)
diff --git a/website/blog/2024-12-18-Reasoning-and-MCTS/index.mdx b/website/blog/2024-12-18-Reasoning-and-MCTS/index.mdx
new file mode 100644
index 0000000000..819062fe8f
--- /dev/null
+++ b/website/blog/2024-12-18-Reasoning-and-MCTS/index.mdx
@@ -0,0 +1,189 @@
+---
+title: MCTS vs Beam Search in Reasoning Agent to Help LLM Post-Training
+authors:
+  - BabyCNM
+  - Hk669
+  - sonichi
+  - qingyunwu
+tags: [LLM, GPT, research]
+---
+
+![Tree of Thoughts](img/reasoningagent_1.png)
+
+**TL;DR:**
+* We introduce Monte Carlo Tree Search (MCTS) as an alternative to Beam Search in ReasoningAgent
+* MCTS is particularly effective when ground truth evaluation is available or when LLM-based evaluation is expensive
+* We provide detailed complexity analysis and comparison between MCTS and Beam Search approaches
+* The resulting search trees can be used to generate high-quality training datasets for LLM fine-tuning
+
+## Introduction
+
+In our [previous post](/blog/2024-12-02-ReasoningAgent2), we introduced ReasoningAgent with Beam Search for systematic reasoning. Today, we explore an alternative approach using Monte Carlo Tree Search (MCTS) that offers unique advantages in certain scenarios, particularly when:
+
+1. Ground truth evaluation is available (either from human feedback or labeled data)
+2. LLM-based evaluation is expensive or unreliable
+3. You want to generate high-quality training data for future LLM fine-tuning
+
+## MCTS vs Beam Search: Key Differences
+
+### Search Strategy
+- **Beam Search**: Maintains a fixed number (beam size) of most promising paths at each step
+- **MCTS**: Dynamically explores the search space, balancing exploitation of known good paths with exploration of new possibilities
+
+### Evaluation Timing
+- **Beam Search**: Evaluates every node at every step
+- **MCTS**: Only evaluates leaf nodes during simulation, making it more efficient when evaluation is expensive
+
+### Memory Usage
+- **Beam Search**: Memory usage is bounded by beam size × depth
+- **MCTS**: Memory grows with number of simulations but focuses on promising paths
+
+## Implementation Details
+
+The MCTS implementation in ReasoningAgent follows four key steps:
+
+1. **Selection**: Choose nodes to explore using UCT (Upper Confidence Bound for Trees)
+```python
+choices_weights = [
+    # exploitation term
+    (child.value / (child.visits + EPSILON)) + 
+    # exploration term
+    self.exploration_constant * math.sqrt((2 * math.log(node.visits + EPSILON) / (child.visits + EPSILON)))
+    for child in node.children
+]
+```
+
+2. **Expansion**: Generate possible next steps using the thinker agent
+```python
+# Expansion happens through the expand() method
+new_nodes = self.expand(node)
+```
+
+3. **Simulation**: Run random simulations to leaf nodes
+```python
+while not self.is_terminal(node):
+    if len(node.children) == 0:
+        self.expand(node)
+    node = random.choice(node.children)
+```
+
+4. **Backpropagation**: Update node statistics based on simulation results
+```python
+while node is not None:
+    node.visits += 1
+    if node.value is None:
+        node.value = reward
+    else:
+        node.value += reward
+    node = node.parent
+```
+
+### Ground Truth Evaluation
+
+ReasoningAgent now supports ground truth evaluation by allowing users to include a "GROUND_TRUTH" marker in their prompts. This enables more accurate evaluation of reasoning paths:
+
+```python
+# Example usage with ground truth
+prompt = """What is the expected maximum dice value if you can roll a 6-sided dice three times?
+
+GROUND_TRUTH:
+We define X as the highest outcome among the three rolls. 
+The probability that X is at least m is 1 - \left(\frac{m-1}{6}\right)^3 for each m from 1 to 6. 
+Summing these probabilities gives the expectation E(X) = \sum_{m=1}^{6} [1 - (\frac{m-1}{6})^3].
+Calculating this sum results in E(X) = 6 - \frac{225}{216} = \frac{119}{24}, which approximates to 4.9583.
+Therefore, the expected maximum value when rolling a six-sided die three times is \frac{119}{24} or approximately 4.9583."""
+
+# The agent will use the ground truth to provide more accurate evaluation scores
+ans = user_proxy.initiate_chat(mcts_agent, message=prompt)```
+
+When ground truth is provided:
+1. The agent automatically splits the prompt into the question and ground truth
+2. The grader's system message is updated to include the ground truth
+3. Evaluation scores become more reliable since they're based on actual correct answers
+
+This feature is particularly useful for:
+- Training data generation with verified correct answers
+- Educational applications where correct solutions are known
+- Fine-tuning reward models with ground truth supervision
+
+
+## Generating Training Data
+
+Both MCTS and Beam Search can generate valuable training data, but in different ways:
+
+### From MCTS:
+```python
+from autogen.agentchat.contrib.reasoning_agent import extract_sft_dataset, extract_rlhf_preference_dataset
+
+# Get SFT data from successful paths
+sft_data = extract_sft_dataset(reason_agent._root)
+
+# Get preference pairs for RLHF
+rlhf_data = extract_rlhf_preference_dataset(reason_agent._root)
+```
+
+The MCTS approach tends to generate:
+- More diverse reasoning paths
+- Better exploration of alternative solutions
+- Stronger contrast between good and bad paths (useful for RLHF)
+
+## Complexity Analysis
+
+Let's analyze the computational complexity of both approaches:
+
+$d$: maximum depth of search tree
+$w$: average branching factor (options per node)
+$n$: number of Monte Carlo simulations
+$b$: beam size
+
+### MCTS
+- **Time Complexity**: $O(n \times d)$
+  - Each simulation traverses max depth $d$
+  - Performs $n$ simulations
+- **Memory Complexity**: $O(w^d)$ worst case, but typically much lower in practice
+  - Tree grows based on visited paths
+  - Focuses on promising branches
+
+### Beam Search
+- **Time Complexity**: $O(d \times b \times (w + 1))$
+  - At each depth $d$, evaluates $b$ beams
+  - Each beam generates $w$ new options
+  - Plus one evaluation per beam
+- **Memory Complexity**: $O(b \times d)$
+  - Maintains $b$ paths
+  - Each path has depth $d$
+
+## When to Use Each Approach
+
+### Use MCTS when:
+1. You have reliable ground truth evaluation
+2. LLM-based evaluation is expensive
+3. You want to generate training data with diverse, high-quality reasoning paths
+4. Exploration of the solution space is important
+
+### Use Beam Search when:
+1. Exploration is not very important, as the quality of previous steps is indicative for future steps
+2. LLM-based evaluation is cheap and reliable
+3. The problem space is well-structured
+4. Memory constraints are strict
+
+
+## Conclusion
+
+While both MCTS and Beam Search are valuable approaches for ReasoningAgent, they serve different purposes:
+
+- MCTS excels at thorough exploration and generating training data
+- Beam Search is more efficient for quick, direct problem-solving
+
+The choice between them should be based on your specific needs regarding:
+- Evaluation cost and availability
+- Time and resource constraints
+- Intended use of the results
+
+## For Further Reading
+
+* [Original ReasoningAgent with Beam Search](/blog/2024-12-02-ReasoningAgent2)
+* [Documentation about ReasoningAgent](/docs/reference/agentchat/contrib/reasoning_agent)
+* [MCTS in Wikipedia](https://en.wikipedia.org/wiki/Monte_Carlo_tree_search)
+
+*Join our [Discord](https://discord.com/invite/pAbnFJrkgZ) server to discuss your experiences with these approaches and suggest improvements.*