diff --git a/benchmark/configs/auxo/auxo.yml b/benchmark/configs/auxo/auxo.yml
new file mode 100644
index 00000000..aca16936
--- /dev/null
+++ b/benchmark/configs/auxo/auxo.yml
@@ -0,0 +1,52 @@
+# Configuration file of fed_hetero experiment
+
+# ========== Cluster configuration ==========
+# ip address of the parameter server (need 1 GPU process)
+ps_ip: localhost
+ps_port: 12345
+
+# ip address of each worker:# of available gpus process on each gpu in this node
+# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
+# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
+worker_ips:
+    - localhost:[7,7,0,0] # worker_ip: [(# processes on gpu) for gpu in available_gpus] eg. 10.0.0.2:[4,4,4,4] This node has 4 gpus, each gpu has 4 processes.
+
+exp_path: $FEDSCALE_HOME/examples/auxo
+
+# Entry function of executor and aggregator under $exp_path
+executor_entry: executor.py
+
+aggregator_entry: aggregator.py
+
+auth:
+    ssh_user: ""
+    ssh_private_key: ~/.ssh/id_rsa
+
+# cmd to run before we can indeed run FAR (in order)
+setup_commands:
+    - source $HOME/anaconda3/bin/activate fedscale
+
+# ========== Additional job configuration ==========
+# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
+
+job_conf:
+    - job_name: auxo_femnist                     # Generate logs under this folder: log_path/job_name/time_stamp
+    - log_path: $FEDSCALE_HOME/benchmark    # Path of log files
+    - num_participants: 200                  # Number of participants per round, we use K=100 in our paper, large K will be much slower
+    - data_set: femnist                     # Dataset: openImg, google_speech, stackoverflow
+    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
+    - data_map_file: $FEDSCALE_HOME/benchmark/dataset/data/femnist/client_data_mapping/train.csv              # Allocation of data to each client, turn to iid setting if not provided
+    - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
+    - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
+    - model: resnet18                       # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
+#    - model_zoo: fedscale-torch-zoo
+    - eval_interval: 20                     # How many rounds to run a testing on the testing set
+    - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - filter_less: 0                       # Remove clients w/ less than 21 samples
+    - num_loaders: 2
+    - local_steps: 10
+    - learning_rate: 0.05
+    - batch_size: 20
+    - test_bsz: 20
+    - use_cuda: True
+    - save_checkpoint: False
diff --git a/examples/auxo/Dockerfile b/examples/auxo/Dockerfile
new file mode 100644
index 00000000..8e5bb3e3
--- /dev/null
+++ b/examples/auxo/Dockerfile
@@ -0,0 +1,29 @@
+# Use an official CUDA image as a parent image
+FROM nvidia/cuda:11.0-base-ubuntu20.04
+
+# Set the working directory inside the container
+WORKDIR /app
+
+# Install necessary system packages
+RUN apt-get update && apt-get install -y python3.7 python3-pip
+
+# Create a virtual environment and activate it
+RUN python3.7 -m pip install virtualenv
+RUN python3.7 -m virtualenv venv
+RUN /bin/bash -c "source venv/bin/activate"
+
+# Copy the requirements file into the container
+COPY requirements.txt .
+
+# Install the Python dependencies
+RUN pip install --upgrade pip && pip install -r requirements.txt
+
+# Copy the project files into the container (assuming your project is in the current directory)
+COPY . .
+
+# Install your project using pip
+RUN pip install -e .
+
+# Command to run when the container starts
+CMD ["bash"]
+
diff --git a/examples/auxo/README.md b/examples/auxo/README.md
new file mode 100644
index 00000000..4ac86fdf
--- /dev/null
+++ b/examples/auxo/README.md
@@ -0,0 +1,72 @@
+ 
+
+<div align="center">
+<picture>
+  <img alt="Auxo logo" width="45%" src="fig/auxo.png">
+</picture> 
+<h1>Auxo: Efficient Federated Learning via Scalable Client Clustering</h1>
+
+</div>
+
+Auxo is a heterogeneity manager in Federated Learning (FL) through scalable and efficient cohort-based training mechanisms.
+For more details, refer to our academic paper on SoCC'23 [paper](https://arxiv.org/abs/2210.16656).
+
+
+## Key Features
+
+- **Scalable Cohort Identification**: Efficiently identifies cohorts even in large-scale FL deployments.
+  
+- **Cohort-Based Training**: Optimizes the performance of existing FL algorithms by reducing intra-cohort heterogeneity.
+
+- **Resource Efficiency**: Designed to work in low-availability, resource-constrained settings without additional computational overhead.
+
+- **Privacy Preservation**: Respects user privacy by avoiding the need for traditional clustering methods that require access to client data.
+
+
+## Getting Started
+### Install
+Following the installation steps if you have not installed fedscale yet.
+```commandline  
+docker build -t fedscale:auxo .
+docker run --gpus all -it --name auxo -v $FEDSCALE_HOME:/workspace/FedScale fedscale:auxo /bin/bash
+```
+
+```
+echo export FEDSCALE_HOME=$(pwd) >> ~/.bashrc
+echo alias fedscale=\'bash ${FEDSCALE_HOME}/fedscale.sh\' >> ~/.bashrc
+source ~/.bashrc
+```
+
+### Prepare dataset
+After setting up the fedscale environment, you can download the dataset and partition each client dataset into train set and test set.
+
+```commandline
+fedscale dataset download femnist
+cd $FEDSCALE_HOME/examples/auxo 
+python -m utils.prepare_test_train ../../benchmark/dataset/data/femnist/client_data_mapping/train.csv
+python -m utils.prepare_test_train ../../benchmark/dataset/data/femnist/client_data_mapping/test.csv
+python -m utils.prepare_test_train ../../benchmark/dataset/data/femnist/client_data_mapping/val.csv
+```
+### Run Auxo
+```
+cd $FEDSCALE_HOME
+fedscale driver start benchmark/configs/auxo/auxo.yml 
+```
+
+### Visualize continuous clustering algorithm
+```commandline
+cd $FEDSCALE_HOME/examples/auxo
+python playground.py
+```
+Visualized clustering Results: 
+
+ <p float="left">
+  <img src="fig/epoch_14.png" width="150" />
+  <img src="fig/epoch_100.png" width="150" /> 
+  <img src="fig/epoch_224.png" width="150" />
+  <img src="fig/epoch_300.png" width="150" /> 
+  <img src="fig/epoch_500.png" width="150" /> 
+  <img src="fig/epoch_700.png" width="150" /> 
+</p>
+
+ 
\ No newline at end of file
diff --git a/examples/auxo/aggregator.py b/examples/auxo/aggregator.py
new file mode 100644
index 00000000..829bf837
--- /dev/null
+++ b/examples/auxo/aggregator.py
@@ -0,0 +1,600 @@
+# -*- coding: utf-8 -*-
+import logging
+
+from fedscale.cloud.aggregation.aggregator import *
+from client_manager import HeterClientManager
+from resource_manager import AuxoResourceManager
+from utils.helper import *
+
+class AuxoAggregator(Aggregator):
+    def __init__(self, args):
+        super().__init__(args)
+
+        self.sampled_participants = [[]]
+        self.sampled_executors = [[]]
+        self.round_stragglers = [[]]
+        self.stats_util_accumulator = [[]]
+        self.loss_accumulator = [[]]
+        self.client_training_results = [[]]
+        self.test_result_accumulator = [[]]
+        self.virtual_client_clock = [[]]
+        self.testing_history = [{'data_set': args.data_set, 'model': args.model, 'sample_mode': args.sample_mode,
+                                'gradient_policy': args.gradient_policy, 'task': args.task,
+                                'perf': collections.OrderedDict()}]
+
+        self.model_in_update = [0]
+        # self.last_saved_round = [0]
+        self.tasks_round = [0]
+        self.global_virtual_clock = [0.]
+        self.round_duration = [0.]
+        self.model_update_size = [0.]
+        self.round = [0]
+
+        self.stop_cluster = 0
+        self.split_cluster = 1
+        self.num_split = 2
+        self.resource_manager = AuxoResourceManager(self.experiment_mode)
+
+    def init_model(self):
+        """Initialize the model"""
+        if self.args.engine == commons.TENSORFLOW:
+            self.model_wrapper = [TensorflowModelAdapter(init_model())]
+        elif self.args.engine == commons.PYTORCH:
+            self.model_wrapper = [TorchModelAdapter(
+                init_model(),
+                optimizer=TorchServerOptimizer(
+                    self.args.gradient_policy, self.args, self.device))]
+        else:
+            raise ValueError(f"{self.args.engine} is not a supported engine.")
+        self.model_weights = [self.model_wrapper[0].get_weights()]
+
+    def init_client_manager(self, args):
+        """ Initialize client sampler
+
+        Args:
+            args (dictionary): Variable arguments for fedscale runtime config. defaults to the setup in arg_parser.py
+
+        Returns:
+            ClientManager: The client manager class
+
+        Currently we implement two client managers:
+
+        1. Random client sampler - it selects participants randomly in each round
+        [Ref]: https://arxiv.org/abs/1902.01046
+
+        2. Oort sampler
+        Oort prioritizes the use of those clients who have both data that offers the greatest utility
+        in improving model accuracy and the capability to run training quickly.
+        [Ref]: https://www.usenix.org/conference/osdi21/presentation/lai
+
+        3. Auxo: Client Heterogeneity Manager
+        [Ref]: https://arxiv.org/abs/2210.16656
+        """
+
+        # sample_mode: random or oort
+        client_manager = HeterClientManager(args.sample_mode, args=args)
+
+        return client_manager
+
+    def event_monitor(self):
+        """Activate event handler according to the received new message
+        """
+        logging.info("Start monitoring events ...")
+
+        while True:
+            # Broadcast events to clients
+            if len(self.broadcast_events_queue) > 0:
+                current_event = self.broadcast_events_queue.popleft()
+                logging.info(f"Event {current_event} is broadcasted to clients")
+                event_type, cohort_id = decode_msg(current_event)
+
+                if event_type in (commons.UPDATE_MODEL, commons.MODEL_TEST):
+                    self.dispatch_client_events(current_event)
+
+                elif event_type == commons.START_ROUND:
+                    self.dispatch_client_events(generate_msg(commons.CLIENT_TRAIN, cohort_id))
+
+                elif event_type == 'split':
+                    self.dispatch_client_events(current_event)
+
+                elif event_type == commons.SHUT_DOWN:
+                    self.dispatch_client_events(current_event)
+                    break
+
+            # Handle events queued on the aggregator
+            elif len(self.sever_events_queue) > 0:
+
+                client_id, current_event, meta, data = self.sever_events_queue.popleft()
+                logging.info(f"Event {current_event} is received from client {client_id}")
+                event_type, cohort_id = decode_msg(current_event)
+
+                if event_type == commons.UPLOAD_MODEL:
+                    self.client_completion_handler(
+                        self.deserialize_response(data), cohort_id)
+                    logging.info(f"[Cohort {cohort_id}] Client {client_id} has completed the task. {len(self.stats_util_accumulator[cohort_id])} v.s. {self.tasks_round[cohort_id]}")
+                    if len(self.stats_util_accumulator[cohort_id]) == self.tasks_round[cohort_id]:
+                        self.round_completion_handler(cohort_id)
+
+                elif event_type == commons.MODEL_TEST:
+                    self.testing_completion_handler(
+                        client_id, self.deserialize_response(data), cohort_id)
+                else:
+                    logging.error(f"Event {current_event} is not defined")
+
+            else:
+                # execute every 100 ms
+                time.sleep(0.1)
+
+    def CLIENT_REGISTER(self, request, context):
+        """FL TorchClient register to the aggregator
+
+        Args:
+            request (RegisterRequest): Registeration request info from executor.
+
+        Returns:
+            ServerResponse: Server response to registeration request
+
+        """
+
+        # NOTE: client_id = executor_id in deployment,
+        # while multiple client_id uses the same executor_id (VMs) in simulations
+        executor_id = request.executor_id
+        executor_info = self.deserialize_response(request.executor_info)
+        if executor_id not in self.individual_client_events:
+            # logging.info(f"Detect new client: {executor_id}, executor info: {executor_info}")
+            self.individual_client_events[executor_id] = collections.deque()
+        else:
+            logging.info(f"Previous client: {executor_id} resumes connecting")
+
+        # We can customize whether to admit the clients here
+        self.executor_info_handler(executor_id, executor_info)
+        dummy_data = self.serialize_response(generate_msg(commons.DUMMY_RESPONSE, 0))
+
+        return job_api_pb2.ServerResponse(event=generate_msg(commons.DUMMY_EVENT, 0),
+                                          meta=dummy_data, data=dummy_data)
+
+    def get_test_config(self, client_id, cohort_id=0):
+        """FL model testing on clients, developers can further define personalized client config here.
+
+        Args:
+            client_id (int): The client id.
+
+        Returns:
+            dictionary: The testing config for new task.
+
+        """
+        num_client = self.client_manager.schedule_plan()
+        client_list = self.select_participants(num_client, overcommitment = 1, cohort_id = cohort_id, test=True)
+        return {'client_id': client_list}
+
+
+    def CLIENT_PING(self, request, context):
+        """Handle client ping requests
+
+        Args:
+            request (PingRequest): Ping request info from executor.
+
+        Returns:
+            ServerResponse: Server response to ping request
+
+        """
+        # NOTE: client_id = executor_id in deployment,
+        # while multiple client_id may use the same executor_id (VMs) in simulations
+        executor_id, client_id = request.executor_id, request.client_id
+        response_data = response_msg = generate_msg(commons.DUMMY_RESPONSE, 0)
+
+        if len(self.individual_client_events[executor_id]) == 0:
+            # send dummy response
+            current_event = generate_msg(commons.DUMMY_EVENT, 0)
+            response_data = response_msg = current_event
+        else:
+            current_event = self.individual_client_events[executor_id].popleft()
+            event_type, cohort_id = decode_msg(current_event)
+            if event_type == commons.CLIENT_TRAIN:
+                response_msg, response_data = self.create_client_task(
+                    executor_id, cohort_id)
+                if response_msg is None:
+                    current_event = generate_msg(commons.DUMMY_EVENT, 0)
+                    if self.experiment_mode != commons.SIMULATION_MODE:
+                        self.individual_client_events[executor_id].append(
+                            commons.CLIENT_TRAIN)
+            elif event_type == commons.MODEL_TEST:
+                # TODO: remove fedscale test and add individual client testing
+                response_msg = self.get_test_config(client_id, cohort_id)
+            elif event_type == commons.UPDATE_MODEL:
+                response_data = self.model_wrapper[cohort_id].get_weights()
+            elif event_type == commons.SHUT_DOWN:
+                response_msg = self.get_shutdown_config(executor_id)
+
+        response_msg, response_data = self.serialize_response(
+            response_msg), self.serialize_response(response_data)
+        # NOTE: in simulation mode, response data is pickle for faster (de)serialization
+        response = job_api_pb2.ServerResponse(event=current_event,
+                                              meta=response_msg, data=response_data)
+        if decode_msg(current_event)[0] != commons.DUMMY_EVENT:
+            logging.info(f"Issue EVENT ({current_event}) to EXECUTOR ({executor_id})")
+
+        return response
+
+    def CLIENT_EXECUTE_COMPLETION(self, request, context):
+        """FL clients complete the execution task.
+
+        Args:
+            request (CompleteRequest): Complete request info from executor.
+
+        Returns:
+            ServerResponse: Server response to job completion request
+
+        """
+        executor_id, client_id, event = request.executor_id, request.client_id, request.event
+        execution_status, execution_msg = request.status, request.msg
+        meta_result, data_result = request.meta_result, request.data_result
+        event_type, cohort_id = decode_msg(event)
+        if event_type == commons.CLIENT_TRAIN:
+            # Training results may be uploaded in CLIENT_EXECUTE_RESULT request later,
+            # so we need to specify whether to ask client to do so (in case of straggler/timeout in real FL).
+            if execution_status is False:
+                logging.error(f"Executor {executor_id} fails to run client {client_id}, due to {execution_msg}")
+
+            if self.resource_manager.has_next_task(executor_id, cohort_id):
+                # NOTE: we do not pop the train immediately in simulation mode,
+                # since the executor may run multiple clients
+                if commons.CLIENT_TRAIN not in self.individual_client_events[executor_id]:
+                    self.individual_client_events[executor_id].append(generate_msg(commons.CLIENT_TRAIN, cohort_id))
+
+        elif event_type in (commons.MODEL_TEST, commons.UPLOAD_MODEL):
+            self.add_event_handler(
+                executor_id, event, meta_result, data_result)
+        else:
+            logging.error(f"Received undefined event {event} from client {client_id}")
+
+        return self.CLIENT_PING(request, context)
+
+
+    def create_client_task(self, executor_id, cohort_id):
+        """Issue a new client training task to specific executor
+
+        Args:
+            executorId (int): Executor Id.
+            cohort_id (int): Cohort Id.
+
+        Returns:
+            tuple: Training config for new task. (dictionary, PyTorch or TensorFlow module)
+
+        """
+        next_client_id = self.resource_manager.get_next_task(executor_id, cohort_id)
+        train_config = None
+        # NOTE: model = None then the executor will load the global model broadcasted in UPDATE_MODEL
+        if next_client_id is not None:
+            config = self.get_client_conf(next_client_id)
+            train_config = {'client_id': next_client_id, 'task_config': config, 'cohort_id': cohort_id}
+
+        return train_config, self.model_wrapper[cohort_id].get_weights()
+
+
+    def client_completion_handler(self, results, cohort_id):
+        """We may need to keep all updates from clients,
+        if so, we need to append results to the cache
+
+        Args:
+            results (dictionary): client's training result
+
+        """
+        # Format:
+        #       -results = {'client_id':client_id, 'update_weight': model_param, 'moving_loss': round_train_loss,
+        #       'trained_size': count, 'wall_duration': time_cost, 'success': is_success 'utility': utility}
+
+        if self.args.gradient_policy in ['q-fedavg']:
+            self.client_training_results[cohort_id].append(results)
+        # Feed metrics to client sampler
+        self.stats_util_accumulator[cohort_id].append(results['utility'])
+        self.loss_accumulator[cohort_id].append(results['moving_loss'])
+
+        self.client_manager.register_feedback(results['client_id'], results['utility'],
+                                              auxi=math.sqrt(
+                                                  results['moving_loss']),
+                                              time_stamp=self.round[cohort_id],
+                                              duration=self.virtual_client_clock[cohort_id][results['client_id']]['computation'] +
+                                                       self.virtual_client_clock[cohort_id][results['client_id']]['communication'],
+                                              w_new = results['update_weight'],
+                                              w_old = self.model_wrapper[cohort_id].get_weights(),
+                                              cohort_id=cohort_id
+                                              )
+
+        # ================== Aggregate weights ======================
+        self.update_lock.acquire()
+
+        self.model_in_update[cohort_id] += 1
+        self.update_weight_aggregation(results, cohort_id)
+
+        self.update_lock.release()
+
+    def tictak_client_tasks(self, sampled_clients, num_clients_to_collect, cohort_id):
+        """Record sampled client execution information in last round. In the SIMULATION_MODE,
+        further filter the sampled_client and pick the top num_clients_to_collect clients.
+
+        Args:
+            sampled_clients (list of int): Sampled clients from client manager
+            num_clients_to_collect (int): The number of clients actually needed for next round.
+
+        Returns:
+            Tuple: (the List of clients to run, the List of stragglers in the round, a Dict of the virtual clock of each
+            client, the duration of the aggregation round, and the durations of each client's task).
+
+        """
+        if self.experiment_mode == commons.SIMULATION_MODE:
+            # NOTE: We try to remove dummy events as much as possible in simulations,
+            # by removing the stragglers/offline clients in overcommitment"""
+            sampledClientsReal = []
+            completionTimes = []
+            completed_client_clock = {}
+            # 1. remove dummy clients that are not available to the end of training
+            for client_to_run in sampled_clients:
+                client_cfg = self.client_conf.get(client_to_run, self.args)
+
+                exe_cost = self.client_manager.get_completion_time(client_to_run,
+                                                                   batch_size=client_cfg.batch_size,
+                                                                   local_steps=client_cfg.local_steps,
+                                                                   upload_size=self.model_update_size,
+                                                                   download_size=self.model_update_size)
+
+                roundDuration = exe_cost['computation'] + \
+                                exe_cost['communication']
+                # if the client is not active by the time of collection, we consider it is lost in this round
+                if self.client_manager.isClientActive(client_to_run, roundDuration + self.global_virtual_clock[cohort_id]):
+                    sampledClientsReal.append(client_to_run)
+                    completionTimes.append(roundDuration)
+                    completed_client_clock[client_to_run] = exe_cost
+
+            num_clients_to_collect = min(
+                num_clients_to_collect, len(completionTimes))
+            # 2. get the top-k completions to remove stragglers
+            workers_sorted_by_completion_time = sorted(
+                range(len(completionTimes)), key=lambda k: completionTimes[k])
+            top_k_index = workers_sorted_by_completion_time[:num_clients_to_collect]
+            clients_to_run = [sampledClientsReal[k] for k in top_k_index]
+
+            stragglers = [sampledClientsReal[k]
+                          for k in workers_sorted_by_completion_time[num_clients_to_collect:]]
+            round_duration = completionTimes[top_k_index[-1]]
+            completionTimes.sort()
+
+            return (clients_to_run, stragglers,
+                    completed_client_clock, round_duration,
+                    completionTimes[:num_clients_to_collect])
+        else:
+            completed_client_clock = {
+                client: {'computation': 1, 'communication': 1} for client in sampled_clients}
+            completionTimes = [1 for c in sampled_clients]
+            return (sampled_clients, sampled_clients, completed_client_clock,
+                    1, completionTimes)
+
+    def update_default_task_config(self, cohort_id):
+        """Update the default task configuration after each round
+        """
+        # TODO: fix the lr update
+        if self.round[cohort_id] % self.args.decay_round == 0:
+            self.args.learning_rate = max(
+                self.args.learning_rate * self.args.decay_factor, self.args.min_learning_rate)
+
+    def select_participants(self, select_num_participants, overcommitment=1.3, cohort_id=0, test=False):
+        """Select clients for next round.
+
+        Args:
+            select_num_participants (int): Number of clients to select.
+            overcommitment (float): Overcommit ration for next round.
+
+        Returns:
+            list of int: The list of sampled clients id.
+
+        """
+        return sorted(self.client_manager.select_participants(
+            int(select_num_participants * overcommitment),
+            cur_time=self.global_virtual_clock[cohort_id],
+            cohort_id=cohort_id,
+            test=test)
+        )
+
+
+
+    def _init_split_config(self, cohort_id):
+        def increment_config( in_list ):
+            in_list.append([])
+            in_list[cohort_id ] = []
+
+        self.model_wrapper.append(copy.deepcopy(self.model_wrapper[cohort_id]))
+        self.global_virtual_clock.append(copy.deepcopy(self.global_virtual_clock[cohort_id]))
+        self.model_in_update.append(0)
+        increment_config(self.round_stragglers)
+        increment_config(self.virtual_client_clock )
+        increment_config(self.round_duration)
+        increment_config(self.test_result_accumulator )
+        increment_config(self.stats_util_accumulator)
+        increment_config(self.client_training_results)
+        increment_config(self.tasks_round)
+        increment_config(self.loss_accumulator)
+        self.model_weights.append(copy.deepcopy(self.model_weights[cohort_id]))
+        self.testing_history.append(copy.deepcopy(self.testing_history[cohort_id]))
+        self.round.append(copy.deepcopy(self.round[cohort_id]))
+        increment_config(self.sampled_participants)
+
+    def _split_participant_list(self, cohort_id, num_split = 2):
+
+        for s in range(num_split-1):
+            self._init_split_config(cohort_id)
+        cohort_id_list = [cohort_id, self.split_cluster - 1] if num_split == 2 else [*range(num_split)]
+
+        for cid in range(num_split):
+
+            # num_client_per_round  = self.args.num_participants * self.client_manager.get_cohort_size(cohort_id_list[cid]) // self.total_clients
+            num_client_per_round = self.client_manager.schedule_plan(self.round[cohort_id_list[cid]] , cid )
+            num_client_per_round = max(num_client_per_round,1 )
+
+            self.sampled_participants[cohort_id_list[cid]] = self.select_participants(select_num_participants = num_client_per_round, \
+                                     overcommitment=self.args.overcommitment, cohort_id = cohort_id_list[cid])
+            clients_to_run, round_stragglers, virtual_client_clock, round_duration, _ = \
+                self.tictak_client_tasks(self.sampled_participants[cohort_id_list[cid]], num_client_per_round, cohort_id_list[cid])
+            self.round_stragglers[cohort_id_list[cid]] = round_stragglers
+            self.resource_manager.register_tasks(clients_to_run, cohort_id_list[cid])
+            self.tasks_round[cohort_id_list[cid]] = len(clients_to_run)
+            self.virtual_client_clock[cohort_id_list[cid]] = virtual_client_clock
+            self.round_duration[cohort_id_list[cid]] = round_duration
+            self.model_in_update[cohort_id_list[cid]] = 0
+            self.test_result_accumulator[cohort_id_list[cid]] = []
+            self.stats_util_accumulator[cohort_id_list[cid]] = []
+            self.client_training_results[cohort_id_list[cid]] = []
+            self.loss_accumulator[cohort_id_list[cid]] = []
+
+        return cohort_id_list
+
+
+    def _is_first_result_in_round(self, cohort_id):
+        return self.model_in_update[cohort_id] == 1
+
+    def _is_last_result_in_round(self, cohort_id):
+        return self.model_in_update[cohort_id] == self.tasks_round[cohort_id]
+
+
+    def update_weight_aggregation(self, results, cohort_id = 0):
+        """Updates the aggregation with the new results.
+
+        :param results: the results collected from the client.
+        """
+        update_weights = results['update_weight']
+        if type(update_weights) is dict:
+            update_weights = [x for x in update_weights.values()]
+        if self._is_first_result_in_round(cohort_id):
+            self.model_weights[cohort_id] = update_weights
+        else:
+            self.model_weights[cohort_id] = [weight + update_weights[i] for i, weight in enumerate(self.model_weights[cohort_id])]
+        if self._is_last_result_in_round(cohort_id):
+            self.model_weights[cohort_id] = [np.divide(weight, self.tasks_round[cohort_id]) for weight in self.model_weights[cohort_id]]
+            self.model_wrapper[cohort_id].set_weights(copy.deepcopy(self.model_weights[cohort_id]))
+
+
+    def round_completion_handler(self, cohort_id = 0):
+        """Triggered upon the round completion, it registers the last round execution info,
+        broadcast new tasks for executors and select clients for next round.
+        """
+        self.global_virtual_clock[cohort_id] += self.round_duration[cohort_id]
+        self.round[cohort_id] += 1
+        last_round_avg_util = sum(self.stats_util_accumulator[cohort_id]) / max(1, len(self.stats_util_accumulator[cohort_id]))
+        # assign avg reward to explored, but not ran workers
+        for client_id in self.round_stragglers[cohort_id]:
+            self.client_manager.register_feedback(client_id, last_round_avg_util,
+                                                  time_stamp=self.round[cohort_id],
+                                                  duration=self.virtual_client_clock[cohort_id][client_id]['computation'] +
+                                                           self.virtual_client_clock[cohort_id][client_id]['communication'],
+                                                  success=False)
+
+        avg_loss = sum(self.loss_accumulator[cohort_id]) / max(1, len(self.loss_accumulator[cohort_id]))
+        logging.info(f"[Cohort {cohort_id}] Wall clock: {round(self.global_virtual_clock[cohort_id])} s, round: {self.round[cohort_id]}, Planned participants: " +
+                     f"{len(self.sampled_participants[cohort_id])}, Succeed participants: {len(self.stats_util_accumulator[cohort_id])}, Training loss: {avg_loss}")
+
+        at_split = False
+        if self.round[cohort_id] > 1: # TODO: replace with clustering start round
+            at_split = self.client_manager.cohort_clustering(self.round[cohort_id], cohort_id )
+
+        # TODO: add split and non-split logic: update the stats and select participants
+        if at_split:
+            self.split_cluster = len(self.client_manager.feasibleClients)
+            self.resource_manager.split(cohort_id)
+            new_cohort_id_list = self._split_participant_list(cohort_id, self.num_split)
+
+        else:
+            num_client_per_round = self.client_manager.schedule_plan(self.round[cohort_id], cohort_id)
+            # update select participants
+            self.sampled_participants[cohort_id] = self.select_participants(
+                select_num_participants=num_client_per_round, overcommitment=self.args.overcommitment, cohort_id=cohort_id)
+            (clients_to_run, round_stragglers, virtual_client_clock, round_duration,
+             flatten_client_duration) = self.tictak_client_tasks(
+                self.sampled_participants[cohort_id], num_client_per_round, cohort_id)
+
+            logging.info(f"Selected participants to run: {clients_to_run}")
+
+            # Issue requests to the resource manager; Tasks ordered by the completion time
+            self.resource_manager.register_tasks(clients_to_run, cohort_id)
+            self.tasks_round[cohort_id] = len(clients_to_run)
+
+            # Update executors and participants
+            if self.experiment_mode == commons.SIMULATION_MODE:
+                self.sampled_executors = list(
+                    self.individual_client_events.keys())
+            else:
+                self.sampled_executors = [str(c_id)
+                                          for c_id in self.sampled_participants]
+            self.round_stragglers[cohort_id] = round_stragglers
+            self.virtual_client_clock[cohort_id] = virtual_client_clock
+            self.flatten_client_duration = np.array(flatten_client_duration)
+            self.round_duration[cohort_id] = round_duration
+            self.model_in_update[cohort_id] = 0
+            self.test_result_accumulator[cohort_id] = []
+            self.stats_util_accumulator[cohort_id] = []
+            self.client_training_results[cohort_id] = []
+            self.loss_accumulator[cohort_id] = []
+            self.update_default_task_config(cohort_id)
+
+        if self.round[cohort_id] >= self.args.rounds:
+            self.broadcast_aggregator_events(generate_msg(commons.SHUT_DOWN))
+        elif at_split:
+            self.broadcast_aggregator_events(generate_msg('split', cohort_id=cohort_id))
+            for cid in new_cohort_id_list:
+                self.broadcast_aggregator_events(generate_msg(commons.UPDATE_MODEL, cohort_id=cid))
+                self.broadcast_aggregator_events(generate_msg(commons.START_ROUND, cohort_id=cid))
+        elif self.round[cohort_id] % self.args.eval_interval == 0 or self.round[cohort_id] == 1:
+            self.broadcast_aggregator_events(generate_msg(commons.UPDATE_MODEL, cohort_id=cohort_id))
+            self.broadcast_aggregator_events(generate_msg(commons.MODEL_TEST, cohort_id=cohort_id))
+        else:
+            self.broadcast_aggregator_events(generate_msg(commons.UPDATE_MODEL, cohort_id=cohort_id))
+            self.broadcast_aggregator_events(generate_msg(commons.START_ROUND, cohort_id=cohort_id))
+
+    def aggregate_test_result(self, cohort_id):
+        accumulator = self.test_result_accumulator[cohort_id][0]
+        for i in range(1, len(self.test_result_accumulator[cohort_id])):
+            if self.args.task == "detection":
+                for key in accumulator:
+                    if key == "boxes":
+                        for j in range(596):
+                            accumulator[key][j] = accumulator[key][j] + \
+                                                  self.test_result_accumulator[cohort_id][i][key][j]
+                    else:
+                        accumulator[key] += self.test_result_accumulator[cohort_id][i][key]
+            else:
+                for key in accumulator:
+                    accumulator[key] += self.test_result_accumulator[cohort_id][i][key]
+        self.testing_history[cohort_id]['perf'][self.round[cohort_id]] = {'round': self.round[cohort_id], 'clock': self.global_virtual_clock[cohort_id]}
+        for metric_name in accumulator.keys():
+            if metric_name == 'test_loss':
+                self.testing_history[cohort_id]['perf'][self.round[cohort_id]]['loss'] = accumulator['test_loss'] \
+                    if self.args.task == "detection" else accumulator['test_loss'] / accumulator['test_len']
+            elif metric_name not in ['test_len']:
+                self.testing_history[cohort_id]['perf'][self.round[cohort_id]][metric_name] \
+                    = accumulator[metric_name] / accumulator['test_len']
+
+        round_perf = self.testing_history[cohort_id]['perf'][self.round[cohort_id]]
+        logging.info(
+            "FL Testing in round: {}, virtual_clock: {}, results: {}"
+            .format(self.round[cohort_id], self.global_virtual_clock[cohort_id], round_perf))
+
+    def testing_completion_handler(self, client_id, results, cohort_id):
+        """Each executor will handle a subset of testing dataset
+
+        Args:
+            client_id (int): The client id.
+            results (dictionary): The client test results.
+
+        """
+
+        results = results['results']
+        # List append is thread-safe
+        self.test_result_accumulator[cohort_id].append(results)
+
+        # Have collected all testing results
+        if len(self.test_result_accumulator[cohort_id]) == len(self.executors):
+            self.aggregate_test_result(cohort_id)
+            self.broadcast_aggregator_events(generate_msg(commons.START_ROUND, cohort_id=cohort_id))
+
+
+if __name__ == "__main__":
+    aggregator = AuxoAggregator(parser.args)
+    aggregator.run()
diff --git a/examples/auxo/client_manager.py b/examples/auxo/client_manager.py
new file mode 100644
index 00000000..86409efd
--- /dev/null
+++ b/examples/auxo/client_manager.py
@@ -0,0 +1,268 @@
+
+from fedscale.cloud.client_manager import *
+from collections import  defaultdict
+from client_metadata import AuxoClientMetadata
+import logging
+import numpy as np
+from sklearn import preprocessing
+from collections import defaultdict
+import copy
+from clustering import QTable
+from sklearn.manifold import TSNE
+from config import auxo_config
+
+
+class HeterClientManager(ClientManager):
+    def __init__(self, mode, args, sample_seed=233):
+        '''
+        Manage cohort membership;
+        Manane client selection;
+        Manage cohort training resources usage
+        '''
+        super().__init__(mode, args, sample_seed)
+
+        self.round_clt = defaultdict(list)
+        self.grad_div_dict = defaultdict(list)
+        # self.split_round = defaultdict(bool)
+        self.stop_cluster = False
+        self.gradient_list = defaultdict(list)
+        self.feasibleClients = [[]]
+
+        self.total_res = args.num_participants
+        self.round_acc =  defaultdict(dict)
+        self.num_cluster = 1
+        self.latest_acc_list = {0:1}
+
+        logging.info(f'Client manager initialized with auxo config: {auxo_config}')
+
+
+    def register_client(self, host_id: int, client_id: int, size: int, speed: Dict[str, float],
+                        duration: float = 1) -> None:
+        """Register client information to the client manager.
+
+        Args:
+            host_id (int): executor Id.
+            client_id (int): client Id.
+            size (int): number of samples on this client.
+            speed (Dict[str, float]): device speed (e.g., compuutation and communication).
+            duration (float): execution latency.
+
+        """
+        uniqueId = self.getUniqueId(host_id, client_id)
+        user_trace = None if self.user_trace is None else self.user_trace[self.user_trace_keys[int(
+            client_id) % len(self.user_trace)]]
+
+        self.client_metadata[uniqueId] = AuxoClientMetadata(host_id, client_id, speed, user_trace)
+        # remove clients
+        # if size >= self.filter_less and size <= self.filter_more:
+        self.feasibleClients[0].append(client_id)
+        self.feasible_samples += size
+
+        if self.mode == "oort":
+            feedbacks = {'reward': min(size, self.args.local_steps * self.args.batch_size),
+                         'duration': duration,
+                         }
+            self.ucb_sampler.register_client(client_id, feedbacks=feedbacks)
+        # else:
+        #     del self.client_metadata[uniqueId]
+
+
+    def getUniqueId(self, host_id, client_id):
+        return int(client_id)
+        # return (str(host_id) + '_' + str(client_id))
+
+    def getFeasibleClients(self, cur_time: float, cohort_id: int = 0):
+        if self.user_trace is None:
+            clients_online = self.feasibleClients[cohort_id]
+        else:
+            clients_online = [client_id for client_id in self.feasibleClients[cohort_id] if self.client_metadata[self.getUniqueId(
+                0, client_id)].is_active(cur_time)]
+
+        logging.info(f"Wall clock time: {cur_time}, {len(clients_online)} clients online, " +
+                     f"{len(self.feasibleClients[cohort_id]) - len(clients_online)} clients offline")
+
+        return clients_online
+
+
+    def select_participants(self, num_of_clients: int, cur_time: float = 0, cohort_id: int = 0, test: bool = False) -> List[int]:
+        """Select participating clients for current execution task.
+
+        Args:
+            num_of_clients (int): number of participants to select.
+            cur_time (float): current wall clock time.
+
+        Returns:
+            List[int]: indices of selected clients.
+
+        """
+
+        clients_online = self.getFeasibleClients(cur_time, cohort_id)
+
+        if len(clients_online) <= num_of_clients:
+            return clients_online
+
+        self.gradient_list[cohort_id] = []
+        pickled_clients = None
+        clients_online_set = set(clients_online)
+
+        if test:
+            pivot_client = self.reward_qtable.return_pivot_client(cohort_id)
+            pivot_client = list(set(pivot_client) & set(self.feasibleClients[cohort_id]))
+            self.rng.shuffle(pivot_client)
+            extra_clt = list(set(clients_online) - set(pivot_client))
+            self.rng.shuffle(extra_clt)
+
+            pickled_clients = pivot_client + extra_clt
+            client_len = min(num_of_clients, len(pickled_clients) - 1)
+            pickled_clients = pickled_clients[:client_len]
+
+        elif self.mode == "oort":
+            pickled_clients = self.ucb_sampler.select_participant(
+                num_of_clients, feasible_clients=clients_online_set)
+        else:
+            self.rng.shuffle(clients_online)
+            client_len = min(num_of_clients, len(clients_online) - 1)
+            pickled_clients = clients_online[:client_len]
+        return pickled_clients
+
+    def getDataInfo(self):
+        train_ratio = self.args.num_participants / len(self.feasibleClients[0])
+        avg_train_times = self.args.rounds * self.args.num_participants / len(
+            self.feasibleClients[0])
+        known_clt = len(self.feasibleClients[0]) // 20
+
+        split_round = auxo_config['split_round']
+        exploredecay = auxo_config['exploredecay']
+        explorerate = auxo_config['explorerate']
+        self.reduction = auxo_config['reduction']
+        metric = auxo_config['metric']
+
+        self.reward_qtable = QTable(1 + len(self.feasibleClients[0]), known_clt = known_clt,
+                                    split_round = split_round,\
+                                    elbow_constant=0.97, train_ratio=train_ratio, avg_train_times=avg_train_times, \
+                                    epsilon=explorerate, epsilon_decay = exploredecay,\
+                                    metric=metric)
+
+        return {'total_feasible_clients': len(self.feasibleClients[0]), 'total_num_samples': self.feasible_samples}
+
+    def get_cohort_size(self, cohort_id):
+        return len(self.feasibleClients[cohort_id])
+
+    def register_feedback(self, client_id: int, reward: float, auxi: float = 1.0, time_stamp: float = 0,
+                          duration: float = 1., success: bool = True, w_new = None, w_old = None, cohort_id = 0) -> None:
+        """Collect client execution feedbacks of last round.
+
+        Args:
+            client_id (int): client Id.
+            reward (float): execution utilities (processed feedbacks).
+            auxi (float): unprocessed feedbacks.
+            time_stamp (float): current wall clock time.
+            duration (float): system execution duration.
+            success (bool): whether this client runs successfully.
+
+        """
+        # currently, we only use distance as reward
+        if self.mode == "oort":
+            feedbacks = {
+                'reward': reward,
+                'duration': duration,
+                'status': True,
+                'time_stamp': time_stamp
+            }
+
+            self.ucb_sampler.update_client_util(client_id, feedbacks=feedbacks)
+
+        if w_new is not None:
+            grad_norm = self._register_client_grad(client_id, w_new, w_old, cohort_id)
+        return
+
+    def _register_client_grad(self, client_id, w_new, w_old, cohort_id):
+        if not self.stop_cluster:
+            grad_norm = self.client_metadata[client_id].register_gradient(w_new, w_old)
+            self.round_clt[cohort_id].append(client_id)
+            self.gradient_list[cohort_id].append(grad_norm)
+            return grad_norm
+        return 0
+
+    def cohort_clustering(self, round, cohort_id=0):
+        '''Clustering cohort results for current rounds; Update reward table
+        Args:
+            round: current round
+            cohort_id: current cohort
+        Returns:
+            whether to split the cohort
+        '''
+        if round < auxo_config['start_round']:
+            return False
+        global_index = [int(gid) for gid in self.round_clt[cohort_id]]  # [1,2,3,5,7]
+        logging.info(f'Clustering clients global index {global_index}')
+        if len(global_index) < 5:
+            return False
+        gradient_list = [self.client_metadata[clt].gradient for clt in self.round_clt[cohort_id]]
+
+        # if self.reduction:
+        #     norm_grad = TSNE(n_components=3, init='random').fit_transform(np.array(gradient_list))
+       #  elif self.distance == 'kl':
+       #      norm_grad = centered_grad = np.array(gradient_list)
+       # else:
+        avg_grad = np.mean(gradient_list, axis=0)
+        centered_grad = gradient_list - avg_grad
+        norm_grad = preprocessing.normalize(centered_grad)
+
+        # update sub reward
+        logging.info(f'Update intra-cluster relation ')
+        self.reward_qtable.knn_update_subR(cohort_id, norm_grad, global_index, False if round > 500 else True)
+        logging.info(f'Update inter-cluster relation ')
+        split = self.reward_qtable.update_mainR(cohort_id, centered_grad, global_index, False if round > 500 else True)
+
+        if split:
+            self._split(len(self.feasibleClients))
+            logging.info(f'SPLIT at round {round} ')
+            self.feasibleClients.append([])
+            for i in range(len(self.feasibleClients)):
+                self.feasibleClients[i] = list(np.argwhere(self.reward_qtable.y_kmeans == i).reshape(-1))
+
+        # update feasible clients
+        if len(self.feasibleClients) > 1 and split == False:
+            for clt in self.round_clt[cohort_id]:  # str
+                new_label = int(self.reward_qtable.y_kmeans[int(clt)])
+                if int(clt) in self.feasibleClients[cohort_id] and new_label != cohort_id:
+                    self.feasibleClients[cohort_id].remove(int(clt))
+                    self.feasibleClients[new_label].append(int(clt))
+                elif int(clt) not in self.feasibleClients[cohort_id] and new_label == cohort_id:
+                    for c in range(len(self.feasibleClients)):
+                        if int(clt) in self.feasibleClients[c]:
+                            self.feasibleClients[c].remove(int(clt))
+                    self.feasibleClients[cohort_id].append(int(clt))
+
+        self.round_clt[cohort_id] = []
+
+        self._print_cohort(round)
+        return split
+
+    def _print_cohort(self, round):
+        size_ratio = [len(cluster) for cluster in self.feasibleClients]
+        logging.info(f'Round {round} FeasibleClients client number : {size_ratio}')
+
+    def schedule_plan(self, round=0, cohort_id=0) -> int:
+        """ Schedule the training resources for each cohort
+
+        Args:
+            round:
+            cohort_id:
+
+        Returns:
+            int: number of training resources for each cohort
+        """
+        # TODO: schedule based on accuracy
+        return self.total_res // self.num_cluster
+
+    def _split(self, cohort_id):
+        self.num_cluster += 1
+        if cohort_id not in self.latest_acc_list:
+            self.latest_acc_list[cohort_id] = 1
+
+    def update_eval(self, r, acc, clusterID):
+        self.round_acc[r][clusterID] = acc
+        if len(self.round_acc[r]) == self.num_cluster:
+            self.latest_acc_list = self.round_acc[r]
\ No newline at end of file
diff --git a/examples/auxo/client_metadata.py b/examples/auxo/client_metadata.py
new file mode 100644
index 00000000..87c961e5
--- /dev/null
+++ b/examples/auxo/client_metadata.py
@@ -0,0 +1,17 @@
+import logging
+
+from fedscale.cloud.internal.client_metadata import *
+
+class AuxoClientMetadata(ClientMetadata):
+    def __init__(self, host_id, client_id, speed, traces=None):
+        super().__init__(host_id, client_id, speed, traces)
+        self.grad_ratio = 10
+
+    def register_gradient(self, W, W_old):
+        W_old = [dt.cpu().numpy() for dt in W_old]
+        W = [W[k] for k in W]
+        gradient = [pb - pa for pa, pb in zip(W, W_old)]
+        gradient = np.concatenate([v.flatten() for v in gradient])
+        val_len = len(gradient) // self.grad_ratio  # change grad size
+        self.gradient = np.float16(gradient[-val_len:])
+        return self.gradient
diff --git a/examples/auxo/clustering.py b/examples/auxo/clustering.py
new file mode 100644
index 00000000..007124be
--- /dev/null
+++ b/examples/auxo/clustering.py
@@ -0,0 +1,359 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.metrics import silhouette_score
+from random import Random
+from collections import defaultdict
+from sklearn.cluster import KMeans
+from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
+from sklearn.cluster import MiniBatchKMeans
+import logging, time
+from utils.klkmeans import KLKmeans
+
+
+class QEntry():
+    def __init__(self, init_R=1, r0=1, train_times_R=0):
+        self.R = init_R
+        self.sub_R = [init_R / 2, init_R / 2]
+        self.lr = 0.9
+        self.r0 = r0  # lambda : np.random.normal(r0, r0/20)
+        self.train_times_R = train_times_R
+
+    def update_R(self, new_reward):
+        old_R = self.R
+        # self.R = self.lr * new_reward + self.R * (1 - self.lr)
+        self.R = new_reward + self.R
+        delta_R = self.R - old_R
+        self.sub_R[0] += delta_R / 2
+        self.sub_R[1] += delta_R / 2
+
+    def update_sub(self, sub_id):
+        self.sub_R[sub_id] += self.r0  # ()
+        self.sub_R[1 - sub_id] -= self.r0  # ()
+
+    def reset_sub(self):
+        self.sub_R = [self.R / 2, self.R / 2]
+        self.train_times_R = 0
+
+
+class QTable():
+    def __init__(self, num_client, train_ratio=0.1, base_reward=5, sample_seed=233, epsilon=0.01, epsilon_decay=0.99, \
+                 known_clt=50, elbow_constant=0.45, avg_train_times=4, split_round=None, merge=False, metric='cosine'):
+        # list dict
+        self.Qtable = [{0: QEntry()} for row in range(num_client)]  # init with one model
+        self.num_client = num_client
+        self.y_kmeans = np.zeros(num_client)
+        self.base_reward = base_reward
+        self.known_clt = known_clt
+        self.num_model = 1
+        self.init_round = {0: False}
+        self.epsilon = epsilon
+        self.epsilon_decay = epsilon_decay
+        self.rng = Random()
+        self.rng.seed(sample_seed)
+        self.sub_num = {}
+        self.split_counter = defaultdict(int)
+        self.epoch = defaultdict(int)
+        self.min_round = 1
+        self.elbow_constant = elbow_constant
+        self.train_ratio = train_ratio
+        self.min_clt = 100 / train_ratio  # minimal # clt per round --> 50
+        self.avg_train_times = avg_train_times
+        self.pivot_clients = defaultdict(list)
+        self.pivot_clients[0] = [*range(self.num_client)]
+        self.min_cluster_size = num_client // 10
+        self.split_round = split_round
+        self.merge_action = merge
+        self.metric = metric
+        if metric == 'kl':
+            self._initialize_kl()
+
+    def _initialize_kl(self):
+        """Initialize KL divergence metric if applicable."""
+        def KL(a, b):
+            epsilon = 0.00001
+            a += epsilon
+            b += epsilon
+            return np.sum(np.where(a != 0, a * np.log(a / b), 0))
+        self.kl = KL
+
+    def update_R(self, cid, mid, new_reward):
+        self.Qtable[cid][mid].update_R(new_reward)
+
+        if new_reward < 0:
+            self.Qtable[cid][mid].reset_sub()
+
+    def update_R_batch(self, cid_list, mid, reward_list, remain_round):
+        for cid, reward in zip(cid_list, reward_list):
+            self.update_R(cid, mid, reward)
+            self.Qtable[cid][mid].train_times_R += 1
+
+    def update_subR(self, cid, mid, sub_id):
+        self.Qtable[cid][mid].update_sub(sub_id)
+        self.Qtable[cid][mid].train_times_R += 1
+
+    def update_subR_batch(self, cid_list, mid, sub_id):
+        for cid in cid_list:
+            self.update_subR(cid, mid, sub_id)
+
+    def get_subid(self, cid, mid):
+        # ( cid ) prefer which subcluster in mid
+        if self.Qtable[cid][mid].sub_R[0] == self.Qtable[cid][mid].sub_R[1] or self.Qtable[cid][mid].R < 0:
+            return 2
+        else:
+            return int(self.Qtable[cid][mid].sub_R[0] < self.Qtable[cid][mid].sub_R[1])
+
+    def subcluster_policy(self, mid, clt_list):
+        sub_num = [0, 0, 0]
+        sub_label_list = []
+        for cid in clt_list:
+            sub_id = self.get_subid(cid, mid)
+            sub_num[sub_id] += 1
+            sub_label_list.append(sub_id)
+        return sub_num, sub_label_list
+
+    def grow_table(self, mid):
+        self.split_counter[mid] = 0
+        new_mid = self.num_model
+        self.init_round[new_mid] = False
+        self.init_round[mid] = False
+        self.epoch[new_mid] = self.epoch[mid]
+        for cid in range(self.num_client):
+            sub_a = self.Qtable[cid][mid].sub_R[0]
+            sub_b = self.Qtable[cid][mid].sub_R[1]
+            a_R = 1 if sub_a > sub_b else 0
+            b_R = 1 - a_R if sub_a != sub_b else 0
+
+            reward = self.Qtable[cid][mid].R
+            times = self.Qtable[cid][mid].train_times_R
+            self.Qtable[cid][mid] = QEntry(init_R=reward + a_R, train_times_R=times * a_R)
+            self.Qtable[cid][new_mid] = QEntry(init_R=reward + b_R, train_times_R=times * b_R)
+        self.num_model += 1
+
+    def shrink_table(self, mid):
+        self.num_model -= 1
+        for cid in range(self.num_client):
+            self.Qtable[cid].pop(mid, None)
+            tmp_dict = self.Qtable[cid]
+            for m in range(self.num_model):
+                self.Qtable[cid][m] = list(tmp_dict.values())[m]
+
+    def model_policy(self, cid):
+
+        train_times = [self.Qtable[cid][m].train_times_R for m in self.Qtable[cid]]
+        new_client = False if np.sum(train_times) > 0 else True
+
+        self.epsilon *= self.epsilon_decay
+        if new_client or self.rng.random() < self.epsilon:
+            return self.rng.randint(0, self.num_model - 1)
+        else:
+            reward_list = self.get_model_reward(cid)
+            return np.argmax(reward_list)
+
+    def model_policy_batch(self, client_list):
+        for cid in client_list:
+            self.y_kmeans[cid] = self.model_policy(cid)
+
+        self.y_kmeans[0] = -1
+
+    def get_model_reward(self, cid):
+        return [self.Qtable[cid][m].R for m in self.Qtable[cid]]
+
+    def dist_to_reward(self, dist_list, known_ratio):
+
+        avg_dist = np.mean(dist_list)
+        std_dist = np.std(dist_list)
+
+        R0 = self.base_reward
+        slope = self.base_reward / (avg_dist + std_dist)
+        return [(R0 - slope * d) * known_ratio for d in dist_list]
+
+    def count_known_main(self, global_index, mid):
+        known_index_list = [i for i, cid in enumerate(global_index) \
+                            if self.Qtable[cid][mid].train_times_R > 0 and self.Qtable[cid][mid].R > 1]
+        # local clt index
+        return known_index_list
+
+    def split(self, mid):
+        # whether to split: enough client subreward info + minimal size satisfy + elbow test
+        if self.split_round is not None and self.epoch[mid] in self.split_round:
+            return True
+        elif self.split_round is None:
+            # whether to split: enough client subreward info + minimal size satisfy + elbow test
+            mid_client = np.argwhere(self.y_kmeans == mid)  # global idx
+            mid_client = [i[0] for i in mid_client]  # [1,3,5,7,9]
+            if len(mid_client) < self.min_clt:  # ensure around 50 participate every round
+                return False
+
+            sub_num = [0, 0, 0]
+            for cid in mid_client:
+                sub_num[self.get_subid(cid, mid)] += 1
+            logging.info(f'sub_num : {sub_num} ')
+            min_size = self.known_clt * 0.9 ** (self.num_model - 1)
+            ratio = sub_num[0] / max(sub_num[1], 1)
+
+            within_ratio = True if (ratio > 0.5 and ratio < 2) or self.num_model == 1 else False
+            return sub_num[0] > min_size and sub_num[1] > min_size and within_ratio
+        else:
+            return False
+
+    def knn_update_subR(self, mid, X_sub, global_index, keep_split=True):
+        # update cluster membership for each clusters
+        self.epoch[mid] += 1
+        if self.split_round is not None:
+            if self.epoch[mid] > max(self.split_round) + 1:
+                keep_split = False
+            else:
+                keep_split = True
+
+        if self.init_round[mid] == False:
+            # First round of clustering
+            if self.num_model > 1:
+                known_clt_list = self.count_known_main(global_index, mid)
+                if len(known_clt_list) <= 1:
+                    return
+                X_sub = X_sub[known_clt_list]
+                global_index = known_clt_list
+
+            if self.metric == 'kl':
+                clustering = KLKmeans(n_clusters=2, init_center = X_sub[:2] )
+                clustering.fit(X_sub)
+                labels = clustering.labels_
+
+            else:
+                clustering = MiniBatchKMeans(n_clusters=2,
+                                             random_state=0,
+                                             batch_size=10).fit(X_sub)
+                labels = clustering.labels_  # return labels
+
+            for clt in range(len(global_index)):
+                self.update_subR(global_index[clt], mid, int(labels[clt]))
+            self.init_round[mid] = True
+
+        elif keep_split:
+            # Continuous clustering of subsequent rounds
+            sub_num, sub_label_list = self.subcluster_policy(mid, global_index)
+
+            if sub_num[0] == sub_num[1] == 0:
+                return
+
+            elif sub_num[0] == 0 or sub_num[1] == 0:
+                sub_size = len(global_index) // 4
+                if self.metric == 'kl':
+                    neigh = NearestNeighbors(n_neighbors=sub_size, metric=lambda a, b: self.kl(a, b)).fit(X_sub)
+                else:
+                    neigh = NearestNeighbors(n_neighbors=sub_size).fit(X_sub)
+                center_solo = np.argwhere(np.array(sub_label_list) != 2)[0][0]
+                near_clt_id = neigh.kneighbors([X_sub[center_solo]])[1][0]
+                for clt in near_clt_id:
+                    self.update_subR(global_index[clt], mid, sub_label_list[center_solo])
+                return
+
+            else:
+                labeled_id = np.argwhere(np.array(sub_label_list) != 2).reshape(-1)
+                knn_label = list(filter(lambda score: score != 2, sub_label_list))
+                knn_data = X_sub[labeled_id]
+                if self.metric == 'kl':
+                    neigh = KNeighborsClassifier(n_neighbors=1, metric=lambda a, b: self.kl(a, b)).fit(knn_data,
+                                                                                                       knn_label)
+                else:
+                    neigh = KNeighborsClassifier(n_neighbors=1).fit(knn_data, knn_label)
+                # labels =  neigh.predict(X_sub)
+                unseen_id = np.argwhere(np.array(sub_label_list) == 2).reshape(-1)
+                if len(unseen_id) < 1:
+                    return
+                labels = neigh.predict(X_sub[unseen_id])
+                pred_prob = neigh.predict_proba(X_sub[unseen_id])
+                for i, unseen_clt in enumerate(unseen_id):
+                    conf = pred_prob[i][0] / max(pred_prob[i][1], 0.1)
+                    if conf > 1.5 or conf < 0.67:
+                        self.update_subR(global_index[unseen_clt], mid, int(labels[i]))
+                return
+
+    def update_mainR(self, mid, X_sub, global_index, remain_round=True):
+        # update cluster membership for each clusters
+        if self.split_round is not None:
+            if self.epoch[mid] > max(self.split_round) + 1:
+                remain_round = False
+            else:
+                remain_round = True
+
+        if self.num_model > 1:
+            known_clt_list = self.count_known_main(global_index, mid)
+            known_ratio = len(known_clt_list) / len(global_index)
+
+            if len(known_clt_list) < 2 and self.train_ratio < 1:
+                # X_subcen = np.mean(X_sub, axis=0)
+                # print("know too less")
+                return False
+            else:
+                X_known = X_sub[known_clt_list]
+                X_subcen = np.mean(X_known, axis=0)
+
+            X_ = X_sub - X_subcen
+            square_dist = np.sum(X_ ** 2, axis=1)
+            dist_list = np.sqrt(square_dist)
+            reward_list = self.dist_to_reward(dist_list, known_ratio)
+            self.update_R_batch(global_index, mid, reward_list, remain_round)
+
+        split = False
+        self.split_counter[mid] += 1
+        if self.split(mid):
+            # if self.split_counter[mid] > self.min_round :
+            split = True
+            self.grow_table(mid)
+
+        if self.num_model > 2 and self.split_counter[mid] > 5 and self.merge(mid):
+            self.shrink_table(mid)
+            print(f"<<<  Merge cluster {mid}")
+            logging.info(f"<<<  Merge cluster {mid}")
+        self.model_policy_batch([*range(self.num_client)])  # update kmeans
+
+        return split
+
+    def update_pivot_client(self, mid):
+        trained_clt = set()
+        c = set()
+        for cid in range(1, self.num_client):
+            if self.Qtable[cid][mid].train_times_R > 0 and self.Qtable[cid][mid].R > 1:
+                c.add(cid)
+            #    tmp_dict = self.Qtable[cid]
+            #    if np.argsort(tmp_dict.values())[-1] == mid:
+            if self.Qtable[cid][mid].R > 1:
+                trained_clt.add(cid)
+        print(len(c), len(trained_clt))
+        self.pivot_clients[mid] = list(trained_clt)
+        # TODO: can have many overlap clients, instead choose clients with highest score
+
+    def return_pivot_client(self, mid):
+        # Return the clients that belong to the cohort
+        self.update_pivot_client(mid)
+        return self.pivot_clients[mid]
+
+    def merge(self, mid):
+        if self.merge_action == False:
+            return False
+
+        mid_client = np.argwhere(self.y_kmeans == mid)
+        if len(mid_client) < self.min_cluster_size:
+            return True
+        return False
+
+    def count_trained_clt(self):
+        trained_clt = []
+        for mid in range(self.num_model):
+            self.update_pivot_client(mid)
+            trained_clt += self.pivot_clients[mid]
+        print("Trained clients :", len(set(trained_clt)))
+        return list(set(trained_clt))
+
+    def plot(self, X, epoch):
+        # Visualize the clustering result
+        trained_clt = self.count_trained_clt()
+        plt.scatter(X[trained_clt, 0], X[trained_clt, 1], c=self.y_kmeans[trained_clt], s=30, cmap='viridis')
+        plt.title(f"Epoch {epoch}: {len(np.unique(self.y_kmeans[trained_clt], axis=0))} clusters")
+        plt.savefig(f"epoch_{epoch}.png")
+        plt.show()
+
+        if len(np.unique(self.y_kmeans[trained_clt], axis=0)) > 1:
+            silhouette_avg = silhouette_score(X[trained_clt], self.y_kmeans[trained_clt])
+            print("Silhouette score is ", silhouette_avg)
diff --git a/examples/auxo/config.py b/examples/auxo/config.py
new file mode 100644
index 00000000..e539f31e
--- /dev/null
+++ b/examples/auxo/config.py
@@ -0,0 +1,16 @@
+import yaml, os
+global auxo_config
+
+
+# Get the value of the FEDSCALE_HOME environment variable
+fedscale_home = os.environ.get('FEDSCALE_HOME')
+
+# Check if the environment variable is set
+if fedscale_home is not None:
+    config_path = os.path.join(fedscale_home, 'examples', 'auxo', 'config.yml')
+
+    # Now, open the file using the constructed path
+    with open(config_path, 'r') as f:
+        auxo_config = yaml.load(f, Loader=yaml.FullLoader)
+else:
+    print("FEDSCALE_HOME environment variable is not set.")
\ No newline at end of file
diff --git a/examples/auxo/config.yml b/examples/auxo/config.yml
new file mode 100644
index 00000000..4946c84b
--- /dev/null
+++ b/examples/auxo/config.yml
@@ -0,0 +1,8 @@
+train_data_map_file: examples/auxo/train_by_clt.csv
+test_data_map_file: examples/auxo/test_by_clt.csv
+split_round: [50,100] # can be removed
+exploredecay: 0.99
+explorerate: 0.01
+reduction: False
+metric: 'cosine'
+start_round: 0
\ No newline at end of file
diff --git a/examples/auxo/executor.py b/examples/auxo/executor.py
new file mode 100644
index 00000000..4e10f0ec
--- /dev/null
+++ b/examples/auxo/executor.py
@@ -0,0 +1,253 @@
+import logging
+
+from fedscale.cloud.execution.executor import *
+from utils.helper import *
+import copy
+from config import auxo_config
+
+class AuxoExecutor(Executor):
+    def __init__(self, args):
+        super().__init__(args)
+        self.round = [0]
+        self.model_adapter = [self.get_client_trainer(args).get_model_adapter(init_model())]
+
+    def UpdateModel(self, model_weights, cohort_id):
+        """Receive the broadcasted global model for current round
+
+        Args:
+            config (PyTorch or TensorFlow model): The broadcasted global model config
+            cohort_id (int): The cohort id
+        """
+        self.round[cohort_id] += 1
+        self.model_adapter[cohort_id].set_weights(model_weights)
+
+    def training_handler(self, client_id, conf, model, cohort_id):
+        """Train model given client id
+
+        Args:
+            client_id (int): The client id.
+            conf (dictionary): The client runtime config.
+            cohort_id (int): The cohort id.
+
+        Returns:
+            dictionary: The train result
+
+        """
+        self.model_adapter[cohort_id].set_weights(model)
+        conf.client_id = client_id
+        conf.tokenizer = tokenizer
+        client_data = self.training_sets if self.args.task == "rl" else \
+            select_dataset(client_id, self.training_sets,
+                           batch_size=conf.batch_size, args=self.args,
+                           collate_fn=self.collate_fn
+                           )
+        client = self.get_client_trainer(self.args)
+        if len(client_data) == 0:
+            state_dicts = self.model_adapter[cohort_id].get_model().state_dict()
+            logging.info(f"Client {client_id} has no data, return empty result")
+            return {'client_id': client_id, 'moving_loss': 0,
+                   'trained_size': 0, 'utility': 0, 'wall_duration': 0,
+                   'update_weight': {p: state_dicts[p].data.cpu().numpy()
+                       for p in state_dicts},
+                   'success': 1}
+        train_res = client.train(
+            client_data=client_data, model=self.model_adapter[cohort_id].get_model(), conf=conf)
+
+        return train_res
+
+    def Train(self, config):
+        """Load train config and data to start training on that client
+
+        Args:
+            config (dictionary): The client training config.
+
+        Returns:
+            tuple (int, dictionary): The client id and train result
+
+        """
+        client_id, train_config, cohort_id = config['client_id'], config['task_config'], config['cohort_id']
+
+        if 'model' not in config or not config['model']:
+            raise "The 'model' object must be a non-null value in the training config."
+        client_conf = self.override_conf(train_config)
+        train_res = self.training_handler(
+            client_id=client_id, conf=client_conf, model=config['model'], cohort_id=cohort_id)
+
+        # Report execution completion meta information
+        response = self.aggregator_communicator.stub.CLIENT_EXECUTE_COMPLETION(
+            job_api_pb2.CompleteRequest(
+                client_id=str(client_id), executor_id=self.executor_id,
+                event=generate_msg(commons.CLIENT_TRAIN, cohort_id), status=True, msg=None,
+                meta_result=None, data_result=None
+            )
+        )
+        self.dispatch_worker_events(response)
+        logging.info(f"[Cohort {cohort_id}] Client {client_id} finished training. ")
+
+        return client_id, train_res
+
+    def _init_train_test_data(self):
+
+        if self.args.data_set == 'femnist':
+            from utils.openimg import OpenImage
+            train_transform, test_transform = get_data_transform('mnist')
+            train_dataset = OpenImage(self.args.data_dir, dataset='femnist', transform=train_transform, client_mapping_file = auxo_config['train_data_map_file']  )
+            test_dataset = OpenImage(self.args.data_dir, dataset='femnist', transform=test_transform, client_mapping_file = auxo_config['test_data_map_file'] )
+        else:
+            raise NotImplementedError
+        return train_dataset, test_dataset
+
+
+    def init_data(self):
+        """Return the training and testing dataset
+
+        Returns:
+            Tuple of DataPartitioner class: The partioned dataset class for training and testing
+
+        """
+        train_dataset, test_dataset = self._init_train_test_data()
+        if self.args.task == "rl":
+            return train_dataset, test_dataset
+        if self.args.task == 'nlp':
+            self.collate_fn = collate
+        elif self.args.task == 'voice':
+            self.collate_fn = voice_collate_fn
+        # load data partitionxr (entire_train_data)
+        logging.info("Data partitioner starts ...")
+
+        training_sets = DataPartitioner(
+            data=train_dataset, args=self.args, numOfClass=self.args.num_class)
+        training_sets.partition_data_helper(
+            num_clients=self.args.num_participants, data_map_file=auxo_config['train_data_map_file'])
+
+        testing_sets = DataPartitioner(
+            data=test_dataset, args=self.args, numOfClass=self.args.num_class)
+        testing_sets.partition_data_helper(
+            num_clients=self.args.num_participants, data_map_file=auxo_config['test_data_map_file'])
+
+        logging.info("Data partitioner completes ...")
+
+        return training_sets, testing_sets
+
+
+    def testing_handler(self, client_list, cohort_id=0):
+        """Test model
+
+        Args:
+            args (dictionary): Variable arguments for fedscale runtime config. defaults to the setup in arg_parser.py
+            config (dictionary): Variable arguments from coordinator.
+        Returns:
+            dictionary: The test result
+
+        """
+
+        test_num_clt = max(len(client_list) // self.num_executors, 1)
+        test_client_id_list = client_list[(self.this_rank - 1) * test_num_clt: self.this_rank * test_num_clt]
+        logging.info(f"[Cohort {cohort_id}] Test client ID: {test_client_id_list}")
+        testResults_accum = {'top_1': 0, 'top_5': 0, 'test_loss': 0, 'test_len': 0}
+
+        test_config = self.override_conf({
+            'rank': self.this_rank,
+            'memory_capacity': self.args.memory_capacity,
+            'tokenizer': tokenizer
+        })
+        for clt in test_client_id_list:
+            client = self.get_client_trainer(test_config)
+            data_loader = select_dataset(clt, self.testing_sets,
+                                         batch_size=self.args.test_bsz, args=self.args,
+                                         isTest=False, collate_fn=self.collate_fn)
+            if len(data_loader) > 0:
+                test_results = client.test(data_loader, self.model_adapter[cohort_id].get_model(), test_config)
+                testResults_accum['top_1'] += test_results['top_1']
+                testResults_accum['top_5'] += test_results['top_5']
+                testResults_accum['test_loss'] += test_results['test_loss']
+                testResults_accum['test_len'] += test_results['test_len']
+
+        # testRes = {'top_1': correct, 'top_5': top_5,
+        #            'test_loss': sum_loss, 'test_len': test_len}
+
+        gc.collect()
+
+        return testResults_accum
+
+    def Test(self, config, cohort_id):
+        """Model Testing. By default, we test the accuracy on all data of clients in the test group
+
+        Args:
+            config (dictionary): The client testing config.
+
+        """
+        test_res = self.testing_handler(config['client_id'], cohort_id)
+        test_res = {'executorId': self.this_rank, 'results': test_res}
+
+        # Report execution completion information
+        response = self.aggregator_communicator.stub.CLIENT_EXECUTE_COMPLETION(
+            job_api_pb2.CompleteRequest(
+                client_id=self.executor_id, executor_id=self.executor_id,
+                event=generate_msg(commons.MODEL_TEST, cohort_id), status=True, msg=None,
+                meta_result=None, data_result=self.serialize_response(test_res)
+            )
+        )
+        self.dispatch_worker_events(response)
+
+    def _init_split(self, cohort_id, new_cohort_id):
+        if len(self.model_adapter) <= new_cohort_id:
+            self.model_adapter.append(copy.deepcopy(self.model_adapter[cohort_id]))
+            self.round.append(copy.deepcopy(self.round[cohort_id]))
+
+    def event_monitor(self):
+        """Activate event handler once receiving new message
+        """
+        logging.info("Start monitoring events ...")
+        self.client_register()
+
+        while not self.received_stop_request:
+            if len(self.event_queue) > 0:
+                request = self.event_queue.popleft()
+                current_event = request.event
+                event_type, cohort_id = decode_msg(current_event)
+                if event_type != commons.DUMMY_EVENT:
+                    logging.info("Received message: {}".format(current_event))
+
+                if event_type == commons.CLIENT_TRAIN:
+                    train_config = self.deserialize_response(request.meta)
+                    train_model = self.deserialize_response(request.data)
+                    train_config['model'] = train_model
+                    train_config['client_id'] = int(train_config['client_id'])
+                    client_id, train_res = self.Train(train_config)
+
+                    # Upload model updates
+                    future_call = self.aggregator_communicator.stub.CLIENT_EXECUTE_COMPLETION.future(
+                        job_api_pb2.CompleteRequest(client_id=str(client_id), executor_id=self.executor_id,
+                                                    event=generate_msg(commons.UPLOAD_MODEL, cohort_id), status=True, msg=None,
+                                                    meta_result=None, data_result=self.serialize_response(train_res)
+                                                    ))
+                    future_call.add_done_callback(lambda _response: self.dispatch_worker_events(_response.result()))
+
+                elif event_type == commons.MODEL_TEST:
+                    self.Test(self.deserialize_response(request.meta), cohort_id)
+
+                elif event_type == commons.UPDATE_MODEL:
+                    model_weights = self.deserialize_response(request.data)
+                    self.UpdateModel(model_weights, cohort_id)
+
+                elif event_type == 'split':
+                    new_cohort_id = len(self.model_adapter)
+                    self._init_split(cohort_id, new_cohort_id)
+
+                elif event_type == commons.SHUT_DOWN:
+                    self.Stop()
+
+                elif event_type == commons.DUMMY_EVENT:
+                    pass
+            else:
+                time.sleep(1)
+                try:
+                    self.client_ping()
+                except Exception as e:
+                    logging.info(f"Caught exception {e} from aggregator, terminating executor {self.this_rank} ...")
+                    self.Stop()
+
+if __name__ == "__main__":
+    executor = AuxoExecutor(parser.args)
+    executor.run()
diff --git a/examples/auxo/fig/auxo.png b/examples/auxo/fig/auxo.png
new file mode 100644
index 00000000..71458bb5
Binary files /dev/null and b/examples/auxo/fig/auxo.png differ
diff --git a/examples/auxo/fig/epoch_100.png b/examples/auxo/fig/epoch_100.png
new file mode 100644
index 00000000..305f0e61
Binary files /dev/null and b/examples/auxo/fig/epoch_100.png differ
diff --git a/examples/auxo/fig/epoch_14.png b/examples/auxo/fig/epoch_14.png
new file mode 100644
index 00000000..1323c1d4
Binary files /dev/null and b/examples/auxo/fig/epoch_14.png differ
diff --git a/examples/auxo/fig/epoch_224.png b/examples/auxo/fig/epoch_224.png
new file mode 100644
index 00000000..fa7cc029
Binary files /dev/null and b/examples/auxo/fig/epoch_224.png differ
diff --git a/examples/auxo/fig/epoch_300.png b/examples/auxo/fig/epoch_300.png
new file mode 100644
index 00000000..da20135d
Binary files /dev/null and b/examples/auxo/fig/epoch_300.png differ
diff --git a/examples/auxo/fig/epoch_500.png b/examples/auxo/fig/epoch_500.png
new file mode 100644
index 00000000..e8096cd4
Binary files /dev/null and b/examples/auxo/fig/epoch_500.png differ
diff --git a/examples/auxo/fig/epoch_700.png b/examples/auxo/fig/epoch_700.png
new file mode 100644
index 00000000..61f11aa3
Binary files /dev/null and b/examples/auxo/fig/epoch_700.png differ
diff --git a/examples/auxo/playground.py b/examples/auxo/playground.py
new file mode 100644
index 00000000..b8dad2b3
--- /dev/null
+++ b/examples/auxo/playground.py
@@ -0,0 +1,64 @@
+import time
+import numpy as np
+import random
+import sys
+from sklearn.datasets import make_blobs
+from sklearn.cluster import KMeans
+from clustering import QTable
+
+DEFAULT_SAMPLE_SIZE = 50
+DEFAULT_TOTAL_EPOCH = 800
+DEFAULT_TOTAL_SAMPLE = 1000
+DEFAULT_KNOWN_CLT = 150
+NUM_CENTERS = 4
+random.seed(100)
+
+def bipartition_cluster(total_epoch, total_sample, sample_size, known_clt):
+    start_time = time.time()
+
+    learning_rate = 0.1
+    avg_train_times = 4 if len(sys.argv) <= 3 else total_epoch * sample_size / total_sample
+    print(f"{sample_size}/{total_sample} of samples cluster for {total_epoch} epochs")
+
+    X, y_true = make_blobs(n_samples=total_sample, centers=NUM_CENTERS, cluster_std=2, random_state=100)
+
+    Q_table = QTable(
+        total_sample,
+        train_ratio=0.9,
+        elbow_constant=0.8,
+        merge=False,
+        known_clt=known_clt,
+        avg_train_times=avg_train_times,
+        # split_round=[200]
+    )
+
+    for epoch in range(total_epoch):
+        num_list = random.sample(range(total_sample), sample_size)
+
+        for mid in range(Q_table.num_model):
+            client_id = np.argwhere(Q_table.y_kmeans[num_list] == mid)
+            global_index = [num_list[i[0]] for i in client_id]
+
+            if len(client_id) < 5:
+                continue
+
+            X_sub = X[global_index]
+            Q_table.knn_update_subR(mid, X_sub, global_index)
+            split = Q_table.update_mainR(mid, X_sub, global_index, epoch <= total_epoch * 0.8)
+
+            if split or epoch % 200 == 100 and mid == 0:
+                if split:
+                    print(f"SPLIT at round {epoch}")
+                print(f'Epoch: {epoch}')
+                Q_table.plot(X, epoch)
+
+    print(f"Split to {Q_table.num_model} models")
+    print(f"Time usage: {time.time() - start_time}")
+
+if __name__ == "__main__":
+    total_epoch = int(sys.argv[3]) if len(sys.argv) > 3 else DEFAULT_TOTAL_EPOCH
+    total_sample = int(sys.argv[1]) if len(sys.argv) > 3 else DEFAULT_TOTAL_SAMPLE
+    sample_size = int(sys.argv[2]) if len(sys.argv) > 3 else DEFAULT_SAMPLE_SIZE
+    known_clt = DEFAULT_KNOWN_CLT if len(sys.argv) <= 3 else 1000
+
+    bipartition_cluster(total_epoch, total_sample, sample_size, known_clt)
diff --git a/examples/auxo/requirements.txt b/examples/auxo/requirements.txt
new file mode 100644
index 00000000..a66a9f9a
--- /dev/null
+++ b/examples/auxo/requirements.txt
@@ -0,0 +1,32 @@
+tensorboard
+numba==0.48.0
+pip==20.0.2
+torch_optimizer
+torch
+tensorflow
+torchvision
+transformers
+scipy==1.4.1
+matplotlib==3.1.3
+torch_baidu_ctc==0.3.0
+tensorboardX==2.1
+overrides==3.1.0
+python-levenshtein==0.12.0
+pandas==1.1.0
+PyYAML
+pytest
+sox==1.3.7
+grpcio==1.40.0
+gym
+jupyter
+pillow==9
+sentencepiece
+gdown
+h5py
+librosa==0.7.2
+SoundFile
+resampy==0.3.1
+kubernetes
+wandb
+nltk
+
diff --git a/examples/auxo/resource_manager.py b/examples/auxo/resource_manager.py
new file mode 100644
index 00000000..12055df7
--- /dev/null
+++ b/examples/auxo/resource_manager.py
@@ -0,0 +1,58 @@
+from fedscale.cloud.resource_manager import *
+
+
+class AuxoResourceManager(ResourceManager):
+    def __init__(self, experiment_mode):
+        self.client_run_queue = [[]]
+        self.client_run_queue_idx = [0]
+        self.experiment_mode = experiment_mode
+        self.update_lock = threading.Lock()
+
+
+    def register_tasks(self, clientsToRun, cohort_id):
+        self.client_run_queue[cohort_id] = clientsToRun.copy()
+        self.client_run_queue_idx[cohort_id] = 0
+
+    def split(self, cohort_id):
+        self.client_run_queue.append( self.client_run_queue[cohort_id].copy())
+        self.client_run_queue_idx.append(0)
+
+    def get_task_length(self, cohort_id) -> int:
+        """Number of tasks left in the queue
+
+        Returns:
+            int: Number of tasks left in the queue
+        """
+        self.update_lock.acquire()
+        remaining_task_num: int = len(self.client_run_queue[cohort_id]) - self.client_run_queue_idx[cohort_id]
+        self.update_lock.release()
+        return remaining_task_num
+
+    def remove_client_task(self, client_id, cohort_id):
+        assert(client_id in self.client_run_queue[cohort_id],
+               f"client task {client_id} is not in task queue")
+
+    def has_next_task(self, client_id=None, cohort_id=0):
+        exist_next_task = False
+        if self.experiment_mode == commons.SIMULATION_MODE:
+            exist_next_task = self.client_run_queue_idx[cohort_id] < len(
+                self.client_run_queue[cohort_id])
+        else:
+            exist_next_task = client_id in self.client_run_queue[cohort_id]
+        return exist_next_task
+
+    def get_next_task(self, client_id=None, cohort_id=0):
+        # TODO: remove client id when finish
+        next_task_id = None
+        self.update_lock.acquire()
+        if self.experiment_mode == commons.SIMULATION_MODE:
+            if self.has_next_task(client_id, cohort_id):
+                next_task_id = self.client_run_queue[cohort_id][self.client_run_queue_idx[cohort_id]]
+                self.client_run_queue_idx[cohort_id] += 1
+        else:
+            if client_id in self.client_run_queue[cohort_id]:
+                next_task_id = client_id
+                self.client_run_queue[cohort_id].remove(next_task_id)
+
+        self.update_lock.release()
+        return next_task_id
\ No newline at end of file
diff --git a/examples/auxo/utils/grad_monitor.py b/examples/auxo/utils/grad_monitor.py
new file mode 100644
index 00000000..8475dcf5
--- /dev/null
+++ b/examples/auxo/utils/grad_monitor.py
@@ -0,0 +1,73 @@
+import logging
+import numpy as np
+import math
+
+class Gradient_Monitor():
+    def __init__(self, rank):
+        self.client_grad = {}
+        self.client_data = {}
+        self.grad_stability = []
+        self.rank = rank
+
+
+    def register_client(self, client_id, new_w, old_w, client_data):
+        if self.rank > 1:
+            return
+
+        old_w = [dt.cpu().numpy() for dt in old_w]
+        new_w = [new_w[k] for k in new_w]
+        gradient = [pb - pa for pa, pb in zip(new_w, old_w)]
+        self.client_grad[client_id] = np.concatenate([n.ravel() for n in gradient])
+        clt_data = None
+        for data_pair in client_data:
+            (data, target) = data_pair
+            if clt_data is None:
+                clt_data = [np.asarray(data.ravel())]
+            else:
+                clt_data.append(np.asarray(data.ravel()))
+
+        self.client_data[client_id] = np.mean(clt_data, axis=0)
+        logging.info(f'client_data[{client_id}] registered')
+
+    def _cal_data_similarity(self, client_1, client_2):
+        '''Calculate the cosine similarity between the data of two clients'''
+        data_1 = self.client_data[client_1].ravel()
+        data_2 = self.client_data[client_2].ravel()
+        cosine_similarity = np.dot(data_1, data_2) / (np.linalg.norm(data_1) * np.linalg.norm(data_2))
+        return cosine_similarity
+
+    def _cal_grad_similarity(self, client_1, client_2):
+        '''Calculate the cosine similarity between the gradient of two clients'''
+        grad_1 = self.client_grad[client_1]
+        grad_2 = self.client_grad[client_2]
+
+        # Calculate the magnitude (Euclidean norm) of A and B
+        magnitude_A = np.linalg.norm(grad_1)
+        magnitude_B = np.linalg.norm(grad_2)
+        # Calculate the cosine similarity
+        cosine_similarity = np.dot(grad_1, grad_2) / (magnitude_A * magnitude_B)
+
+        return cosine_similarity
+
+    def cal_pairwise_grad_stability(self):
+        '''Calculate the pairwise gradient similarity and data similarity'''
+        if len(self.client_grad) > 0:
+            grad_sim = []
+            data_sim = []
+            for client_id1 in self.client_grad:
+                for client_id2 in self.client_grad:
+                    if client_id1 > client_id2:
+                        data_similarity = self._cal_data_similarity(client_id1, client_id2)
+                        grad_similarity = self._cal_grad_similarity(client_id1, client_id2)
+                        # logging.info(f"Gradient similarity between {client_id1} and {client_id2}: {grad_similarity}")
+                        # logging.info(f"Data similarity between {client_id1} and {client_id2}: {data_similarity}")
+                        data_sim.append(data_similarity)
+                        grad_sim.append(grad_similarity)
+            correlation_coefficient = np.corrcoef(grad_sim, data_sim)[0, 1]
+
+            self.grad_stability.append(correlation_coefficient)
+            logging.info(f"Gradient stability: {self.grad_stability}")
+            # Reset for the next round
+            self.client_grad = {}
+            self.client_data = {}
+
diff --git a/examples/auxo/utils/helper.py b/examples/auxo/utils/helper.py
new file mode 100644
index 00000000..4c61353d
--- /dev/null
+++ b/examples/auxo/utils/helper.py
@@ -0,0 +1,15 @@
+
+
+def decode_msg(msg):
+    """Decode message into event type and cohort id
+
+    Args:
+        msg (string): message from client
+    """
+    return msg.split('-')[0], int(msg.split('-')[1])
+
+
+def generate_msg( msg_type, cohort_id=0):
+    return f'{msg_type}-{cohort_id}'
+
+
diff --git a/examples/auxo/utils/klkmeans.py b/examples/auxo/utils/klkmeans.py
new file mode 100644
index 00000000..7545e308
--- /dev/null
+++ b/examples/auxo/utils/klkmeans.py
@@ -0,0 +1,37 @@
+from nltk.cluster import KMeansClusterer, euclidean_distance
+import numpy as np
+
+
+class KLKmeans(object):
+    def __init__(self,n_clusters, init_center =None ):
+        self.labels_ = None
+
+        def _processNegVals(x):
+            x = np.array(x)
+            minx = np.min(x)
+            if minx < 0:
+                x = x + abs(minx)
+            """ 0.000001 is used here to avoid 0. """
+            x = x + 0.000001
+            # px = x / np.sum(x)
+            return x
+
+        def _KL(P, Q):
+            epsilon = 0.00001
+            P = _processNegVals(P)
+            Q = _processNegVals(Q)
+            # You may want to instead make copies to avoid changing the np arrays.
+            divergence = np.sum(P * np.log(P / Q))
+            return divergence
+
+        self.klkmeans = KMeansClusterer(n_clusters, _KL, initial_means = init_center)
+
+    def fit(self, x):
+        print(x)
+        self.klkmeans.cluster(x)
+        self.cluster_centers_ = self.klkmeans.means()
+        self.labels_ = self.predict(x)
+
+    def predict(self, x):
+        return [ self.klkmeans.classify(i) for i in x]
+
diff --git a/examples/auxo/utils/openimg.py b/examples/auxo/utils/openimg.py
new file mode 100644
index 00000000..498a7b05
--- /dev/null
+++ b/examples/auxo/utils/openimg.py
@@ -0,0 +1,138 @@
+from __future__ import print_function
+import warnings
+from PIL import Image
+import os
+import os.path
+import csv
+
+
+class OpenImage():
+    """
+    Args:
+        root (string): Root directory of dataset where ``MNIST/processed/training.pt``
+            and  ``MNIST/processed/test.pt`` exist.
+        train (bool, optional): If True, creates dataset from ``training.pt``,
+            otherwise from ``test.pt``.
+        download (bool, optional): If true, downloads the dataset from the internet and
+            puts it in root directory. If dataset is already downloaded, it is not
+            downloaded again.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+
+    classes = []
+
+    @property
+    def train_labels(self):
+        warnings.warn("train_labels has been renamed targets")
+        return self.targets
+
+    @property
+    def test_labels(self):
+        warnings.warn("test_labels has been renamed targets")
+        return self.targets
+
+    @property
+    def train_data(self):
+        warnings.warn("train_data has been renamed data")
+        return self.data
+
+    @property
+    def test_data(self):
+        warnings.warn("test_data has been renamed data")
+        return self.data
+
+    def __init__(self, root, dataset='train', transform=None, target_transform=None,
+                 imgview=False, client_mapping_file=None, num_clt=1e10, noniid=0):
+
+        self.root = root
+        self.transform = transform
+        self.target_transform = target_transform
+        self.data_file = dataset  # 'train', 'test', 'validation'
+        self.client_mapping_file = client_mapping_file
+        self.num_clt = num_clt
+        if not self._check_exists():
+            raise RuntimeError('Dataset not found. You have to download it')
+
+        self.path = os.path.join(self.processed_folder, self.data_file)
+        # load data and targets
+        self.data_to_clientID = {}
+        self.data, self.targets = self.load_file(self.path)
+        self.imgview = imgview
+        self.noniid = noniid
+
+    def __getitem__(self, index):
+        """
+        Args:
+            id_clt (int): Index, client ID
+
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        # index , clientID = id_clt
+        imgName, target = self.data[index], int(self.targets[index])
+
+        # doing this so that it is consistent with all other datasets
+        # to return a PIL Image
+        img = Image.open(os.path.join(self.path, imgName))
+        # avoid channel error
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+
+        if self.transform is not None:
+            img = self.transform(img)
+
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return img, target
+
+    def __len__(self):
+        return len(self.data)
+
+    @property
+    def raw_folder(self):
+        return self.root
+
+    @property
+    def processed_folder(self):
+        return self.root
+
+    def _check_exists(self):
+
+        print("Checking data path:", os.path.join(self.processed_folder, self.data_file))
+        return (os.path.exists(os.path.join(self.processed_folder,
+                                            self.data_file)))
+
+    def load_meta_data(self, path):
+        datas, labels = [], []
+        unique_clientIds = set()
+        with open(path) as csv_file:
+            csv_reader = csv.reader(csv_file, delimiter=',')
+            line_count = 0
+            for row in csv_reader:
+                if line_count != 0:
+                    unique_clientIds.add(row[0])
+                    self.data_to_clientID[len(datas)] = row[0]
+                    if len(unique_clientIds) > self.num_clt:
+                        break
+                    datas.append(row[1])
+                    labels.append(int(row[-1]))
+
+                line_count += 1
+        return datas, labels
+
+    def load_file(self, path):
+        # load meta file to get labels
+        # datas, labels = self.load_meta_data(os.path.join(self.processed_folder, 'client_data_mapping', self.data_file+'.csv'))
+        if self.client_mapping_file is not None:
+            datas, labels = self.load_meta_data(self.client_mapping_file)
+        else:
+            datas, labels = self.load_meta_data(
+                os.path.join(self.processed_folder, 'client_data_mapping', self.data_file + '.csv'))
+
+        return datas, labels
+
+
diff --git a/examples/auxo/utils/prepare_train_test.py b/examples/auxo/utils/prepare_train_test.py
new file mode 100644
index 00000000..eb528075
--- /dev/null
+++ b/examples/auxo/utils/prepare_train_test.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import csv
+import sys
+from collections import defaultdict
+from os import path
+import pandas as pd
+
+
+def partition_by_client_unsort(file_path='train.csv'):
+    print(f"Processing {file_path} into train and test sets.")
+
+    clt_smp = defaultdict(list)
+    title = None
+
+    # Read CSV and partition by client
+    with open(file_path, 'r') as fin:
+        csv_reader = csv.reader(fin)
+        for ind, row in enumerate(csv_reader):
+            if ind == 0:
+                title = row
+            else:
+                clt_smp[row[0]].append(ind - 1)
+
+    # Check if output files already exist
+    no_title = path.exists('train_by_clt.csv') or path.exists('test_by_clt.csv')
+
+    # Initialize CSV writers for train and test sets
+    with open('train_by_clt.csv', 'a') as write_partition_train_file, open('test_by_clt.csv',
+                                                                           'a') as write_partition_test_file:
+        writer_train = csv.writer(write_partition_train_file)
+        writer_test = csv.writer(write_partition_test_file)
+
+        if not no_title:
+            writer_train.writerow(title)
+            writer_test.writerow(title)
+
+        # Read original CSV into a Pandas DataFrame
+        print("Reading into pandas DataFrame.")
+        df = pd.read_csv(file_path)
+
+        cnt = 0
+        clt_num = 0
+
+        # Partition data and write to train and test CSV files
+        for clt, samples in clt_smp.items():
+            sample_num = len(samples)
+            cnt += sample_num
+            clt_num += 1
+
+            for i, sample_idx in enumerate(samples):
+                if i < sample_num * 0.8:
+                    writer_train.writerow(list(df.loc[sample_idx].values))
+                else:
+                    writer_test.writerow(list(df.loc[sample_idx].values))
+
+            if cnt % 10000 == 0:
+                print(f"Wrote {cnt} samples.")
+                print(f"Running average sample: {cnt / clt_num}")
+
+
+if __name__ == "__main__":
+    file_path = sys.argv[1]
+    partition_by_client_unsort(file_path)