diff --git a/lilac/data/clustering.py b/lilac/data/clustering.py
index a9762372f..7530aafe4 100644
--- a/lilac/data/clustering.py
+++ b/lilac/data/clustering.py
@@ -104,17 +104,7 @@ def cluster_impl(
   if output_path:
     cluster_output_path = normalize_path(output_path)
   elif path:
-    # The sibling output path is the same as the input path, but with a different suffix.
-    index = 0
-    for i, path_part in enumerate(path):
-      if path_part == PATH_WILDCARD:
-        break
-      else:
-        index = i
-
-    parent = path[:index]
-    sibling = '_'.join([p for p in path[index:] if p != PATH_WILDCARD])
-    cluster_output_path = (*parent, f'{sibling}__{FIELD_SUFFIX}')
+    cluster_output_path = default_cluster_output_path(path)
   else:
     raise ValueError('input must be provided.')
 
@@ -416,3 +406,19 @@ def _hdbscan_cluster(
 
   for cluster_id, membership_prob in zip(labels, memberships):
     yield {CLUSTER_ID: int(cluster_id), CLUSTER_MEMBERSHIP_PROB: float(membership_prob)}
+
+
+def default_cluster_output_path(input_path: Path) -> PathTuple:
+  """Default output path for clustering."""
+  input_path = normalize_path(input_path)
+  # The sibling output path is the same as the input path, but with a different suffix.
+  index = 0
+  for i, path_part in enumerate(input_path):
+    if path_part == PATH_WILDCARD:
+      break
+    else:
+      index = i
+
+  parent = input_path[:index]
+  sibling = '_'.join([p for p in input_path[index:] if p != PATH_WILDCARD])
+  return (*parent, f'{sibling}__{FIELD_SUFFIX}')
diff --git a/lilac/formats/openai_json.py b/lilac/formats/openai_json.py
index 3b861daec..47a198047 100644
--- a/lilac/formats/openai_json.py
+++ b/lilac/formats/openai_json.py
@@ -32,7 +32,7 @@ class OpenAIJSON(DatasetFormat):
   Taken from: https://platform.openai.com/docs/api-reference/chat
   """
 
-  name: ClassVar[str] = 'openai_json'
+  name: ClassVar[str] = 'OpenAI JSON'
   data_schema: Schema = schema(
     {
       'messages': [
@@ -88,7 +88,7 @@ class OpenAIConversationJSON(DatasetFormat):
   Note that here "messages" is "conversation" for support with common datasets.
   """
 
-  name: ClassVar[str] = 'openai_conversation_json'
+  name: ClassVar[str] = 'OpenAI Conversation JSON'
   data_schema: Schema = schema(
     {
       'conversation': [
diff --git a/lilac/formats/openchat.py b/lilac/formats/openchat.py
index 815268e0d..9bee2ee32 100644
--- a/lilac/formats/openchat.py
+++ b/lilac/formats/openchat.py
@@ -10,7 +10,7 @@
 class OpenChat(DatasetFormat):
   """OpenChat format."""
 
-  name: ClassVar[str] = 'openchat'
+  name: ClassVar[str] = 'OpenChat'
   data_schema: Schema = schema(
     {
       'items': [
diff --git a/lilac/formats/sharegpt.py b/lilac/formats/sharegpt.py
index 75c9f2e91..30134205d 100644
--- a/lilac/formats/sharegpt.py
+++ b/lilac/formats/sharegpt.py
@@ -37,7 +37,7 @@ def _sharegpt_selector(item: Item, conv_from: str) -> str:
 class ShareGPT(DatasetFormat):
   """ShareGPT format."""
 
-  name: ClassVar[str] = 'sharegpt'
+  name: ClassVar[str] = 'ShareGPT'
   data_schema: Schema = schema(
     {
       'conversations': [
@@ -59,5 +59,5 @@ class ShareGPT(DatasetFormat):
 
   input_selectors: ClassVar[dict[str, DatasetFormatInputSelector]] = {
     selector.name: selector
-    for selector in [_SYSTEM_SELECTOR, _HUMAN_SELECTOR, _GPT_SELECTOR, _TOOL_SELECTOR]
+    for selector in [_HUMAN_SELECTOR, _SYSTEM_SELECTOR, _GPT_SELECTOR, _TOOL_SELECTOR]
   }
diff --git a/lilac/load_test.py b/lilac/load_test.py
index 0c3ae2895..1ac93e244 100644
--- a/lilac/load_test.py
+++ b/lilac/load_test.py
@@ -513,7 +513,7 @@ def _test_topic_fn(docs: list[tuple[str, float]]) -> str:
         dataset_namespace='namespace',
         dataset_name='test',
         input_selector=ClusterInputSelectorConfig(
-          format='sharegpt',
+          format='ShareGPT',
           selector='human',
         ),
         output_path=('cluster',),
diff --git a/lilac/router_dataset.py b/lilac/router_dataset.py
index fbef6181f..76a0b08e2 100644
--- a/lilac/router_dataset.py
+++ b/lilac/router_dataset.py
@@ -534,3 +534,13 @@ def restore_rows(
     searches=options.searches,
     filters=sanitized_filters,
   )
+
+
+@router.get('/{namespace}/{dataset_name}/format_selectors')
+def get_format_selectors(namespace: str, dataset_name: str) -> list[str]:
+  """Get format selectors for the dataset if a format has been inferred."""
+  dataset = get_dataset(namespace, dataset_name)
+  manifest = dataset.manifest()
+  if manifest.dataset_format:
+    return list(manifest.dataset_format.input_selectors.keys())
+  return []
diff --git a/lilac/router_dataset_signals.py b/lilac/router_dataset_signals.py
index 7c90335cc..4c4f4d8df 100644
--- a/lilac/router_dataset_signals.py
+++ b/lilac/router_dataset_signals.py
@@ -1,5 +1,5 @@
 """Routing endpoints for running signals on datasets."""
-from typing import Annotated, Optional
+from typing import Annotated, Optional, Union
 
 from fastapi import APIRouter, HTTPException
 from fastapi.params import Depends
@@ -7,9 +7,11 @@
 from pydantic import Field as PydanticField
 
 from .auth import UserInfo, get_session_user, get_user_access
+from .data.clustering import default_cluster_output_path
+from .dataset_format import DatasetFormatInputSelector, get_dataset_format_cls
 from .db_manager import get_dataset
 from .router_utils import RouteErrorHandler
-from .schema import Path
+from .schema import Path, PathTuple, normalize_path
 from .signal import Signal, resolve_signal
 from .tasks import TaskId, get_task_manager, launch_task
 
@@ -82,7 +84,9 @@ def run() -> None:
 class ClusterOptions(BaseModel):
   """The request for the cluster endpoint."""
 
-  input: Path
+  input: Optional[Path] = None
+  input_selector: Optional[str] = None
+
   output_path: Optional[Path] = None
   use_garden: bool = PydanticField(
     default=False, description='Accelerate computation by running remotely on Lilac Garden.'
@@ -107,14 +111,36 @@ def cluster(
   if not get_user_access(user).dataset.compute_signals:
     raise HTTPException(401, 'User does not have access to compute clusters over this dataset.')
 
-  path_str = '.'.join(map(str, options.input))
-  task_name = f'[{namespace}/{dataset_name}] Clustering "{path_str}"'
-  task_id = get_task_manager().task_id(name=task_name)
   dataset = get_dataset(namespace, dataset_name)
+  manifest = dataset.manifest()
+
+  cluster_input: Optional[Union[DatasetFormatInputSelector, PathTuple]] = None
+  if options.input:
+    path_str = '.'.join(map(str, options.input))
+    task_name = f'[{namespace}/{dataset_name}] Clustering "{path_str}"'
+    cluster_input = normalize_path(options.input)
+  elif options.input_selector:
+    dataset_format = manifest.dataset_format
+    if dataset_format is None:
+      raise ValueError('Dataset format is not defined.')
+
+    format_cls = get_dataset_format_cls(dataset_format.name)
+    if format_cls is None:
+      raise ValueError(f'Unknown format: {dataset_format.name}')
+
+    cluster_input = format_cls.input_selectors[options.input_selector]
+
+    task_name = (
+      f'[{namespace}/{dataset_name}] Clustering using input selector ' f'"{options.input_selector}"'
+    )
+  else:
+    raise HTTPException(400, 'Either input or input_selector must be provided.')
+
+  task_id = get_task_manager().task_id(name=task_name)
 
   def run() -> None:
     dataset.cluster(
-      options.input,
+      cluster_input,
       options.output_path,
       use_garden=options.use_garden,
       overwrite=options.overwrite,
@@ -125,6 +151,18 @@ def run() -> None:
   return ClusterResponse(task_id=task_id)
 
 
+class DefaultClusterOutputPathOptions(BaseModel):
+  """Request body for the default cluster output path endpoint."""
+
+  input_path: Path
+
+
+@router.post('/{namespace}/{dataset_name}/default_cluster_output_path')
+def get_default_cluster_output_path(options: DefaultClusterOutputPathOptions) -> Path:
+  """Get format selectors for the dataset if a format has been inferred."""
+  return default_cluster_output_path(options.input_path)
+
+
 class DeleteSignalOptions(BaseModel):
   """The request for the delete signal endpoint."""
 
diff --git a/web/blueprint/src/lib/components/ComputeClusterModal.svelte b/web/blueprint/src/lib/components/ComputeClusterModal.svelte
index 1763643c8..1380a7aff 100644
--- a/web/blueprint/src/lib/components/ComputeClusterModal.svelte
+++ b/web/blueprint/src/lib/components/ComputeClusterModal.svelte
@@ -18,14 +18,21 @@
 </script>
 
 <script lang="ts">
-  import {clusterMutation} from '$lib/queries/datasetQueries';
+  import {
+    clusterMutation,
+    queryDatasetManifest,
+    queryDefaultClusterOutputPath,
+    queryFormatSelectors
+  } from '$lib/queries/datasetQueries';
   import {queryAuthInfo} from '$lib/queries/serverQueries';
-  import type {Path} from '$lilac';
+  import {serializePath, type Path} from '$lilac';
   import {
     ComposedModal,
     ModalBody,
     ModalFooter,
     ModalHeader,
+    Select,
+    SelectItem,
     Toggle
   } from 'carbon-components-svelte';
   import FieldSelect from './commands/selectors/FieldSelect.svelte';
@@ -36,18 +43,63 @@
 
   $: canComputeRemotely = $authInfo.data?.access.dataset.execute_remotely;
 
+  $: formatSelectorsQuery =
+    options != null ? queryFormatSelectors(options.namespace, options.datasetName) : null;
+  $: datasetManifest =
+    options != null ? queryDatasetManifest(options.namespace, options.datasetName) : null;
+  let selectedFormatSelector = 'none';
+  let formatSelectors: string[] | undefined = undefined;
+  let outputColumn: string | undefined = undefined;
+  $: outputColumnRequired =
+    formatSelectors != null &&
+    formatSelectors.length > 0 &&
+    selectedFormatSelector != null &&
+    selectedFormatSelector != 'none';
+  $: defaultClusterOutputPath = options?.input
+    ? queryDefaultClusterOutputPath({input_path: options.input})
+    : null;
+  $: {
+    if ($defaultClusterOutputPath?.data != null) {
+      outputColumn = serializePath($defaultClusterOutputPath.data);
+    }
+  }
+  $: {
+    if (options?.output_path != null) {
+      outputColumn = serializePath(options.output_path);
+    }
+  }
+  $: {
+    if (
+      formatSelectorsQuery != null &&
+      $formatSelectorsQuery != null &&
+      $formatSelectorsQuery.data != null
+    ) {
+      formatSelectors = $formatSelectorsQuery.data;
+    }
+  }
+  $: {
+    if (selectedFormatSelector != null && selectedFormatSelector != 'none') {
+      // Choose a reasonable default output column.
+      outputColumn = `${selectedFormatSelector}__clusters`;
+    } else if (selectedFormatSelector === 'none') {
+      outputColumn = undefined;
+    }
+  }
+
   function close() {
     store.set(null);
   }
   function submit() {
     if (!options) return;
+
     $clusterQuery.mutate([
       options.namespace,
       options.datasetName,
       {
-        input: options.input,
+        input: selectedFormatSelector == null ? options.input : null,
         use_garden: options.use_garden,
-        output_path: options.output_path,
+        output_path: outputColumn,
+        input_selector: selectedFormatSelector,
         overwrite: options.overwrite
       }
     ]);
@@ -59,47 +111,78 @@
   <ComposedModal open on:submit={submit} on:close={close}>
     <ModalHeader title="Compute clusters" />
     <ModalBody hasForm>
-      <div class="max-w-2xl">
-        <FieldSelect
-          filter={f => f.dtype?.type === 'string'}
-          defaultPath={options.input}
-          bind:path={options.input}
-          labelText="Field"
-        />
-      </div>
-      <div class="mt-8">
-        <div class="label mb-2 font-medium text-gray-700">Use Garden</div>
-        <div class="label mb-2 text-sm text-gray-700">
-          Accelerate computation by running remotely on <a
-            href="https://lilacml.com/#garden"
-            target="_blank">Lilac Garden</a
-          >
+      <div class="flex max-w-2xl flex-col gap-y-8">
+        <div>
+          <FieldSelect
+            disabled={selectedFormatSelector != null && selectedFormatSelector != 'none'}
+            filter={f => f.dtype?.type === 'string'}
+            defaultPath={options.input}
+            bind:path={options.input}
+            labelText="Field"
+          />
         </div>
-        <Toggle
-          disabled={!canComputeRemotely}
-          labelA={'False'}
-          labelB={'True'}
-          bind:toggled={options.use_garden}
-          hideLabel
-        />
-        {#if !canComputeRemotely}
-          <div class="mt-2">
-            <a href="https://forms.gle/Gz9cpeKJccNar5Lq8" target="_blank">
-              Sign up for Lilac Garden
-            </a>
-            to enable this feature.
+        {#if formatSelectors != null && formatSelectors.length > 0}
+          <div>
+            <div class="label text-s mb-2 font-medium text-gray-700">
+              {$datasetManifest?.data?.dataset_manifest.dataset_format?.['format_name']} selector
+            </div>
+            <Select hideLabel={true} bind:selected={selectedFormatSelector} required>
+              <SelectItem value={'none'} text={'None'} />
+
+              {#each formatSelectors as formatSelector}
+                <SelectItem value={formatSelector} text={formatSelector} />
+              {/each}
+            </Select>
           </div>
         {/if}
-      </div>
-      <div class="mt-8">
-        <div class="label text-s mb-2 font-medium text-gray-700">Overwrite</div>
-        <Toggle labelA={'False'} labelB={'True'} bind:toggled={options.overwrite} hideLabel />
+        <div>
+          <div class="label text-s mb-2 font-medium text-gray-700">
+            {outputColumnRequired ? '*' : ''} Output column {!outputColumnRequired
+              ? '(Optional)'
+              : ''}
+          </div>
+          <input
+            required={outputColumnRequired}
+            class="h-full w-full rounded border border-neutral-300 p-2"
+            placeholder="Choose a new column name to write clusters"
+            bind:value={outputColumn}
+          />
+        </div>
+        <div>
+          <div class="label mb-2 font-medium text-gray-700">Use Garden</div>
+          <div class="label text-sm text-gray-700">
+            Accelerate computation by running remotely on <a
+              href="https://lilacml.com/#garden"
+              target="_blank">Lilac Garden</a
+            >
+          </div>
+          <Toggle
+            disabled={!canComputeRemotely}
+            labelA={'False'}
+            labelB={'True'}
+            bind:toggled={options.use_garden}
+            hideLabel
+          />
+          {#if !canComputeRemotely}
+            <div>
+              <a href="https://forms.gle/Gz9cpeKJccNar5Lq8" target="_blank">
+                Sign up for Lilac Garden
+              </a>
+              to enable this feature.
+            </div>
+          {/if}
+        </div>
+        <div>
+          <div class="label text-s mb-2 font-medium text-gray-700">Overwrite</div>
+          <Toggle labelA={'False'} labelB={'True'} bind:toggled={options.overwrite} hideLabel />
+        </div>
       </div>
     </ModalBody>
     <ModalFooter
       primaryButtonText="Cluster"
       secondaryButtonText="Cancel"
       on:click:button--secondary={close}
+      primaryButtonDisabled={outputColumnRequired && !outputColumn}
     />
   </ComposedModal>
 {/if}
diff --git a/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte b/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte
index 045d5cde2..a7ad26e64 100644
--- a/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte
+++ b/web/blueprint/src/lib/components/commands/selectors/FieldSelect.svelte
@@ -17,6 +17,7 @@
 
   export let defaultPath: Path | undefined = undefined;
   export let path: Path | undefined = undefined;
+  export let disabled = false;
 
   const datasetViewStore = getDatasetViewContext();
 
@@ -83,7 +84,7 @@
     <div class="label text-s mb-2 font-medium text-gray-700">
       {labelText}
     </div>
-    <Select hideLabel={true} {helperText} bind:selected={selectedPath} required>
+    <Select hideLabel={true} {helperText} bind:selected={selectedPath} required {disabled}>
       {#if sourceFields?.length}
         <SelectItemGroup label="Source Fields">
           {#each sourceFields as field}
diff --git a/web/blueprint/src/lib/queries/datasetQueries.ts b/web/blueprint/src/lib/queries/datasetQueries.ts
index 5fb5bcadc..608661e42 100644
--- a/web/blueprint/src/lib/queries/datasetQueries.ts
+++ b/web/blueprint/src/lib/queries/datasetQueries.ts
@@ -333,3 +333,12 @@ function invalidateQueriesLabelEdit(
     ]);
   }
 }
+export const queryFormatSelectors = createApiQuery(
+  DatasetsService.getFormatSelectors,
+  DATASETS_TAG
+);
+
+export const queryDefaultClusterOutputPath = createApiQuery(
+  DatasetsService.getDefaultClusterOutputPath,
+  DATASETS_TAG
+);
diff --git a/web/lib/fastapi_client/index.ts b/web/lib/fastapi_client/index.ts
index b58bbe552..50ed213d2 100644
--- a/web/lib/fastapi_client/index.ts
+++ b/web/lib/fastapi_client/index.ts
@@ -37,6 +37,7 @@ export type { DatasetSettings } from './models/DatasetSettings';
 export type { DatasetUISettings } from './models/DatasetUISettings';
 export type { DatasetUserAccess } from './models/DatasetUserAccess';
 export type { DataType } from './models/DataType';
+export type { DefaultClusterOutputPathOptions } from './models/DefaultClusterOutputPathOptions';
 export type { DeleteRowsOptions } from './models/DeleteRowsOptions';
 export type { DeleteSignalOptions } from './models/DeleteSignalOptions';
 export type { DeleteSignalResponse } from './models/DeleteSignalResponse';
diff --git a/web/lib/fastapi_client/models/ClusterOptions.ts b/web/lib/fastapi_client/models/ClusterOptions.ts
index 5bb5b9b6a..a5b200fec 100644
--- a/web/lib/fastapi_client/models/ClusterOptions.ts
+++ b/web/lib/fastapi_client/models/ClusterOptions.ts
@@ -7,7 +7,8 @@
  * The request for the cluster endpoint.
  */
 export type ClusterOptions = {
-    input: (Array<string> | string);
+    input?: (Array<string> | string | null);
+    input_selector?: (string | null);
     output_path?: (Array<string> | string | null);
     /**
      * Accelerate computation by running remotely on Lilac Garden.
diff --git a/web/lib/fastapi_client/models/DefaultClusterOutputPathOptions.ts b/web/lib/fastapi_client/models/DefaultClusterOutputPathOptions.ts
new file mode 100644
index 000000000..459df2f38
--- /dev/null
+++ b/web/lib/fastapi_client/models/DefaultClusterOutputPathOptions.ts
@@ -0,0 +1,12 @@
+/* generated using openapi-typescript-codegen -- do no edit */
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+/**
+ * Request body for the default cluster output path endpoint.
+ */
+export type DefaultClusterOutputPathOptions = {
+    input_path: (Array<string> | string);
+};
+
diff --git a/web/lib/fastapi_client/services/DatasetsService.ts b/web/lib/fastapi_client/services/DatasetsService.ts
index 7ee3ab55f..9bc66be4e 100644
--- a/web/lib/fastapi_client/services/DatasetsService.ts
+++ b/web/lib/fastapi_client/services/DatasetsService.ts
@@ -9,6 +9,7 @@ import type { ComputeSignalOptions } from '../models/ComputeSignalOptions';
 import type { ComputeSignalResponse } from '../models/ComputeSignalResponse';
 import type { DatasetInfo } from '../models/DatasetInfo';
 import type { DatasetSettings } from '../models/DatasetSettings';
+import type { DefaultClusterOutputPathOptions } from '../models/DefaultClusterOutputPathOptions';
 import type { DeleteRowsOptions } from '../models/DeleteRowsOptions';
 import type { DeleteSignalOptions } from '../models/DeleteSignalOptions';
 import type { DeleteSignalResponse } from '../models/DeleteSignalResponse';
@@ -525,6 +526,31 @@ export class DatasetsService {
         });
     }
 
+    /**
+     * Get Format Selectors
+     * Get format selectors for the dataset if a format has been inferred.
+     * @param namespace
+     * @param datasetName
+     * @returns string Successful Response
+     * @throws ApiError
+     */
+    public static getFormatSelectors(
+        namespace: string,
+        datasetName: string,
+    ): CancelablePromise<Array<string>> {
+        return __request(OpenAPI, {
+            method: 'GET',
+            url: '/api/v1/datasets/{namespace}/{dataset_name}/format_selectors',
+            path: {
+                'namespace': namespace,
+                'dataset_name': datasetName,
+            },
+            errors: {
+                422: `Validation Error`,
+            },
+        });
+    }
+
     /**
      * Compute Signal
      * Compute a signal for a dataset.
@@ -583,6 +609,27 @@ export class DatasetsService {
         });
     }
 
+    /**
+     * Get Default Cluster Output Path
+     * Get format selectors for the dataset if a format has been inferred.
+     * @param requestBody
+     * @returns any Successful Response
+     * @throws ApiError
+     */
+    public static getDefaultClusterOutputPath(
+        requestBody: DefaultClusterOutputPathOptions,
+    ): CancelablePromise<(Array<string> | string)> {
+        return __request(OpenAPI, {
+            method: 'POST',
+            url: '/api/v1/datasets/{namespace}/{dataset_name}/default_cluster_output_path',
+            body: requestBody,
+            mediaType: 'application/json',
+            errors: {
+                422: `Validation Error`,
+            },
+        });
+    }
+
     /**
      * Delete Signal
      * Delete a signal from a dataset.