diff --git a/backend/main.go b/backend/main.go
index 49dcc69..947e7da 100644
--- a/backend/main.go
+++ b/backend/main.go
@@ -529,6 +529,27 @@ func handleStageStart(ctx context.Context, w http.ResponseWriter, r *http.Reques
 	return nil
 }
 
+func handleStartConversation(ctx context.Context, w http.ResponseWriter, r *http.Request) error {
+	// The stage uuid, user must create it before upload question audio.
+	q := r.URL.Query()
+	sid := q.Get("sid")
+	if sid == "" {
+		return errors.Errorf("empty sid")
+	}
+
+	stage := talkServer.QueryStage(sid)
+	if stage == nil {
+		return errors.Errorf("invalid sid %v", sid)
+	}
+
+	// Keep alive the stage.
+	stage.KeepAlive()
+	stage.lastSentence = time.Now()
+
+	ohttp.WriteData(ctx, w, r, nil)
+	return nil
+}
+
 // When user ask a question, which is a request with audio, which is identified by rid (request id).
 func handleUploadQuestionAudio(ctx context.Context, w http.ResponseWriter, r *http.Request) error {
 	// The stage uuid, user must create it before upload question audio.
@@ -545,7 +566,6 @@ func handleUploadQuestionAudio(ctx context.Context, w http.ResponseWriter, r *ht
 
 	// Keep alive the stage.
 	stage.KeepAlive()
-	stage.lastSentence = time.Now()
 	// Switch to the context of stage.
 	ctx = stage.loggingCtx
 
@@ -611,8 +631,8 @@ func handleUploadQuestionAudio(ctx context.Context, w http.ResponseWriter, r *ht
 			stage.lastAsrDuration = resp.Duration
 			stage.lastRequestAsrText = asrText
 		}
-		logger.Tf(ctx, "ASR ok, robot=%v(%v), lang=%v, prompt=<%v>, resp is <%v>",
-			robot.uuid, robot.label, robot.asrLanguage, stage.previousAsrText, asrText)
+		logger.Tf(ctx, "ASR ok, robot=%v(%v), lang=%v, speech=%v, prompt=<%v>, resp is <%v>",
+			robot.uuid, robot.label, robot.asrLanguage, stage.lastAsrDuration, stage.previousAsrText, asrText)
 
 		// Important trace log.
 		logger.Tf(ctx, "You: %v", asrText)
@@ -929,6 +949,13 @@ func doMain(ctx context.Context) error {
 		}
 	})
 
+	handler.HandleFunc("/api/ai-talk/conversation/", func(w http.ResponseWriter, r *http.Request) {
+		if err := handleStartConversation(ctx, w, r); err != nil {
+			logger.Ef(ctx, "Handle audio failed, err %+v", err)
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+		}
+	})
+
 	handler.HandleFunc("/api/ai-talk/upload/", func(w http.ResponseWriter, r *http.Request) {
 		if err := handleUploadQuestionAudio(ctx, w, r); err != nil {
 			logger.Ef(ctx, "Handle audio failed, err %+v", err)
diff --git a/backend/openai.go b/backend/openai.go
index 33d2d60..f1425a1 100644
--- a/backend/openai.go
+++ b/backend/openai.go
@@ -108,7 +108,8 @@ func (v *openaiASRService) RequestASR(ctx context.Context, inputFile, language,
 		openai.AudioRequest{
 			Model:    os.Getenv("AIT_ASR_MODEL"),
 			FilePath: outputFile,
-			Format:   openai.AudioResponseFormatJSON,
+			// Note that must use verbose JSON, to get the duration of file.
+			Format:   openai.AudioResponseFormatVerboseJSON,
 			Language: language,
 			Prompt:   prompt,
 		},
diff --git a/src/App.js b/src/App.js
index 2924235..d9d95d0 100644
--- a/src/App.js
+++ b/src/App.js
@@ -98,6 +98,18 @@ function AppImpl({info, verbose, robot, robotReady, stageUUID, playerRef}) {
     if (!robotReady) return;
 
     const processUserInput = async(userMayInput) => {
+      // End conversation, for stat the elapsed time cost accurately.
+      await new Promise((resolve, reject) => {
+        fetch(`/api/ai-talk/conversation/?sid=${stageUUID}&robot=${robot.uuid}&umi=${userMayInput}`, {
+          method: 'POST',
+        }).then(response => {
+          return response.json();
+        }).then((data) => {
+          verbose(`TTS: Conversation started`);
+          resolve();
+        }).catch(error => reject(error));
+      });
+
       // Upload the user input audio to the server.
       const requestUUID = await new Promise((resolve, reject) => {
         verbose(`ASR: Uploading ${ref.current.audioChunks.length} chunks, robot=${robot.uuid}`);