From ec66e2d24b68ac40af047fa84672d2a0a51e6c98 Mon Sep 17 00:00:00 2001 From: boocmp Date: Thu, 18 May 2023 22:32:58 +0700 Subject: [PATCH] Text to speech in reader mode added. --- browser/speedreader/page_distiller.cc | 54 ++++++----- browser/speedreader/page_distiller.h | 5 +- browser/speedreader/speedreader_tab_helper.cc | 4 +- .../speedreader_toolbar_data_handler_impl.cc | 28 ++++-- .../speedreader_toolbar_data_handler_impl.h | 4 +- .../panel/components/lists/index.tsx | 1 - .../resources/speedreader-desktop.css | 32 ++++++- .../resources/speedreader-desktop.js | 53 ++++++++++- components/speedreader/tts_player.cc | 95 +++++++++++++++---- components/speedreader/tts_player.h | 15 +-- 10 files changed, 230 insertions(+), 61 deletions(-) diff --git a/browser/speedreader/page_distiller.cc b/browser/speedreader/page_distiller.cc index 2d7b40be13d4..3bf02b4ed302 100644 --- a/browser/speedreader/page_distiller.cc +++ b/browser/speedreader/page_distiller.cc @@ -46,6 +46,20 @@ void PageDistiller::GetDistilledText(DistillContentCallback callback) { weak_factory_.GetWeakPtr(), std::move(callback))); } +void PageDistiller::GetTextToSpeak(TextToSpeechContentCallback callback) { + if (state_ != State::kDistilled) { + return std::move(callback).Run(base::Value()); + } + + constexpr const char16_t kGetTextToSpeak[] = uR"js( extractTextToSpeak() )js"; + + web_contents_->GetPrimaryMainFrame()->ExecuteJavaScriptInIsolatedWorld( + kGetTextToSpeak, + base::BindOnce(&PageDistiller::OnGetTextToSpeak, + weak_factory_.GetWeakPtr(), std::move(callback)), + ISOLATED_WORLD_ID_BRAVE_INTERNAL); +} + void PageDistiller::UpdateState(State state) { state_ = state; for (auto& observer : observers_) { @@ -62,22 +76,17 @@ void PageDistiller::StartDistill(DistillContentCallback callback) { return std::move(callback).Run(false, {}); } - if (state_ == State::kDistilled) { - constexpr const char16_t kScript[] = uR"js( extractText() )js"; - web_contents_->GetPrimaryMainFrame()->ExecuteJavaScriptInIsolatedWorld( - kScript, - base::BindOnce(&PageDistiller::OnGetText, weak_factory_.GetWeakPtr(), - std::move(callback)), - ISOLATED_WORLD_ID_BRAVE_INTERNAL); - } else { - constexpr const char16_t kScript[] = - uR"js( document.documentElement.outerHTML )js"; - web_contents_->GetPrimaryMainFrame()->ExecuteJavaScriptInIsolatedWorld( - kScript, - base::BindOnce(&PageDistiller::OnGetOuterHTML, - weak_factory_.GetWeakPtr(), std::move(callback)), - ISOLATED_WORLD_ID_BRAVE_INTERNAL); - } + constexpr const char16_t kGetDocumentSource[] = + uR"js( document.documentElement.outerHTML )js"; + + constexpr const char16_t kGetBodySource[] = + uR"js( document.body.outerHTML )js"; + + web_contents_->GetPrimaryMainFrame()->ExecuteJavaScriptInIsolatedWorld( + (state_ != State::kDistilled) ? kGetDocumentSource : kGetBodySource, + base::BindOnce(&PageDistiller::OnGetOuterHTML, weak_factory_.GetWeakPtr(), + std::move(callback)), + ISOLATED_WORLD_ID_BRAVE_INTERNAL); } void PageDistiller::OnGetOuterHTML(DistillContentCallback callback, @@ -106,14 +115,12 @@ void PageDistiller::OnGetOuterHTML(DistillContentCallback callback, } } -void PageDistiller::OnGetText(DistillContentCallback callback, - base::Value result) { - if (!web_contents_ || !result.is_dict() || - !result.GetDict().FindString("content")) { - return std::move(callback).Run(false, {}); +void PageDistiller::OnGetTextToSpeak(TextToSpeechContentCallback callback, + base::Value result) { + if (!result.is_dict()) { + return std::move(callback).Run(base::Value()); } - std::move(callback).Run(true, - std::move(*result.GetDict().FindString("content"))); + std::move(callback).Run(std::move(result)); } void PageDistiller::OnPageDistilled(DistillContentCallback callback, @@ -149,6 +156,7 @@ void PageDistiller::ExtractText(DistillContentCallback callback, return std::move(callback).Run(false, {}); } + re2::RE2::GlobalReplace(&html_content, "<[^>]*>", " "); std::move(callback).Run(true, html_content); } diff --git a/browser/speedreader/page_distiller.h b/browser/speedreader/page_distiller.h index ed79ab9db407..913343f5ebd2 100644 --- a/browser/speedreader/page_distiller.h +++ b/browser/speedreader/page_distiller.h @@ -41,6 +41,7 @@ class PageDistiller { using DistillContentCallback = base::OnceCallback; + using TextToSpeechContentCallback = base::OnceCallback; State GetState() const; @@ -49,6 +50,7 @@ class PageDistiller { void GetDistilledHTML(DistillContentCallback callback); void GetDistilledText(DistillContentCallback callback); + void GetTextToSpeak(TextToSpeechContentCallback callback); protected: explicit PageDistiller(content::WebContents* web_contents); @@ -60,7 +62,8 @@ class PageDistiller { private: void StartDistill(DistillContentCallback callback); void OnGetOuterHTML(DistillContentCallback callback, base::Value result); - void OnGetText(DistillContentCallback callback, base::Value result); + void OnGetTextToSpeak(TextToSpeechContentCallback callback, + base::Value result); void OnPageDistilled(DistillContentCallback callback, DistillationResult result, std::string original_data, diff --git a/browser/speedreader/speedreader_tab_helper.cc b/browser/speedreader/speedreader_tab_helper.cc index 4f26de9e033b..d392832f622f 100644 --- a/browser/speedreader/speedreader_tab_helper.cc +++ b/browser/speedreader/speedreader_tab_helper.cc @@ -594,14 +594,14 @@ void SpeedreaderTabHelper::SetDocumentAttribute(const std::string& attribute, void SpeedreaderTabHelper::OnGetDocumentSource(bool success, std::string html) { DCHECK(single_shot_next_request_); - if (!success) { + if (!success || html.empty()) { // TODO(boocmp): Show error dialog [Distillation failed on this page]. SetNextRequestState(DistillState::kPageProbablyReadable); UpdateUI(); return; } - single_show_content_.swap(html); + single_show_content_ = std::move(html); ReloadContents(); } diff --git a/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.cc b/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.cc index 5800792b88ef..0620e658910c 100644 --- a/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.cc +++ b/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.cc @@ -8,6 +8,7 @@ #include #include +#include "base/strings/string_number_conversions.h" #include "brave/browser/speedreader/speedreader_service_factory.h" #include "brave/browser/speedreader/speedreader_tab_helper.h" #include "brave/browser/ui/brave_browser_window.h" @@ -22,6 +23,7 @@ #include "chrome/browser/ui/browser_window.h" #include "chrome/browser/ui/color/chrome_color_id.h" #include "chrome/browser/ui/tabs/tab_strip_model.h" +#include "chrome/common/chrome_isolated_world_ids.h" #include "ui/color/color_provider.h" namespace { @@ -31,14 +33,13 @@ class TtsPlayerDelegate : public speedreader::TtsPlayer::Delegate { void RequestReadingContent( content::WebContents* web_contents, - base::OnceCallback result_cb) - override { + base::OnceCallback result_cb) override { auto* page_distiller = speedreader::SpeedreaderTabHelper::GetPageDistiller(web_contents); if (page_distiller) { - page_distiller->GetDistilledText(std::move(result_cb)); + page_distiller->GetTextToSpeak(std::move(result_cb)); } else { - std::move(result_cb).Run(false, {}); + std::move(result_cb).Run(base::Value()); } } }; @@ -232,9 +233,24 @@ void SpeedreaderToolbarDataHandlerImpl::OnReadingStop( void SpeedreaderToolbarDataHandlerImpl::OnReadingProgress( content::WebContents* web_contents, - const std::string& element_id, + int paragraph_index, int char_index, - int length) {} + int length) { + if (!web_contents) { + return; + } + + constexpr const char16_t kHighlight[] = uR"js( highlightText($1, $2, $3) )js"; + + const auto script = base::ReplaceStringPlaceholders( + kHighlight, + {base::NumberToString16(paragraph_index), + base::NumberToString16(char_index), base::NumberToString16(length)}, + nullptr); + + web_contents->GetPrimaryMainFrame()->ExecuteJavaScriptInIsolatedWorld( + script, base::DoNothing(), ISOLATED_WORLD_ID_BRAVE_INTERNAL); +} void SpeedreaderToolbarDataHandlerImpl::OnTabStripModelChanged( TabStripModel* tab_strip_model, diff --git a/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.h b/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.h index 3b8af6a7f354..71463b84f8c9 100644 --- a/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.h +++ b/browser/ui/webui/speedreader/speedreader_toolbar_data_handler_impl.h @@ -6,8 +6,6 @@ #ifndef BRAVE_BROWSER_UI_WEBUI_SPEEDREADER_SPEEDREADER_TOOLBAR_DATA_HANDLER_IMPL_H_ #define BRAVE_BROWSER_UI_WEBUI_SPEEDREADER_SPEEDREADER_TOOLBAR_DATA_HANDLER_IMPL_H_ -#include - #include "base/scoped_observation.h" #include "brave/components/speedreader/common/speedreader_toolbar.mojom.h" #include "brave/components/speedreader/speedreader_service.h" @@ -94,7 +92,7 @@ class SpeedreaderToolbarDataHandlerImpl void OnReadingStart(content::WebContents* web_contents) override; void OnReadingStop(content::WebContents* web_contents) override; void OnReadingProgress(content::WebContents* web_contents, - const std::string& element_id, + int paragraph_index, int char_index, int length) override; diff --git a/components/speedreader/resources/panel/components/lists/index.tsx b/components/speedreader/resources/panel/components/lists/index.tsx index e2bb5860b8ee..db223d638e39 100644 --- a/components/speedreader/resources/panel/components/lists/index.tsx +++ b/components/speedreader/resources/panel/components/lists/index.tsx @@ -30,7 +30,6 @@ const mainButtonsOptions = [ id: 'tts', type: MainButtonType.TextToSpeech, iconName: 'headphones', - hidden: true, // TODO(boocmp): Enable in future PR. title: getLocale('braveReaderModeTextToSpeech') }, { diff --git a/components/speedreader/resources/speedreader-desktop.css b/components/speedreader/resources/speedreader-desktop.css index f8442012c30a..f5a96df0cecc 100644 --- a/components/speedreader/resources/speedreader-desktop.css +++ b/components/speedreader/resources/speedreader-desktop.css @@ -548,6 +548,7 @@ iframe { html, html[data-theme='light'] { + position: relative; !important; background-color: var(--background-color); --article-background-color: #FFFFFF; @@ -643,4 +644,33 @@ html[data-font-family='dyslexic'] { html[data-content-style='text-only'] img { display: none !important; -} \ No newline at end of file +} + +.tts-highlighted, .tts-highlighted * { + position: relative; +} + +@keyframes tts-fade-in { + 0% { opacity: 0; } + 100% { opacity: 1; } +} + +.tts-highlighted::after { + animation: tts-fade-in 0.75s; + + content: ''; + background: linear-gradient(90deg, rgba(168, 168, 168, 0.5), rgba(128, 128, 128, 0.1)); + mix-blend-mode: luminosity; + + background-size: 100%; + border-radius: 10px; + + position: absolute; + + left: -1.5rem; + right: -1.5rem; + top: -0.5rem; + bottom: -0.5rem; + + pointer-events: none; +} diff --git a/components/speedreader/resources/speedreader-desktop.js b/components/speedreader/resources/speedreader-desktop.js index 62895b508e4a..e29058b0ca4b 100644 --- a/components/speedreader/resources/speedreader-desktop.js +++ b/components/speedreader/resources/speedreader-desktop.js @@ -45,12 +45,61 @@ const defaultSpeedreaderData = { minutesText: 'min. read', } -const extractText = () => { +const extractTextToSpeak = () => { + const textTags = ['P', 'DIV', 'MAIN', 'ARTICLE'] + + const extractParagraphs = (node) => { + let paragraphs = [] + if (!node) { + return paragraphs + } + for (const child of node.children) { + if (textTags.indexOf(child.tagName) >= 0) { + const childParagraphs = extractParagraphs(child) + if (childParagraphs.length == 0) { + paragraphs.push(child) + } else { + paragraphs = paragraphs.concat(childParagraphs) + } + } + } + return paragraphs + } + + const paragraphs = extractParagraphs($(contentDivId)) + + const textToSpeak = [] + for (const p of paragraphs) { + const text = p.innerText.replace(/\n|\r +/g, ' ').trim() + if (text) { + p.setAttribute('tts-paragraph-index', textToSpeak.length) + textToSpeak.push(p.innerText.replace(/\n|\r +/g, ' ')) + } + } + return { title: document.title, author: $(metaDataDivId)?.querySelector('.author')?.textContent, desciption: $(metaDataDivId)?.querySelector('.subhead')?.textContent, - content: $(contentDivId)?.innerText.replace(/\n|\r +/g, ' ') + paragraphs: textToSpeak + } +} + +const highlightText = (ttsParagraphIndex, charIndex, length) => { + document.querySelectorAll('.tts-highlighted').forEach((e) => { + if (e.getAttribute('tts-paragraph-index') != ttsParagraphIndex) { + e.classList.remove('tts-highlighted') + } + }) + + const paragraph = document.querySelector( + '[tts-paragraph-index="' + ttsParagraphIndex + '"]') + if (paragraph) { + paragraph.classList.add('tts-highlighted') + const ttsContent = document.createElement('tts-content') + ttsContent.append(...paragraph.childNodes) + paragraph.append(ttsContent) + document.documentElement.style.setProperty('--tts-highlight-progress', (charIndex / paragraph.textContent.length) * 100 + '%') } } diff --git a/components/speedreader/tts_player.cc b/components/speedreader/tts_player.cc index dc42cacc90ce..4337a87d3d02 100644 --- a/components/speedreader/tts_player.cc +++ b/components/speedreader/tts_player.cc @@ -8,10 +8,15 @@ #include #include +#include "content/public/browser/navigation_handle.h" #include "content/public/browser/tts_controller.h" #include "content/public/browser/tts_utterance.h" #include "content/public/browser/web_contents.h" +namespace { +constexpr const char kParagraphsKey[] = "paragraphs"; +} + namespace speedreader { TtsPlayer::TtsPlayer() = default; @@ -98,7 +103,7 @@ void TtsPlayer::Controller::Pause() { if (IsPlayingRequestedWebContents()) { auto* tts = content::TtsController::GetInstance(); reading_start_position_ = - std::min(static_cast(reading_content_.size()), + std::min(static_cast(GetParagraphToRead().size()), reading_start_position_ + reading_position_); reading_position_ = 0; tts->Stop(); @@ -115,29 +120,46 @@ void TtsPlayer::Controller::Resume() { void TtsPlayer::Controller::Stop() { auto* tts = content::TtsController::GetInstance(); tts->Stop(); + + paragraph_index_ = -1; reading_position_ = 0; reading_start_position_ = 0; + for (auto& o : owner_->observers_) { + o.OnReadingProgress(playing_web_contents_, paragraph_index_, 0, 0); + } playing_web_contents_ = nullptr; Observe(nullptr); } void TtsPlayer::Controller::Forward() { - reading_start_position_ = - std::min(static_cast(reading_content_.size()), - reading_start_position_ + reading_position_ + 32); + if (!HasNextParagraph()) { + return; + } + ++paragraph_index_; + reading_start_position_ = 0; reading_position_ = 0; if (IsPlaying()) { Resume(true); + } else { + for (auto& o : owner_->observers_) { + o.OnReadingProgress(request_web_contents_, paragraph_index_, 0, 0); + } } } void TtsPlayer::Controller::Rewind() { - reading_start_position_ = - std::max(0, reading_start_position_ + reading_position_ - 32); + if (paragraph_index_ > 0) { + --paragraph_index_; + } + reading_start_position_ = 0; reading_position_ = 0; if (IsPlaying()) { Resume(true); + } else { + for (auto& o : owner_->observers_) { + o.OnReadingProgress(request_web_contents_, paragraph_index_, 0, 0); + } } } @@ -151,7 +173,7 @@ void TtsPlayer::Controller::Resume(bool recreate_utterance) { reading_start_position_ += reading_position_; reading_position_ = 0; - utterance->SetText(reading_content_.substr(reading_start_position_)); + utterance->SetText(GetParagraphToRead().substr(reading_start_position_)); utterance->SetShouldClearQueue(true); utterance->SetEventDelegate(this); utterance->SetVoiceName(current_voice_); @@ -162,8 +184,38 @@ void TtsPlayer::Controller::Resume(bool recreate_utterance) { } } +bool TtsPlayer::Controller::HasNextParagraph() { + if (!reading_content_.is_dict()) { + return false; + } + const auto* content = reading_content_.GetDict().FindList(kParagraphsKey); + if (!content) { + return false; + } + return paragraph_index_ + 1 < static_cast(content->size()); +} + +const std::string& TtsPlayer::Controller::GetParagraphToRead() { + if (!reading_content_.is_dict()) { + return base::EmptyString(); + } + const auto* content = reading_content_.GetDict().FindList(kParagraphsKey); + if (!content) { + return base::EmptyString(); + } + if (0 <= paragraph_index_ && + paragraph_index_ < static_cast(content->size())) { + return (*content)[paragraph_index_].GetString(); + } + return base::EmptyString(); +} + void TtsPlayer::Controller::DidStartNavigation( content::NavigationHandle* handle) { + if (!handle->IsInPrimaryMainFrame() || + handle->GetReloadType() == content::ReloadType::NONE) { + return; + } Stop(); } @@ -180,21 +232,32 @@ void TtsPlayer::Controller::OnTtsEvent(content::TtsUtterance* utterance, case content::TtsEventType::TTS_EVENT_WORD: reading_position_ = char_index; for (auto& o : owner_->observers_) { - o.OnReadingProgress(playing_web_contents_, "content", char_index, + o.OnReadingProgress(playing_web_contents_, paragraph_index_, char_index, length); } break; - case content::TtsEventType::TTS_EVENT_PAUSE: case content::TtsEventType::TTS_EVENT_ERROR: case content::TtsEventType::TTS_EVENT_INTERRUPTED: case content::TtsEventType::TTS_EVENT_CANCELLED: - case content::TtsEventType::TTS_EVENT_END: + case content::TtsEventType::TTS_EVENT_PAUSE: for (auto& o : owner_->observers_) { o.OnReadingStop(playing_web_contents_); } - if (event_type == content::TtsEventType::TTS_EVENT_END) { - reading_position_ = 0; - reading_start_position_ = 0; + break; + case content::TtsEventType::TTS_EVENT_END: + reading_position_ = 0; + reading_start_position_ = 0; + + if (HasNextParagraph()) { + ++paragraph_index_; + Resume(true); + } else { + paragraph_index_ = -1; + for (auto& o : owner_->observers_) { + o.OnReadingProgress(playing_web_contents_, paragraph_index_, + char_index, length); + o.OnReadingStop(playing_web_contents_); + } } break; case content::TtsEventType::TTS_EVENT_RESUME: @@ -210,15 +273,15 @@ void TtsPlayer::Controller::OnTtsEvent(content::TtsUtterance* utterance, } void TtsPlayer::Controller::OnContentReady(content::WebContents* web_contents, - bool success, - std::string content) { - if (!success || web_contents != request_web_contents_) { + base::Value content) { + if (!content.is_dict() || web_contents != request_web_contents_) { return; } playing_web_contents_ = web_contents; Observe(playing_web_contents_); + paragraph_index_ = 0; reading_content_ = std::move(content); reading_position_ = 0; reading_start_position_ = 0; diff --git a/components/speedreader/tts_player.h b/components/speedreader/tts_player.h index b2795a9ed1aa..079f81307004 100644 --- a/components/speedreader/tts_player.h +++ b/components/speedreader/tts_player.h @@ -14,6 +14,7 @@ #include "base/memory/raw_ptr.h" #include "base/memory/singleton.h" #include "base/observer_list.h" +#include "base/values.h" #include "content/public/browser/tts_utterance.h" #include "content/public/browser/web_contents_observer.h" @@ -34,8 +35,7 @@ class TtsPlayer { virtual void RequestReadingContent( content::WebContents* web_contents, - base::OnceCallback - result_cb) = 0; + base::OnceCallback result_cb) = 0; }; class Observer : public base::CheckedObserver { @@ -43,7 +43,7 @@ class TtsPlayer { virtual void OnReadingStart(content::WebContents* web_contents) {} virtual void OnReadingStop(content::WebContents* web_contents) {} virtual void OnReadingProgress(content::WebContents* web_contents, - const std::string& element_id, + int tts_order, int char_index, int length) {} @@ -77,6 +77,9 @@ class TtsPlayer { void Resume(bool recreate_utterance); + bool HasNextParagraph(); + const std::string& GetParagraphToRead(); + // content::WebContentsObserver: void DidStartNavigation(content::NavigationHandle* handle) override; void WebContentsDestroyed() override; @@ -89,17 +92,17 @@ class TtsPlayer { const std::string& error_message) override; void OnContentReady(content::WebContents* web_contents, - bool success, - std::string content); + base::Value content); raw_ptr owner_ = nullptr; raw_ptr playing_web_contents_ = nullptr; raw_ptr request_web_contents_ = nullptr; + int paragraph_index_ = -1; int reading_start_position_ = 0; int reading_position_ = 0; - std::string reading_content_; + base::Value reading_content_; double current_speed_ = 1.0; std::string current_voice_;