diff --git a/browser/brave_news/BUILD.gn b/browser/brave_news/BUILD.gn index 000a87f19f88..7ac8e4779a66 100644 --- a/browser/brave_news/BUILD.gn +++ b/browser/brave_news/BUILD.gn @@ -8,14 +8,19 @@ source_set("browser_tests") { defines = [ "HAS_OUT_OF_PROC_TEST_RUNNER" ] if (!is_android) { - sources = [ "brave_news_tab_helper_browsertest.cc" ] + sources = [ + "brave_news_tab_helper_browsertest.cc", + "direct_feed_fetcher_browsertest.cc", + ] deps = [ "//brave/components//brave_rewards/browser", + "//brave/components/brave_news/browser", "//brave/components/brave_news/common:common", "//brave/components/brave_news/common:mojom", "//brave/components/constants", "//chrome/browser", + "//chrome/browser:browser_process", "//chrome/browser/profiles", "//chrome/browser/profiles:profile", "//chrome/browser/ui:ui", diff --git a/browser/brave_news/direct_feed_fetcher_browsertest.cc b/browser/brave_news/direct_feed_fetcher_browsertest.cc new file mode 100644 index 000000000000..2ff577b04521 --- /dev/null +++ b/browser/brave_news/direct_feed_fetcher_browsertest.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2024 The Brave Authors. All rights reserved. +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "brave/components/brave_news/browser/direct_feed_fetcher.h" + +#include "base/test/bind.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/net/system_network_context_manager.h" +#include "chrome/test/base/in_process_browser_test.h" +#include "content/public/test/browser_test.h" +#include "content/public/test/content_mock_cert_verifier.h" +#include "net/dns/mock_host_resolver.h" +#include "net/test/embedded_test_server/embedded_test_server.h" +#include "net/test/embedded_test_server/http_request.h" +#include "net/test/embedded_test_server/http_response.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace brave_news { + +namespace { +std::string GetBasicFeed() { + return R"( + + Hacker News + https://news.ycombinator.com/ + Links for the intellectually curious, ranked by readers. + + Enough with the dead butterflies (2017) + https://www.emilydamstra.com/please-enough-dead-butterflies/ + Sun, 3 Mar 2024 22:40:13 +0000 + https://news.ycombinator.com/item?id=39585207 + Comments]]> + + + )"; +} + +} // namespace + +class DirectFeedFetcherBrowserTest : public InProcessBrowserTest { + public: + DirectFeedFetcherBrowserTest() = default; + + void SetUpOnMainThread() override { + InProcessBrowserTest::SetUpOnMainThread(); + + mock_cert_verifier_.mock_cert_verifier()->set_default_result(net::OK); + host_resolver()->AddRule("*", "127.0.0.1"); + + https_server_.RegisterRequestHandler(base::BindLambdaForTesting( + [](const net::test_server::HttpRequest& request) + -> std::unique_ptr { + LOG(ERROR) << "request to https " << request.GetURL().path(); + if (request.GetURL().path() == "/feed") { + auto response = + std::make_unique(); + response->set_code(net::HTTP_OK); + response->set_content(GetBasicFeed()); + response->set_content_type("application/rss+xml"); + return response; + } else if (request.GetURL().path() == "/feed2") { + auto response = + std::make_unique(); + response->set_code(net::HTTP_MOVED_PERMANENTLY); + response->AddCustomHeader("Location", "/feed"); + return response; + } + return nullptr; + })); + fetcher_ = std::make_unique( + g_browser_process->system_network_context_manager() + ->GetSharedURLLoaderFactory(), + delegate_.AsWeakPtr()); + + ASSERT_TRUE(https_server_.Start()); + } + + protected: + class MockDelegate : public DirectFeedFetcher::Delegate { + public: + ~MockDelegate() override = default; + + DirectFeedFetcher::Delegate::HTTPSUpgradeInfo GetURLHTTPSUpgradeInfo( + const GURL& url) override { + HTTPSUpgradeInfo info; + info.should_upgrade = true; + info.should_force = false; + return info; + } + + base::WeakPtr AsWeakPtr() override { + return weak_ptr_factory_.GetWeakPtr(); + } + + private: + base::WeakPtrFactory weak_ptr_factory_{this}; + }; + + net::EmbeddedTestServer https_server_{net::EmbeddedTestServer::TYPE_HTTPS}; + content::ContentMockCertVerifier mock_cert_verifier_; + MockDelegate delegate_; + std::unique_ptr fetcher_; +}; + +IN_PROC_BROWSER_TEST_F(DirectFeedFetcherBrowserTest, RedirectToFeed) { + base::RunLoop run_loop; + GURL feed2_url = https_server_.GetURL("/feed2"); + + fetcher_->DownloadFeed( + feed2_url, "test_publisher", + base::BindLambdaForTesting([&](DirectFeedResponse response) { + const auto& result = absl::get(response.result); + ASSERT_EQ(1u, result.articles.size()); + EXPECT_EQ(feed2_url.spec(), response.url.spec()); + EXPECT_EQ("Hacker News", result.title); + run_loop.Quit(); + })); + + run_loop.Run(); +} + +} // namespace brave_news diff --git a/browser/brave_news/direct_feed_fetcher_delegate_impl.cc b/browser/brave_news/direct_feed_fetcher_delegate_impl.cc index 22ce8cfeeaf5..25186ad5dc31 100644 --- a/browser/brave_news/direct_feed_fetcher_delegate_impl.cc +++ b/browser/brave_news/direct_feed_fetcher_delegate_impl.cc @@ -6,6 +6,7 @@ #include "brave/browser/brave_news/direct_feed_fetcher_delegate_impl.h" #include "brave/browser/brave_browser_process.h" +#include "brave/components/brave_news/browser/direct_feed_fetcher.h" #include "brave/components/brave_shields/content/browser/brave_shields_util.h" #include "content/public/browser/browser_thread.h" @@ -15,17 +16,21 @@ DirectFeedFetcherDelegateImpl::DirectFeedFetcherDelegateImpl( HostContentSettingsMap* host_content_settings_map) : host_content_settings_map_(host_content_settings_map), https_upgrade_exceptions_service_( - g_brave_browser_process->https_upgrade_exceptions_service()) {} + g_brave_browser_process->https_upgrade_exceptions_service()) { + CHECK(host_content_settings_map_); + CHECK(https_upgrade_exceptions_service_); +} DirectFeedFetcherDelegateImpl::~DirectFeedFetcherDelegateImpl() = default; -bool DirectFeedFetcherDelegateImpl::ShouldUpgradeToHttps(const GURL& url) { - DCHECK_CURRENTLY_ON(content::BrowserThread::UI); - if (!host_content_settings_map_ || !https_upgrade_exceptions_service_) { - return true; - } - return brave_shields::ShouldUpgradeToHttps(host_content_settings_map_, url, - https_upgrade_exceptions_service_); +DirectFeedFetcher::Delegate::HTTPSUpgradeInfo +DirectFeedFetcherDelegateImpl::GetURLHTTPSUpgradeInfo(const GURL& url) { + HTTPSUpgradeInfo info; + info.should_upgrade = brave_shields::ShouldUpgradeToHttps( + host_content_settings_map_, url, https_upgrade_exceptions_service_); + info.should_force = + brave_shields::ShouldForceHttps(host_content_settings_map_, url); + return info; } base::WeakPtr diff --git a/browser/brave_news/direct_feed_fetcher_delegate_impl.h b/browser/brave_news/direct_feed_fetcher_delegate_impl.h index 339dd4af5a55..b20053a3649c 100644 --- a/browser/brave_news/direct_feed_fetcher_delegate_impl.h +++ b/browser/brave_news/direct_feed_fetcher_delegate_impl.h @@ -28,7 +28,8 @@ class DirectFeedFetcherDelegateImpl : public DirectFeedFetcher::Delegate { const DirectFeedFetcherDelegateImpl&) = delete; // Must be called on UI thread - bool ShouldUpgradeToHttps(const GURL& url) override; + DirectFeedFetcher::Delegate::HTTPSUpgradeInfo GetURLHTTPSUpgradeInfo( + const GURL& url) override; base::WeakPtr AsWeakPtr() override; diff --git a/components/brave_news/browser/direct_feed_controller_unittest.cc b/components/brave_news/browser/direct_feed_controller_unittest.cc index 04f49b0651ed..aa7845caa8a6 100644 --- a/components/brave_news/browser/direct_feed_controller_unittest.cc +++ b/components/brave_news/browser/direct_feed_controller_unittest.cc @@ -86,7 +86,13 @@ class BraveNewsDirectFeedControllerTest : public testing::Test { public: ~MockDirectFeedFetcherDelegate() override = default; - bool ShouldUpgradeToHttps(const GURL& url) override { return true; } + DirectFeedFetcher::Delegate::HTTPSUpgradeInfo GetURLHTTPSUpgradeInfo( + const GURL& url) override { + HTTPSUpgradeInfo info; + info.should_upgrade = true; + info.should_force = false; + return info; + } base::WeakPtr AsWeakPtr() override { return weak_ptr_factory_.GetWeakPtr(); diff --git a/components/brave_news/browser/direct_feed_fetcher.cc b/components/brave_news/browser/direct_feed_fetcher.cc index d3e7a7c0b1b3..085498eeaaaa 100644 --- a/components/brave_news/browser/direct_feed_fetcher.cc +++ b/components/brave_news/browser/direct_feed_fetcher.cc @@ -30,6 +30,8 @@ namespace brave_news { namespace { +constexpr size_t kMaxRedirectCount = 7; + std::string GetResponseCharset(network::SimpleURLLoader* loader) { auto* response_info = loader->ResponseInfo(); if (!response_info) { @@ -154,44 +156,53 @@ DirectFeedFetcher::~DirectFeedFetcher() = default; void DirectFeedFetcher::DownloadFeed(GURL url, std::string publisher_id, DownloadFeedCallback callback) { - if (url.SchemeIs(url::kHttpScheme)) { + DownloadFeedHelper(url, url, publisher_id, 0, std::move(callback), + std::nullopt); +} + +void DirectFeedFetcher::DownloadFeedHelper( + GURL url, + GURL original_url, + std::string publisher_id, + size_t redirect_count, + DownloadFeedCallback callback, + std::optional https_upgrade_info) { + if (!https_upgrade_info && url.SchemeIs(url::kHttpScheme)) { content::GetUIThreadTaskRunner({})->PostTask( FROM_HERE, base::BindOnce( [](base::WeakPtr delegate, scoped_refptr source_task_runner, - base::OnceCallback callback, GURL url) { + base::OnceCallback)> callback, + GURL url) { if (delegate.WasInvalidated()) { return; } - bool should_upgrade = delegate->ShouldUpgradeToHttps(url); + auto upgrade_info = delegate->GetURLHTTPSUpgradeInfo(url); source_task_runner->PostTask( FROM_HERE, base::BindOnce( - [](base::OnceCallback callback, bool result) { + [](base::OnceCallback)> + callback, + Delegate::HTTPSUpgradeInfo result) { std::move(callback).Run(result); }, - std::move(callback), should_upgrade)); + std::move(callback), upgrade_info)); }, delegate_, base::SingleThreadTaskRunner::GetCurrentDefault(), base::BindOnce(&DirectFeedFetcher::DownloadFeedHelper, - weak_ptr_factory_.GetWeakPtr(), url, publisher_id, - std::move(callback)), + weak_ptr_factory_.GetWeakPtr(), url, original_url, + publisher_id, redirect_count, std::move(callback)), url)); return; } - DownloadFeedHelper(url, publisher_id, std::move(callback), false); -} - -void DirectFeedFetcher::DownloadFeedHelper(GURL url, - std::string publisher_id, - DownloadFeedCallback callback, - bool should_https_upgrade) { // Make request auto request = std::make_unique(); bool https_upgraded = false; - if (should_https_upgrade) { + if (https_upgrade_info && https_upgrade_info->should_upgrade) { GURL::Replacements replacements; replacements.SetSchemeStr(url::kHttpsScheme); url = url.ReplaceComponents(replacements); @@ -202,6 +213,7 @@ void DirectFeedFetcher::DownloadFeedHelper(GURL url, request->load_flags = net::LOAD_DO_NOT_SAVE_COOKIES; request->credentials_mode = network::mojom::CredentialsMode::kOmit; request->method = net::HttpRequestHeaders::kGetMethod; + request->redirect_mode = network::mojom::RedirectMode::kError; auto url_loader = network::SimpleURLLoader::Create( std::move(request), GetNetworkTrafficAnnotationTag()); url_loader->SetRetryOptions( @@ -216,24 +228,37 @@ void DirectFeedFetcher::DownloadFeedHelper(GURL url, // Handle response base::BindOnce(&DirectFeedFetcher::OnFeedDownloaded, weak_ptr_factory_.GetWeakPtr(), iter, std::move(callback), - url, std::move(publisher_id), https_upgraded), + url, original_url, std::move(publisher_id), + https_upgrade_info, https_upgraded, redirect_count), 5 * 1024 * 1024); } void DirectFeedFetcher::OnFeedDownloaded( SimpleURLLoaderList::iterator iter, DownloadFeedCallback callback, - GURL feed_url, + GURL url, + GURL original_url, std::string publisher_id, + std::optional https_upgrade_info, bool https_upgraded, + size_t redirect_count, std::unique_ptr response_body) { auto* loader = iter->get(); + + if (loader->NetError() == net::ERR_FAILED && loader->GetFinalURL() != url && + redirect_count < kMaxRedirectCount) { + // Redirect detected. Make another request + DownloadFeedHelper(loader->GetFinalURL(), original_url, publisher_id, + redirect_count + 1, std::move(callback), std::nullopt); + return; + } + auto response_code = -1; auto result = DirectFeedResponse(); result.charset = GetResponseCharset(loader); - result.url = feed_url; - result.final_url = loader->GetFinalURL(); + result.url = original_url; + result.final_url = url; if (loader->ResponseInfo()) { auto headers_list = loader->ResponseInfo()->headers; @@ -248,12 +273,15 @@ void DirectFeedFetcher::OnFeedDownloaded( std::string body_content = response_body ? *response_body : ""; if (response_code < 200 || response_code >= 300 || body_content.empty()) { - VLOG(1) << feed_url.spec() << " invalid response, state: " << response_code; - if (https_upgraded) { + VLOG(1) << url.spec() << " invalid response, state: " << response_code; + if (https_upgraded && https_upgrade_info && + !https_upgrade_info->should_force) { GURL::Replacements replacements; replacements.SetSchemeStr(url::kHttpScheme); - feed_url = feed_url.ReplaceComponents(replacements); - DownloadFeedHelper(feed_url, publisher_id, std::move(callback), false); + url = url.ReplaceComponents(replacements); + https_upgrade_info->should_upgrade = false; + DownloadFeedHelper(url, original_url, publisher_id, redirect_count, + std::move(callback), https_upgrade_info); return; } DirectFeedError error; @@ -264,7 +292,7 @@ void DirectFeedFetcher::OnFeedDownloaded( } ParseFeedDataOffMainThread( - feed_url, std::move(publisher_id), std::move(body_content), + url, std::move(publisher_id), std::move(body_content), base::BindOnce(&DirectFeedFetcher::OnParsedFeedData, weak_ptr_factory_.GetWeakPtr(), std::move(callback), std::move(result))); diff --git a/components/brave_news/browser/direct_feed_fetcher.h b/components/brave_news/browser/direct_feed_fetcher.h index 5b24e7620f51..f8d1b436fcbf 100644 --- a/components/brave_news/browser/direct_feed_fetcher.h +++ b/components/brave_news/browser/direct_feed_fetcher.h @@ -71,11 +71,16 @@ class DirectFeedFetcher { public: virtual ~Delegate() = default; - virtual bool ShouldUpgradeToHttps(const GURL& url) = 0; + struct HTTPSUpgradeInfo { + bool should_upgrade; + bool should_force; + }; + + virtual HTTPSUpgradeInfo GetURLHTTPSUpgradeInfo(const GURL& url) = 0; virtual base::WeakPtr AsWeakPtr() = 0; }; - explicit DirectFeedFetcher( + DirectFeedFetcher( scoped_refptr url_loader_factory, base::WeakPtr delegate); DirectFeedFetcher(const DirectFeedFetcher&) = delete; @@ -92,16 +97,23 @@ class DirectFeedFetcher { using SimpleURLLoaderList = std::list>; - void DownloadFeedHelper(GURL url, - std::string publisher_id, - DownloadFeedCallback callback, - bool should_https_upgrade); - void OnFeedDownloaded(SimpleURLLoaderList::iterator iter, - DownloadFeedCallback callback, - GURL feed_url, - std::string publisher_id, - bool https_upgraded, - const std::unique_ptr response_body); + void DownloadFeedHelper( + GURL url, + GURL original_url, + std::string publisher_id, + size_t redirect_count, + DownloadFeedCallback callback, + std::optional https_upgrade_info); + void OnFeedDownloaded( + SimpleURLLoaderList::iterator iter, + DownloadFeedCallback callback, + GURL url, + GURL original_url, + std::string publisher_id, + std::optional https_upgrade_info, + bool https_upgraded, + size_t redirect_count, + const std::unique_ptr response_body); void OnParsedFeedData(DownloadFeedCallback callback, DirectFeedResponse result, absl::variant data);