diff --git a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc index b3f075b4654b..339a8068a852 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc @@ -227,7 +227,7 @@ TEST(BraveSiteHacksNetworkDelegateHelperTest, QueryStringExempted) { TEST(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { const std::vector> urls( - {// { original url, expected url after filtering } + {// { original url, expected url after filtering ("" means unchanged) } {"https://example.com/?fbclid=1234", "https://example.com/"}, {"https://example.com/?fbclid=1234&", "https://example.com/"}, {"https://example.com/?&fbclid=1234", "https://example.com/"}, @@ -253,6 +253,9 @@ TEST(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { // Conditional query parameter stripping {"https://example.com/?igshid=1234", ""}, {"https://www.instagram.com/?igshid=1234", "https://www.instagram.com/"}, + {"https://brave.com/?ref_src=example.com", ""}, + {"https://twitter.com/?ref_src=example.com", "https://twitter.com/"}, + {"https://x.com/?ref_src=example.com", "https://x.com/"}, {"https://example.com/?mkt_tok=123&foo=bar&mkt_unsubscribe=1", ""}, {"https://example.com/index.php/email/emailWebview?mkt_tok=1234&foo=bar", ""}, diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index 3ca91dd696d8..0621574d17c5 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -10,6 +10,7 @@ #include #include +#include "base/containers/contains.h" #include "base/containers/fixed_flat_map.h" #include "base/containers/fixed_flat_set.h" #include "base/strings/string_split.h" @@ -17,7 +18,7 @@ #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "third_party/re2/src/re2/re2.h" -namespace query_filter { +namespace { static constexpr auto kSimpleQueryStringTrackers = base::MakeFixedFlatSetSorted({ @@ -117,15 +118,46 @@ static constexpr auto kConditionalQueryStringTrackers = {"mkt_tok", "([uU]nsubscribe|emailWebview)"}, }); -static constexpr auto kScopedQueryStringTrackers = - base::MakeFixedFlatMapSorted({ +// The second parameter is a comma-separated list of domains. +// The domain comparison will also match on subdomains. So if the +// parameter is scoped to example.com below, it will be removed from +// https://example.com/index.php and from http://www.example.com/ for +// example. +static const auto kScopedQueryStringTrackers = + std::map>({ + // https://github.com/brave/brave-browser/issues/35094 + {"igsh", {"instagram.com"}}, // https://github.com/brave/brave-browser/issues/11580 - {"igshid", "instagram.com"}, + {"igshid", {"instagram.com"}}, // https://github.com/brave/brave-browser/issues/26966 - {"ref_src", "twitter.com"}, - {"ref_url", "twitter.com"}, + {"ref_src", {"twitter.com", "x.com"}}, + {"ref_url", {"twitter.com", "x.com"}}, + // https://github.com/brave/brave-browser/issues/34719 + {"si", {"youtube.com", "youtu.be"}}, }); +bool IsScopedTracker( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers) { + if (!base::Contains(trackers, param_name)) { + return false; + } + + const auto& domain_strings = trackers.at(param_name); + if (domain_strings.empty()) { + return false; + } + const GURL original_url = GURL(spec); + for (const auto& domain : domain_strings) { + if (original_url.DomainIs(domain)) { + return true; + } + } + + return false; +} + // Remove tracking query parameters from a GURL, leaving all // other parts untouched. std::optional StripQueryParameter(const std::string_view query, @@ -147,8 +179,7 @@ std::optional StripQueryParameter(const std::string_view query, const std::string_view key = pieces.empty() ? "" : pieces[0]; if (pieces.size() >= 2 && (kSimpleQueryStringTrackers.count(key) == 1 || - (kScopedQueryStringTrackers.count(key) == 1 && - GURL(spec).DomainIs(kScopedQueryStringTrackers.at(key).data())) || + IsScopedTracker(key, spec, kScopedQueryStringTrackers) || (kConditionalQueryStringTrackers.count(key) == 1 && !re2::RE2::PartialMatch( spec, kConditionalQueryStringTrackers.at(key).data())))) { @@ -162,6 +193,9 @@ std::optional StripQueryParameter(const std::string_view query, } return std::nullopt; } +} // namespace + +namespace query_filter { std::optional ApplyQueryFilter(const GURL& original_url) { const auto& query = original_url.query_piece(); @@ -221,4 +255,11 @@ std::optional MaybeApplyQueryStringFilter( return ApplyQueryFilter(request_url); } + +bool IsScopedTrackerForTesting( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers) { + return IsScopedTracker(param_name, spec, trackers); +} } // namespace query_filter diff --git a/components/query_filter/utils.h b/components/query_filter/utils.h index 65b79738714c..ff57f4f3de9d 100644 --- a/components/query_filter/utils.h +++ b/components/query_filter/utils.h @@ -6,8 +6,10 @@ #ifndef BRAVE_COMPONENTS_QUERY_FILTER_UTILS_H_ #define BRAVE_COMPONENTS_QUERY_FILTER_UTILS_H_ +#include #include #include +#include #include "url/gurl.h" @@ -31,5 +33,10 @@ std::optional MaybeApplyQueryStringFilter( const GURL& request_url, const std::string& request_method, const bool internal_redirect); + +bool IsScopedTrackerForTesting( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers); } // namespace query_filter #endif // BRAVE_COMPONENTS_QUERY_FILTER_UTILS_H_ diff --git a/components/query_filter/utils_unittest.cc b/components/query_filter/utils_unittest.cc index 3869c0605289..b9b139b636ad 100644 --- a/components/query_filter/utils_unittest.cc +++ b/components/query_filter/utils_unittest.cc @@ -5,6 +5,8 @@ #include "brave/components/query_filter/utils.h" +#include + #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" @@ -98,3 +100,48 @@ TEST(BraveQueryFilter, FilterQueryTrackers) { GURL("https://brave.com"), GURL("https://brave.com"), GURL("https://test.com/?gclid=123"), "GET", true)); } + +TEST(BraveQueryFilter, IsScopedTracker) { + auto trackers = std::map>({ + {"igshid", {"instagram.com"}}, + {"ref_src", {"twitter.com", "x.com", "y.com"}}, + {"sample1", {"", " ", "brave.com", ""}}, + {"sample2", {" "}}, + {"sample3", {""}}, + {"sample4", {}}, + }); + + // Normal case for a parameter that's not on the list + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "t", "https://twitter.com/", trackers)); + + // Normal case with a single domain + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "igshid", "https://instagram.com/", trackers)); + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "igshid", "http://www.instagram.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "igshid", "https://example.com/", trackers)); + + // Normal case with more than one domain + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://twitter.com/", trackers)); + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://x.com/", trackers)); + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://y.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://z.com/", trackers)); + + // Edge cases + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "sample1", "https://brave.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample1", "https://example.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample2", "https://brave.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample3", "https://brave.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample4", "https://brave.com/", trackers)); +}