From 5f464e315f2b3ca6ffe1e81e0dd5e361af90c8d0 Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Fri, 8 Dec 2023 11:05:49 -0800 Subject: [PATCH 1/6] Add a comment clarifying the format of the domain in scoped rules --- components/query_filter/utils.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index 3ca91dd696d8..21a65c83c435 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -117,6 +117,10 @@ static constexpr auto kConditionalQueryStringTrackers = {"mkt_tok", "([uU]nsubscribe|emailWebview)"}, }); +// The domain comparison will also match on subdomains. So if the +// parameter is scoped to example.com below, it will be removed from +// https://example.com/index.php and from http://www.example.com/ for +// example. static constexpr auto kScopedQueryStringTrackers = base::MakeFixedFlatMapSorted({ // https://github.com/brave/brave-browser/issues/11580 From df5b206ba56c24e7189b937dda13c6fe64383aea Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Mon, 18 Dec 2023 14:54:50 -0800 Subject: [PATCH 2/6] Extend scoped domain rules to support multiple domains The ABP list format also separates domains using commas. --- ..._hacks_network_delegate_helper_unittest.cc | 5 ++- components/query_filter/utils.cc | 31 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc index b3f075b4654b..339a8068a852 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc @@ -227,7 +227,7 @@ TEST(BraveSiteHacksNetworkDelegateHelperTest, QueryStringExempted) { TEST(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { const std::vector> urls( - {// { original url, expected url after filtering } + {// { original url, expected url after filtering ("" means unchanged) } {"https://example.com/?fbclid=1234", "https://example.com/"}, {"https://example.com/?fbclid=1234&", "https://example.com/"}, {"https://example.com/?&fbclid=1234", "https://example.com/"}, @@ -253,6 +253,9 @@ TEST(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { // Conditional query parameter stripping {"https://example.com/?igshid=1234", ""}, {"https://www.instagram.com/?igshid=1234", "https://www.instagram.com/"}, + {"https://brave.com/?ref_src=example.com", ""}, + {"https://twitter.com/?ref_src=example.com", "https://twitter.com/"}, + {"https://x.com/?ref_src=example.com", "https://x.com/"}, {"https://example.com/?mkt_tok=123&foo=bar&mkt_unsubscribe=1", ""}, {"https://example.com/index.php/email/emailWebview?mkt_tok=1234&foo=bar", ""}, diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index 21a65c83c435..7f8062a34db7 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -10,6 +10,7 @@ #include #include +#include "base/containers/contains.h" #include "base/containers/fixed_flat_map.h" #include "base/containers/fixed_flat_set.h" #include "base/strings/string_split.h" @@ -117,6 +118,7 @@ static constexpr auto kConditionalQueryStringTrackers = {"mkt_tok", "([uU]nsubscribe|emailWebview)"}, }); +// The second parameter is a comma-separated list of domains. // The domain comparison will also match on subdomains. So if the // parameter is scoped to example.com below, it will be removed from // https://example.com/index.php and from http://www.example.com/ for @@ -126,10 +128,32 @@ static constexpr auto kScopedQueryStringTrackers = // https://github.com/brave/brave-browser/issues/11580 {"igshid", "instagram.com"}, // https://github.com/brave/brave-browser/issues/26966 - {"ref_src", "twitter.com"}, - {"ref_url", "twitter.com"}, + {"ref_src", "twitter.com,x.com"}, + {"ref_url", "twitter.com,x.com"}, }); +bool IsScopedTracker(const std::string_view param_name, + const std::string& spec) { + if (!base::Contains(trackers, param_name)) { + return false; + } + + const std::vector domain_strings = + SplitStringPiece(kScopedQueryStringTrackers.at(param_name).data(), ",", + base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); + if (domain_strings.empty()) { + return false; + } + const GURL original_url = GURL(spec); + for (const auto& domain : domain_strings) { + if (original_url.DomainIs(domain)) { + return true; + } + } + + return false; +} + // Remove tracking query parameters from a GURL, leaving all // other parts untouched. std::optional StripQueryParameter(const std::string_view query, @@ -151,8 +175,7 @@ std::optional StripQueryParameter(const std::string_view query, const std::string_view key = pieces.empty() ? "" : pieces[0]; if (pieces.size() >= 2 && (kSimpleQueryStringTrackers.count(key) == 1 || - (kScopedQueryStringTrackers.count(key) == 1 && - GURL(spec).DomainIs(kScopedQueryStringTrackers.at(key).data())) || + IsScopedTracker(key, spec) || (kConditionalQueryStringTrackers.count(key) == 1 && !re2::RE2::PartialMatch( spec, kConditionalQueryStringTrackers.at(key).data())))) { From 973af87d45e19ee09e8d0d29efdbd5b489962ac5 Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Tue, 5 Dec 2023 13:21:05 -0800 Subject: [PATCH 3/6] Remove YouTube's si URL parameter from URLs (fixes brave/brave-browser#34719) --- components/query_filter/utils.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index 7f8062a34db7..4b584c912863 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -130,6 +130,8 @@ static constexpr auto kScopedQueryStringTrackers = // https://github.com/brave/brave-browser/issues/26966 {"ref_src", "twitter.com,x.com"}, {"ref_url", "twitter.com,x.com"}, + // https://github.com/brave/brave-browser/issues/34719 + {"si", "youtube.com,youtu.be"}, }); bool IsScopedTracker(const std::string_view param_name, From 74d0ab318806eeb8865b3b976f8841cdb200dc8e Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Thu, 21 Dec 2023 12:56:19 -0800 Subject: [PATCH 4/6] Make IsScopedTracker() unit-testable Use a vector instead of a comma-separated string since this is a short list and it doesn't have to be optimized prematurely. --- components/query_filter/utils.cc | 31 +++++++++------ components/query_filter/utils.h | 7 ++++ components/query_filter/utils_unittest.cc | 47 +++++++++++++++++++++++ 3 files changed, 73 insertions(+), 12 deletions(-) diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index 4b584c912863..c0003bcc0ab4 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -123,26 +123,26 @@ static constexpr auto kConditionalQueryStringTrackers = // parameter is scoped to example.com below, it will be removed from // https://example.com/index.php and from http://www.example.com/ for // example. -static constexpr auto kScopedQueryStringTrackers = - base::MakeFixedFlatMapSorted({ +static const auto kScopedQueryStringTrackers = + std::map>({ // https://github.com/brave/brave-browser/issues/11580 - {"igshid", "instagram.com"}, + {"igshid", {"instagram.com"}}, // https://github.com/brave/brave-browser/issues/26966 - {"ref_src", "twitter.com,x.com"}, - {"ref_url", "twitter.com,x.com"}, + {"ref_src", {"twitter.com", "x.com"}}, + {"ref_url", {"twitter.com", "x.com"}}, // https://github.com/brave/brave-browser/issues/34719 - {"si", "youtube.com,youtu.be"}, + {"si", {"youtube.com", "youtu.be"}}, }); -bool IsScopedTracker(const std::string_view param_name, - const std::string& spec) { +bool IsScopedTracker( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers) { if (!base::Contains(trackers, param_name)) { return false; } - const std::vector domain_strings = - SplitStringPiece(kScopedQueryStringTrackers.at(param_name).data(), ",", - base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); + const auto& domain_strings = trackers.at(param_name); if (domain_strings.empty()) { return false; } @@ -156,6 +156,13 @@ bool IsScopedTracker(const std::string_view param_name, return false; } +bool IsScopedTrackerForTesting( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers) { + return IsScopedTracker(param_name, spec, trackers); +} + // Remove tracking query parameters from a GURL, leaving all // other parts untouched. std::optional StripQueryParameter(const std::string_view query, @@ -177,7 +184,7 @@ std::optional StripQueryParameter(const std::string_view query, const std::string_view key = pieces.empty() ? "" : pieces[0]; if (pieces.size() >= 2 && (kSimpleQueryStringTrackers.count(key) == 1 || - IsScopedTracker(key, spec) || + IsScopedTracker(key, spec, kScopedQueryStringTrackers) || (kConditionalQueryStringTrackers.count(key) == 1 && !re2::RE2::PartialMatch( spec, kConditionalQueryStringTrackers.at(key).data())))) { diff --git a/components/query_filter/utils.h b/components/query_filter/utils.h index 65b79738714c..ff57f4f3de9d 100644 --- a/components/query_filter/utils.h +++ b/components/query_filter/utils.h @@ -6,8 +6,10 @@ #ifndef BRAVE_COMPONENTS_QUERY_FILTER_UTILS_H_ #define BRAVE_COMPONENTS_QUERY_FILTER_UTILS_H_ +#include #include #include +#include #include "url/gurl.h" @@ -31,5 +33,10 @@ std::optional MaybeApplyQueryStringFilter( const GURL& request_url, const std::string& request_method, const bool internal_redirect); + +bool IsScopedTrackerForTesting( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers); } // namespace query_filter #endif // BRAVE_COMPONENTS_QUERY_FILTER_UTILS_H_ diff --git a/components/query_filter/utils_unittest.cc b/components/query_filter/utils_unittest.cc index 3869c0605289..b9b139b636ad 100644 --- a/components/query_filter/utils_unittest.cc +++ b/components/query_filter/utils_unittest.cc @@ -5,6 +5,8 @@ #include "brave/components/query_filter/utils.h" +#include + #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" @@ -98,3 +100,48 @@ TEST(BraveQueryFilter, FilterQueryTrackers) { GURL("https://brave.com"), GURL("https://brave.com"), GURL("https://test.com/?gclid=123"), "GET", true)); } + +TEST(BraveQueryFilter, IsScopedTracker) { + auto trackers = std::map>({ + {"igshid", {"instagram.com"}}, + {"ref_src", {"twitter.com", "x.com", "y.com"}}, + {"sample1", {"", " ", "brave.com", ""}}, + {"sample2", {" "}}, + {"sample3", {""}}, + {"sample4", {}}, + }); + + // Normal case for a parameter that's not on the list + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "t", "https://twitter.com/", trackers)); + + // Normal case with a single domain + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "igshid", "https://instagram.com/", trackers)); + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "igshid", "http://www.instagram.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "igshid", "https://example.com/", trackers)); + + // Normal case with more than one domain + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://twitter.com/", trackers)); + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://x.com/", trackers)); + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://y.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "ref_src", "https://z.com/", trackers)); + + // Edge cases + EXPECT_TRUE(query_filter::IsScopedTrackerForTesting( + "sample1", "https://brave.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample1", "https://example.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample2", "https://brave.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample3", "https://brave.com/", trackers)); + EXPECT_FALSE(query_filter::IsScopedTrackerForTesting( + "sample4", "https://brave.com/", trackers)); +} From 62f21f24ec790f9cf0836475d3878cfef361f198 Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Thu, 21 Dec 2023 13:00:07 -0800 Subject: [PATCH 5/6] Move unexported functions to the anonymous namespace --- components/query_filter/utils.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index c0003bcc0ab4..52c5af74976e 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -18,7 +18,7 @@ #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "third_party/re2/src/re2/re2.h" -namespace query_filter { +namespace { static constexpr auto kSimpleQueryStringTrackers = base::MakeFixedFlatSetSorted({ @@ -156,13 +156,6 @@ bool IsScopedTracker( return false; } -bool IsScopedTrackerForTesting( - const std::string_view param_name, - const std::string& spec, - const std::map>& trackers) { - return IsScopedTracker(param_name, spec, trackers); -} - // Remove tracking query parameters from a GURL, leaving all // other parts untouched. std::optional StripQueryParameter(const std::string_view query, @@ -198,6 +191,9 @@ std::optional StripQueryParameter(const std::string_view query, } return std::nullopt; } +} // namespace + +namespace query_filter { std::optional ApplyQueryFilter(const GURL& original_url) { const auto& query = original_url.query_piece(); @@ -257,4 +253,11 @@ std::optional MaybeApplyQueryStringFilter( return ApplyQueryFilter(request_url); } + +bool IsScopedTrackerForTesting( + const std::string_view param_name, + const std::string& spec, + const std::map>& trackers) { + return IsScopedTracker(param_name, spec, trackers); +} } // namespace query_filter From 3016a599c26aabd21a615bb1f04119e3fc974855 Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Wed, 3 Jan 2024 11:35:24 -0800 Subject: [PATCH 6/6] Remove Instagram's igsh share tracker (fixes brave/brave-browser#35094) --- components/query_filter/utils.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/query_filter/utils.cc b/components/query_filter/utils.cc index 52c5af74976e..0621574d17c5 100644 --- a/components/query_filter/utils.cc +++ b/components/query_filter/utils.cc @@ -125,6 +125,8 @@ static constexpr auto kConditionalQueryStringTrackers = // example. static const auto kScopedQueryStringTrackers = std::map>({ + // https://github.com/brave/brave-browser/issues/35094 + {"igsh", {"instagram.com"}}, // https://github.com/brave/brave-browser/issues/11580 {"igshid", {"instagram.com"}}, // https://github.com/brave/brave-browser/issues/26966