Skip to content

Commit

Permalink
speedreader: port to kuchikiki 0.8.2
Browse files Browse the repository at this point in the history
We renamed our fork of the kuchiki tree-handling crate since
upstream is unmaintained. It's easier to build against released
crates now that we're vendoring, so move to the new publication
crate name. This also addresses a `cargo audit` warning about
the unmaintained dependency.

Note however that we're still using a brave-specific fork from
git here, so the crate itself is still installed from DEPS.

This adds a new transitive dependency on `indexmap`, which is used
to roundtrip html attributes with stable ordering so unit tests
don't need trivial updates whenever we touch something.
This dependency is already available in upstream chromium.

kuchikiki 0.8.2 specifies newer versions of its other dependencies
but seems to still work when built against the old releases, so
those are left to another commit.
  • Loading branch information
rillian committed Sep 19, 2023
1 parent e44aea2 commit 32203ba
Show file tree
Hide file tree
Showing 19 changed files with 85 additions and 86 deletions.
2 changes: 1 addition & 1 deletion DEPS
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ deps = {
"third_party/rust/star_constellation/v0_2/crate": "https://github.com/brave/constellation.git@db575edec12509ce1bda6afe68bb58e538a21d3a",
"third_party/rust/challenge_bypass_ristretto/v1/crate": "https://github.com/brave-intl/challenge-bypass-ristretto.git@a1da4641734adc8312215b38a8221962d2c8e045",
"third_party/rust/futures_retry/v0_5/crate": "https://github.com/brave-intl/futures-retry.git@2aaaafbc3d394661534d4dbd14159d164243c20e",
"third_party/rust/kuchiki/v0_8/crate": "https://github.com/brave/kuchiki.git@589eadca2c1d06ddda2919354590bfe1ace88a43",
"third_party/rust/kuchikiki/v0_8/crate": "https://github.com/brave/kuchiki.git@436b4e21274e64c5f9bc142ba6fa330fe9a8c97d",
"third_party/macholib": {
"url": "https://github.com/ronaldoussoren/macholib.git@36a6777ccd0891c5d1b44ba885573d7c90740015",
"condition": "checkout_mac",
Expand Down
4 changes: 2 additions & 2 deletions components/speedreader/rust/ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ name = "speedreader_ffi"
path = "lib.rs"
crate-type = ["rlib"]

[patch.crates-io.kuchiki]
path = "../../../../third_party/rust/kuchiki/v0_8/crate"
[patch.crates-io.kuchikiki]
path = "../../../../third_party/rust/kuchikiki/v0_8/crate"
2 changes: 1 addition & 1 deletion components/speedreader/rust/lib/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ rust_static_library("lib") {
deps = [
"src/readability:lib",
"//brave/third_party/rust/html5ever/v0_25:lib",
"//brave/third_party/rust/kuchiki/v0_8:lib",
"//brave/third_party/rust/kuchikiki/v0_8:lib",
"//brave/third_party/rust/lifeguard/v0_6:lib",
"//brave/third_party/rust/lol_html/v0_3:lib",
"//brave/third_party/rust/markup5ever/v0_10:lib",
Expand Down
7 changes: 3 additions & 4 deletions components/speedreader/rust/lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ include = [

[dependencies]
html5ever = "0.25.1"
kuchiki = "0.8.1"
kuchikiki = "0.8.2"
lol_html = "0.3.0"
regex = "1"
serde_json = "1.0.70"
Expand All @@ -44,6 +44,5 @@ criterion = "0.3"
futures = "0.3"
tokio = { version = "1.2", features = ["full"] }

[patch.crates-io.kuchiki_v0_8]
path = "../../../../third_party/rust/kuchiki/v0_8/crate"
package = "kuchiki"
[patch.crates-io.kuchikiki]
path = "../../../../third_party/rust/kuchikiki/v0_8/crate"
2 changes: 1 addition & 1 deletion components/speedreader/rust/lib/src/readability/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ rust_static_library("lib") {

deps = [
"//brave/third_party/rust/html5ever/v0_25:lib",
"//brave/third_party/rust/kuchiki/v0_8:lib",
"//brave/third_party/rust/kuchikiki/v0_8:lib",
"//brave/third_party/rust/thiserror/v1:lib",
"//brave/third_party/rust/time/v0_3:lib",
"//brave/third_party/rust/url/v2:lib",
Expand Down
7 changes: 3 additions & 4 deletions components/speedreader/rust/lib/src/readability/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ edition = "2018"
[dependencies]
bitflags = "1.2"
html5ever = "0.25.1"
kuchiki = "0.8.1"
kuchikiki = "0.8.2"
lazy_static = "1.4"
regex = "1"
serde_json = "1.0"
Expand All @@ -25,6 +25,5 @@ url = "2.2"
assert_approx_eq = "1.1.0"
reqwest = { version = "0.11.6", features = ["blocking", "stream"] }

[patch.crates-io.kuchiki_v0_8]
path = "../../../../../third_party/rust/kuchiki/v0_8/crate"
package = "kuchiki"
[patch.crates-io.kuchiki]
path = "../../../../../third_party/rust/kuchikiki/v0_8/crate"
8 changes: 4 additions & 4 deletions components/speedreader/rust/lib/src/readability/src/dom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::{ElementFlags, NodeOrText};
use html5ever::{parse_document, ParseOpts};
use html5ever::{Attribute, LocalName, QualName};
use kuchiki::iter::NodeIterator;
use kuchiki::NodeData::{Element, Text};
use kuchiki::NodeRef as Handle;
use kuchiki::Sink;
use kuchikiki::iter::NodeIterator;
use kuchikiki::NodeData::{Element, Text};
use kuchikiki::NodeRef as Handle;
use kuchikiki::Sink;
use std::str::FromStr;

/// A small wrapper function that creates a NodeOrText from a Text handle or an
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::{ElementFlags, NodeOrText, TreeSink};
use html5ever::QualName;
use kuchiki::NodeRef as Handle;
use kuchiki::Sink;
use kuchikiki::NodeRef as Handle;
use kuchikiki::Sink;
use regex::Regex;
use std::collections::{HashMap, HashSet};
use std::default::Default;
Expand Down
2 changes: 1 addition & 1 deletion components/speedreader/rust/lib/src/readability/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[macro_use]
extern crate html5ever;
extern crate kuchiki;
extern crate kuchikiki;
extern crate regex;
extern crate url;
#[macro_use]
Expand Down
8 changes: 4 additions & 4 deletions components/speedreader/rust/lib/src/readability/src/scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use html5ever::tendril::StrTendril;
use html5ever::tree_builder::TreeSink;
use html5ever::tree_builder::{ElementFlags, NodeOrText};
use html5ever::{LocalName, QualName};
use kuchiki::iter::NodeIterator;
use kuchiki::NodeData::{
use kuchikiki::iter::NodeIterator;
use kuchikiki::NodeData::{
Comment, Doctype, Document, DocumentFragment, Element, ProcessingInstruction, Text,
};
use kuchiki::NodeRef as Handle;
use kuchiki::{ElementData, Sink};
use kuchikiki::NodeRef as Handle;
use kuchikiki::{ElementData, Sink};
use regex::Regex;
use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{dom, util};
use kuchiki::iter::NodeIterator;
use kuchiki::NodeRef as Handle;
use kuchiki::{ElementData, Sink};
use kuchikiki::iter::NodeIterator;
use kuchikiki::NodeRef as Handle;
use kuchikiki::{ElementData, Sink};
use util::count_ignore_consecutive_whitespace;

#[derive(Default)]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use html5ever::driver::{ParseOpts, Parser};
use html5ever::tendril::{StrTendril, TendrilSink};
use kuchiki::Sink;
use kuchikiki::Sink;
use lol_html::OutputSink;
use std::collections::HashMap;
use url::Url;
Expand Down
6 changes: 3 additions & 3 deletions components/speedreader/rust/lib/tests/legacy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ use std::fs::File;
use std::io::Read;
use url::Url;

use kuchiki::NodeData::{Element, Text};
use kuchiki::NodeRef as Handle;
use kuchiki::Sink;
use kuchikiki::NodeData::{Element, Text};
use kuchikiki::NodeRef as Handle;
use kuchikiki::Sink;
use regex::Regex;
use std::vec::Vec;

Expand Down
4 changes: 2 additions & 2 deletions components/speedreader/rust/lib/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ use url::Url;

use distance::damerau_levenshtein;
use html5ever::LocalName;
use kuchiki::NodeData::{Element, Text};
use kuchiki::NodeRef as Handle;
use kuchikiki::NodeData::{Element, Text};
use kuchikiki::NodeRef as Handle;
use std::vec::Vec;

static SAMPLES_PATH: &str = "data/tests-samples/";
Expand Down
4 changes: 2 additions & 2 deletions third_party/rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions third_party/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fvm_shared = "3"
hex = "0.4"
hex-literal = "0.4"
hmac = "0.10"
kuchiki = "0.8"
kuchikiki = "0.8"
lazy_static = "1"
libc = "0.2"
lifeguard = "0.6"
Expand Down Expand Up @@ -692,9 +692,9 @@ package = "itoa"
path = "keccak/v0_1/crate"
package = "keccak"

[patch.crates-io.kuchiki_v0_8]
path = "kuchiki/v0_8/crate"
package = "kuchiki"
[patch.crates-io.kuchikiki_v0_8]
path = "kuchikiki/v0_8/crate"
package = "kuchikiki"

[patch.crates-io.lazy_static_v1]
path = "../../../third_party/rust/lazy_static/v1/crate"
Expand Down
45 changes: 0 additions & 45 deletions third_party/rust/kuchiki/v0_8/BUILD.gn

This file was deleted.

46 changes: 46 additions & 0 deletions third_party/rust/kuchikiki/v0_8/BUILD.gn
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import("//build/rust/cargo_crate.gni")

cargo_crate("lib") {
crate_name = "kuchikiki"
epoch = "0.8"
crate_type = "rlib"
crate_root = "crate/src/lib.rs"
sources = [
"//brave/third_party/rust/kuchikiki/v0_8/crate/examples/find_matches.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/examples/stack-overflow.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/attributes.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/cell_extras.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/iter.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/lib.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/node_data_ref.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/parser.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/select.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/serializer.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/tests.rs",
"//brave/third_party/rust/kuchikiki/v0_8/crate/src/tree.rs",
]
inputs = [ "//brave/third_party/rust/kuchikiki/v0_8/crate/README.md" ]

# Unit tests skipped. Generate with --with-tests to include them.
build_native_rust_unit_tests = false
edition = "2018"
cargo_pkg_version = "0.8.2"
cargo_pkg_authors = "Simon Sapin <[email protected]>"
cargo_pkg_name = "kuchikiki"
cargo_pkg_description = "(口利き) HTML tree manipulation library"
library_configs -= [ "//build/config/compiler:chromium_code" ]
library_configs += [ "//build/config/compiler:no_chromium_code" ]
executable_configs -= [ "//build/config/compiler:chromium_code" ]
executable_configs += [ "//build/config/compiler:no_chromium_code" ]
deps = [
"//brave/third_party/rust/cssparser/v0_27:lib",
"//brave/third_party/rust/html5ever/v0_25:lib",
"//third_party/rust/indexmap/v1:lib",
"//brave/third_party/rust/matches/v0_1:lib",
"//brave/third_party/rust/selectors/v0_22:lib",
]
}
4 changes: 2 additions & 2 deletions third_party/rust/third_party.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ skrifa = "0.4"
libc = "0.2.107"

# speedreader
kuchiki = "0.8.1"
kuchikiki = "0.8.1"
lol_html = "0.3.0"
# regex = "1"
# serde_json = "1.0.70"
Expand All @@ -74,7 +74,7 @@ lazy_static = "1.4"

# readability
bitflags = "1.2"
kuchiki = "0.8.1"
kuchikiki = "0.8.1"
lazy_static = "1.4"
# regex = "1"
# serde_json = "1.0"
Expand Down

0 comments on commit 32203ba

Please sign in to comment.