Skip to content

Commit

Permalink
Merge pull request #18488 from brave/speedreader/kuchiki-0.8.2
Browse files Browse the repository at this point in the history
[Speedreader] Update to kuchikiki 0.8.2
  • Loading branch information
rillian authored Jan 30, 2024
2 parents f95277e + 2ecf050 commit a7a102b
Show file tree
Hide file tree
Showing 149 changed files with 4,065 additions and 840 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ patches/**/*.patchinfo
/third_party/rapidjson/src
/third_party/reclient_configs/src
/third_party/rust/challenge_bypass_ristretto/v1/crate
/third_party/rust/kuchiki/v0_8/crate
/third_party/rust/futures_retry/v0_5/crate
/third_party/cryptography
/third_party/macholib
Expand Down
1 change: 0 additions & 1 deletion DEPS
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ deps = {
"third_party/playlist_component/src": "https://github.com/brave/playlist-component.git@c043566e2ff6133d110cf516ed472451039139e2",
"third_party/rust/challenge_bypass_ristretto/v1/crate": "https://github.com/brave-intl/challenge-bypass-ristretto.git@a1da4641734adc8312215b38a8221962d2c8e045",
"third_party/rust/futures_retry/v0_5/crate": "https://github.com/brave-intl/futures-retry.git@2aaaafbc3d394661534d4dbd14159d164243c20e",
"third_party/rust/kuchiki/v0_8/crate": "https://github.com/brave/kuchiki.git@589eadca2c1d06ddda2919354590bfe1ace88a43",
"third_party/macholib": {
"url": "https://github.com/ronaldoussoren/macholib.git@36a6777ccd0891c5d1b44ba885573d7c90740015",
"condition": "checkout_mac",
Expand Down
3 changes: 3 additions & 0 deletions components/speedreader/rust/ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ panic = "abort"
name = "speedreader_ffi"
path = "lib.rs"
crate-type = ["rlib"]

[patch.crates-io.kuchikiki]
path = "../../../../third_party/rust/kuchikiki/v0_8/crate"
2 changes: 1 addition & 1 deletion components/speedreader/rust/lib/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ rust_static_library("lib") {
deps = [
"src/readability:lib",
"//brave/third_party/rust/html5ever/v0_25:lib",
"//brave/third_party/rust/kuchiki/v0_8:lib",
"//brave/third_party/rust/kuchikiki/v0_8:lib",
"//brave/third_party/rust/lifeguard/v0_6:lib",
"//brave/third_party/rust/lol_html/v0_3:lib",
"//brave/third_party/rust/markup5ever/v0_10:lib",
Expand Down
12 changes: 3 additions & 9 deletions components/speedreader/rust/lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@ include = [
"/README.md",
"/src/*",
"/examples/*.rs",
"/benches/*"
]

[dependencies]
html5ever = "0.25.1"
kuchiki = "0.8.1"
kuchikiki = "0.8.2"
lol_html = "0.3.0"
regex = "1"
serde_json = "1.0.70"
Expand All @@ -45,10 +44,5 @@ criterion = "0.3"
futures = "0.3"
tokio = { version = "1.2", features = ["full"] }

[[bench]]
name = "html5ever"
harness = false

[patch.crates-io.kuchiki_v0_8]
path = "../../../third_party/rust/kuchiki/v0_8/crate"
package = "kuchiki"
[patch.crates-io.kuchikiki]
path = "../../../../third_party/rust/kuchikiki/v0_8/crate"
2 changes: 1 addition & 1 deletion components/speedreader/rust/lib/src/readability/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ rust_static_library("lib") {

deps = [
"//brave/third_party/rust/html5ever/v0_25:lib",
"//brave/third_party/rust/kuchiki/v0_8:lib",
"//brave/third_party/rust/kuchikiki/v0_8:lib",
"//brave/third_party/rust/thiserror/v1:lib",
"//brave/third_party/rust/time/v0_3:lib",
"//brave/third_party/rust/url/v2:lib",
Expand Down
7 changes: 3 additions & 4 deletions components/speedreader/rust/lib/src/readability/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ edition = "2018"
[dependencies]
bitflags = "1.2"
html5ever = "0.25.1"
kuchiki = "0.8.1"
kuchikiki = "0.8.2"
lazy_static = "1.4"
regex = "1"
serde_json = "1.0"
Expand All @@ -25,6 +25,5 @@ url = "2.2"
assert_approx_eq = "1.1.0"
reqwest = { version = "0.11.6", features = ["blocking", "stream"] }

[patch.crates-io.kuchiki_v0_8]
path = "../../../../../third_party/rust/kuchiki/v0_8/crate"
package = "kuchiki"
[patch.crates-io.kuchiki]
path = "../../../../../third_party/rust/kuchikiki/v0_8/crate"
8 changes: 4 additions & 4 deletions components/speedreader/rust/lib/src/readability/src/dom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::{ElementFlags, NodeOrText};
use html5ever::{parse_document, ParseOpts};
use html5ever::{Attribute, LocalName, QualName};
use kuchiki::iter::NodeIterator;
use kuchiki::NodeData::{Element, Text};
use kuchiki::NodeRef as Handle;
use kuchiki::Sink;
use kuchikiki::iter::NodeIterator;
use kuchikiki::NodeData::{Element, Text};
use kuchikiki::NodeRef as Handle;
use kuchikiki::Sink;
use std::str::FromStr;

/// A small wrapper function that creates a NodeOrText from a Text handle or an
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::{ElementFlags, NodeOrText, TreeSink};
use html5ever::QualName;
use kuchiki::NodeRef as Handle;
use kuchiki::Sink;
use kuchikiki::NodeRef as Handle;
use kuchikiki::Sink;
use regex::Regex;
use std::collections::HashSet;
use std::default::Default;
Expand Down
7 changes: 6 additions & 1 deletion components/speedreader/rust/lib/src/readability/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
// Copyright (c) 2021 The Brave Authors. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.

#[macro_use]
extern crate html5ever;
extern crate kuchiki;
extern crate kuchikiki;
extern crate regex;
extern crate url;
#[macro_use]
Expand Down
9 changes: 7 additions & 2 deletions components/speedreader/rust/lib/src/readability/src/nlp.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
// Copyright (c) 2021 The Brave Authors. All rights reserved.
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.

use regex::{Regex, RegexSet};
use std::cmp::max;
use std::collections::HashSet;
Expand Down Expand Up @@ -28,8 +33,8 @@ lazy_static! {
]).unwrap();
}

/// Determines if a slice is an abbreviation by checking a list of common abbreviations and some
/// simple heuristics.
/// Determines if a slice is an abbreviation by checking a list of common
/// abbreviations and some simple heuristics.
#[inline]
pub fn is_abbreviation(s: &str) -> bool {
let len = s.chars().count();
Expand Down
Loading

0 comments on commit a7a102b

Please sign in to comment.