From 9d9c3f2c699ab3da9e3caeb2e55061b3bee99767 Mon Sep 17 00:00:00 2001 From: V Date: Thu, 4 Jul 2024 07:06:26 +0800 Subject: [PATCH] feat: support geosite (#466) --- .github/workflows/ci.yml | 11 +- clash_lib/src/app/dns/system.rs | 45 ++-- .../providers/rule_provider/provider.rs | 21 +- clash_lib/src/app/router/mod.rs | 28 ++- .../src/app/router/rules/geodata/attribute.rs | 51 +++++ .../app/router/rules/geodata/geodata.proto | 64 ++++++ .../app/router/rules/geodata/matcher_group.rs | 65 ++++++ clash_lib/src/app/router/rules/geodata/mod.rs | 207 ++++++++++++++++++ .../app/router/rules/geodata/str_matcher.rs | 87 ++++++++ clash_lib/src/app/router/rules/mod.rs | 1 + clash_lib/src/common/geodata/geodata_proto.rs | 130 +++++++++++ clash_lib/src/common/geodata/mod.rs | 61 ++++++ clash_lib/src/common/mmdb.rs | 64 +----- clash_lib/src/common/mod.rs | 1 + clash_lib/src/common/utils.rs | 56 ++++- clash_lib/src/config/def.rs | 6 + clash_lib/src/config/internal/config.rs | 5 + clash_lib/src/config/internal/rule.rs | 10 + clash_lib/src/lib.rs | 33 ++- 19 files changed, 848 insertions(+), 98 deletions(-) create mode 100644 clash_lib/src/app/router/rules/geodata/attribute.rs create mode 100644 clash_lib/src/app/router/rules/geodata/geodata.proto create mode 100644 clash_lib/src/app/router/rules/geodata/matcher_group.rs create mode 100644 clash_lib/src/app/router/rules/geodata/mod.rs create mode 100644 clash_lib/src/app/router/rules/geodata/str_matcher.rs create mode 100644 clash_lib/src/common/geodata/geodata_proto.rs create mode 100644 clash_lib/src/common/geodata/mod.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a24c5fea3..1af605fa8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -146,7 +146,6 @@ jobs: - uses: actions/checkout@v4 with: submodules: true - - uses: actions/cache@v4 with: path: | @@ -217,7 +216,15 @@ jobs: with: name: ${{ matrix.release-name || matrix.target }} path: ${{ env.PACKAGE }}-${{ matrix.release-name || matrix.target }}${{ matrix.postfix }} - + + - name: Setup tmate session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 + with: + detached: true + timeout-minutes: 15 + limit-access-to-actor: true + release: name: Release diff --git a/clash_lib/src/app/dns/system.rs b/clash_lib/src/app/dns/system.rs index a42a148b6..c09811f53 100644 --- a/clash_lib/src/app/dns/system.rs +++ b/clash_lib/src/app/dns/system.rs @@ -1,10 +1,14 @@ use async_trait::async_trait; +use hickory_resolver::{ + name_server::{GenericConnector, TokioRuntimeProvider}, + AsyncResolver, +}; use rand::seq::IteratorRandom; use tracing::warn; use super::{ClashResolver, ResolverKind}; -pub struct SystemResolver; +pub struct SystemResolver(AsyncResolver>); /// SystemResolver is a resolver that uses libc getaddrinfo to resolve /// hostnames. @@ -14,7 +18,12 @@ impl SystemResolver { "Default dns resolver doesn't support ipv6, please enable clash dns \ resolver if you need ipv6 support." ); - Ok(Self) + + let resolver: AsyncResolver< + GenericConnector, + > = hickory_resolver::AsyncResolver::tokio_from_system_conf()?; + + Ok(Self(resolver)) } } @@ -25,12 +34,9 @@ impl ClashResolver for SystemResolver { host: &str, _: bool, ) -> anyhow::Result> { - let response = tokio::net::lookup_host(format!("{}:0", host)) - .await? - .collect::>(); + let response = self.0.lookup_ip(host).await?; Ok(response .iter() - .map(|x| x.ip()) .filter(|x| self.ipv6() || x.is_ipv4()) .choose(&mut rand::thread_rng())) } @@ -40,17 +46,8 @@ impl ClashResolver for SystemResolver { host: &str, _: bool, ) -> anyhow::Result> { - let response = tokio::net::lookup_host(format!("{}:0", host)) - .await? - .collect::>(); - Ok(response - .iter() - .map(|x| x.ip()) - .filter_map(|ip| match ip { - std::net::IpAddr::V4(ip) => Some(ip), - _ => None, - }) - .choose(&mut rand::thread_rng())) + let response = self.0.ipv4_lookup(host).await?; + Ok(response.iter().map(|x| x.0).choose(&mut rand::thread_rng())) } async fn resolve_v6( @@ -58,17 +55,8 @@ impl ClashResolver for SystemResolver { host: &str, _: bool, ) -> anyhow::Result> { - let response = tokio::net::lookup_host(format!("{}:0", host)) - .await? - .collect::>(); - Ok(response - .iter() - .map(|x| x.ip()) - .filter_map(|ip| match ip { - std::net::IpAddr::V6(ip) => Some(ip), - _ => None, - }) - .choose(&mut rand::thread_rng())) + let response = self.0.ipv6_lookup(host).await?; + Ok(response.iter().map(|x| x.0).choose(&mut rand::thread_rng())) } async fn exchange( @@ -79,6 +67,7 @@ impl ClashResolver for SystemResolver { } fn ipv6(&self) -> bool { + // TODO: support ipv6 false } diff --git a/clash_lib/src/app/remote_content_manager/providers/rule_provider/provider.rs b/clash_lib/src/app/remote_content_manager/providers/rule_provider/provider.rs index ca69f6b9d..2108b9049 100644 --- a/clash_lib/src/app/remote_content_manager/providers/rule_provider/provider.rs +++ b/clash_lib/src/app/remote_content_manager/providers/rule_provider/provider.rs @@ -20,7 +20,7 @@ use crate::{ }, router::{map_rule_type, RuleMatcher}, }, - common::{errors::map_io_error, mmdb::Mmdb, trie}, + common::{errors::map_io_error, geodata::GeoData, mmdb::Mmdb, trie}, config::internal::rule::RuleType, session::Session, Error, @@ -86,6 +86,7 @@ impl RuleProviderImpl { interval: Duration, vehicle: ThreadSafeProviderVehicle, mmdb: Arc, + geodata: Arc, ) -> Self { let inner = Arc::new(tokio::sync::RwLock::new(Inner { content: match behovior { @@ -123,7 +124,12 @@ impl RuleProviderImpl { n, x )) })?; - let rules = make_rules(behovior, scheme.payload, mmdb.clone())?; + let rules = make_rules( + behovior, + scheme.payload, + mmdb.clone(), + geodata.clone(), + )?; Ok(rules) }); @@ -233,6 +239,7 @@ fn make_rules( behavior: RuleSetBehavior, rules: Vec, mmdb: Arc, + geodata: Arc, ) -> Result { match behavior { RuleSetBehavior::Domain => { @@ -241,9 +248,9 @@ fn make_rules( RuleSetBehavior::Ipcidr => { Ok(RuleContent::Ipcidr(Box::new(make_ip_cidr_rules(rules)?))) } - RuleSetBehavior::Classical => { - Ok(RuleContent::Classical(make_classical_rules(rules, mmdb)?)) - } + RuleSetBehavior::Classical => Ok(RuleContent::Classical( + make_classical_rules(rules, mmdb, geodata)?, + )), } } @@ -266,6 +273,7 @@ fn make_ip_cidr_rules(rules: Vec) -> Result { fn make_classical_rules( rules: Vec, mmdb: Arc, + geodata: Arc, ) -> Result>, Error> { let mut rv = vec![]; for rule in rules { @@ -282,7 +290,8 @@ fn make_classical_rules( _ => Err(Error::InvalidConfig(format!("invalid rule line: {}", rule))), }?; - let rule_matcher = map_rule_type(rule_type, mmdb.clone(), None); + let rule_matcher = + map_rule_type(rule_type, mmdb.clone(), geodata.clone(), None); rv.push(rule_matcher); } Ok(rv) diff --git a/clash_lib/src/app/router/mod.rs b/clash_lib/src/app/router/mod.rs index af29e3d97..f9b1bae2a 100644 --- a/clash_lib/src/app/router/mod.rs +++ b/clash_lib/src/app/router/mod.rs @@ -27,6 +27,8 @@ use super::{ }; mod rules; + +use crate::common::geodata::GeoData; pub use rules::RuleMatcher; pub struct Router { @@ -46,6 +48,7 @@ impl Router { rule_providers: HashMap, dns_resolver: ThreadSafeDNSResolver, mmdb: Arc, + geodata: Arc, cwd: String, ) -> Self { let mut rule_provider_registry = HashMap::new(); @@ -55,6 +58,7 @@ impl Router { &mut rule_provider_registry, dns_resolver.clone(), mmdb.clone(), + geodata.clone(), cwd, ) .await @@ -64,7 +68,12 @@ impl Router { rules: rules .into_iter() .map(|r| { - map_rule_type(r, mmdb.clone(), Some(&rule_provider_registry)) + map_rule_type( + r, + mmdb.clone(), + geodata.clone(), + Some(&rule_provider_registry), + ) }) .collect(), dns_resolver, @@ -106,6 +115,7 @@ impl Router { r.target(), r.type_name() ); + debug!("matched rule details: {}", r); return (r.target(), Some(r)); } } @@ -118,6 +128,7 @@ impl Router { rule_provider_registry: &mut HashMap, resolver: ThreadSafeDNSResolver, mmdb: Arc, + geodata: Arc, cwd: String, ) -> Result<(), Error> { for (name, provider) in rule_providers.into_iter() { @@ -138,6 +149,7 @@ impl Router { Duration::from_secs(http.interval), Arc::new(vehicle), mmdb.clone(), + geodata.clone(), ); rule_provider_registry.insert(name, Arc::new(provider)); @@ -156,6 +168,7 @@ impl Router { Duration::from_secs(file.interval.unwrap_or_default()), Arc::new(vehicle), mmdb.clone(), + geodata.clone(), ); rule_provider_registry.insert(name, Arc::new(provider)); @@ -194,6 +207,7 @@ impl Router { pub fn map_rule_type( rule_type: RuleType, mmdb: Arc, + geodata: Arc, rule_provider_registry: Option<&HashMap>, ) -> Box { match rule_type { @@ -245,6 +259,18 @@ pub fn map_rule_type( no_resolve, mmdb: mmdb.clone(), }), + RuleType::GeoSite { + target, + country_code, + } => { + let res = rules::geodata::GeoSiteMatcher::new( + country_code, + target, + geodata.as_ref(), + ) + .unwrap(); + Box::new(res) as _ + } RuleType::SRCPort { target, port } => Box::new(rules::port::Port { port, target, diff --git a/clash_lib/src/app/router/rules/geodata/attribute.rs b/clash_lib/src/app/router/rules/geodata/attribute.rs new file mode 100644 index 000000000..30674eb98 --- /dev/null +++ b/clash_lib/src/app/router/rules/geodata/attribute.rs @@ -0,0 +1,51 @@ +use crate::common::geodata::geodata_proto; + +pub trait AttrMatcher { + fn matches(&self, domain: &geodata_proto::Domain) -> bool; +} + +pub struct BooleanAttrMatcher(pub String); + +impl AttrMatcher for BooleanAttrMatcher { + fn matches(&self, domain: &geodata_proto::Domain) -> bool { + for attr in &domain.attribute { + if attr.key.eq_ignore_ascii_case(&self.0) { + return true; + } + } + false + } +} + +impl From for BooleanAttrMatcher { + fn from(s: String) -> Self { + BooleanAttrMatcher(s) + } +} + +// logical AND of multiple attribute matchers +pub struct AndAttrMatcher { + list: Vec>, +} + +impl From> for AndAttrMatcher { + fn from(list: Vec) -> Self { + AndAttrMatcher { + list: list + .into_iter() + .map(|s| Box::new(BooleanAttrMatcher(s)) as Box) + .collect(), + } + } +} + +impl AttrMatcher for AndAttrMatcher { + fn matches(&self, domain: &geodata_proto::Domain) -> bool { + for matcher in &self.list { + if !matcher.matches(domain) { + return false; + } + } + true + } +} diff --git a/clash_lib/src/app/router/rules/geodata/geodata.proto b/clash_lib/src/app/router/rules/geodata/geodata.proto new file mode 100644 index 000000000..b1f1f2603 --- /dev/null +++ b/clash_lib/src/app/router/rules/geodata/geodata.proto @@ -0,0 +1,64 @@ +syntax = "proto3"; + +package geodata; + +// Domain for routing decision. +message Domain { + // Type of domain value. + enum Type { + // The value is used as is. + Plain = 0; + // The value is used as a regular expression. + Regex = 1; + // The value is a root domain. + Domain = 2; + // The value is a domain. + Full = 3; + } + + // Domain matching type. + Type type = 1; + + // Domain value. + string value = 2; + + message Attribute { + string key = 1; + + oneof typed_value { + bool bool_value = 2; + int64 int_value = 3; + } + } + + // Attributes of this domain. May be used for filtering. + repeated Attribute attribute = 3; +} + +// IP for routing decision, in CIDR form. +message CIDR { + // IP address, should be either 4 or 16 bytes. + bytes ip = 1; + + // Number of leading ones in the network mask. + uint32 prefix = 2; +} + +message GeoIP { + string country_code = 1; + repeated CIDR cidr = 2; + bool reverse_match = 3; +} + +message GeoIPList { + repeated GeoIP entry = 1; +} + +message GeoSite { + string country_code = 1; + repeated Domain domain = 2; +} + +message GeoSiteList { + repeated GeoSite entry = 1; +} \ No newline at end of file diff --git a/clash_lib/src/app/router/rules/geodata/matcher_group.rs b/clash_lib/src/app/router/rules/geodata/matcher_group.rs new file mode 100644 index 000000000..dccb380e7 --- /dev/null +++ b/clash_lib/src/app/router/rules/geodata/matcher_group.rs @@ -0,0 +1,65 @@ +use crate::{ + app::router::rules::geodata::str_matcher::{try_new_matcher, Matcher}, + common::{ + geodata::geodata_proto::{domain::Type, Domain}, + trie, + }, +}; +use std::sync::Arc; + +pub trait DomainGroupMatcher: Send + Sync { + fn apply(&self, domain: &str) -> bool; +} + +pub struct SuccinctMatcherGroup { + set: trie::StringTrie<()>, + other_matchers: Vec>, + not: bool, +} + +impl SuccinctMatcherGroup { + pub fn try_new(domains: Vec, not: bool) -> Result { + let mut set = trie::StringTrie::new(); + let mut other_matchers = Vec::new(); + for domain in domains { + let t = Type::try_from(domain.r#type)?; + match t { + Type::Plain | Type::Regex => { + let matcher = try_new_matcher(domain.value, t)?; + other_matchers.push(matcher); + } + Type::Domain => { + let domain = format!("+.{}", domain.value); + set.insert(&domain, Arc::new(())); + } + Type::Full => { + set.insert(&domain.value, Arc::new(())); + } + } + } + Ok(SuccinctMatcherGroup { + set, + other_matchers, + not, + }) + } +} + +impl DomainGroupMatcher for SuccinctMatcherGroup { + fn apply(&self, domain: &str) -> bool { + let mut is_matched = self.set.search(domain).is_some(); + if !is_matched { + for matcher in &self.other_matchers { + if matcher.matches(domain) { + is_matched = true; + break; + } + } + } + if self.not { + !is_matched + } else { + is_matched + } + } +} diff --git a/clash_lib/src/app/router/rules/geodata/mod.rs b/clash_lib/src/app/router/rules/geodata/mod.rs new file mode 100644 index 000000000..df708c704 --- /dev/null +++ b/clash_lib/src/app/router/rules/geodata/mod.rs @@ -0,0 +1,207 @@ +use crate::{app::router::RuleMatcher, session::Session, Error}; +use std::fmt::{Display, Formatter}; + +use crate::{ + app::router::rules::geodata::{ + attribute::{AndAttrMatcher, AttrMatcher}, + matcher_group::{DomainGroupMatcher, SuccinctMatcherGroup}, + }, + common::geodata::GeoData, +}; + +mod attribute; +mod matcher_group; +mod str_matcher; + +// if country_code is empty, return None +// or will return the parsed **real** code and the attr list and if the code is +// negated +fn parse(country_code: &str) -> Option<(bool, String, Box)> { + let country_code = country_code.trim().to_lowercase(); + let mut country_code = country_code.as_str(); + let mut not = false; + + if country_code.is_empty() { + return None; + } + + if country_code.as_bytes()[0] == b'!' { + not = true; + country_code = &country_code[1..]; + } + let parts = country_code.split('@').collect::>(); + let code = parts[0].to_owned(); + let attrs = if parts.len() > 1 { + parts[1].split(',').map(|x| x.to_owned()).collect() + } else { + Vec::new() + }; + let attr_matcher = Box::new(AndAttrMatcher::from(attrs)) as _; + + Some((not, code, attr_matcher)) +} + +pub struct GeoSiteMatcher { + pub country_code: String, + pub target: String, + pub matcher: Box, +} + +impl GeoSiteMatcher { + pub fn new( + country_code: String, + target: String, + loader: &GeoData, + ) -> anyhow::Result { + let (not, code, attr_matcher) = + parse(&country_code).ok_or(Error::InvalidConfig( + "invalid geosite matcher, country code is empty".to_owned(), + ))?; + let list = + loader + .get(&code) + .cloned() + .ok_or(Error::InvalidConfig(format!( + "geosite matcher, country code {} not found", + code + )))?; + let domains = list + .domain + .into_iter() + .filter(|domain| attr_matcher.matches(domain)) + .collect::>(); + + let matcher_group: Box = + Box::new(SuccinctMatcherGroup::try_new(domains, not)?); + Ok(Self { + country_code, + target, + matcher: matcher_group, + }) + } +} + +impl Display for GeoSiteMatcher { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "GeoSite({})", self.country_code) + } +} + +impl RuleMatcher for GeoSiteMatcher { + fn apply(&self, sess: &Session) -> bool { + match &sess.destination { + crate::session::SocksAddr::Ip(_) => false, + crate::session::SocksAddr::Domain(domain, _) => { + self.matcher.apply(domain.as_str()) + } + } + } + + fn target(&self) -> &str { + self.target.as_str() + } + + fn payload(&self) -> String { + self.country_code.clone() + } + + fn type_name(&self) -> &str { + "GeoSite" + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::*; + use crate::{ + app::{ + dns::SystemResolver, + router::rules::geodata::matcher_group::{ + DomainGroupMatcher, SuccinctMatcherGroup, + }, + }, + common::{geodata::GeoData, http::new_http_client, utils::download}, + Error, + }; + + const GEOSITE_URL: &str = + "https://github.com/Watfaq/v2ray-rules-dat/releases/download/test/geosite.dat"; + + struct TestSuite<'a> { + country_code: &'a str, + expected_results: Vec<(&'a str, bool)>, + } + + #[tokio::test] + async fn test_download_and_apply() -> anyhow::Result<()> { + let system_resolver = Arc::new( + SystemResolver::new() + .map_err(|x| Error::DNSError(x.to_string())) + .unwrap(), + ); + let client = new_http_client(system_resolver) + .map_err(|x| Error::DNSError(x.to_string())) + .unwrap(); + let out = tempfile::Builder::new().append(true).tempfile()?; + download(GEOSITE_URL, out.as_ref(), &client).await?; + let path = out.path().to_str().unwrap().to_owned(); + + let loader = GeoData::from_file(path).await?; + + let suites = [ + TestSuite { + country_code: "CN", + expected_results: vec![ + ("www.bilibili.com", true), + ("www.baidu.com", true), + ("www.youtube.com", false), + ("www.google.com", false), + ], + }, + TestSuite { + country_code: "microsoft@cn", + expected_results: vec![ + ("www.microsoft.com", true), + ("dcg.microsoft.com", true), + ("www.bilibili.com", false), + ], + }, + TestSuite { + country_code: "youtube", + expected_results: vec![ + ("www.youtube.com", true), + ("www.bilibili.com", false), + ], + }, + TestSuite { + country_code: "!youtube", + expected_results: vec![ + ("www.youtube.com", false), + ("www.bilibili.com", true), + ], + }, + ]; + + for suite in suites.iter() { + // the same code of GeoMatcher + let (not, code, attr_matcher) = parse(suite.country_code).unwrap(); + let list = loader.get(&code).cloned().unwrap(); + let domains = list + .domain + .into_iter() + .filter(|domain| attr_matcher.matches(domain)) + .collect::>(); + + let matcher_group: Box = + Box::new(SuccinctMatcherGroup::try_new(domains, not).unwrap()); + + for (domain, expected) in suite.expected_results.iter() { + assert_eq!(matcher_group.apply(domain), *expected); + } + } + + Ok(()) + } +} diff --git a/clash_lib/src/app/router/rules/geodata/str_matcher.rs b/clash_lib/src/app/router/rules/geodata/str_matcher.rs new file mode 100644 index 000000000..075cb9bc9 --- /dev/null +++ b/clash_lib/src/app/router/rules/geodata/str_matcher.rs @@ -0,0 +1,87 @@ +use crate::common::geodata::geodata_proto::domain::Type; + +pub trait Matcher: Send + Sync { + fn matches(&self, url: &str) -> bool; +} + +pub struct FullMatcher(pub String); + +impl Matcher for FullMatcher { + fn matches(&self, url: &str) -> bool { + self.0 == url + } +} + +pub struct SubStrMatcher(pub String); + +impl Matcher for SubStrMatcher { + fn matches(&self, url: &str) -> bool { + url.contains(&self.0) + } +} + +pub struct DomainMatcher(pub String); + +impl Matcher for DomainMatcher { + fn matches(&self, url: &str) -> bool { + let pattern = &self.0; + if !url.ends_with(pattern) { + return false; + } + if pattern.len() == url.len() { + return true; + } + let prefix_idx_end = url.len() as i32 - pattern.len() as i32 - 1; + if prefix_idx_end < 0 { + return false; + } + url.as_bytes()[prefix_idx_end as usize] == b'.' + } +} + +pub struct RegexMatcher(regex::Regex); + +impl Matcher for RegexMatcher { + fn matches(&self, url: &str) -> bool { + self.0.is_match(url) + } +} + +pub fn try_new_matcher( + domain: String, + t: Type, +) -> Result, crate::Error> { + Ok(match t { + Type::Plain => Box::new(SubStrMatcher(domain)), + Type::Regex => Box::new(RegexMatcher(regex::Regex::new(&domain)?)), + Type::Domain => Box::new(DomainMatcher(domain)), + Type::Full => Box::new(FullMatcher(domain)), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_matchers() { + let full_matcher = FullMatcher("https://google.com".to_string()); + assert_eq!(full_matcher.matches("https://google.com"), true); + assert_eq!(full_matcher.matches("https://www.google.com"), false); + + let sub_str_matcher = SubStrMatcher("google".to_string()); + assert_eq!(sub_str_matcher.matches("https://www.google.com"), true); + assert_eq!(sub_str_matcher.matches("https://www.youtube.com"), false); + + let domain_matcher = DomainMatcher("google.com".to_string()); + assert_eq!(domain_matcher.matches("https://www.google.com"), true); + assert_eq!(domain_matcher.matches("https://www.fakegoogle.com"), false); + assert_eq!(domain_matcher.matches("https://wwwgoogle.com"), false); + + let regex_matcher = + RegexMatcher(regex::Regex::new(r".*google\..*").unwrap()); + assert_eq!(regex_matcher.matches("https://www.google.com"), true); + assert_eq!(regex_matcher.matches("https://www.fakegoogle.com"), true); + assert_eq!(regex_matcher.matches("https://goo.gle.com"), false); + } +} diff --git a/clash_lib/src/app/router/rules/mod.rs b/clash_lib/src/app/router/rules/mod.rs index c0f253cf2..ed1836ff9 100644 --- a/clash_lib/src/app/router/rules/mod.rs +++ b/clash_lib/src/app/router/rules/mod.rs @@ -8,6 +8,7 @@ pub mod domain; pub mod domain_keyword; pub mod domain_suffix; pub mod final_; +pub mod geodata; pub mod geoip; pub mod ipcidr; pub mod port; diff --git a/clash_lib/src/common/geodata/geodata_proto.rs b/clash_lib/src/common/geodata/geodata_proto.rs new file mode 100644 index 000000000..a7fb9e676 --- /dev/null +++ b/clash_lib/src/common/geodata/geodata_proto.rs @@ -0,0 +1,130 @@ +// This file is @generated by prost-build. DON'T edit +// libprotoc 25.3 +// prost-build = "0.12" +/// Domain for routing decision. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Domain { + /// Domain matching type. + #[prost(enumeration = "domain::Type", tag = "1")] + pub r#type: i32, + /// Domain value. + #[prost(string, tag = "2")] + pub value: ::prost::alloc::string::String, + /// Attributes of this domain. May be used for filtering. + #[prost(message, repeated, tag = "3")] + pub attribute: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `Domain`. +pub mod domain { + #[allow(clippy::derive_partial_eq_without_eq)] + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Attribute { + #[prost(string, tag = "1")] + pub key: ::prost::alloc::string::String, + #[prost(oneof = "attribute::TypedValue", tags = "2, 3")] + pub typed_value: ::core::option::Option, + } + /// Nested message and enum types in `Attribute`. + pub mod attribute { + #[allow(clippy::derive_partial_eq_without_eq)] + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum TypedValue { + #[prost(bool, tag = "2")] + BoolValue(bool), + #[prost(int64, tag = "3")] + IntValue(i64), + } + } + /// Type of domain value. + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration, + )] + #[repr(i32)] + pub enum Type { + /// The value is used as is. + Plain = 0, + /// The value is used as a regular expression. + Regex = 1, + /// The value is a root domain. + Domain = 2, + /// The value is a domain. + Full = 3, + } + impl Type { + /// String value of the enum field names used in the ProtoBuf + /// definition. + /// + /// The values are not transformed in any way and thus are considered + /// stable (if the ProtoBuf definition does not change) and safe + /// for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Type::Plain => "Plain", + Type::Regex => "Regex", + Type::Domain => "Domain", + Type::Full => "Full", + } + } + + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "Plain" => Some(Self::Plain), + "Regex" => Some(Self::Regex), + "Domain" => Some(Self::Domain), + "Full" => Some(Self::Full), + _ => None, + } + } + } +} +/// IP for routing decision, in CIDR form. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Cidr { + /// IP address, should be either 4 or 16 bytes. + #[prost(bytes = "vec", tag = "1")] + pub ip: ::prost::alloc::vec::Vec, + /// Number of leading ones in the network mask. + #[prost(uint32, tag = "2")] + pub prefix: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoIp { + #[prost(string, tag = "1")] + pub country_code: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub cidr: ::prost::alloc::vec::Vec, + #[prost(bool, tag = "3")] + pub reverse_match: bool, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoIpList { + #[prost(message, repeated, tag = "1")] + pub entry: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoSite { + #[prost(string, tag = "1")] + pub country_code: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub domain: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GeoSiteList { + #[prost(message, repeated, tag = "1")] + pub entry: ::prost::alloc::vec::Vec, +} diff --git a/clash_lib/src/common/geodata/mod.rs b/clash_lib/src/common/geodata/mod.rs new file mode 100644 index 000000000..659e2cba9 --- /dev/null +++ b/clash_lib/src/common/geodata/mod.rs @@ -0,0 +1,61 @@ +use crate::{ + common::{http::HttpClient, utils::download}, + Error, +}; +use prost::Message; +use std::path::Path; +use tracing::{debug, info}; + +pub(crate) mod geodata_proto; + +pub struct GeoData { + cache: geodata_proto::GeoSiteList, +} + +impl GeoData { + pub async fn new>( + path: P, + download_url: Option, + http_client: HttpClient, + ) -> Result { + debug!("geosite path: {}", path.as_ref().to_string_lossy()); + + let geosite_file = path.as_ref().to_path_buf(); + + if !geosite_file.exists() { + if let Some(url) = download_url.as_ref() { + info!("downloading geodata from {}", url); + download(url, &geosite_file, &http_client) + .await + .map_err(|x| { + Error::InvalidConfig(format!( + "geosite download failed: {}", + x + )) + })?; + } else { + return Err(Error::InvalidConfig(format!( + "geosite `{}` not found and geosite_download_url is not set", + path.as_ref().to_string_lossy() + ))); + } + } + let bytes = tokio::fs::read(path).await?; + let cache = geodata_proto::GeoSiteList::decode(bytes.as_slice())?; + Ok(Self { cache }) + } + + #[cfg(test)] + pub async fn from_file>(path: P) -> Result { + let bytes = tokio::fs::read(path).await?; + let cache = geodata_proto::GeoSiteList::decode(bytes.as_slice())?; + Ok(Self { cache }) + } + + pub fn get(&self, list: &str) -> Option<&geodata_proto::GeoSite> { + self.cache + .entry + .iter() + .find(|x| x.country_code.eq_ignore_ascii_case(list)) + } +} diff --git a/clash_lib/src/common/mmdb.rs b/clash_lib/src/common/mmdb.rs index 92f76e192..1a97fb594 100644 --- a/clash_lib/src/common/mmdb.rs +++ b/clash_lib/src/common/mmdb.rs @@ -1,15 +1,10 @@ -use std::{fs, io::Write, net::IpAddr, path::Path}; +use std::{fs, net::IpAddr, path::Path}; -use async_recursion::async_recursion; -use hyper::body::HttpBody; use maxminddb::geoip2; use tracing::{debug, info, warn}; use crate::{ - common::{ - errors::{map_io_error, new_io_error}, - http::HttpClient, - }, + common::{errors::map_io_error, http::HttpClient, utils::download}, Error, }; @@ -38,11 +33,9 @@ impl Mmdb { if !mmdb_file.exists() { if let Some(url) = download_url.as_ref() { info!("downloading mmdb from {}", url); - Self::download(url, &mmdb_file, http_client) - .await - .map_err(|x| { - Error::InvalidConfig(format!("mmdb download failed: {}", x)) - })?; + download(url, &mmdb_file, http_client).await.map_err(|x| { + Error::InvalidConfig(format!("mmdb download failed: {}", x)) + })?; } else { return Err(Error::InvalidConfig(format!( "mmdb `{}` not found and mmdb_download_url is not set", @@ -66,7 +59,7 @@ impl Mmdb { fs::remove_file(&mmdb_file)?; if let Some(url) = download_url.as_ref() { info!("downloading mmdb from {}", url); - Self::download(url, &mmdb_file, http_client).await.map_err( + download(url, &mmdb_file, http_client).await.map_err( |x| { Error::InvalidConfig(format!( "mmdb download failed: {}", @@ -97,51 +90,6 @@ impl Mmdb { } } - #[async_recursion] - async fn download

( - url: &str, - path: P, - http_client: &HttpClient, - ) -> anyhow::Result<()> - where - P: AsRef + std::marker::Send, - { - let uri = url.parse::()?; - let mut out = std::fs::File::create(&path)?; - - let mut res = http_client.get(uri).await?; - - if res.status().is_redirection() { - return Self::download( - res.headers() - .get("Location") - .ok_or(new_io_error( - format!("failed to download from {}", url).as_str(), - ))? - .to_str()?, - path, - http_client, - ) - .await; - } - - if !res.status().is_success() { - return Err(Error::InvalidConfig(format!( - "mmdb download failed: {}", - res.status() - )) - .into()); - } - - debug!("downloading mmdb to {}", path.as_ref().to_string_lossy()); - - while let Some(chunk) = res.body_mut().data().await { - out.write_all(&chunk?)?; - } - - Ok(()) - } - pub fn lookup(&self, ip: IpAddr) -> std::io::Result { self.reader .lookup::(ip) diff --git a/clash_lib/src/common/mod.rs b/clash_lib/src/common/mod.rs index 3604b7d55..7aca64696 100644 --- a/clash_lib/src/common/mod.rs +++ b/clash_lib/src/common/mod.rs @@ -1,6 +1,7 @@ pub mod auth; pub mod crypto; pub mod errors; +pub mod geodata; pub mod http; pub mod io; pub mod mmdb; diff --git a/clash_lib/src/common/utils.rs b/clash_lib/src/common/utils.rs index c69f1b58a..b83108d1f 100644 --- a/clash_lib/src/common/utils.rs +++ b/clash_lib/src/common/utils.rs @@ -1,10 +1,17 @@ -use std::{fmt::Write, num::ParseIntError}; +use async_recursion::async_recursion; +use hyper::body::HttpBody; +use std::{fmt::Write, num::ParseIntError, path::Path}; +use crate::{ + common::{errors::new_io_error, http::HttpClient}, + Error, +}; use rand::{ distributions::uniform::{SampleRange, SampleUniform}, Fill, Rng, }; use sha2::Digest; +use tracing::debug; pub fn rand_range(range: R) -> T where @@ -54,3 +61,50 @@ pub fn md5(bytes: &[u8]) -> Vec { pub fn default_bool_true() -> bool { true } + +#[async_recursion] +pub async fn download

( + url: &str, + path: P, + http_client: &HttpClient, +) -> anyhow::Result<()> +where + P: AsRef + std::marker::Send, +{ + use std::io::Write; + + let uri = url.parse::()?; + let mut out = std::fs::File::create(&path)?; + + let mut res = http_client.get(uri).await?; + + if res.status().is_redirection() { + return download( + res.headers() + .get("Location") + .ok_or(new_io_error( + format!("failed to download from {}", url).as_str(), + ))? + .to_str()?, + path, + http_client, + ) + .await; + } + + if !res.status().is_success() { + return Err(Error::InvalidConfig(format!( + "data download failed: {}", + res.status() + )) + .into()); + } + + debug!("downloading data to {}", path.as_ref().to_string_lossy()); + + while let Some(chunk) = res.body_mut().data().await { + out.write_all(&chunk?)?; + } + + Ok(()) +} diff --git a/clash_lib/src/config/def.rs b/clash_lib/src/config/def.rs index 7460ed10d..add12d1c6 100644 --- a/clash_lib/src/config/def.rs +++ b/clash_lib/src/config/def.rs @@ -270,6 +270,10 @@ pub struct Config { pub mmdb: String, /// Country database download url pub mmdb_download_url: Option, + /// Geosite database path relative to the $CWD + pub geosite: String, + /// Geosite database download url + pub geosite_download_url: Option, /// these options has default vals, /// and needs extra processing @@ -366,6 +370,8 @@ impl Default for Config { "https://github.com/Loyalsoldier/geoip/releases/download/202307271745/Country.mmdb" .to_owned(), ), + geosite: "geosite.dat".to_string(), + geosite_download_url: Some("https://github.com/Loyalsoldier/v2ray-rules-dat/releases/download/202406182210/geosite.dat".to_owned()), tun: Default::default(), } } diff --git a/clash_lib/src/config/internal/config.rs b/clash_lib/src/config/internal/config.rs index 861603d76..492508f58 100644 --- a/clash_lib/src/config/internal/config.rs +++ b/clash_lib/src/config/internal/config.rs @@ -91,6 +91,8 @@ impl TryFrom for Config { routing_mask: c.routing_mask, mmdb: c.mmdb.to_owned(), mmdb_download_url: c.mmdb_download_url.to_owned(), + geosite: c.geosite.to_owned(), + geosite_download_url: c.geosite_download_url.to_owned(), }, dns: (&c).try_into()?, experimental: c.experimental, @@ -266,6 +268,9 @@ pub struct General { pub routing_mask: Option, pub mmdb: String, pub mmdb_download_url: Option, + + pub geosite: String, + pub geosite_download_url: Option, } pub struct Profile { diff --git a/clash_lib/src/config/internal/rule.rs b/clash_lib/src/config/internal/rule.rs index 9b7a0d231..89651674a 100644 --- a/clash_lib/src/config/internal/rule.rs +++ b/clash_lib/src/config/internal/rule.rs @@ -19,6 +19,10 @@ pub enum RuleType { country_code: String, no_resolve: bool, }, + GeoSite { + target: String, + country_code: String, + }, IpCidr { ipnet: ipnet::IpNet, target: String, @@ -61,6 +65,7 @@ impl RuleType { RuleType::DomainSuffix { target, .. } => target, RuleType::DomainKeyword { target, .. } => target, RuleType::GeoIP { target, .. } => target, + RuleType::GeoSite { target, .. } => target, RuleType::IpCidr { target, .. } => target, RuleType::SrcCidr { target, .. } => target, RuleType::SRCPort { target, .. } => target, @@ -82,6 +87,7 @@ impl Display for RuleType { RuleType::DomainSuffix { .. } => write!(f, "DOMAIN-SUFFIX"), RuleType::DomainKeyword { .. } => write!(f, "DOMAIN-KEYWORD"), RuleType::GeoIP { .. } => write!(f, "GEOIP"), + RuleType::GeoSite { .. } => write!(f, "GEOSITE"), RuleType::IpCidr { .. } => write!(f, "IP-CIDR"), RuleType::SrcCidr { .. } => write!(f, "SRC-IP-CIDR"), RuleType::SRCPort { .. } => write!(f, "SRC-PORT"), @@ -114,6 +120,10 @@ impl RuleType { domain_keyword: payload.to_string(), target: target.to_string(), }), + "GEOSITE" => Ok(RuleType::GeoSite { + target: target.to_string(), + country_code: payload.to_string(), + }), "GEOIP" => Ok(RuleType::GeoIP { target: target.to_string(), country_code: payload.to_string(), diff --git a/clash_lib/src/lib.rs b/clash_lib/src/lib.rs index 12355e0a5..b5a13da29 100644 --- a/clash_lib/src/lib.rs +++ b/clash_lib/src/lib.rs @@ -33,6 +33,7 @@ mod config; mod proxy; mod session; +use crate::common::geodata; pub use config::{ def::{Config as ClashConfigDef, DNS as ClashDNSConfigDef}, DNSListen as ClashDNSListen, RuntimeConfig as ClashRuntimeConfig, @@ -44,6 +45,10 @@ pub enum Error { IpNet(#[from] ipnet::AddrParseError), #[error(transparent)] Io(#[from] io::Error), + #[error(transparent)] + Decode(#[from] prost::DecodeError), + #[error(transparent)] + Regex(#[from] regex::Error), #[error("invalid config: {0}")] InvalidConfig(String), #[error("profile error: {0}")] @@ -169,7 +174,7 @@ async fn start_async(opts: Options) -> Result<(), Error> { debug!("initializing dns resolver"); let system_resolver = Arc::new(SystemResolver::new().map_err(|x| Error::DNSError(x.to_string()))?); - let client = new_http_client(system_resolver) + let client = new_http_client(system_resolver.clone()) .map_err(|x| Error::DNSError(x.to_string()))?; debug!("initializing mmdb"); @@ -183,6 +188,17 @@ async fn start_async(opts: Options) -> Result<(), Error> { .await?, ); + let client = new_http_client(system_resolver) + .map_err(|x| Error::DNSError(x.to_string()))?; + let geodata = Arc::new( + geodata::GeoData::new( + cwd.join(&config.general.geosite), + config.general.geosite_download_url, + client, + ) + .await?, + ); + debug!("initializing cache store"); let cache_store = profile::ThreadSafeCacheFile::new( cwd.join("cache.db").as_path().to_str().unwrap(), @@ -228,6 +244,7 @@ async fn start_async(opts: Options) -> Result<(), Error> { config.rule_providers, dns_resolver.clone(), mmdb, + geodata, cwd.to_string_lossy().to_string(), ) .await, @@ -325,7 +342,7 @@ async fn start_async(opts: Options) -> Result<(), Error> { let system_resolver = Arc::new( SystemResolver::new().map_err(|x| Error::DNSError(x.to_string()))?, ); - let client = new_http_client(system_resolver) + let client = new_http_client(system_resolver.clone()) .map_err(|x| Error::DNSError(x.to_string()))?; debug!("reloading mmdb"); @@ -338,6 +355,17 @@ async fn start_async(opts: Options) -> Result<(), Error> { .await?, ); + let client = new_http_client(system_resolver) + .map_err(|x| Error::DNSError(x.to_string()))?; + let geodata = Arc::new( + geodata::GeoData::new( + cwd.join(&config.general.geosite), + config.general.geosite_download_url, + client, + ) + .await?, + ); + debug!("reloading cache store"); let cache_store = profile::ThreadSafeCacheFile::new( cwd.join("cache.db").as_path().to_str().unwrap(), @@ -386,6 +414,7 @@ async fn start_async(opts: Options) -> Result<(), Error> { config.rule_providers, dns_resolver.clone(), mmdb, + geodata, cwd.to_string_lossy().to_string(), ) .await,