Skip to content

Commit

Permalink
feat(split chunks): split chunks should consider chunk combinations (#…
Browse files Browse the repository at this point in the history
…4280)

* feat: add bitmap structure

* feat(splitChunks): add combinations

* chore(splitChunks): add webpack combination test

* fix tests

* use hash_set::is_subset

* fix: use num_bigint

* fix: fix tests

* feat: combinations

* fix: fix tests

* fix: sort before getKey

* chore: improve tests
  • Loading branch information
JSerFeng authored Oct 10, 2023
1 parent c0965f1 commit 05815ca
Show file tree
Hide file tree
Showing 31 changed files with 277 additions and 121 deletions.
9 changes: 5 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions crates/rspack_binding_options/src/options/raw_split_chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ fn create_chunks_filter(raw: Chunks) -> rspack_plugin_split_chunks_new::ChunkFil
Either::A(reg) => {
rspack_plugin_split_chunks_new::create_regex_chunk_filter_from_str(reg.to_rspack_regex())
}
Either::B(str) => {
let str = str.into_string();
rspack_plugin_split_chunks_new::create_chunk_filter_from_str(&str)
Either::B(js_str) => {
let js_str = js_str.into_string();
rspack_plugin_split_chunks_new::create_chunk_filter_from_str(&js_str)
}
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/rspack_plugin_split_chunks_new/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ async-trait = { workspace = true }
dashmap = { workspace = true }
derivative = { workspace = true }
futures-util = { workspace = true }
num-bigint = "0.4.4"
rayon = { workspace = true }
rspack_util = { path = "../rspack_util" }
rustc-hash = { workspace = true }
Expand Down
252 changes: 184 additions & 68 deletions crates/rspack_plugin_split_chunks_new/src/plugin/module_group.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use async_scoped::TokioScope;
use dashmap::DashMap;
use num_bigint::BigUint as ChunksKey;
use rayon::prelude::*;
use rspack_core::{Chunk, ChunkUkey, Compilation, Module};
use rustc_hash::FxHashSet;
use rspack_core::{Chunk, ChunkByUkey, ChunkGraph, ChunkUkey, Compilation, Module, ModuleGraph};
use rustc_hash::{FxHashMap, FxHashSet};

use super::ModuleGroupMap;
use crate::SplitChunksPlugin;
Expand Down Expand Up @@ -32,10 +33,26 @@ impl SplitChunksPlugin {

let best_module_group = module_group_map
.remove(&best_entry_key)
.expect("item should exist");
.expect("This should never happen, please file an issue");
(best_entry_key, best_module_group)
}

fn create_chunk_index_map(&self, chunk_db: &ChunkByUkey) -> FxHashMap<ChunkUkey, ChunksKey> {
let mut chunk_index_map: FxHashMap<ChunkUkey, ChunksKey> = Default::default();

let mut idx: ChunksKey = 1usize.into();

let mut chunks: Vec<_> = chunk_db.keys().collect();
chunks.sort_unstable();

for key in chunks {
chunk_index_map.insert(*key, idx.clone());
idx <<= 1;
}

chunk_index_map
}

#[tracing::instrument(skip_all)]
pub(crate) async fn prepare_module_group_map(
&self,
Expand All @@ -51,21 +68,65 @@ impl SplitChunksPlugin {
cache_group_index: usize,
cache_group: &'a CacheGroup,
selected_chunks: Box<[&'a Chunk]>,
selected_chunks_key: ChunksKey,
}

let module_group_map: DashMap<String, ModuleGroup> = DashMap::default();

let chunk_idx_map = self.create_chunk_index_map(chunk_db);

// chunk_sets_in_graph: key: module, value: multiple chunks contains the module
// single_chunk_sets: chunkset of module that belongs to only one chunk
// chunk_sets_by_count: use chunkset len as key
let (chunk_sets_in_graph, chunk_sets_by_count) = Self::prepare_combination_maps(
&compilation.module_graph,
&compilation.chunk_graph,
&chunk_idx_map,
);

let combinations_cache = DashMap::<ChunksKey, Vec<FxHashSet<ChunkUkey>>>::default();

let get_combination = |chunks_key: ChunksKey| {
if let Some(combs) = combinations_cache.get(&chunks_key) {
return combs.clone();
}
let chunks_set = chunk_sets_in_graph
.get(&chunks_key)
.expect("This should never happen, please file an issue");
let mut result = vec![chunks_set.clone()];

for (count, array_of_set) in &chunk_sets_by_count {
if *count < chunks_set.len() {
for set in array_of_set {
if set.is_subset(chunks_set) {
result.push(set.clone());
}
}
}
}

combinations_cache.insert(chunks_key.clone(), result);
combinations_cache
.get(&chunks_key)
.expect("This should never happen, please file an issue")
.clone()
};

let chunk_idx_map = &chunk_idx_map;

async_scoped::Scope::scope_and_block(|scope: &mut TokioScope<'_, _>| {
for module in compilation.module_graph.modules().values() {
let module = &**module;

let belong_to_chunks = compilation
.chunk_graph
.get_module_chunks((*module).identifier());
.get_module_chunks(module.identifier());

let chunks_key = Self::get_key(belong_to_chunks.iter(), chunk_idx_map);
let module_group_map = &module_group_map;

for (cache_group_index, cache_group) in self.cache_groups.iter().enumerate() {
let chunks_key = chunks_key.clone();
scope.spawn(async move {
// Filter by `splitChunks.cacheGroups.{cacheGroup}.test`
let is_match_the_test: bool = (cache_group.test)(module);
Expand All @@ -82,72 +143,79 @@ impl SplitChunksPlugin {
return;
}

let selected_chunks = belong_to_chunks
.iter()
.map(|c| chunk_db.get(c).expect("Should have a chunk here"))
// Filter by `splitChunks.cacheGroups.{cacheGroup}.chunks`
.filter(|c| (cache_group.chunk_filter)(c, chunk_group_db))
.collect::<Box<[_]>>();
let combs = get_combination(chunks_key.clone());

// Filter by `splitChunks.cacheGroups.{cacheGroup}.minChunks`
if selected_chunks.len() < cache_group.min_chunks as usize {
tracing::trace!(
"Module({:?}) is ignored by CacheGroup({:?}). Reason: selected_chunks.len({:?}) < cache_group.min_chunks({:?})",
module.identifier(),
cache_group.key,
selected_chunks.len(),
cache_group.min_chunks,
);
return;
}
for chunk_combination in combs {
let selected_chunks = chunk_combination
.iter()
.map(|c| chunk_db.get(c).expect("This should never happen, please file an issue"))
// Filter by `splitChunks.cacheGroups.{cacheGroup}.chunks`
.filter(|c| (cache_group.chunk_filter)(c, chunk_group_db))
.collect::<Box<[_]>>();

// Filter by `splitChunks.cacheGroups.{cacheGroup}.minChunks`
if selected_chunks.len() < cache_group.min_chunks as usize {
tracing::trace!(
"Module({:?}) is ignored by CacheGroup({:?}). Reason: selected_chunks.len({:?}) < cache_group.min_chunks({:?})",
module.identifier(),
cache_group.key,
selected_chunks.len(),
cache_group.min_chunks,
);
continue;
}
let selected_chunks_key = Self::get_key(selected_chunks.iter().map(|chunk| &chunk.ukey), chunk_idx_map);

merge_matched_item_into_module_group_map(
MatchedItem {
module,
cache_group,
cache_group_index,
selected_chunks,
selected_chunks_key,
},
module_group_map,
)
.await;

#[tracing::instrument(skip_all)]
async fn merge_matched_item_into_module_group_map(
matched_item: MatchedItem<'_>,
module_group_map: &DashMap<String, ModuleGroup>,
) {
let MatchedItem {
module,
cache_group_index,
cache_group,
selected_chunks,
selected_chunks_key,
} = matched_item;

// `Module`s with the same chunk_name would be merged togother.
// `Module`s could be in different `ModuleGroup`s.
let chunk_name: Option<String> = (cache_group.name)(module).await;

merge_matched_item_into_module_group_map(
MatchedItem {
module,
cache_group,
cache_group_index,
selected_chunks,
},
module_group_map,
)
.await;

#[tracing::instrument(skip_all)]
async fn merge_matched_item_into_module_group_map(
matched_item: MatchedItem<'_>,
module_group_map: &DashMap<String, ModuleGroup>,
) {
let MatchedItem {
module,
cache_group_index,
cache_group,
selected_chunks,
} = matched_item;

// `Module`s with the same chunk_name would be merged togother.
// `Module`s could be in different `ModuleGroup`s.
let chunk_name: Option<String> = (cache_group.name)(module).await;

let key: String = if let Some(cache_group_name) = &chunk_name {
[&cache_group.key, " name:", cache_group_name].join("")
} else {
[&cache_group.key, " index:", &cache_group_index.to_string()].join("")
};

let mut module_group = module_group_map.entry(key).or_insert_with(|| ModuleGroup {
modules: Default::default(),
cache_group_index,
cache_group_priority: cache_group.priority,
cache_group_reuse_existing_chunk: cache_group.reuse_existing_chunk,
sizes: Default::default(),
chunks: Default::default(),
chunk_name,
});

module_group.add_module(module);
module_group
.chunks
.extend(selected_chunks.iter().map(|c| c.ukey))
let key: String = if let Some(cache_group_name) = &chunk_name {
[&cache_group.key, " name:", cache_group_name].join("")
} else {
[&cache_group.key, " chunks:", selected_chunks_key.to_string().as_str()].join("")
};

let mut module_group = module_group_map.entry(key).or_insert_with(|| ModuleGroup {
modules: Default::default(),
cache_group_index,
cache_group_priority: cache_group.priority,
cache_group_reuse_existing_chunk: cache_group.reuse_existing_chunk,
sizes: Default::default(),
chunks: Default::default(),
chunk_name,
});

module_group.add_module(module);
module_group
.chunks
.extend(selected_chunks.iter().map(|c| c.ukey))
}
}
});
}
Expand Down Expand Up @@ -240,4 +308,52 @@ impl SplitChunksPlugin {
module_group_map.remove(&key);
});
}

fn get_key<'a, I: Iterator<Item = &'a ChunkUkey>>(
chunks: I,
chunk_idx_map: &FxHashMap<ChunkUkey, ChunksKey>,
) -> ChunksKey {
let mut sorted = chunks.collect::<Vec<_>>();
sorted.sort_unstable();

let mut result: ChunksKey = 1usize.into();
for chunk in sorted {
let idx = chunk_idx_map
.get(chunk)
.expect("This should never happen, please file an issue");
result |= idx;
}
result
}

#[allow(clippy::type_complexity)]
fn prepare_combination_maps(
module_graph: &ModuleGraph,
chunk_graph: &ChunkGraph,
chunk_idx_map: &FxHashMap<ChunkUkey, ChunksKey>,
) -> (
FxHashMap<ChunksKey, FxHashSet<ChunkUkey>>,
FxHashMap<usize, Vec<FxHashSet<ChunkUkey>>>,
) {
let mut chunk_sets_in_graph = FxHashMap::default();

for module in module_graph.modules().keys() {
let chunks = chunk_graph.get_module_chunks(*module);
let chunk_key = Self::get_key(chunks.iter(), chunk_idx_map);

chunk_sets_in_graph.insert(chunk_key, chunks.clone());
}

let mut chunk_sets_by_count = FxHashMap::<usize, Vec<FxHashSet<ChunkUkey>>>::default();

for chunks in chunk_sets_in_graph.values() {
let count = chunks.len();
chunk_sets_by_count
.entry(count)
.and_modify(|set| set.push(chunks.clone()))
.or_insert(vec![chunks.clone()]);
}

(chunk_sets_in_graph, chunk_sets_by_count)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ module.exports = {
output: {
filename: "[name].js"
},
experiments: {
newSplitChunks: true
},
optimization: {
splitChunks: {
minSize: 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ module.exports = {
output: {
filename: "[name].js"
},
experiments: {
newSplitChunks: true
},
optimization: {
splitChunks: {
minSize: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ module.exports = {
output: {
filename: "[name].js"
},
experiments: {
newSplitChunks: true
},
optimization: {
splitChunks: {
minSize: 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ module.exports = {
output: {
filename: "[name].js"
},
experiments: {
newSplitChunks: true
},
optimization: {
splitChunks: {
cacheGroups: {
Expand Down
Loading

0 comments on commit 05815ca

Please sign in to comment.