forked from rollup/rollup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
chunkAssignment.ts
864 lines (833 loc) · 31 KB
/
chunkAssignment.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
import ExternalModule from '../ExternalModule';
import Module from '../Module';
import type { LogHandler } from '../rollup/types';
import { getNewSet, getOrCreate } from './getOrCreate';
import { concatLazy } from './iterators';
import { logOptimizeChunkStatus } from './logs';
import { timeEnd, timeStart } from './timers';
type ChunkDefinitions = { alias: string | null; modules: Module[] }[];
interface ModulesWithDependentEntries {
/**
* The indices of the entries depending on this chunk
*/
dependentEntries: Set<number>;
modules: Module[];
}
interface ChunkDescription extends ModulesWithDependentEntries {
/**
* These are the atoms (=initial chunks) that are contained in this chunk
*/
containedAtoms: bigint;
/**
* The signatures of all atoms that are included in or loaded with this
* chunk. This is the intersection of all dependent entry modules. As chunks
* are merged, these sets are intersected.
*/
correlatedAtoms: bigint;
dependencies: Set<ChunkDescription>;
dependentChunks: Set<ChunkDescription>;
pure: boolean;
size: number;
}
interface ChunkPartition {
big: Set<ChunkDescription>;
small: Set<ChunkDescription>;
}
/**
* At its core, the algorithm first starts from each static or dynamic entry
* point and then assigns that entry point to all modules than can be reached
* via static imports. We call this the *dependent entry points* of that
* module.
*
* Then we group all modules with the same dependent entry points into chunks
* as those modules will always be loaded together.
*
* One non-trivial optimization we can apply is that dynamic entries are
* different from static entries in so far as when a dynamic import occurs,
* some modules are already in memory. If some of these modules are also
* dependencies of the dynamic entry, then it does not make sense to create a
* separate chunk for them. Instead, the dynamic import target can load them
* from the importing chunk.
*
* With regard to chunking, if B is implicitly loaded after A, then this can be
* handled the same way as if there was a dynamic import A => B.
*
* Example:
* Assume A -> B (A imports B), A => C (A dynamically imports C) and C -> B.
* Then the initial algorithm would assign A into the A chunk, C into the C
* chunk and B into the AC chunk, i.e. the chunk with the dependent entry
* points A and C.
* However we know that C can only be loaded from A, so A and its dependency B
* must already be in memory when C is loaded. So it is enough to create only
* two chunks A containing [AB] and C containing [C].
*
* So we do not assign the dynamic entry C as dependent entry point to modules
* that are already loaded.
*
* In a more complex example, let us assume that we have entry points X and Y.
* Further, let us assume
* X -> A, X -> B, X -> C,
* Y -> A, Y -> B,
* A => D,
* D -> B, D -> C
* So without dynamic import optimization, the dependent entry points are
* A: XY, B: DXY, C: DX, D: D, X: X, Y: Y, so we would for now create six
* chunks.
*
* Now D is loaded only after A is loaded. But A is loaded if either X is
* loaded or Y is loaded. So the modules that are already in memory when D is
* loaded are the intersection of all modules that X depends on with all
* modules that Y depends on, which in this case are the modules A and B.
* We could also say they are all modules that have both X and Y as dependent
* entry points.
*
* So we can remove D as dependent entry point from A and B, which means they
* both now have only XY as dependent entry points and can be merged into the
* same chunk.
*
* Now let us extend this to the most general case where we have several
* dynamic importers for one dynamic entry point.
*
* In the most general form, it works like this:
* For each dynamic entry point, we have a number of dynamic importers, which
* are the modules importing it. Using the previous ideas, we can determine
* the modules already in memory for each dynamic importer by looking for all
* modules that have all the dependent entry points of the dynamic importer as
* dependent entry points.
* So the modules that are guaranteed to be in memory when the dynamic entry
* point is loaded are the intersection of the modules already in memory for
* each dynamic importer.
*
* Assuming that A => D and B => D and A has dependent entry points XY and B
* has dependent entry points YZ, then the modules guaranteed to be in memory
* are all modules that have at least XYZ as dependent entry points.
* We call XYZ the *dynamically dependent entry points* of D.
*
* Now there is one last case to consider: If one of the dynamically dependent
* entries is itself a dynamic entry, then any module is in memory that either
* is a dependency of that dynamic entry or again has the dynamic dependent
* entries of that dynamic entry as dependent entry points.
*
* A naive algorithm for this proved to be costly as it contained an O(n^3)
* complexity with regard to dynamic entries that blew up for very large
* projects.
*
* If we have an efficient way to do Set operations, an alternative approach
* would be to instead collect already loaded modules per dynamic entry. And as
* all chunks from the initial grouping would behave the same, we can instead
* collect already loaded chunks for a performance improvement.
*
* To do that efficiently, need
* - a Map of dynamic imports per dynamic entry, which contains all dynamic
* imports that can be triggered by a dynamic entry
* - a Map of static dependencies per entry
* - a Map of already loaded chunks per entry that we initially populate with
* empty Sets for static entries and Sets containing all entries for dynamic
* entries
*
* For efficient operations, we assign each entry a numerical index and
* represent Sets of Chunks as BigInt values where each chunk corresponds to a
* bit index. Then the last two maps can be represented as arrays of BigInt
* values.
*
* Then we iterate through each dynamic entry. We set the already loaded modules
* to the intersection of the previously already loaded modules with the union
* of the already loaded modules of that chunk with its static dependencies.
*
* If the already loaded modules changed, then we use the Map of dynamic imports
* per dynamic entry to marks all dynamic entry dependencies as "dirty" and put
* them back into the iteration. As an additional optimization, we note for
* each dynamic entry which dynamic dependent entries have changed and only
* intersect those entries again on subsequent interations.
*
* Then we remove the dynamic entries from the list of dependent entries for
* those chunks that are already loaded for that dynamic entry and create
* another round of chunks.
*/
export function getChunkAssignments(
entries: readonly Module[],
manualChunkAliasByEntry: ReadonlyMap<Module, string>,
minChunkSize: number,
log: LogHandler
): ChunkDefinitions {
const { chunkDefinitions, modulesInManualChunks } =
getChunkDefinitionsFromManualChunks(manualChunkAliasByEntry);
const {
allEntries,
dependentEntriesByModule,
dynamicallyDependentEntriesByDynamicEntry,
dynamicImportsByEntry
} = analyzeModuleGraph(entries);
// Each chunk is identified by its position in this array
const chunkAtoms = getChunksWithSameDependentEntries(
getModulesWithDependentEntries(dependentEntriesByModule, modulesInManualChunks)
);
const staticDependencyAtomsByEntry = getStaticDependencyAtomsByEntry(allEntries, chunkAtoms);
// Warning: This will consume dynamicallyDependentEntriesByDynamicEntry.
// If we no longer want this, we should make a copy here.
const alreadyLoadedAtomsByEntry = getAlreadyLoadedAtomsByEntry(
staticDependencyAtomsByEntry,
dynamicallyDependentEntriesByDynamicEntry,
dynamicImportsByEntry,
allEntries
);
// This mutates the dependentEntries in chunkAtoms
removeUnnecessaryDependentEntries(chunkAtoms, alreadyLoadedAtomsByEntry);
const { chunks, sideEffectAtoms, sizeByAtom } =
getChunksWithSameDependentEntriesAndCorrelatedAtoms(
chunkAtoms,
staticDependencyAtomsByEntry,
alreadyLoadedAtomsByEntry,
minChunkSize
);
chunkDefinitions.push(
...getOptimizedChunks(chunks, minChunkSize, sideEffectAtoms, sizeByAtom, log).map(
({ modules }) => ({
alias: null,
modules
})
)
);
return chunkDefinitions;
}
function getChunkDefinitionsFromManualChunks(
manualChunkAliasByEntry: ReadonlyMap<Module, string>
): { chunkDefinitions: ChunkDefinitions; modulesInManualChunks: Set<Module> } {
const chunkDefinitions: ChunkDefinitions = [];
const modulesInManualChunks = new Set(manualChunkAliasByEntry.keys());
const manualChunkModulesByAlias: Record<string, Module[]> = Object.create(null);
for (const [entry, alias] of manualChunkAliasByEntry) {
addStaticDependenciesToManualChunk(
entry,
(manualChunkModulesByAlias[alias] ||= []),
modulesInManualChunks
);
}
for (const [alias, modules] of Object.entries(manualChunkModulesByAlias)) {
chunkDefinitions.push({ alias, modules });
}
return { chunkDefinitions, modulesInManualChunks };
}
function addStaticDependenciesToManualChunk(
entry: Module,
manualChunkModules: Module[],
modulesInManualChunks: Set<Module>
): void {
const modulesToHandle = new Set([entry]);
for (const module of modulesToHandle) {
modulesInManualChunks.add(module);
manualChunkModules.push(module);
for (const dependency of module.dependencies) {
if (!(dependency instanceof ExternalModule || modulesInManualChunks.has(dependency))) {
modulesToHandle.add(dependency);
}
}
}
}
function analyzeModuleGraph(entries: Iterable<Module>): {
allEntries: readonly Module[];
dependentEntriesByModule: Map<Module, Set<number>>;
dynamicImportsByEntry: readonly ReadonlySet<number>[];
dynamicallyDependentEntriesByDynamicEntry: Map<number, Set<number>>;
} {
const dynamicEntryModules = new Set<Module>();
const dependentEntriesByModule = new Map<Module, Set<number>>();
const dynamicImportModulesByEntry: Set<Module>[] = [];
const allEntriesSet = new Set(entries);
let entryIndex = 0;
for (const currentEntry of allEntriesSet) {
const dynamicImportsForCurrentEntry = new Set<Module>();
dynamicImportModulesByEntry.push(dynamicImportsForCurrentEntry);
const modulesToHandle = new Set([currentEntry]);
for (const module of modulesToHandle) {
getOrCreate(dependentEntriesByModule, module, getNewSet<number>).add(entryIndex);
for (const dependency of module.getDependenciesToBeIncluded()) {
if (!(dependency instanceof ExternalModule)) {
modulesToHandle.add(dependency);
}
}
for (const { resolution } of module.dynamicImports) {
if (
resolution instanceof Module &&
resolution.includedDynamicImporters.length > 0 &&
!allEntriesSet.has(resolution)
) {
dynamicEntryModules.add(resolution);
allEntriesSet.add(resolution);
dynamicImportsForCurrentEntry.add(resolution);
}
}
for (const dependency of module.implicitlyLoadedBefore) {
if (!allEntriesSet.has(dependency)) {
dynamicEntryModules.add(dependency);
allEntriesSet.add(dependency);
}
}
}
entryIndex++;
}
const allEntries = [...allEntriesSet];
const { dynamicEntries, dynamicImportsByEntry } = getDynamicEntries(
allEntries,
dynamicEntryModules,
dynamicImportModulesByEntry
);
return {
allEntries,
dependentEntriesByModule,
dynamicallyDependentEntriesByDynamicEntry: getDynamicallyDependentEntriesByDynamicEntry(
dependentEntriesByModule,
dynamicEntries,
allEntries
),
dynamicImportsByEntry
};
}
function getDynamicEntries(
allEntries: Module[],
dynamicEntryModules: Set<Module>,
dynamicImportModulesByEntry: Set<Module>[]
) {
const entryIndexByModule = new Map<Module, number>();
const dynamicEntries = new Set<number>();
for (const [entryIndex, entry] of allEntries.entries()) {
entryIndexByModule.set(entry, entryIndex);
if (dynamicEntryModules.has(entry)) {
dynamicEntries.add(entryIndex);
}
}
const dynamicImportsByEntry: Set<number>[] = [];
for (const dynamicImportModules of dynamicImportModulesByEntry) {
const dynamicImports = new Set<number>();
for (const dynamicEntry of dynamicImportModules) {
dynamicImports.add(entryIndexByModule.get(dynamicEntry)!);
}
dynamicImportsByEntry.push(dynamicImports);
}
return { dynamicEntries, dynamicImportsByEntry };
}
function getDynamicallyDependentEntriesByDynamicEntry(
dependentEntriesByModule: ReadonlyMap<Module, ReadonlySet<number>>,
dynamicEntries: ReadonlySet<number>,
allEntries: readonly Module[]
): Map<number, Set<number>> {
const dynamicallyDependentEntriesByDynamicEntry = new Map<number, Set<number>>();
for (const dynamicEntryIndex of dynamicEntries) {
const dynamicallyDependentEntries = getOrCreate(
dynamicallyDependentEntriesByDynamicEntry,
dynamicEntryIndex,
getNewSet<number>
);
const dynamicEntry = allEntries[dynamicEntryIndex];
for (const importer of concatLazy([
dynamicEntry.includedDynamicImporters,
dynamicEntry.implicitlyLoadedAfter
])) {
for (const entry of dependentEntriesByModule.get(importer)!) {
dynamicallyDependentEntries.add(entry);
}
}
}
return dynamicallyDependentEntriesByDynamicEntry;
}
function getChunksWithSameDependentEntries(
modulesWithDependentEntries: Iterable<ModulesWithDependentEntries>
): ModulesWithDependentEntries[] {
const chunkModules: Record<string, ModulesWithDependentEntries> = Object.create(null);
for (const { dependentEntries, modules } of modulesWithDependentEntries) {
let chunkSignature = 0n;
for (const entryIndex of dependentEntries) {
chunkSignature |= 1n << BigInt(entryIndex);
}
(chunkModules[String(chunkSignature)] ||= {
dependentEntries: new Set(dependentEntries),
modules: []
}).modules.push(...modules);
}
return Object.values(chunkModules);
}
function* getModulesWithDependentEntries(
dependentEntriesByModule: Map<Module, Set<number>>,
modulesInManualChunks: Set<Module>
) {
for (const [module, dependentEntries] of dependentEntriesByModule) {
if (!modulesInManualChunks.has(module)) {
yield { dependentEntries, modules: [module] };
}
}
}
function getStaticDependencyAtomsByEntry(
allEntries: readonly Module[],
chunkAtoms: ModulesWithDependentEntries[]
) {
// The indices correspond to the indices in allEntries. The atoms correspond
// to bits in the bigint values where chunk 0 is the lowest bit.
const staticDependencyAtomsByEntry: bigint[] = allEntries.map(() => 0n);
// This toggles the bits for each atom that is a dependency of an entry
let atomMask = 1n;
for (const { dependentEntries } of chunkAtoms) {
for (const entryIndex of dependentEntries) {
staticDependencyAtomsByEntry[entryIndex] |= atomMask;
}
atomMask <<= 1n;
}
return staticDependencyAtomsByEntry;
}
// Warning: This will consume dynamicallyDependentEntriesByDynamicEntry.
function getAlreadyLoadedAtomsByEntry(
staticDependencyAtomsByEntry: bigint[],
dynamicallyDependentEntriesByDynamicEntry: Map<number, Set<number>>,
dynamicImportsByEntry: readonly ReadonlySet<number>[],
allEntries: readonly Module[]
) {
// Dynamic entries have all atoms as already loaded initially because we then
// intersect with the static dependency atoms of all dynamic importers.
// Static entries cannot have already loaded atoms.
const alreadyLoadedAtomsByEntry: bigint[] = allEntries.map((_entry, entryIndex) =>
dynamicallyDependentEntriesByDynamicEntry.has(entryIndex) ? -1n : 0n
);
for (const [
dynamicEntryIndex,
dynamicallyDependentEntries
] of dynamicallyDependentEntriesByDynamicEntry) {
// We delete here so that they can be added again if necessary to be handled
// again by the loop
dynamicallyDependentEntriesByDynamicEntry.delete(dynamicEntryIndex);
const knownLoadedAtoms = alreadyLoadedAtomsByEntry[dynamicEntryIndex];
let updatedLoadedAtoms = knownLoadedAtoms;
for (const entryIndex of dynamicallyDependentEntries) {
updatedLoadedAtoms &=
staticDependencyAtomsByEntry[entryIndex] | alreadyLoadedAtomsByEntry[entryIndex];
}
// If the knownLoadedAtoms changed, all dependent dynamic entries need to be
// updated again
if (updatedLoadedAtoms !== knownLoadedAtoms) {
alreadyLoadedAtomsByEntry[dynamicEntryIndex] = updatedLoadedAtoms;
for (const dynamicImport of dynamicImportsByEntry[dynamicEntryIndex]) {
// If this adds an entry that was deleted before, it will be handled
// again. This is the reason why we delete every entry from this map
// that we processed.
getOrCreate(
dynamicallyDependentEntriesByDynamicEntry,
dynamicImport,
getNewSet<number>
).add(dynamicEntryIndex);
}
}
}
return alreadyLoadedAtomsByEntry;
}
/**
* This removes all unnecessary dynamic entries from the dependentEntries in its
* first argument if a chunk is already loaded without that entry.
*/
function removeUnnecessaryDependentEntries(
chunkAtoms: ModulesWithDependentEntries[],
alreadyLoadedAtomsByEntry: bigint[]
) {
// Remove entries from dependent entries if a chunk is already loaded without
// that entry.
let chunkMask = 1n;
for (const { dependentEntries } of chunkAtoms) {
for (const entryIndex of dependentEntries) {
if ((alreadyLoadedAtomsByEntry[entryIndex] & chunkMask) === chunkMask) {
dependentEntries.delete(entryIndex);
}
}
chunkMask <<= 1n;
}
}
function getChunksWithSameDependentEntriesAndCorrelatedAtoms(
chunkAtoms: ModulesWithDependentEntries[],
staticDependencyAtomsByEntry: bigint[],
alreadyLoadedAtomsByEntry: bigint[],
minChunkSize: number
) {
const chunksBySignature: Record<string, ChunkDescription> = Object.create(null);
const chunkByModule = new Map<Module, ChunkDescription>();
const sizeByAtom: number[] = [];
let sideEffectAtoms = 0n;
let atomMask = 1n;
for (const { dependentEntries, modules } of chunkAtoms) {
let chunkSignature = 0n;
let correlatedAtoms = -1n;
for (const entryIndex of dependentEntries) {
chunkSignature |= 1n << BigInt(entryIndex);
// Correlated atoms are the atoms that are guaranteed to be loaded as
// well when a given atom is loaded. It is the intersection of the already
// loaded modules of each chunk merged with its static dependencies.
correlatedAtoms &=
staticDependencyAtomsByEntry[entryIndex] | alreadyLoadedAtomsByEntry[entryIndex];
}
const chunk = (chunksBySignature[String(chunkSignature)] ||= {
containedAtoms: 0n,
correlatedAtoms,
dependencies: new Set(),
dependentChunks: new Set(),
dependentEntries: new Set(dependentEntries),
modules: [],
pure: true,
size: 0
});
let atomSize = 0;
let pure = true;
for (const module of modules) {
chunkByModule.set(module, chunk);
// Unfortunately, we cannot take tree-shaking into account here because
// rendering did not happen yet, but we can detect empty modules
if (module.isIncluded()) {
pure &&= !module.hasEffects();
// we use a trivial size for the default minChunkSize to improve
// performance
atomSize += minChunkSize > 1 ? module.estimateSize() : 1;
}
}
if (!pure) {
sideEffectAtoms |= atomMask;
}
sizeByAtom.push(atomSize);
chunk.containedAtoms |= atomMask;
chunk.modules.push(...modules);
chunk.pure &&= pure;
chunk.size += atomSize;
atomMask <<= 1n;
}
const chunks = Object.values(chunksBySignature);
sideEffectAtoms |= addChunkDependenciesAndGetExternalSideEffectAtoms(
chunks,
chunkByModule,
atomMask
);
return { chunks, sideEffectAtoms, sizeByAtom };
}
function addChunkDependenciesAndGetExternalSideEffectAtoms(
chunks: ChunkDescription[],
chunkByModule: Map<Module, ChunkDescription>,
nextAvailableAtomMask: bigint
): bigint {
const signatureByExternalModule = new Map<ExternalModule, bigint>();
let externalSideEffectAtoms = 0n;
for (const chunk of chunks) {
const { dependencies, modules } = chunk;
for (const module of modules) {
for (const dependency of module.getDependenciesToBeIncluded()) {
if (dependency instanceof ExternalModule) {
if (dependency.info.moduleSideEffects) {
const signature = getOrCreate(signatureByExternalModule, dependency, () => {
const signature = nextAvailableAtomMask;
nextAvailableAtomMask <<= 1n;
externalSideEffectAtoms |= signature;
return signature;
});
chunk.containedAtoms |= signature;
chunk.correlatedAtoms |= signature;
}
} else {
const dependencyChunk = chunkByModule.get(dependency);
if (dependencyChunk && dependencyChunk !== chunk) {
dependencies.add(dependencyChunk);
dependencyChunk.dependentChunks.add(chunk);
}
}
}
}
}
return externalSideEffectAtoms;
}
/**
* This function tries to get rid of small chunks by merging them with other
* chunks.
*
* We can only merge chunks safely if after the merge, loading any entry point
* in any allowed order will not trigger side effects that should not have been
* triggered. While side effects are usually things like global function calls,
* global variable mutations or potentially thrown errors, details do not
* matter here, and we just discern chunks without side effects (pure chunks)
* from other chunks.
*
* As a first step, we assign each pre-generated chunk with side effects a
* label. I.e. we have side effect "A" if the non-pure chunk "A" is loaded.
*
* Now to determine the side effects of loading a chunk, one also has to take
* the side effects of its dependencies into account. So if A depends on B
* (A -> B) and both have side effects, loading A triggers effects AB.
*
* Now from the previous step we know that each chunk is uniquely determine by
* the entry points that depend on it and cause it to load, which we will call
* its dependent entry points.
*
* E.g. if X -> A and Y -> A, then the dependent entry points of A are XY.
* Starting from that idea, we can determine a set of chunks—and thus a set
* of side effects—that must have been triggered if a certain chunk has been
* loaded. Basically, it is the intersection of all chunks loaded by the
* dependent entry points of a given chunk. We call the corresponding side
* effects the correlated side effects of that chunk.
*
* Example:
* X -> ABC, Y -> ADE, A-> F, B -> D
* Then taking dependencies into account, X -> ABCDF, Y -> ADEF
* The intersection is ADF. So we know that when A is loaded, D and F must also
* be in memory even though neither D nor A is a dependency of the other.
* If all have side effects, we call ADF the correlated side effects of A. The
* correlated side effects need to remain constant when merging chunks.
*
* In contrast, we have the dependency side effects of A, which represents
* the side effects we trigger if we directly load A. In this example, the
* dependency side effects are AF.
* For entry chunks, dependency and correlated side effects are the same.
*
* With these concepts, merging chunks is allowed if the correlated side
* effects of each entry do not change. Thus, we are allowed to merge two
* chunks if
*
* a) the dependency side effects of each chunk are a subset of the correlated
* side effects of the other chunk, so no additional side effects are
* triggered for any entry, or
* b) The dependent entry points of chunk A are a subset of the dependent entry
* points of chunk B while the dependency side effects of A are a subset of
* the correlated side effects of B. Because in that scenario, whenever A is
* loaded, B is loaded as well. But there are cases when B is loaded where A
* is not loaded. So if we merge the chunks, all dependency side effects of
* A will be added to the correlated side effects of B, and as the latter is
* not allowed to change, the former need to be a subset of the latter.
*
* Another consideration when merging small chunks into other chunks is to
* avoid
* that too much additional code is loaded. This is achieved when the dependent
* entries of the small chunk are a subset of the dependent entries of the
* other
* chunk. Because then when the small chunk is loaded, the other chunk was
* loaded/in memory anyway, so at most when the other chunk is loaded, the
* additional size of the small chunk is loaded unnecessarily.
*
* So the algorithm performs merges in two passes:
*
* 1. First we try to merge small chunks A only into other chunks B if the
* dependent entries of A are a subset of the dependent entries of B and the
* dependency side effects of A are a subset of the correlated side effects
* of B.
* 2. Only then for all remaining small chunks, we look for arbitrary merges
* following the rule (a), starting with the smallest chunks to look for
* possible merge targets.
*/
function getOptimizedChunks(
chunks: ChunkDescription[],
minChunkSize: number,
sideEffectAtoms: bigint,
sizeByAtom: number[],
log: LogHandler
): { modules: Module[] }[] {
timeStart('optimize chunks', 3);
const chunkPartition = getPartitionedChunks(chunks, minChunkSize);
if (!chunkPartition) {
timeEnd('optimize chunks', 3);
return chunks; // the actual modules
}
if (minChunkSize > 1) {
log('info', logOptimizeChunkStatus(chunks.length, chunkPartition.small.size, 'Initially'));
}
mergeChunks(chunkPartition, minChunkSize, sideEffectAtoms, sizeByAtom);
if (minChunkSize > 1) {
log(
'info',
logOptimizeChunkStatus(
chunkPartition.small.size + chunkPartition.big.size,
chunkPartition.small.size,
'After merging chunks'
)
);
}
timeEnd('optimize chunks', 3);
return [...chunkPartition.small, ...chunkPartition.big];
}
function getPartitionedChunks(
chunks: ChunkDescription[],
minChunkSize: number
): ChunkPartition | null {
const smallChunks: ChunkDescription[] = [];
const bigChunks: ChunkDescription[] = [];
for (const chunk of chunks) {
(chunk.size < minChunkSize ? smallChunks : bigChunks).push(chunk);
}
if (smallChunks.length === 0) {
return null;
}
smallChunks.sort(compareChunkSize);
bigChunks.sort(compareChunkSize);
return {
big: new Set(bigChunks),
small: new Set(smallChunks)
};
}
function compareChunkSize(
{ size: sizeA }: ChunkDescription,
{ size: sizeB }: ChunkDescription
): number {
return sizeA - sizeB;
}
function mergeChunks(
chunkPartition: ChunkPartition,
minChunkSize: number,
sideEffectAtoms: bigint,
sizeByAtom: number[]
) {
const { small } = chunkPartition;
for (const mergedChunk of small) {
const bestTargetChunk = findBestMergeTarget(
mergedChunk,
chunkPartition,
sideEffectAtoms,
sizeByAtom,
// In the default case, we do not accept size increases
minChunkSize <= 1 ? 1 : Infinity
);
if (bestTargetChunk) {
const { containedAtoms, correlatedAtoms, modules, pure, size } = mergedChunk;
small.delete(mergedChunk);
getChunksInPartition(bestTargetChunk, minChunkSize, chunkPartition).delete(bestTargetChunk);
bestTargetChunk.modules.push(...modules);
bestTargetChunk.size += size;
bestTargetChunk.pure &&= pure;
const { dependencies, dependentChunks, dependentEntries } = bestTargetChunk;
bestTargetChunk.correlatedAtoms &= correlatedAtoms;
bestTargetChunk.containedAtoms |= containedAtoms;
for (const entry of mergedChunk.dependentEntries) {
dependentEntries.add(entry);
}
for (const dependency of mergedChunk.dependencies) {
dependencies.add(dependency);
dependency.dependentChunks.delete(mergedChunk);
dependency.dependentChunks.add(bestTargetChunk);
}
for (const dependentChunk of mergedChunk.dependentChunks) {
dependentChunks.add(dependentChunk);
dependentChunk.dependencies.delete(mergedChunk);
dependentChunk.dependencies.add(bestTargetChunk);
}
dependencies.delete(bestTargetChunk);
dependentChunks.delete(bestTargetChunk);
getChunksInPartition(bestTargetChunk, minChunkSize, chunkPartition).add(bestTargetChunk);
}
}
}
function findBestMergeTarget(
mergedChunk: ChunkDescription,
{ big, small }: ChunkPartition,
sideEffectAtoms: bigint,
sizeByAtom: number[],
smallestAdditionalSize: number
): ChunkDescription | null {
let bestTargetChunk: ChunkDescription | null = null;
// In the default case, we do not accept size increases
for (const targetChunk of concatLazy([small, big])) {
if (mergedChunk === targetChunk) continue;
const additionalSizeAfterMerge = getAdditionalSizeAfterMerge(
mergedChunk,
targetChunk,
smallestAdditionalSize,
sideEffectAtoms,
sizeByAtom
);
if (additionalSizeAfterMerge < smallestAdditionalSize) {
bestTargetChunk = targetChunk;
if (additionalSizeAfterMerge === 0) break;
smallestAdditionalSize = additionalSizeAfterMerge;
}
}
return bestTargetChunk;
}
/**
* Determine the additional unused code size that would be added by merging the
* two chunks. This is not an exact measurement but rather an upper bound. If
* the merge produces cycles or adds non-correlated side effects, `Infinity`
* is returned.
* Merging will not produce cycles if none of the direct non-merged
* dependencies of a chunk have the other chunk as a transitive dependency.
*/
function getAdditionalSizeAfterMerge(
mergedChunk: ChunkDescription,
targetChunk: ChunkDescription,
// The maximum additional unused code size allowed to be added by the merge,
// taking dependencies into account, needs to be below this number
currentAdditionalSize: number,
sideEffectAtoms: bigint,
sizeByAtom: number[]
): number {
const firstSize = getAdditionalSizeIfNoTransitiveDependencyOrNonCorrelatedSideEffect(
mergedChunk,
targetChunk,
currentAdditionalSize,
sideEffectAtoms,
sizeByAtom
);
return firstSize < currentAdditionalSize
? firstSize +
getAdditionalSizeIfNoTransitiveDependencyOrNonCorrelatedSideEffect(
targetChunk,
mergedChunk,
currentAdditionalSize - firstSize,
sideEffectAtoms,
sizeByAtom
)
: Infinity;
}
function getAdditionalSizeIfNoTransitiveDependencyOrNonCorrelatedSideEffect(
dependentChunk: ChunkDescription,
dependencyChunk: ChunkDescription,
currentAdditionalSize: number,
sideEffectAtoms: bigint,
sizeByAtom: number[]
): number {
const { correlatedAtoms } = dependencyChunk;
let dependencyAtoms = dependentChunk.containedAtoms;
const dependentContainedSideEffects = dependencyAtoms & sideEffectAtoms;
if ((correlatedAtoms & dependentContainedSideEffects) !== dependentContainedSideEffects) {
return Infinity;
}
const chunksToCheck = new Set(dependentChunk.dependencies);
for (const { dependencies, containedAtoms } of chunksToCheck) {
dependencyAtoms |= containedAtoms;
const containedSideEffects = containedAtoms & sideEffectAtoms;
if ((correlatedAtoms & containedSideEffects) !== containedSideEffects) {
return Infinity;
}
for (const dependency of dependencies) {
if (dependency === dependencyChunk) {
return Infinity;
}
chunksToCheck.add(dependency);
}
}
return getAtomsSizeIfBelowLimit(
dependencyAtoms & ~correlatedAtoms,
currentAdditionalSize,
sizeByAtom
);
}
function getAtomsSizeIfBelowLimit(
atoms: bigint,
currentAdditionalSize: number,
sizeByAtom: number[]
): number {
let size = 0;
let atomIndex = 0;
let atomSignature = 1n;
const { length } = sizeByAtom;
for (; atomIndex < length; atomIndex++) {
if ((atoms & atomSignature) === atomSignature) {
size += sizeByAtom[atomIndex];
}
atomSignature <<= 1n;
if (size >= currentAdditionalSize) {
return Infinity;
}
}
return size;
}
function getChunksInPartition(
chunk: ChunkDescription,
minChunkSize: number,
chunkPartition: ChunkPartition
): Set<ChunkDescription> {
return chunk.size < minChunkSize ? chunkPartition.small : chunkPartition.big;
}