-
Notifications
You must be signed in to change notification settings - Fork 668
/
Collector.java
358 lines (339 loc) · 16.5 KB
/
Collector.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
/*
* Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package java.util.stream;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
/**
* A <a href="package-summary.html#Reduction">mutable reduction operation</a> that
* accumulates input elements into a mutable result container, optionally transforming
* the accumulated result into a final representation after all input elements
* have been processed. Reduction operations can be performed either sequentially
* or in parallel.
*
* <p>Examples of mutable reduction operations include:
* accumulating elements into a {@code Collection}; concatenating
* strings using a {@code StringBuilder}; computing summary information about
* elements such as sum, min, max, or average; computing "pivot table" summaries
* such as "maximum valued transaction by seller", etc. The class {@link Collectors}
* provides implementations of many common mutable reductions.
*
* <p>A {@code Collector} is specified by four functions that work together to
* accumulate entries into a mutable result container, and optionally perform
* a final transform on the result. They are: <ul>
* <li>creation of a new result container ({@link #supplier()})</li>
* <li>incorporating a new data element into a result container ({@link #accumulator()})</li>
* <li>combining two result containers into one ({@link #combiner()})</li>
* <li>performing an optional final transform on the container ({@link #finisher()})</li>
* </ul>
*
* <p>Collectors also have a set of characteristics, such as
* {@link Characteristics#CONCURRENT}, that provide hints that can be used by a
* reduction implementation to provide better performance.
*
* <p>A sequential implementation of a reduction using a collector would
* create a single result container using the supplier function, and invoke the
* accumulator function once for each input element. A parallel implementation
* would partition the input, create a result container for each partition,
* accumulate the contents of each partition into a subresult for that partition,
* and then use the combiner function to merge the subresults into a combined
* result.
*
* <p>To ensure that sequential and parallel executions produce equivalent
* results, the collector functions must satisfy an <em>identity</em> and an
* <a href="package-summary.html#Associativity">associativity</a> constraints.
*
* <p>The identity constraint says that for any partially accumulated result,
* combining it with an empty result container must produce an equivalent
* result. That is, for a partially accumulated result {@code a} that is the
* result of any series of accumulator and combiner invocations, {@code a} must
* be equivalent to {@code combiner.apply(a, supplier.get())}.
*
* <p>The associativity constraint says that splitting the computation must
* produce an equivalent result. That is, for any input elements {@code t1}
* and {@code t2}, the results {@code r1} and {@code r2} in the computation
* below must be equivalent:
* <pre>{@code
* A a1 = supplier.get();
* accumulator.accept(a1, t1);
* accumulator.accept(a1, t2);
* R r1 = finisher.apply(a1); // result without splitting
*
* A a2 = supplier.get();
* accumulator.accept(a2, t1);
* A a3 = supplier.get();
* accumulator.accept(a3, t2);
* R r2 = finisher.apply(combiner.apply(a2, a3)); // result with splitting
* } </pre>
*
* <p>For collectors that do not have the {@code UNORDERED} characteristic,
* two accumulated results {@code a1} and {@code a2} are equivalent if
* {@code finisher.apply(a1).equals(finisher.apply(a2))}. For unordered
* collectors, equivalence is relaxed to allow for non-equality related to
* differences in order. (For example, an unordered collector that accumulated
* elements to a {@code List} would consider two lists equivalent if they
* contained the same elements, ignoring order.)
*
* <p>Libraries that implement reduction based on {@code Collector}, such as
* {@link Stream#collect(Collector)}, must adhere to the following constraints:
* <ul>
* <li>The first argument passed to the accumulator function, both
* arguments passed to the combiner function, and the argument passed to the
* finisher function must be the result of a previous invocation of the
* result supplier, accumulator, or combiner functions.</li>
* <li>The implementation should not do anything with the result of any of
* the result supplier, accumulator, or combiner functions other than to
* pass them again to the accumulator, combiner, or finisher functions,
* or return them to the caller of the reduction operation.</li>
* <li>If a result is passed to the combiner or finisher
* function, and the same object is not returned from that function, it is
* never used again.</li>
* <li>Once a result is passed to the combiner or finisher function, it
* is never passed to the accumulator function again.</li>
* <li>For non-concurrent collectors, any result returned from the result
* supplier, accumulator, or combiner functions must be serially
* thread-confined. This enables collection to occur in parallel without
* the {@code Collector} needing to implement any additional synchronization.
* The reduction implementation must manage that the input is properly
* partitioned, that partitions are processed in isolation, and combining
* happens only after accumulation is complete.</li>
* <li>For concurrent collectors, an implementation is free to (but not
* required to) implement reduction concurrently. A concurrent reduction
* is one where the accumulator function is called concurrently from
* multiple threads, using the same concurrently-modifiable result container,
* rather than keeping the result isolated during accumulation.
* A concurrent reduction should only be applied if the collector has the
* {@link Characteristics#UNORDERED} characteristics or if the
* originating data is unordered.</li>
* </ul>
*
* <p>In addition to the predefined implementations in {@link Collectors}, the
* static factory methods {@link #of(Supplier, BiConsumer, BinaryOperator, Characteristics...)}
* can be used to construct collectors. For example, you could create a collector
* that accumulates widgets into a {@code TreeSet} with:
*
* <pre>{@code
* Collector<Widget, ?, TreeSet<Widget>> intoSet =
* Collector.of(TreeSet::new, TreeSet::add,
* (left, right) -> { left.addAll(right); return left; });
* }</pre>
*
* (This behavior is also implemented by the predefined collector
* {@link Collectors#toCollection(Supplier)}).
*
* @param <T> the type of input elements to the reduction operation
* @param <A> the mutable accumulation type of the reduction operation (often
* hidden as an implementation detail)
* @param <R> the result type of the reduction operation
*
* @apiNote Performing a reduction operation with a {@code Collector} should produce a
* result equivalent to:
* <pre>{@code
* R container = collector.supplier().get();
* for (T t : data)
* collector.accumulator().accept(container, t);
* return collector.finisher().apply(container);
* }</pre>
*
* <p>However, the library is free to partition the input, perform the reduction
* on the partitions, and then use the combiner function to combine the partial
* results to achieve a parallel reduction. (Depending on the specific reduction
* operation, this may perform better or worse, depending on the relative cost
* of the accumulator and combiner functions.)
*
* <p>Collectors are designed to be <em>composed</em>; many of the methods
* in {@link Collectors} are functions that take a collector and produce
* a new collector. For example, given the following collector that computes
* the sum of the salaries of a stream of employees:
*
* <pre>{@code
* Collector<Employee, ?, Integer> summingSalaries
* = Collectors.summingInt(Employee::getSalary))
* }</pre>
*
* If we wanted to create a collector to tabulate the sum of salaries by
* department, we could reuse the "sum of salaries" logic using
* {@link Collectors#groupingBy(Function, Collector)}:
*
* <pre>{@code
* Collector<Employee, ?, Map<Department, Integer>> summingSalariesByDept
* = Collectors.groupingBy(Employee::getDepartment, summingSalaries);
* }</pre>
* @see Stream#collect(Collector)
* @see Collectors
* @since 1.8
*/
// 收集器的抽象接口,用来指示收集操作需要经过哪些工作流程
public interface Collector<T, A, R> {
/**
* A function that creates and returns a new mutable result container.
*
* @return a function which returns a new, mutable result container
*/
// 1. 容器工厂(该工厂用来构造收纳元素的容器)
Supplier<A> supplier();
/**
* A function that folds a value into a mutable result container.
*
* @return a function which folds a value into a mutable result container
*/
// 2. 择取元素(这是(子)任务中的择取操作,通常用于将元素添加到目标容器)
BiConsumer<A, T> accumulator();
/**
* A function that accepts two partial results and merges them. The
* combiner function may fold state from one argument into the other and
* return that, or may return a new result container.
*
* @return a function which combines two partial results into a combined
* result
*/
// 3. 合并容器(这是合并操作,通常用于在并行流中合并子任务)
BinaryOperator<A> combiner();
/**
* Perform the final transformation from the intermediate accumulation type
* {@code A} to the final result type {@code R}.
*
* <p>If the characteristic {@code IDENTITY_FINISH} is
* set, this function may be presumed to be an identity transform with an
* unchecked cast from {@code A} to {@code R}.
*
* @return a function which transforms the intermediate result to the final
* result
*/
// 4. 收尾操作(可选,最后执行)
Function<A, R> finisher();
/**
* Returns a {@code Set} of {@code Collector.Characteristics} indicating
* the characteristics of this Collector. This set should be immutable.
*
* @return an immutable set of collector characteristics
*/
// 5. 返回容器的参数,指示容器的特征
Set<Characteristics> characteristics();
/**
* Returns a new {@code Collector} described by the given {@code supplier},
* {@code accumulator}, and {@code combiner} functions. The resulting
* {@code Collector} has the {@code Collector.Characteristics.IDENTITY_FINISH}
* characteristic.
*
* @param supplier The supplier function for the new collector
* @param accumulator The accumulator function for the new collector
* @param combiner The combiner function for the new collector
* @param characteristics The collector characteristics for the new
* collector
* @param <T> The type of input elements for the new collector
* @param <R> The type of intermediate accumulation result, and final result,
* for the new collector
*
* @return the new {@code Collector}
*
* @throws NullPointerException if any argument is null
*/
// 返回由指定参数构造的Collector,新容器具有IDENTITY_FINISH参数
static <T, R> Collector<T, R, R> of(Supplier<R> supplier, BiConsumer<R, T> accumulator, BinaryOperator<R> combiner, Characteristics... characteristics) {
Objects.requireNonNull(supplier);
Objects.requireNonNull(accumulator);
Objects.requireNonNull(combiner);
Objects.requireNonNull(characteristics);
Set<Characteristics> cs;
if(characteristics.length == 0) {
cs = Collectors.CH_ID;
} else {
cs = Collections.unmodifiableSet(EnumSet.of(Characteristics.IDENTITY_FINISH, characteristics));
}
return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, cs);
}
/**
* Returns a new {@code Collector} described by the given {@code supplier},
* {@code accumulator}, {@code combiner}, and {@code finisher} functions.
*
* @param supplier The supplier function for the new collector
* @param accumulator The accumulator function for the new collector
* @param combiner The combiner function for the new collector
* @param finisher The finisher function for the new collector
* @param characteristics The collector characteristics for the new
* collector
* @param <T> The type of input elements for the new collector
* @param <A> The intermediate accumulation type of the new collector
* @param <R> The final result type of the new collector
*
* @return the new {@code Collector}
*
* @throws NullPointerException if any argument is null
*/
// 返回由指定参数构造的Collector
static <T, A, R> Collector<T, A, R> of(Supplier<A> supplier, BiConsumer<A, T> accumulator, BinaryOperator<A> combiner, Function<A, R> finisher, Characteristics... characteristics) {
Objects.requireNonNull(supplier);
Objects.requireNonNull(accumulator);
Objects.requireNonNull(combiner);
Objects.requireNonNull(finisher);
Objects.requireNonNull(characteristics);
Set<Characteristics> cs = Collectors.CH_NOID;
if(characteristics.length > 0) {
cs = EnumSet.noneOf(Characteristics.class);
Collections.addAll(cs, characteristics);
cs = Collections.unmodifiableSet(cs);
}
return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, finisher, cs);
}
/**
* Characteristics indicating properties of a {@code Collector}, which can
* be used to optimize reduction implementations.
*/
// Collector(收集器)的特征,可用于优化收纳操作
enum Characteristics {
/**
* Indicates that this collector is <em>concurrent</em>, meaning that
* the result container can support the accumulator function being
* called concurrently with the same result container from multiple
* threads.
*
* <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED},
* then it should only be evaluated concurrently if applied to an
* unordered data source.
*/
// 表示此收集器(的容器)是并发的,这意味着可以并发地汇总结果
CONCURRENT,
/**
* Indicates that the collection operation does not commit to preserving
* the encounter order of input elements. (This might be true if the
* result container has no intrinsic order, such as a {@link Set}.)
*/
// 表示收集操作不会保证确定的遭遇顺序(如收集到Set中)
UNORDERED,
/**
* Indicates that the finisher function is the identity function and
* can be elided. If set, it must be the case that an unchecked cast
* from A to R will succeed.
*/
// 表示最终的整理操作是标识转换,即可以省略
IDENTITY_FINISH
}
}