BAEL-6782: partitioning streams (#14833)
* BAEL-6782: partitioning streams * BAEL-6782: code reivew * BAEL-6782: line continuations
This commit is contained in:
parent
5dcd7e88d0
commit
5aea5d5c29
@ -43,6 +43,11 @@
|
|||||||
<artifactId>vavr</artifactId>
|
<artifactId>vavr</artifactId>
|
||||||
<version>${vavr.version}</version>
|
<version>${vavr.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>${guava.version}</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@ -72,6 +77,7 @@
|
|||||||
<maven.compiler.source>12</maven.compiler.source>
|
<maven.compiler.source>12</maven.compiler.source>
|
||||||
<maven.compiler.target>12</maven.compiler.target>
|
<maven.compiler.target>12</maven.compiler.target>
|
||||||
<vavr.version>0.10.2</vavr.version>
|
<vavr.version>0.10.2</vavr.version>
|
||||||
|
<guava.version>32.1.2-jre</guava.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
</project>
|
</project>
|
@ -0,0 +1,90 @@
|
|||||||
|
package com.baeldung.streams.partitioning;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.BiConsumer;
|
||||||
|
import java.util.function.BinaryOperator;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
import java.util.stream.Collector;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
|
|
||||||
|
public class PartitionStream {
|
||||||
|
|
||||||
|
public static <T> Stream<List<T>> partitionList(List<T> source, int batchSize) {
|
||||||
|
if (batchSize <= 0) {
|
||||||
|
throw new IllegalArgumentException(String.format("Expected the batchSize to be greater than ZERO, actual value was: %s", batchSize));
|
||||||
|
}
|
||||||
|
if (source.isEmpty()) {
|
||||||
|
return Stream.empty();
|
||||||
|
}
|
||||||
|
int nrOfFullBatches = (source.size() - 1) / batchSize;
|
||||||
|
return IntStream.rangeClosed(0, nrOfFullBatches)
|
||||||
|
.mapToObj(batch -> {
|
||||||
|
int startIndex = batch * batchSize;
|
||||||
|
int endIndex = (batch == nrOfFullBatches) ? source.size() : (batch + 1) * batchSize;
|
||||||
|
return source.subList(startIndex, endIndex);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> Iterable<List<T>> partitionUsingGuava(Stream<T> source, int batchSize) {
|
||||||
|
return Iterables.partition(source::iterator, batchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> List<List<T>> partitionStream(Stream<T> source, int batchSize) {
|
||||||
|
return source.collect(partitionBySize(batchSize, Collectors.toList()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T, A, R> Collector<T, ?, R> partitionBySize(int batchSize, Collector<List<T>, A, R> downstream) {
|
||||||
|
Supplier<Accumulator<T, A>> supplier = () -> new Accumulator<>(
|
||||||
|
batchSize,
|
||||||
|
downstream.supplier().get(),
|
||||||
|
downstream.accumulator()::accept
|
||||||
|
);
|
||||||
|
|
||||||
|
BiConsumer<Accumulator<T, A>, T> accumulator = (acc, value) -> acc.add(value);
|
||||||
|
|
||||||
|
BinaryOperator<Accumulator<T, A>> combiner = (acc1, acc2) -> acc1.combine(acc2, downstream.combiner());
|
||||||
|
|
||||||
|
Function<Accumulator<T, A>, R> finisher = acc -> {
|
||||||
|
if (!acc.values.isEmpty()) {
|
||||||
|
downstream.accumulator().accept(acc.downstreamAccumulator, acc.values);
|
||||||
|
}
|
||||||
|
return downstream.finisher().apply(acc.downstreamAccumulator);
|
||||||
|
};
|
||||||
|
|
||||||
|
return Collector.of(supplier, accumulator, combiner, finisher, Collector.Characteristics.UNORDERED);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class Accumulator<T, A> {
|
||||||
|
private final List<T> values = new ArrayList<>();
|
||||||
|
private final int batchSize;
|
||||||
|
private A downstreamAccumulator;
|
||||||
|
private final BiConsumer<A, List<T>> batchFullListener;
|
||||||
|
|
||||||
|
Accumulator(int batchSize, A accumulator, BiConsumer<A, List<T>> onBatchFull) {
|
||||||
|
this.batchSize = batchSize;
|
||||||
|
this.downstreamAccumulator = accumulator;
|
||||||
|
this.batchFullListener = onBatchFull;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(T value) {
|
||||||
|
values.add(value);
|
||||||
|
if (values.size() == batchSize) {
|
||||||
|
batchFullListener.accept(downstreamAccumulator, new ArrayList<>(values));
|
||||||
|
values.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Accumulator<T, A> combine(Accumulator<T, A> other, BinaryOperator<A> accumulatorCombiner) {
|
||||||
|
this.downstreamAccumulator = accumulatorCombiner.apply(downstreamAccumulator, other.downstreamAccumulator);
|
||||||
|
other.values.forEach(this::add);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,82 @@
|
|||||||
|
package com.baeldung.partitioning;
|
||||||
|
|
||||||
|
import static com.baeldung.streams.partitioning.PartitionStream.partitionList;
|
||||||
|
import static com.baeldung.streams.partitioning.PartitionStream.partitionStream;
|
||||||
|
import static com.baeldung.streams.partitioning.PartitionStream.partitionUsingGuava;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
import static org.assertj.core.api.Assertions.atIndex;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
public class PartitionStreamsUnitTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void whenPartitionList_thenReturnThreeSubLists() {
|
||||||
|
List<Integer> source = List.of(1, 2, 3, 4, 5, 6, 7, 8);
|
||||||
|
|
||||||
|
Stream<List<Integer>> result = partitionList(source, 3);
|
||||||
|
|
||||||
|
assertThat(result)
|
||||||
|
.containsExactlyInAnyOrder(
|
||||||
|
List.of(1, 2, 3),
|
||||||
|
List.of(4, 5, 6),
|
||||||
|
List.of(7, 8)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void whenPartitionEmptyList_thenReturnEmptyStream() {
|
||||||
|
Stream<List<Integer>> result = partitionList(Collections.emptyList(), 3);
|
||||||
|
|
||||||
|
assertThat(result).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void whenPartitionListWithNegativeBatchSize_thenThrowException() {
|
||||||
|
assertThatThrownBy(() -> partitionList(List.of(1,2,3), -1))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessage("Expected the batchSize to be greater than ZERO, actual value was: -1");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void whenPartitionParallelStream_thenReturnThreeSubLists() {
|
||||||
|
Stream<Integer> source = Stream.of(1, 2, 3, 4, 5, 6, 7, 8).parallel();
|
||||||
|
|
||||||
|
List<List<Integer>> result = partitionStream(source, 3);
|
||||||
|
|
||||||
|
assertThat(result)
|
||||||
|
.hasSize(3)
|
||||||
|
.satisfies(batch -> assertThat(batch).hasSize(3), atIndex(0))
|
||||||
|
.satisfies(batch -> assertThat(batch).hasSize(3), atIndex(1))
|
||||||
|
.satisfies(batch -> assertThat(batch).hasSize(2), atIndex(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void whenPartitionEmptyParallelStream_thenReturnEmptyList() {
|
||||||
|
Stream<Integer> source = Stream.<Integer>empty().parallel();
|
||||||
|
|
||||||
|
List<List<Integer>> result = partitionStream(source, 3);
|
||||||
|
|
||||||
|
assertThat(result).isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void whenPartitionParallelStreamWithGuava_thenReturnThreeSubLists() {
|
||||||
|
Stream<Integer> source = Stream.of(1, 2, 3, 4, 5, 6, 7, 8).parallel();
|
||||||
|
|
||||||
|
Iterable<List<Integer>> result = partitionUsingGuava(source, 3);
|
||||||
|
|
||||||
|
assertThat(result)
|
||||||
|
.map(ArrayList::new)
|
||||||
|
.hasSize(3)
|
||||||
|
.satisfies(batch -> assertThat(batch).asList().hasSize(3), atIndex(0))
|
||||||
|
.satisfies(batch -> assertThat(batch).asList().hasSize(3), atIndex(1))
|
||||||
|
.satisfies(batch -> assertThat(batch).asList().hasSize(2), atIndex(2));
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user