BAEL-5924 Java 8 Stream with Batch Processing Support (#13203)
This commit is contained in:
parent
a4f6d89cc7
commit
de6d2bc25c
@ -59,6 +59,36 @@
|
|||||||
<version>3.12.0</version>
|
<version>3.12.0</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.reactivex.rxjava3</groupId>
|
||||||
|
<artifactId>rxjava</artifactId>
|
||||||
|
<version>${rx.java3.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.vavr</groupId>
|
||||||
|
<artifactId>vavr</artifactId>
|
||||||
|
<version>${io.varv.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.projectreactor</groupId>
|
||||||
|
<artifactId>reactor-core</artifactId>
|
||||||
|
<version>${io.reactor3.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-collections4</artifactId>
|
||||||
|
<version>${apache.commons.collection4.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>${google.guava.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.oath.cyclops</groupId>
|
||||||
|
<artifactId>cyclops</artifactId>
|
||||||
|
<version>${cyclops.version}</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@ -90,6 +120,12 @@
|
|||||||
<maven.compiler.target>12</maven.compiler.target>
|
<maven.compiler.target>12</maven.compiler.target>
|
||||||
<rx.java.version>1.2.5</rx.java.version>
|
<rx.java.version>1.2.5</rx.java.version>
|
||||||
<rx.java2.version>2.2.2</rx.java2.version>
|
<rx.java2.version>2.2.2</rx.java2.version>
|
||||||
|
<rx.java3.version>3.1.5</rx.java3.version>
|
||||||
|
<io.varv.version>1.0.0-alpha-4</io.varv.version>
|
||||||
|
<io.reactor3.version>3.5.1</io.reactor3.version>
|
||||||
|
<apache.commons.collection4.version>4.4</apache.commons.collection4.version>
|
||||||
|
<google.guava.version>31.1-jre</google.guava.version>
|
||||||
|
<cyclops.version>10.4.1</cyclops.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
</project>
|
</project>
|
@ -0,0 +1,47 @@
|
|||||||
|
package com.baeldung.streams.processing;
|
||||||
|
|
||||||
|
import static java.util.Spliterator.ORDERED;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Spliterators;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
|
public class CustomBatchIterator<T> implements Iterator<List<T>> {
|
||||||
|
private final int batchSize;
|
||||||
|
private List<T> currentBatch;
|
||||||
|
private final Iterator<T> iterator;
|
||||||
|
|
||||||
|
public CustomBatchIterator(Iterator<T> sourceIterator, int batchSize) {
|
||||||
|
this.batchSize = batchSize;
|
||||||
|
this.iterator = sourceIterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<T> next() {
|
||||||
|
return currentBatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
prepareNextBatch();
|
||||||
|
return currentBatch != null && !currentBatch.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> Stream<List<T>> batchStreamOf(Stream<T> stream, int batchSize) {
|
||||||
|
return stream(new CustomBatchIterator<>(stream.iterator(), batchSize));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> Stream<T> stream(Iterator<T> iterator) {
|
||||||
|
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, ORDERED), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prepareNextBatch() {
|
||||||
|
currentBatch = new ArrayList<>(batchSize);
|
||||||
|
while (iterator.hasNext() && currentBatch.size() < batchSize) {
|
||||||
|
currentBatch.add(iterator.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,141 @@
|
|||||||
|
package com.baeldung.streams.processing;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.apache.commons.collections4.ListUtils;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
|
||||||
|
import cyclops.data.LazySeq;
|
||||||
|
import cyclops.reactive.ReactiveSeq;
|
||||||
|
import io.reactivex.rxjava3.core.Observable;
|
||||||
|
import reactor.core.publisher.Flux;
|
||||||
|
|
||||||
|
public class StreamProcessingUnitTest {
|
||||||
|
public final int BATCH_SIZE = 10;
|
||||||
|
|
||||||
|
private final List<Integer> firstBatch = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||||
|
private final List<Integer> secondBatch = List.of(10, 11, 12, 13, 14, 15, 16, 17, 18, 19);
|
||||||
|
private final List<Integer> thirdBatch = List.of(20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
|
||||||
|
private final List<Integer> fourthBatch = List.of(30, 31, 32, 33);
|
||||||
|
|
||||||
|
public Stream<Integer> data;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() {
|
||||||
|
data = IntStream.range(0, 34)
|
||||||
|
.boxed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingSpliterator_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>();
|
||||||
|
CustomBatchIterator.batchStreamOf(data, BATCH_SIZE)
|
||||||
|
.forEach(result::add);
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingCollectionAPI_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = data.collect(Collectors.groupingBy(it -> it / BATCH_SIZE))
|
||||||
|
.values();
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchParallelUsingCollectionAPI_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = data.parallel()
|
||||||
|
.collect(Collectors.groupingBy(it -> it / BATCH_SIZE))
|
||||||
|
.values();
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingRxJavaV3_thenFourBatchesAreObtained() {
|
||||||
|
// RxJava v3
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>();
|
||||||
|
Observable.fromStream(data)
|
||||||
|
.buffer(BATCH_SIZE)
|
||||||
|
.subscribe(result::add);
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingReactor_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>();
|
||||||
|
Flux.fromStream(data)
|
||||||
|
.buffer(BATCH_SIZE)
|
||||||
|
.subscribe(result::add);
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingApacheCommon_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>(ListUtils.partition(data.collect(Collectors.toList()), BATCH_SIZE));
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingGuava_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>();
|
||||||
|
Iterators.partition(data.iterator(), BATCH_SIZE)
|
||||||
|
.forEachRemaining(result::add);
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingCyclops_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>();
|
||||||
|
ReactiveSeq.fromStream(data)
|
||||||
|
.grouped(BATCH_SIZE)
|
||||||
|
.toList()
|
||||||
|
.forEach(value -> result.add(value.collect(Collectors.toList())));
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingCyclopsLazy_thenFourBatchesAreObtained() {
|
||||||
|
Collection<List<Integer>> result = new ArrayList<>();
|
||||||
|
LazySeq.fromStream(data)
|
||||||
|
.grouped(BATCH_SIZE)
|
||||||
|
.toList()
|
||||||
|
.forEach(value -> result.add(value.collect(Collectors.toList())));
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
package com.baeldung.streams.processing.vavr;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.baeldung.streams.processing.StreamProcessingUnitTest;
|
||||||
|
|
||||||
|
import io.vavr.collection.List;
|
||||||
|
import io.vavr.collection.Stream;
|
||||||
|
|
||||||
|
public class StreamProcessingWithVavrUnitTest extends StreamProcessingUnitTest {
|
||||||
|
|
||||||
|
private final List<Integer> firstBatch = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
|
||||||
|
private final List<Integer> secondBatch = List.of(10, 11, 12, 13, 14, 15, 16, 17, 18, 19);
|
||||||
|
private final List<Integer> thirdBatch = List.of(20, 21, 22, 23, 24, 25, 26, 27, 28, 29);
|
||||||
|
private final List<Integer> fourthBatch = List.of(30, 31, 32, 33);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenAStreamOfData_whenIsProcessingInBatchUsingVavr_thenFourBatchesAreObtained() {
|
||||||
|
List<List<Integer>> result = Stream.ofAll(data)
|
||||||
|
.toList()
|
||||||
|
.grouped(BATCH_SIZE)
|
||||||
|
.toList();
|
||||||
|
assertTrue(result.contains(firstBatch));
|
||||||
|
assertTrue(result.contains(secondBatch));
|
||||||
|
assertTrue(result.contains(thirdBatch));
|
||||||
|
assertTrue(result.contains(fourthBatch));
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user