Improve kinesis task assignment after resharding (#12235)

Problem:
- When a kinesis stream is resharded, the original shards are closed.
   Any intermediate shard created in the process is eventually closed as well.
- If a shard is closed before any record is put into it, it can be safely ignored for ingestion.
- It is expensive to determine if a closed shard is empty, since it requires a call to the Kinesis cluster.

Changes:
- Maintain a cache of closed empty and closed non-empty shards in `KinesisSupervisor`
- Add config `skipIngorableShards` to `KinesisSupervisorTuningConfig`
- The caches are used and updated only when `skipIgnorableShards = true`
This commit is contained in:
AmatyaAvadhanula 2022-02-18 12:37:06 +05:30 committed by GitHub
parent 70c40c4281
commit 1ec57cb935
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 253 additions and 22 deletions

View File

@ -72,7 +72,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
@ -667,28 +666,32 @@ public class KinesisRecordSupplier implements RecordSupplier<String, String, Byt
* This makes the method resilient to LimitExceeded exceptions (compared to 100 shards, 10 TPS of describeStream)
*
* @param stream name of stream
*
* @return Set of Shard ids
* @return Immutable set of shards
*/
public Set<Shard> getShards(String stream)
{
ImmutableSet.Builder<Shard> shards = ImmutableSet.builder();
ListShardsRequest request = new ListShardsRequest().withStreamName(stream);
while (true) {
ListShardsResult result = kinesis.listShards(request);
shards.addAll(result.getShards());
String nextToken = result.getNextToken();
if (nextToken == null) {
return shards.build();
}
request = new ListShardsRequest().withNextToken(nextToken);
}
}
@Override
public Set<String> getPartitionIds(String stream)
{
return wrapExceptions(() -> {
final Set<String> retVal = new TreeSet<>();
ListShardsRequest request = new ListShardsRequest().withStreamName(stream);
while (true) {
ListShardsResult result = kinesis.listShards(request);
retVal.addAll(result.getShards()
.stream()
.map(Shard::getShardId)
.collect(Collectors.toList())
);
String nextToken = result.getNextToken();
if (nextToken == null) {
return retVal;
}
request = new ListShardsRequest().withNextToken(nextToken);
ImmutableSet.Builder<String> partitionIds = ImmutableSet.builder();
for (Shard shard : getShards(stream)) {
partitionIds.add(shard.getShardId());
}
return partitionIds.build();
});
}
@ -750,6 +753,25 @@ public class KinesisRecordSupplier implements RecordSupplier<String, String, Byt
.anyMatch(fetch -> (fetch != null && !fetch.isDone()));
}
/**
* Fetches records from the specified shard to determine if it is empty.
* @param stream to which shard belongs
* @param shardId of the closed shard
* @return true if the closed shard is empty, false otherwise.
*/
public boolean isClosedShardEmpty(String stream, String shardId)
{
String shardIterator = kinesis.getShardIterator(stream,
shardId,
ShardIteratorType.TRIM_HORIZON.toString())
.getShardIterator();
GetRecordsRequest request = new GetRecordsRequest().withShardIterator(shardIterator)
.withLimit(1);
GetRecordsResult shardData = kinesis.getRecords(request);
return shardData.getRecords().isEmpty() && shardData.getNextShardIterator() == null;
}
/**
* Check that a {@link PartitionResource} has been assigned to this record supplier, and if so call
* {@link PartitionResource#seek} to move it to the latest offsets. Note that this method does not restart background

View File

@ -19,10 +19,12 @@
package org.apache.druid.indexing.kinesis.supervisor;
import com.amazonaws.services.kinesis.model.Shard;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.aws.AWSCredentialsConfig;
import org.apache.druid.common.utils.IdUtils;
import org.apache.druid.data.input.impl.ByteEntity;
@ -64,6 +66,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;
/**
@ -88,6 +91,11 @@ public class KinesisSupervisor extends SeekableStreamSupervisor<String, String,
private final AWSCredentialsConfig awsCredentialsConfig;
private volatile Map<String, Long> currentPartitionTimeLag;
// Maintain sets of currently closed shards to find ignorable (closed and empty) shards
// Poll closed shards once and store the result to avoid redundant costly calls to kinesis
private final Set<String> emptyClosedShardIds = new TreeSet<>();
private final Set<String> nonEmptyClosedShardIds = new TreeSet<>();
public KinesisSupervisor(
final TaskStorage taskStorage,
final TaskMaster taskMaster,
@ -416,6 +424,52 @@ public class KinesisSupervisor extends SeekableStreamSupervisor<String, String,
return true;
}
@Override
protected boolean shouldSkipIgnorablePartitions()
{
return spec.getSpec().getTuningConfig().isSkipIgnorableShards();
}
/**
* A kinesis shard is considered to be an ignorable partition if it is both closed and empty
* @return set of shards ignorable by kinesis ingestion
*/
@Override
protected Set<String> computeIgnorablePartitionIds()
{
updateClosedShardCache();
return ImmutableSet.copyOf(emptyClosedShardIds);
}
private synchronized void updateClosedShardCache()
{
final KinesisRecordSupplier kinesisRecordSupplier = (KinesisRecordSupplier) recordSupplier;
final String stream = spec.getSource();
final Set<Shard> allActiveShards = kinesisRecordSupplier.getShards(stream);
final Set<String> activeClosedShards = allActiveShards.stream()
.filter(shard -> isShardClosed(shard))
.map(Shard::getShardId)
.collect(Collectors.toSet());
// clear stale shards
emptyClosedShardIds.retainAll(activeClosedShards);
nonEmptyClosedShardIds.retainAll(activeClosedShards);
for (String closedShardId : activeClosedShards) {
// Try to utilize cache
if (emptyClosedShardIds.contains(closedShardId) || nonEmptyClosedShardIds.contains(closedShardId)) {
continue;
}
// Check if it is closed using kinesis and add to cache
if (kinesisRecordSupplier.isClosedShardEmpty(stream, closedShardId)) {
emptyClosedShardIds.add(closedShardId);
} else {
nonEmptyClosedShardIds.add(closedShardId);
}
}
}
@Override
protected SeekableStreamDataSourceMetadata<String, String> createDataSourceMetadataWithExpiredPartitions(
SeekableStreamDataSourceMetadata<String, String> currentMetadata, Set<String> expiredPartitionIds
@ -481,4 +535,15 @@ public class KinesisSupervisor extends SeekableStreamSupervisor<String, String,
return new KinesisDataSourceMetadata(newSequences);
}
/**
* A shard is considered closed iff it has an ending sequence number.
*
* @param shard to be checked
* @return if shard is closed
*/
private boolean isShardClosed(Shard shard)
{
return shard.getSequenceNumberRange().getEndingSequenceNumber() != null;
}
}

View File

@ -41,6 +41,7 @@ public class KinesisSupervisorTuningConfig extends KinesisIndexTaskTuningConfig
private final Duration shutdownTimeout;
private final Duration repartitionTransitionDuration;
private final Duration offsetFetchPeriod;
private final boolean skipIgnorableShards;
public static KinesisSupervisorTuningConfig defaultConfig()
{
@ -77,6 +78,7 @@ public class KinesisSupervisorTuningConfig extends KinesisIndexTaskTuningConfig
null,
null,
null,
null,
null
);
}
@ -114,7 +116,8 @@ public class KinesisSupervisorTuningConfig extends KinesisIndexTaskTuningConfig
@JsonProperty("maxRecordsPerPoll") @Nullable Integer maxRecordsPerPoll,
@JsonProperty("intermediateHandoffPeriod") Period intermediateHandoffPeriod,
@JsonProperty("repartitionTransitionDuration") Period repartitionTransitionDuration,
@JsonProperty("offsetFetchPeriod") Period offsetFetchPeriod
@JsonProperty("offsetFetchPeriod") Period offsetFetchPeriod,
@JsonProperty("skipIgnorableShards") Boolean skipIgnorableShards
)
{
super(
@ -162,6 +165,7 @@ public class KinesisSupervisorTuningConfig extends KinesisIndexTaskTuningConfig
offsetFetchPeriod,
DEFAULT_OFFSET_FETCH_PERIOD
);
this.skipIgnorableShards = (skipIgnorableShards != null ? skipIgnorableShards : false);
}
@Override
@ -212,6 +216,12 @@ public class KinesisSupervisorTuningConfig extends KinesisIndexTaskTuningConfig
return offsetFetchPeriod;
}
@JsonProperty
public boolean isSkipIgnorableShards()
{
return skipIgnorableShards;
}
@Override
public String toString()
{
@ -246,6 +256,7 @@ public class KinesisSupervisorTuningConfig extends KinesisIndexTaskTuningConfig
", maxRecordsPerPoll=" + getMaxRecordsPerPoll() +
", intermediateHandoffPeriod=" + getIntermediateHandoffPeriod() +
", repartitionTransitionDuration=" + getRepartitionTransitionDuration() +
", skipIgnorableShards=" + isSkipIgnorableShards() +
'}';
}

View File

@ -317,6 +317,7 @@ public class KinesisIndexTaskTuningConfigTest
null,
null,
null,
null,
null
);
KinesisIndexTaskTuningConfig copy = (KinesisIndexTaskTuningConfig) original.convertToTaskTuningConfig();

View File

@ -50,6 +50,7 @@ import org.junit.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -1023,4 +1024,56 @@ public class KinesisRecordSupplierTest extends EasyMockSupport
}
verifyAll();
}
@Test
public void testIsClosedShardEmpty()
{
AmazonKinesis mockKinesis = EasyMock.mock(AmazonKinesis.class);
KinesisRecordSupplier target = new KinesisRecordSupplier(mockKinesis,
recordsPerFetch,
0,
2,
false,
100,
5000,
5000,
60000,
5,
true
);
Record record = new Record();
final String shardWithoutRecordsAndNullNextIterator = "0";
setupMockKinesisForShardId(mockKinesis, shardWithoutRecordsAndNullNextIterator, new ArrayList<>(), null);
final String shardWithRecordsAndNullNextIterator = "1";
setupMockKinesisForShardId(mockKinesis, shardWithRecordsAndNullNextIterator, Collections.singletonList(record), null);
final String shardWithoutRecordsAndNonNullNextIterator = "2";
setupMockKinesisForShardId(mockKinesis, shardWithoutRecordsAndNonNullNextIterator, new ArrayList<>(), "nextIterator");
final String shardWithRecordsAndNonNullNextIterator = "3";
setupMockKinesisForShardId(mockKinesis, shardWithRecordsAndNonNullNextIterator, Collections.singletonList(record), "nextIterator");
EasyMock.replay(mockKinesis);
// A closed shard is empty only when the records are empty and the next iterator is null
Assert.assertTrue(target.isClosedShardEmpty(STREAM, shardWithoutRecordsAndNullNextIterator));
Assert.assertFalse(target.isClosedShardEmpty(STREAM, shardWithRecordsAndNullNextIterator));
Assert.assertFalse(target.isClosedShardEmpty(STREAM, shardWithoutRecordsAndNonNullNextIterator));
Assert.assertFalse(target.isClosedShardEmpty(STREAM, shardWithRecordsAndNonNullNextIterator));
}
private void setupMockKinesisForShardId(AmazonKinesis kinesis, String shardId,
List<Record> expectedRecords, String expectedNextIterator)
{
String shardIteratorType = ShardIteratorType.TRIM_HORIZON.toString();
String shardIterator = "shardIterator" + shardId;
GetShardIteratorResult shardIteratorResult = new GetShardIteratorResult().withShardIterator(shardIterator);
EasyMock.expect(kinesis.getShardIterator(STREAM, shardId, shardIteratorType)).andReturn(shardIteratorResult).once();
GetRecordsRequest request = new GetRecordsRequest().withShardIterator(shardIterator).withLimit(1);
GetRecordsResult result = new GetRecordsResult().withRecords(expectedRecords)
.withNextShardIterator(expectedNextIterator);
EasyMock.expect(kinesis.getRecords(request)).andReturn(result);
}
}

View File

@ -19,6 +19,8 @@
package org.apache.druid.indexing.kinesis.supervisor;
import com.amazonaws.services.kinesis.model.SequenceNumberRange;
import com.amazonaws.services.kinesis.model.Shard;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
@ -106,6 +108,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -204,6 +207,7 @@ public class KinesisSupervisorTest extends EasyMockSupport
null,
null,
null,
null,
null
);
rowIngestionMetersFactory = new TestUtils().getRowIngestionMetersFactory();
@ -3941,6 +3945,7 @@ public class KinesisSupervisorTest extends EasyMockSupport
42, // This property is different from tuningConfig
null,
null,
null,
null
);
@ -4885,6 +4890,55 @@ public class KinesisSupervisorTest extends EasyMockSupport
Assert.assertEquals(expectedPartitionOffsets, supervisor.getPartitionOffsets());
}
@Test
public void testGetIgnorablePartitionIds()
{
supervisor = getTestableSupervisor(1, 2, true, "PT1H", null, null);
supervisor.setupRecordSupplier();
supervisor.tryInit();
String stream = supervisor.getKinesisSupervisorSpec().getSource();
SequenceNumberRange openShardRange = new SequenceNumberRange().withEndingSequenceNumber(null);
SequenceNumberRange closedShardRange = new SequenceNumberRange().withEndingSequenceNumber("non-null");
Shard openShard = new Shard().withShardId("openShard")
.withSequenceNumberRange(openShardRange);
Shard emptyClosedShard = new Shard().withShardId("emptyClosedShard")
.withSequenceNumberRange(closedShardRange);
Shard nonEmptyClosedShard = new Shard().withShardId("nonEmptyClosedShard")
.withSequenceNumberRange(closedShardRange);
EasyMock.expect(supervisorRecordSupplier.getShards(stream))
.andReturn(ImmutableSet.of(openShard, nonEmptyClosedShard, emptyClosedShard)).once()
.andReturn(ImmutableSet.of(openShard, nonEmptyClosedShard, emptyClosedShard)).once()
.andReturn(ImmutableSet.of(openShard, emptyClosedShard)).once()
.andReturn(ImmutableSet.of(openShard)).once()
.andReturn(ImmutableSet.of(openShard, nonEmptyClosedShard, emptyClosedShard)).once();
// The following calls happen twice, once during the first call since there was no cache,
// and once during the last since the cache was cleared prior to it
EasyMock.expect(supervisorRecordSupplier.isClosedShardEmpty(stream, emptyClosedShard.getShardId()))
.andReturn(true).times(2);
EasyMock.expect(supervisorRecordSupplier.isClosedShardEmpty(stream, nonEmptyClosedShard.getShardId()))
.andReturn(false).times(2);
EasyMock.replay(supervisorRecordSupplier);
// ActiveShards = {open, empty-closed, nonEmpty-closed}, IgnorableShards = {empty-closed}
// {empty-closed, nonEmpty-closed} added to cache
Assert.assertEquals(Collections.singleton(emptyClosedShard.getShardId()), supervisor.computeIgnorablePartitionIds());
// ActiveShards = {open, empty-closed, nonEmpty-closed}, IgnorableShards = {empty-closed}
Assert.assertEquals(Collections.singleton(emptyClosedShard.getShardId()), supervisor.computeIgnorablePartitionIds());
// ActiveShards = {open, empty-closed}, IgnorableShards = {empty-closed}
// {nonEmpty-closed} removed from cache
Assert.assertEquals(Collections.singleton(emptyClosedShard.getShardId()), supervisor.computeIgnorablePartitionIds());
// ActiveShards = {open}, IgnorableShards = {}
// {empty-closed} removed from cache
Assert.assertEquals(new HashSet<>(), supervisor.computeIgnorablePartitionIds());
// ActiveShards = {open, empty-closed, nonEmpty-closed}, IgnorableShards = {empty-closed}
// {empty-closed, nonEmpty-closed} re-added to cache
Assert.assertEquals(Collections.singleton(emptyClosedShard.getShardId()), supervisor.computeIgnorablePartitionIds());
}
private TestableKinesisSupervisor getTestableSupervisor(
int replicas,
int taskCount,
@ -4995,6 +5049,7 @@ public class KinesisSupervisorTest extends EasyMockSupport
null,
null,
null,
null,
null
);

View File

@ -116,11 +116,11 @@ public interface RecordSupplier<PartitionIdType, SequenceOffsetType, RecordType
SequenceOffsetType getPosition(StreamPartition<PartitionIdType> partition);
/**
* returns the set of partitions under the given stream
* returns the set of all available partitions under the given stream
*
* @param stream name of stream
*
* @return set of partitions
* @return set of partition ids belonging to the stream
*/
Set<PartitionIdType> getPartitionIds(String stream);

View File

@ -2296,9 +2296,30 @@ public abstract class SeekableStreamSupervisor<PartitionIdType, SequenceOffsetTy
return false;
}
protected boolean shouldSkipIgnorablePartitions()
{
return false;
}
/**
* Use this method if skipIgnorablePartitions is true in the spec
*
* These partitions can be safely ignored for both ingestion task assignment and autoscaler limits
*
* @return set of ids of ignorable partitions
*/
protected Set<PartitionIdType> computeIgnorablePartitionIds()
{
return ImmutableSet.of();
}
public int getPartitionCount()
{
return recordSupplier.getPartitionIds(ioConfig.getStream()).size();
int partitionCount = recordSupplier.getPartitionIds(ioConfig.getStream()).size();
if (shouldSkipIgnorablePartitions()) {
partitionCount -= computeIgnorablePartitionIds().size();
}
return partitionCount;
}
private boolean updatePartitionDataFromStream()
@ -2308,6 +2329,9 @@ public abstract class SeekableStreamSupervisor<PartitionIdType, SequenceOffsetTy
recordSupplierLock.lock();
try {
partitionIdsFromSupplier = recordSupplier.getPartitionIds(ioConfig.getStream());
if (shouldSkipIgnorablePartitions()) {
partitionIdsFromSupplier.removeAll(computeIgnorablePartitionIds());
}
}
catch (Exception e) {
stateManager.recordThrowableEvent(e);

View File

@ -176,7 +176,7 @@ public class KinesisAdminClient implements StreamAdminClient
if (nextToken == null) {
return shards.build();
}
listShardsRequest = new ListShardsRequest().withNextToken(listShardsResult.getNextToken());
listShardsRequest = new ListShardsRequest().withNextToken(nextToken);
}
}