remove 'white lie' and tracking refreshing bytes explicitly
This commit is contained in:
parent
c66b05d9cf
commit
99e328c9bf
|
@ -102,8 +102,6 @@ public class InternalEngine extends Engine {
|
||||||
|
|
||||||
private volatile SegmentInfos lastCommittedSegmentInfos;
|
private volatile SegmentInfos lastCommittedSegmentInfos;
|
||||||
|
|
||||||
private volatile boolean refreshing;
|
|
||||||
|
|
||||||
private final IndexThrottle throttle;
|
private final IndexThrottle throttle;
|
||||||
|
|
||||||
public InternalEngine(EngineConfig engineConfig, boolean skipInitialTranslogRecovery) throws EngineException {
|
public InternalEngine(EngineConfig engineConfig, boolean skipInitialTranslogRecovery) throws EngineException {
|
||||||
|
@ -489,7 +487,6 @@ public class InternalEngine extends Engine {
|
||||||
// since it flushes the index as well (though, in terms of concurrency, we are allowed to do it)
|
// since it flushes the index as well (though, in terms of concurrency, we are allowed to do it)
|
||||||
try (ReleasableLock lock = readLock.acquire()) {
|
try (ReleasableLock lock = readLock.acquire()) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
refreshing = true;
|
|
||||||
searcherManager.maybeRefreshBlocking();
|
searcherManager.maybeRefreshBlocking();
|
||||||
} catch (AlreadyClosedException e) {
|
} catch (AlreadyClosedException e) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
|
@ -499,8 +496,6 @@ public class InternalEngine extends Engine {
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
failEngine("refresh failed", t);
|
failEngine("refresh failed", t);
|
||||||
throw new RefreshFailedEngineException(shardId, t);
|
throw new RefreshFailedEngineException(shardId, t);
|
||||||
} finally {
|
|
||||||
refreshing = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: maybe we should just put a scheduled job in threadPool?
|
// TODO: maybe we should just put a scheduled job in threadPool?
|
||||||
|
@ -759,16 +754,8 @@ public class InternalEngine extends Engine {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long indexBufferRAMBytesUsed() {
|
public long indexBufferRAMBytesUsed() {
|
||||||
if (refreshing) {
|
|
||||||
// We tell a "white lie" here, pretending that we instantaneously moved all
|
|
||||||
// heap to disk at the start of refresh. We do this so IMC behaves as if we
|
|
||||||
// are using no heap, else it will just keep asking us when it should be
|
|
||||||
// asking others:
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
return indexWriter.ramBytesUsed() + versionMap.ramBytesUsedForRefresh();
|
return indexWriter.ramBytesUsed() + versionMap.ramBytesUsedForRefresh();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Segment> segments(boolean verbose) {
|
public List<Segment> segments(boolean verbose) {
|
||||||
|
|
|
@ -506,12 +506,19 @@ public class IndexShard extends AbstractIndexShardComponent implements IndexSett
|
||||||
|
|
||||||
public void refresh(String source) {
|
public void refresh(String source) {
|
||||||
verifyNotClosed();
|
verifyNotClosed();
|
||||||
|
// nocommit OK to throw EngineClosedExc?
|
||||||
|
long ramBytesUsed = getEngine().indexBufferRAMBytesUsed();
|
||||||
|
indexingMemoryController.addRefreshingBytes(shardId, ramBytesUsed);
|
||||||
|
try {
|
||||||
if (logger.isTraceEnabled()) {
|
if (logger.isTraceEnabled()) {
|
||||||
logger.trace("refresh with source: {}", source);
|
logger.trace("refresh with source: {} indexBufferRAMBytesUsed={}", source, ramBytesUsed);
|
||||||
}
|
}
|
||||||
long time = System.nanoTime();
|
long time = System.nanoTime();
|
||||||
getEngine().refresh(source);
|
getEngine().refresh(source);
|
||||||
refreshMetric.inc(System.nanoTime() - time);
|
refreshMetric.inc(System.nanoTime() - time);
|
||||||
|
} finally {
|
||||||
|
indexingMemoryController.removeRefreshingBytes(shardId, ramBytesUsed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public RefreshStats refreshStats() {
|
public RefreshStats refreshStats() {
|
||||||
|
|
|
@ -58,7 +58,7 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
|
||||||
public static final String SHARD_MEMORY_INTERVAL_TIME_SETTING = "indices.memory.interval";
|
public static final String SHARD_MEMORY_INTERVAL_TIME_SETTING = "indices.memory.interval";
|
||||||
|
|
||||||
/** Hardwired translog buffer size */
|
/** Hardwired translog buffer size */
|
||||||
public static final ByteSizeValue SHARD_TRANSLOG_BUFFER = ByteSizeValue.parseBytesSizeValue("32kb", "SHARD_TRANSLOG_BUFFER");
|
public static final ByteSizeValue SHARD_TRANSLOG_BUFFER = ByteSizeValue.parseBytesSizeValue("8kb", "SHARD_TRANSLOG_BUFFER");
|
||||||
|
|
||||||
private final ThreadPool threadPool;
|
private final ThreadPool threadPool;
|
||||||
private final IndicesService indicesService;
|
private final IndicesService indicesService;
|
||||||
|
@ -75,6 +75,11 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
|
||||||
|
|
||||||
private final ShardsIndicesStatusChecker statusChecker;
|
private final ShardsIndicesStatusChecker statusChecker;
|
||||||
|
|
||||||
|
/** How many bytes we are currently moving to disk by the engine to refresh */
|
||||||
|
private final AtomicLong bytesRefreshingNow = new AtomicLong();
|
||||||
|
|
||||||
|
private final Map<ShardId,Long> refreshingBytes = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexingMemoryController(Settings settings, ThreadPool threadPool, IndicesService indicesService) {
|
public IndexingMemoryController(Settings settings, ThreadPool threadPool, IndicesService indicesService) {
|
||||||
this(settings, threadPool, indicesService, JvmInfo.jvmInfo().getMem().getHeapMax().bytes());
|
this(settings, threadPool, indicesService, JvmInfo.jvmInfo().getMem().getHeapMax().bytes());
|
||||||
|
@ -117,6 +122,15 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
|
||||||
SHARD_MEMORY_INTERVAL_TIME_SETTING, this.interval);
|
SHARD_MEMORY_INTERVAL_TIME_SETTING, this.interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addRefreshingBytes(ShardId shardId, long numBytes) {
|
||||||
|
refreshingBytes.put(shardId, numBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeRefreshingBytes(ShardId shardId, long numBytes) {
|
||||||
|
boolean result = refreshingBytes.remove(shardId);
|
||||||
|
assert result;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doStart() {
|
protected void doStart() {
|
||||||
// it's fine to run it on the scheduler thread, no busy work
|
// it's fine to run it on the scheduler thread, no busy work
|
||||||
|
@ -248,29 +262,59 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
|
||||||
@Override
|
@Override
|
||||||
public synchronized void run() {
|
public synchronized void run() {
|
||||||
|
|
||||||
// nocommit lower the translog buffer to 8 KB
|
|
||||||
|
|
||||||
// nocommit add defensive try/catch-everything here? bad if an errant EngineClosedExc kills off this thread!!
|
// nocommit add defensive try/catch-everything here? bad if an errant EngineClosedExc kills off this thread!!
|
||||||
|
|
||||||
// Fast check to sum up how much heap all shards' indexing buffers are using now:
|
// Fast check to sum up how much heap all shards' indexing buffers are using now:
|
||||||
long totalBytesUsed = 0;
|
long totalBytesUsed = 0;
|
||||||
for (ShardId shardId : availableShards()) {
|
for (ShardId shardId : availableShards()) {
|
||||||
|
Long refreshingBytes = refreshingBytes.get(shardId);
|
||||||
|
|
||||||
// Give shard a chance to transition to inactive so sync'd flush can happen:
|
// Give shard a chance to transition to inactive so sync'd flush can happen:
|
||||||
checkIdle(shardId, inactiveTime.nanos());
|
checkIdle(shardId, inactiveTime.nanos());
|
||||||
|
|
||||||
totalBytesUsed += getIndexBufferRAMBytesUsed(shardId);
|
// nocommit explain why order is important here!
|
||||||
System.out.println("IMC: " + shardId + " using " + (getIndexBufferRAMBytesUsed(shardId)/1024./1024.) + " MB");
|
Long refreshingBytes = refreshingBytes.get(shardId);
|
||||||
|
|
||||||
|
long shardBytesUsed = getIndexBufferRAMBytesUsed(shardId);
|
||||||
|
|
||||||
|
if (refreshingBytes != null) {
|
||||||
|
// Only count up bytes not already being refreshed:
|
||||||
|
shardBytesUsed -= refreshingBytes;
|
||||||
|
|
||||||
|
// If the refresh completed just after we pulled refreshingBytes and before we pulled index buffer bytes, then we could
|
||||||
|
// have a negative value here:
|
||||||
|
if (shardBytesUsed < 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
totalBytesUsed += shardBytesUsed;
|
||||||
|
System.out.println("IMC: " + shardId + " using " + (shardBytesUsed/1024./1024.) + " MB");
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println(((System.currentTimeMillis() - startMS)/1000.0) + ": TOT=" + totalBytesUsed + " vs " + indexingBuffer.bytes());
|
System.out.println(((System.currentTimeMillis() - startMS)/1000.0) + ": TOT=" + totalBytesUsed + " vs " + indexingBuffer.bytes());
|
||||||
|
|
||||||
if (totalBytesUsed > indexingBuffer.bytes()) {
|
if (totalBytesUsed - bytesRefreshingNow.get() > indexingBuffer.bytes()) {
|
||||||
// OK we are using too much; make a queue and ask largest shard(s) to refresh:
|
// OK we are using too much; make a queue and ask largest shard(s) to refresh:
|
||||||
logger.debug("now refreshing some shards: total indexing bytes used [{}] vs index_buffer_size [{}]", new ByteSizeValue(totalBytesUsed), indexingBuffer);
|
logger.debug("now refreshing some shards: total indexing bytes used [{}] vs index_buffer_size [{}]", new ByteSizeValue(totalBytesUsed), indexingBuffer);
|
||||||
PriorityQueue<ShardAndBytesUsed> queue = new PriorityQueue<>();
|
PriorityQueue<ShardAndBytesUsed> queue = new PriorityQueue<>();
|
||||||
for (ShardId shardId : availableShards()) {
|
for (ShardId shardId : availableShards()) {
|
||||||
|
// nocommit explain why order is important here!
|
||||||
|
Long refreshingBytes = refreshingBytes.get(shardId);
|
||||||
|
|
||||||
long shardBytesUsed = getIndexBufferRAMBytesUsed(shardId);
|
long shardBytesUsed = getIndexBufferRAMBytesUsed(shardId);
|
||||||
|
|
||||||
|
if (refreshingBytes != null) {
|
||||||
|
// Only count up bytes not already being refreshed:
|
||||||
|
shardBytesUsed -= refreshingBytes;
|
||||||
|
|
||||||
|
// If the refresh completed just after we pulled refreshingBytes and before we pulled index buffer bytes, then we could
|
||||||
|
// have a negative value here:
|
||||||
|
if (shardBytesUsed < 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (shardBytesUsed > 0) {
|
if (shardBytesUsed > 0) {
|
||||||
queue.add(new ShardAndBytesUsed(shardBytesUsed, shardId));
|
queue.add(new ShardAndBytesUsed(shardBytesUsed, shardId));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue