remove 'white lie' and tracking refreshing bytes explicitly

2015-12-14 15:39:18 -05:00 · 2015-12-14 15:39:18 -05:00 · 99e328c9bf
parent c66b05d9cf
commit 99e328c9bf
3 changed files with 63 additions and 25 deletions
--- a/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
+++ b/core/src/main/java/org/elasticsearch/index/engine/InternalEngine.java
@ -102,8 +102,6 @@ public class InternalEngine extends Engine {
    private volatile SegmentInfos lastCommittedSegmentInfos;
    private volatile boolean refreshing;
    private final IndexThrottle throttle;
    public InternalEngine(EngineConfig engineConfig, boolean skipInitialTranslogRecovery) throws EngineException {
@ -489,7 +487,6 @@ public class InternalEngine extends Engine {
        // since it flushes the index as well (though, in terms of concurrency, we are allowed to do it)
        try (ReleasableLock lock = readLock.acquire()) {
            ensureOpen();
            refreshing = true;
            searcherManager.maybeRefreshBlocking();
        } catch (AlreadyClosedException e) {
            ensureOpen();
@ -499,8 +496,6 @@ public class InternalEngine extends Engine {
        } catch (Throwable t) {
            failEngine("refresh failed", t);
            throw new RefreshFailedEngineException(shardId, t);
        } finally {
            refreshing = false;
        }
        // TODO: maybe we should just put a scheduled job in threadPool?
@ -759,16 +754,8 @@ public class InternalEngine extends Engine {
    @Override
    public long indexBufferRAMBytesUsed() {
        if (refreshing) {
            // We tell a "white lie" here, pretending that we instantaneously moved all
            // heap to disk at the start of refresh.  We do this so IMC  behaves as if we
            // are using no heap, else it will just keep asking us when it should be
            // asking others:
            return 0;
        } else {
        return indexWriter.ramBytesUsed() + versionMap.ramBytesUsedForRefresh();
    }
    }
    @Override
    public List<Segment> segments(boolean verbose) {
--- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
+++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
@ -506,12 +506,19 @@ public class IndexShard extends AbstractIndexShardComponent implements IndexSett
    public void refresh(String source) {
        verifyNotClosed();
        // nocommit OK to throw EngineClosedExc?
        long ramBytesUsed = getEngine().indexBufferRAMBytesUsed();
        indexingMemoryController.addRefreshingBytes(shardId, ramBytesUsed);
        try {
            if (logger.isTraceEnabled()) {
-            logger.trace("refresh with source: {}", source);
+                logger.trace("refresh with source: {} indexBufferRAMBytesUsed={}", source, ramBytesUsed);
            }
            long time = System.nanoTime();
            getEngine().refresh(source);
            refreshMetric.inc(System.nanoTime() - time);
        } finally {
            indexingMemoryController.removeRefreshingBytes(shardId, ramBytesUsed);
        }
    }
    public RefreshStats refreshStats() {
--- a/core/src/main/java/org/elasticsearch/indices/memory/IndexingMemoryController.java
+++ b/core/src/main/java/org/elasticsearch/indices/memory/IndexingMemoryController.java
@ -58,7 +58,7 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
    public static final String SHARD_MEMORY_INTERVAL_TIME_SETTING = "indices.memory.interval";
    /** Hardwired translog buffer size */
-    public static final ByteSizeValue SHARD_TRANSLOG_BUFFER = ByteSizeValue.parseBytesSizeValue("32kb", "SHARD_TRANSLOG_BUFFER");
+    public static final ByteSizeValue SHARD_TRANSLOG_BUFFER = ByteSizeValue.parseBytesSizeValue("8kb", "SHARD_TRANSLOG_BUFFER");
    private final ThreadPool threadPool;
    private final IndicesService indicesService;
@ -75,6 +75,11 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
    private final ShardsIndicesStatusChecker statusChecker;
    /** How many bytes we are currently moving to disk by the engine to refresh */
    private final AtomicLong bytesRefreshingNow = new AtomicLong();
    private final Map<ShardId,Long> refreshingBytes = new ConcurrentHashMap<>();
    @Inject
    public IndexingMemoryController(Settings settings, ThreadPool threadPool, IndicesService indicesService) {
        this(settings, threadPool, indicesService, JvmInfo.jvmInfo().getMem().getHeapMax().bytes());
@ -117,6 +122,15 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
                     SHARD_MEMORY_INTERVAL_TIME_SETTING, this.interval);
    }
    public void addRefreshingBytes(ShardId shardId, long numBytes) {
        refreshingBytes.put(shardId, numBytes);
    }
    public void removeRefreshingBytes(ShardId shardId, long numBytes) {
        boolean result = refreshingBytes.remove(shardId);
        assert result;
    }
    @Override
    protected void doStart() {
        // it's fine to run it on the scheduler thread, no busy work
@ -248,29 +262,59 @@ public class IndexingMemoryController extends AbstractLifecycleComponent<Indexin
        @Override
        public synchronized void run() {
            // nocommit lower the translog buffer to 8 KB
            // nocommit add defensive try/catch-everything here?  bad if an errant EngineClosedExc kills off this thread!!
            // Fast check to sum up how much heap all shards' indexing buffers are using now:
            long totalBytesUsed = 0;
            for (ShardId shardId : availableShards()) {
                Long refreshingBytes = refreshingBytes.get(shardId);
                // Give shard a chance to transition to inactive so sync'd flush can happen:
                checkIdle(shardId, inactiveTime.nanos());
-                totalBytesUsed += getIndexBufferRAMBytesUsed(shardId);
+                // nocommit explain why order is important here!
-                System.out.println("IMC:   " + shardId + " using " + (getIndexBufferRAMBytesUsed(shardId)/1024./1024.) + " MB");
+                Long refreshingBytes = refreshingBytes.get(shardId);
                long shardBytesUsed = getIndexBufferRAMBytesUsed(shardId);
                if (refreshingBytes != null) {
                    // Only count up bytes not already being refreshed:
                    shardBytesUsed -= refreshingBytes;
                    // If the refresh completed just after we pulled refreshingBytes and before we pulled index buffer bytes, then we could
                    // have a negative value here:
                    if (shardBytesUsed < 0) {
                        continue;
                    }
                }
                totalBytesUsed += shardBytesUsed;
                System.out.println("IMC:   " + shardId + " using " + (shardBytesUsed/1024./1024.) + " MB");
            }
            System.out.println(((System.currentTimeMillis() - startMS)/1000.0) + ": TOT=" + totalBytesUsed + " vs " + indexingBuffer.bytes());
-            if (totalBytesUsed > indexingBuffer.bytes()) {
+            if (totalBytesUsed - bytesRefreshingNow.get() > indexingBuffer.bytes()) {
                // OK we are using too much; make a queue and ask largest shard(s) to refresh:
                logger.debug("now refreshing some shards: total indexing bytes used [{}] vs index_buffer_size [{}]", new ByteSizeValue(totalBytesUsed), indexingBuffer);
                PriorityQueue<ShardAndBytesUsed> queue = new PriorityQueue<>();
                for (ShardId shardId : availableShards()) {
                    // nocommit explain why order is important here!
                    Long refreshingBytes = refreshingBytes.get(shardId);
                    long shardBytesUsed = getIndexBufferRAMBytesUsed(shardId);
                    if (refreshingBytes != null) {
                        // Only count up bytes not already being refreshed:
                        shardBytesUsed -= refreshingBytes;
                        // If the refresh completed just after we pulled refreshingBytes and before we pulled index buffer bytes, then we could
                        // have a negative value here:
                        if (shardBytesUsed < 0) {
                            continue;
                        }
                    }
                    if (shardBytesUsed > 0) {
                        queue.add(new ShardAndBytesUsed(shardBytesUsed, shardId));
                    }