diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 5baca022a21..093e110cb82 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -183,8 +183,15 @@ public final class IndexSettings { Setting.timeSetting("index.refresh_interval", DEFAULT_REFRESH_INTERVAL, new TimeValue(-1, TimeUnit.MILLISECONDS), Property.Dynamic, Property.IndexScope); public static final Setting INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING = - Setting.byteSizeSetting("index.translog.flush_threshold_size", new ByteSizeValue(512, ByteSizeUnit.MB), Property.Dynamic, - Property.IndexScope); + Setting.byteSizeSetting("index.translog.flush_threshold_size", new ByteSizeValue(512, ByteSizeUnit.MB), + /* + * An empty translog occupies 43 bytes on disk. If the flush threshold is below this, the flush thread + * can get stuck in an infinite loop as the shouldPeriodicallyFlush can still be true after flushing. + * However, small thresholds are useful for testing so we do not add a large lower bound here. + */ + new ByteSizeValue(Translog.DEFAULT_HEADER_SIZE_IN_BYTES + 1, ByteSizeUnit.BYTES), + new ByteSizeValue(Long.MAX_VALUE, ByteSizeUnit.BYTES), + Property.Dynamic, Property.IndexScope); /** * Controls how long translog files that are no longer needed for persistence reasons @@ -219,9 +226,9 @@ public final class IndexSettings { * generation threshold. However, small thresholds are useful for testing so we * do not add a large lower bound here. */ - new ByteSizeValue(64, ByteSizeUnit.BYTES), + new ByteSizeValue(Translog.DEFAULT_HEADER_SIZE_IN_BYTES + 1, ByteSizeUnit.BYTES), new ByteSizeValue(Long.MAX_VALUE, ByteSizeUnit.BYTES), - new Property[]{Property.Dynamic, Property.IndexScope}); + Property.Dynamic, Property.IndexScope); /** * Index setting to enable / disable deletes garbage collection. diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index b01ef12d647..a447a1d123e 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -1470,6 +1470,8 @@ public class InternalEngine extends Engine { if (uncommittedSizeOfCurrentCommit < flushThreshold) { return false; } + assert translog.uncommittedOperations() > 0 : "translog required to flush periodically but not contain any uncommitted operation; " + + "uncommitted translog size [" + uncommittedSizeOfCurrentCommit + "], flush threshold [" + flushThreshold + "]"; /* * We should only flush ony if the shouldFlush condition can become false after flushing. * This condition will change if the `uncommittedSize` of the new commit is smaller than @@ -1477,14 +1479,7 @@ public class InternalEngine extends Engine { * thus the IndexWriter#hasUncommittedChanges condition is not considered. */ final long uncommittedSizeOfNewCommit = translog.sizeOfGensAboveSeqNoInBytes(localCheckpointTracker.getCheckpoint() + 1); - /* - * If flushThreshold is too small, we may repeatedly flush even there is no uncommitted operation - * as #sizeOfGensAboveSeqNoInByte and #uncommittedSizeInBytes can return different values. - * An empty translog file has non-zero `uncommittedSize` (the translog header), and method #sizeOfGensAboveSeqNoInBytes can - * return 0 now(no translog gen contains ops above local checkpoint) but method #uncommittedSizeInBytes will return an actual - * non-zero value after rolling a new translog generation. This can be avoided by checking the actual uncommitted operations. - */ - return uncommittedSizeOfNewCommit < uncommittedSizeOfCurrentCommit && translog.uncommittedOperations() > 0; + return uncommittedSizeOfNewCommit < uncommittedSizeOfCurrentCommit; } @Override diff --git a/server/src/main/java/org/elasticsearch/index/translog/Translog.java b/server/src/main/java/org/elasticsearch/index/translog/Translog.java index 3cbc8fc5305..c2c7ec7e255 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/server/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -108,6 +108,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC public static final String CHECKPOINT_FILE_NAME = "translog" + CHECKPOINT_SUFFIX; static final Pattern PARSE_STRICT_ID_PATTERN = Pattern.compile("^" + TRANSLOG_FILE_PREFIX + "(\\d+)(\\.tlog)$"); + public static final int DEFAULT_HEADER_SIZE_IN_BYTES = TranslogWriter.getHeaderLength(UUIDs.randomBase64UUID()); // the list of translog readers is guaranteed to be in order of translog generation private final List readers = new ArrayList<>(); @@ -451,7 +452,10 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC * @throws IOException if creating the translog failed */ TranslogWriter createWriter(long fileGeneration) throws IOException { - return createWriter(fileGeneration, getMinFileGeneration(), globalCheckpointSupplier.getAsLong()); + final TranslogWriter writer = createWriter(fileGeneration, getMinFileGeneration(), globalCheckpointSupplier.getAsLong()); + assert writer.sizeInBytes() == DEFAULT_HEADER_SIZE_IN_BYTES : "Mismatch translog header size; " + + "empty translog size [" + writer.sizeInBytes() + ", header size [" + DEFAULT_HEADER_SIZE_IN_BYTES + "]"; + return writer; } /** diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardIT.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardIT.java index 601eb8e9b1d..dbbf4bac327 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardIT.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardIT.java @@ -350,7 +350,7 @@ public class IndexShardIT extends ESSingleNodeTestCase { }); assertEquals(0, translog.uncommittedOperations()); translog.sync(); - long size = translog.uncommittedSizeInBytes(); + long size = Math.max(translog.uncommittedSizeInBytes(), Translog.DEFAULT_HEADER_SIZE_IN_BYTES + 1); logger.info("--> current translog size: [{}] num_ops [{}] generation [{}]", translog.uncommittedSizeInBytes(), translog.uncommittedOperations(), translog.getGeneration()); client().admin().indices().prepareUpdateSettings("test").setSettings(Settings.builder().put(