Make _id terms optional in segment containing only noop (#30409)
Previously only index and delete operations are indexed into Lucene, therefore every segment should have both _id and _version terms as these operations contain both terms. However, this is no longer guaranteed after noop is also indexed into Lucene. A segment which contains only no-ops does not have neither _id or _version because a no-op does not contain these terms. This change adds a dummy version to no-ops and makes _id terms optional in PerThreadIDVersionAndSeqNoLookup. Relates #30226
This commit is contained in:
parent
5d677a00d3
commit
75719ac71b
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo;
|
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo;
|
||||||
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
|
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
|
||||||
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
|
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
|
||||||
|
@ -66,15 +67,22 @@ final class PerThreadIDVersionAndSeqNoLookup {
|
||||||
*/
|
*/
|
||||||
PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField) throws IOException {
|
PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField) throws IOException {
|
||||||
this.uidField = uidField;
|
this.uidField = uidField;
|
||||||
Terms terms = reader.terms(uidField);
|
final Terms terms = reader.terms(uidField);
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
throw new IllegalArgumentException("reader misses the [" + uidField + "] field");
|
// If a segment contains only no-ops, it does not have _uid but has both _soft_deletes and _tombstone fields.
|
||||||
|
final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETE_FIELD);
|
||||||
|
final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME);
|
||||||
|
if (softDeletesDV == null || tombstoneDV == null) {
|
||||||
|
throw new IllegalArgumentException("reader does not have _uid terms but not a no-op segment; " +
|
||||||
|
"_soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]");
|
||||||
}
|
}
|
||||||
|
termsEnum = null;
|
||||||
|
} else {
|
||||||
termsEnum = terms.iterator();
|
termsEnum = terms.iterator();
|
||||||
if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) {
|
|
||||||
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field");
|
|
||||||
}
|
}
|
||||||
|
if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) {
|
||||||
|
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field; _uid terms [" + terms + "]");
|
||||||
|
}
|
||||||
Object readerKey = null;
|
Object readerKey = null;
|
||||||
assert (readerKey = reader.getCoreCacheHelper().getKey()) != null;
|
assert (readerKey = reader.getCoreCacheHelper().getKey()) != null;
|
||||||
this.readerKey = readerKey;
|
this.readerKey = readerKey;
|
||||||
|
@ -111,7 +119,8 @@ final class PerThreadIDVersionAndSeqNoLookup {
|
||||||
* {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found
|
* {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found
|
||||||
* */
|
* */
|
||||||
private int getDocID(BytesRef id, Bits liveDocs) throws IOException {
|
private int getDocID(BytesRef id, Bits liveDocs) throws IOException {
|
||||||
if (termsEnum.seekExact(id)) {
|
// termsEnum can possibly be null here if this leaf contains only no-ops.
|
||||||
|
if (termsEnum != null && termsEnum.seekExact(id)) {
|
||||||
int docID = DocIdSetIterator.NO_MORE_DOCS;
|
int docID = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
// there may be more than one matching docID, in the case of nested docs, so we want the last one:
|
// there may be more than one matching docID, in the case of nested docs, so we want the last one:
|
||||||
docsEnum = termsEnum.postings(docsEnum, 0);
|
docsEnum = termsEnum.postings(docsEnum, 0);
|
||||||
|
|
|
@ -1347,6 +1347,9 @@ public class InternalEngine extends Engine {
|
||||||
try {
|
try {
|
||||||
final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newNoopTombstoneDoc();
|
final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newNoopTombstoneDoc();
|
||||||
tombstone.updateSeqID(noOp.seqNo(), noOp.primaryTerm());
|
tombstone.updateSeqID(noOp.seqNo(), noOp.primaryTerm());
|
||||||
|
// A noop tombstone does not require a _version but it's added to have a fully dense docvalues for the version field.
|
||||||
|
// 1L is selected to optimize the compression because it might probably be the most common value in version field.
|
||||||
|
tombstone.version().setLongValue(1L);
|
||||||
assert tombstone.docs().size() == 1 : "Tombstone should have a single doc [" + tombstone + "]";
|
assert tombstone.docs().size() == 1 : "Tombstone should have a single doc [" + tombstone + "]";
|
||||||
final ParseContext.Document doc = tombstone.docs().get(0);
|
final ParseContext.Document doc = tombstone.docs().get(0);
|
||||||
assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null
|
assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null
|
||||||
|
|
|
@ -178,8 +178,8 @@ public class DocumentMapper implements ToXContentFragment {
|
||||||
TypeFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
|
TypeFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
|
||||||
this.deleteTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers)
|
this.deleteTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers)
|
||||||
.filter(field -> deleteTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new);
|
.filter(field -> deleteTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new);
|
||||||
final Collection<String> noopTombstoneMetadataFields =
|
final Collection<String> noopTombstoneMetadataFields = Arrays.asList(
|
||||||
Arrays.asList(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
|
VersionFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME, SeqNoFieldMapper.TOMBSTONE_NAME);
|
||||||
this.noopTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers)
|
this.noopTombstoneMetadataFieldMappers = Stream.of(mapping.metadataMappers)
|
||||||
.filter(field -> noopTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new);
|
.filter(field -> noopTombstoneMetadataFields.contains(field.name())).toArray(MetadataFieldMapper[]::new);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3781,6 +3781,61 @@ public class InternalEngineTests extends EngineTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that a segment containing only no-ops can be used to look up _version and _seqno.
|
||||||
|
*/
|
||||||
|
public void testSegmentContainsOnlyNoOps() throws Exception {
|
||||||
|
Engine.NoOpResult noOpResult = engine.noOp(new Engine.NoOp(1, primaryTerm.get(),
|
||||||
|
randomFrom(Engine.Operation.Origin.values()), randomNonNegativeLong(), "test"));
|
||||||
|
assertThat(noOpResult.getFailure(), nullValue());
|
||||||
|
engine.refresh("test");
|
||||||
|
Engine.DeleteResult deleteResult = engine.delete(replicaDeleteForDoc("id", 1, 2, randomNonNegativeLong()));
|
||||||
|
assertThat(deleteResult.getFailure(), nullValue());
|
||||||
|
engine.refresh("test");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple test to check that random combination of operations can coexist in segments and be lookup.
|
||||||
|
* This is needed as some fields in Lucene may not exist if a segment misses operation types and this code is to check for that.
|
||||||
|
* For example, a segment containing only no-ops does not have neither _uid or _version.
|
||||||
|
*/
|
||||||
|
public void testRandomOperations() throws Exception {
|
||||||
|
int numOps = between(10, 100);
|
||||||
|
for (int i = 0; i < numOps; i++) {
|
||||||
|
String id = Integer.toString(randomIntBetween(1, 10));
|
||||||
|
ParsedDocument doc = createParsedDoc(id, null);
|
||||||
|
Engine.Operation.TYPE type = randomFrom(Engine.Operation.TYPE.values());
|
||||||
|
switch (type) {
|
||||||
|
case INDEX:
|
||||||
|
Engine.IndexResult index = engine.index(replicaIndexForDoc(doc, between(1, 100), i, randomBoolean()));
|
||||||
|
assertThat(index.getFailure(), nullValue());
|
||||||
|
break;
|
||||||
|
case DELETE:
|
||||||
|
Engine.DeleteResult delete = engine.delete(replicaDeleteForDoc(doc.id(), between(1, 100), i, randomNonNegativeLong()));
|
||||||
|
assertThat(delete.getFailure(), nullValue());
|
||||||
|
break;
|
||||||
|
case NO_OP:
|
||||||
|
Engine.NoOpResult noOp = engine.noOp(new Engine.NoOp(i, primaryTerm.get(),
|
||||||
|
randomFrom(Engine.Operation.Origin.values()), randomNonNegativeLong(), ""));
|
||||||
|
assertThat(noOp.getFailure(), nullValue());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("Invalid op [" + type + "]");
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
engine.refresh("test");
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
engine.flush();
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
engine.forceMerge(randomBoolean(), between(1, 10), randomBoolean(), false, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
List<Translog.Operation> operations = readAllOperationsInLucene(engine, createMapperService("test"));
|
||||||
|
assertThat(operations, hasSize(numOps));
|
||||||
|
}
|
||||||
|
|
||||||
public void testMinGenerationForSeqNo() throws IOException, BrokenBarrierException, InterruptedException {
|
public void testMinGenerationForSeqNo() throws IOException, BrokenBarrierException, InterruptedException {
|
||||||
engine.close();
|
engine.close();
|
||||||
final int numberOfTriplets = randomIntBetween(1, 32);
|
final int numberOfTriplets = randomIntBetween(1, 32);
|
||||||
|
|
|
@ -3113,8 +3113,8 @@ public class IndexShardTests extends IndexShardTestCase {
|
||||||
assertThat(noopTombstone.docs(), hasSize(1));
|
assertThat(noopTombstone.docs(), hasSize(1));
|
||||||
ParseContext.Document noopDoc = noopTombstone.docs().get(0);
|
ParseContext.Document noopDoc = noopTombstone.docs().get(0);
|
||||||
assertThat(noopDoc.getFields().stream().map(IndexableField::name).collect(Collectors.toList()),
|
assertThat(noopDoc.getFields().stream().map(IndexableField::name).collect(Collectors.toList()),
|
||||||
containsInAnyOrder(SeqNoFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME,
|
containsInAnyOrder(VersionFieldMapper.NAME, SeqNoFieldMapper.TOMBSTONE_NAME,
|
||||||
SeqNoFieldMapper.TOMBSTONE_NAME));
|
SeqNoFieldMapper.NAME, SeqNoFieldMapper.NAME, SeqNoFieldMapper.PRIMARY_TERM_NAME));
|
||||||
assertThat(noopDoc.getField(SeqNoFieldMapper.TOMBSTONE_NAME).numericValue().longValue(), equalTo(1L));
|
assertThat(noopDoc.getField(SeqNoFieldMapper.TOMBSTONE_NAME).numericValue().longValue(), equalTo(1L));
|
||||||
|
|
||||||
closeShards(shard);
|
closeShards(shard);
|
||||||
|
|
|
@ -315,7 +315,10 @@ public abstract class EngineTestCase extends ESTestCase {
|
||||||
doc.add(seqID.primaryTerm);
|
doc.add(seqID.primaryTerm);
|
||||||
seqID.tombstoneField.setLongValue(1);
|
seqID.tombstoneField.setLongValue(1);
|
||||||
doc.add(seqID.tombstoneField);
|
doc.add(seqID.tombstoneField);
|
||||||
return new ParsedDocument(null, seqID, null, null, null, Collections.singletonList(doc), null, XContentType.JSON, null);
|
Field versionField = new NumericDocValuesField(VersionFieldMapper.NAME, 0);
|
||||||
|
doc.add(versionField);
|
||||||
|
return new ParsedDocument(versionField, seqID, null, null, null,
|
||||||
|
Collections.singletonList(doc), null, XContentType.JSON, null);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -734,6 +737,7 @@ public abstract class EngineTestCase extends ESTestCase {
|
||||||
final int segmentDocID = docID - leaves.get(leafIndex).docBase;
|
final int segmentDocID = docID - leaves.get(leafIndex).docBase;
|
||||||
final long seqNo = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.NAME, segmentDocID);
|
final long seqNo = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.NAME, segmentDocID);
|
||||||
final long primaryTerm = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.PRIMARY_TERM_NAME, segmentDocID);
|
final long primaryTerm = readNumericDV(leaves.get(leafIndex), SeqNoFieldMapper.PRIMARY_TERM_NAME, segmentDocID);
|
||||||
|
final long version = readNumericDV(leaves.get(leafIndex), VersionFieldMapper.NAME, segmentDocID);
|
||||||
final FieldsVisitor fields = new FieldsVisitor(true);
|
final FieldsVisitor fields = new FieldsVisitor(true);
|
||||||
searcher.doc(docID, fields);
|
searcher.doc(docID, fields);
|
||||||
fields.postProcess(mapper);
|
fields.postProcess(mapper);
|
||||||
|
@ -743,11 +747,11 @@ public abstract class EngineTestCase extends ESTestCase {
|
||||||
op = new Translog.NoOp(seqNo, primaryTerm, "");
|
op = new Translog.NoOp(seqNo, primaryTerm, "");
|
||||||
assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1
|
assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1
|
||||||
: "Noop operation but soft_deletes field is not set";
|
: "Noop operation but soft_deletes field is not set";
|
||||||
|
assert version == 1 : "Noop tombstone should have version 1L; actual version [" + version + "]";
|
||||||
} else {
|
} else {
|
||||||
final String id = fields.uid().id();
|
final String id = fields.uid().id();
|
||||||
final String type = fields.uid().type();
|
final String type = fields.uid().type();
|
||||||
final Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(id));
|
final Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(id));
|
||||||
final long version = readNumericDV(leaves.get(leafIndex), VersionFieldMapper.NAME, segmentDocID);
|
|
||||||
if (isTombstone) {
|
if (isTombstone) {
|
||||||
op = new Translog.Delete(type, id, uid, seqNo, primaryTerm, version, VersionType.INTERNAL);
|
op = new Translog.Delete(type, id, uid, seqNo, primaryTerm, version, VersionType.INTERNAL);
|
||||||
assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1
|
assert readNumericDV(leaves.get(leafIndex), Lucene.SOFT_DELETE_FIELD, segmentDocID) == 1
|
||||||
|
|
Loading…
Reference in New Issue