Add support for index sorting with document blocks (#12829)

Today index sorting will most likely break document blocks added with `IndexWriter#addDocuments(...)` and `#updateDocuments(...)` since the index sorter has no indication of what documents are part of a block. This change automatically adds a marker field to  parent documents if configured in `IWC`. These marker documents are optional unless document blocks are indexed and index sorting is configured. In this case indexing blocks will fail unless a parent field is configured. Index sorting will preserve document blocks during sort. Documents within a block not be reordered by the sorting algorithm and will sort along side their parent documents. 

Relates to #12711
This commit is contained in:
Simon Willnauer 2024-01-11 16:11:15 +01:00 committed by GitHub
parent b7728c5657
commit df6bd25ce4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
47 changed files with 1173 additions and 83 deletions

View File

@ -90,6 +90,11 @@ New Features
* LUCENE-10626 Hunspell: add tools to aid dictionary editing: * LUCENE-10626 Hunspell: add tools to aid dictionary editing:
analysis introspection, stem expansion and stem/flag suggestion (Peter Gromov) analysis introspection, stem expansion and stem/flag suggestion (Peter Gromov)
* GITHUB#12829: For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
IndexWriter#addDocuments or IndexWriter#updateDocuments also when index sorting is configured. Document blocks are
maintained alongside their parent documents during sort and merge. IndexWriterConfig now requires a parent field to be
specified if index sorting is used together with document blocks. (Simon Willnauer)
Improvements Improvements
--------------------- ---------------------
@ -131,6 +136,12 @@ Bug Fixes
* GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those * GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those
of DoubleValues#doubleValue(). (Uwe Schindler) of DoubleValues#doubleValue(). (Uwe Schindler)
Changes in Backwards Compatibility Policy
-----------------------------------------
* GITHUB#12829: IndexWriter#addDocuments or IndexWriter#updateDocuments now require a parent field name to be
specified in IndexWriterConfig is documents blocks are indexed and index time sorting is configured. (Simon Willnauer)
Other Other
--------------------- ---------------------

View File

@ -19,6 +19,14 @@
## Migration from Lucene 9.x to Lucene 10.0 ## Migration from Lucene 9.x to Lucene 10.0
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
IndexWriter#addDocuments or IndexWriter#updateDocuments when index sorting is configured. Document blocks are maintained
alongside their parent documents during sort and merge. The internally used parent field must be configured in
IndexWriterConfig only if index sorting is used together with documents blocks. See `IndexWriterConfig#setParendField`
for reference.
### Minor API changes in MatchHighlighter and MatchRegionRetriever. (GITHUB#12881) ### Minor API changes in MatchHighlighter and MatchRegionRetriever. (GITHUB#12881)
The API of interfaces for accepting highlights has changed to allow performance improvements. Look at the issue and the PR diff to get The API of interfaces for accepting highlights has changed to allow performance improvements. Look at the issue and the PR diff to get

View File

@ -217,7 +217,8 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
isSoftDeletesField); isSoftDeletesField,
false);
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
throw new CorruptIndexException( throw new CorruptIndexException(
"invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e); "invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);

View File

@ -194,7 +194,8 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
vectorDimension, vectorDimension,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
vectorDistFunc, vectorDistFunc,
isSoftDeletesField); isSoftDeletesField,
false);
infos[i].checkConsistency(); infos[i].checkConsistency();
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
throw new CorruptIndexException( throw new CorruptIndexException(

View File

@ -35,4 +35,9 @@ public class TestLucene70SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
protected Codec getCodec() { protected Codec getCodec() {
return new Lucene84RWCodec(); return new Lucene84RWCodec();
} }
@Override
protected boolean supportsHasBlocks() {
return false;
}
} }

View File

@ -35,4 +35,9 @@ public class TestLucene86SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
protected Codec getCodec() { protected Codec getCodec() {
return new Lucene87RWCodec(); return new Lucene87RWCodec();
} }
@Override
protected boolean supportsHasBlocks() {
return false;
}
} }

View File

@ -32,4 +32,9 @@ public class TestLucene90SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
protected Codec getCodec() { protected Codec getCodec() {
return new Lucene90RWCodec(); return new Lucene90RWCodec();
} }
@Override
protected boolean supportsHasBlocks() {
return false;
}
} }

View File

@ -99,6 +99,8 @@ import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.FieldExistsQuery;
@ -2162,6 +2164,83 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
} }
} }
public void testSortedIndexAddDocBlocks() throws Exception {
for (String name : oldSortedNames) {
Path path = createTempDir("sorted");
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name + ".zip");
assertNotNull("Sorted index index " + name + " not found", resource);
TestUtil.unzip(resource, path);
try (Directory dir = newFSDirectory(path)) {
final Sort sort;
try (DirectoryReader reader = DirectoryReader.open(dir)) {
assertEquals(1, reader.leaves().size());
sort = reader.leaves().get(0).reader().getMetaData().getSort();
assertNotNull(sort);
searchExampleIndex(reader);
}
// open writer
try (IndexWriter writer =
new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND)
.setIndexSort(sort)
.setMergePolicy(newLogMergePolicy()))) {
// add 10 docs
for (int i = 0; i < 10; i++) {
Document child = new Document();
child.add(new StringField("relation", "child", Field.Store.NO));
child.add(new StringField("bid", "" + i, Field.Store.NO));
child.add(new NumericDocValuesField("dateDV", i));
Document parent = new Document();
parent.add(new StringField("relation", "parent", Field.Store.NO));
parent.add(new StringField("bid", "" + i, Field.Store.NO));
parent.add(new NumericDocValuesField("dateDV", i));
writer.addDocuments(Arrays.asList(child, child, parent));
if (random().nextBoolean()) {
writer.flush();
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
writer.commit();
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
for (int i = 0; i < 10; i++) {
TopDocs children =
searcher.search(
new BooleanQuery.Builder()
.add(
new TermQuery(new Term("relation", "child")),
BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST)
.build(),
2);
TopDocs parents =
searcher.search(
new BooleanQuery.Builder()
.add(
new TermQuery(new Term("relation", "parent")),
BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST)
.build(),
2);
assertEquals(2, children.totalHits.value);
assertEquals(1, parents.totalHits.value);
// make sure it's sorted
assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc);
assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc);
}
}
}
// This will confirm the docs are really sorted
TestUtil.checkIndex(dir);
}
}
}
private void searchExampleIndex(DirectoryReader reader) throws IOException { private void searchExampleIndex(DirectoryReader reader) throws IOException {
IndexSearcher searcher = newSearcher(reader); IndexSearcher searcher = newSearcher(reader);

View File

@ -72,6 +72,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
static final BytesRef VECTOR_ENCODING = new BytesRef(" vector encoding "); static final BytesRef VECTOR_ENCODING = new BytesRef(" vector encoding ");
static final BytesRef VECTOR_SIMILARITY = new BytesRef(" vector similarity "); static final BytesRef VECTOR_SIMILARITY = new BytesRef(" vector similarity ");
static final BytesRef SOFT_DELETES = new BytesRef(" soft-deletes "); static final BytesRef SOFT_DELETES = new BytesRef(" soft-deletes ");
static final BytesRef PARENT = new BytesRef(" parent ");
@Override @Override
public FieldInfos read( public FieldInfos read(
@ -170,6 +171,9 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.readLine(input, scratch); SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SOFT_DELETES); assert StringHelper.startsWith(scratch.get(), SOFT_DELETES);
boolean isSoftDeletesField = Boolean.parseBoolean(readString(SOFT_DELETES.length, scratch)); boolean isSoftDeletesField = Boolean.parseBoolean(readString(SOFT_DELETES.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), PARENT);
boolean isParentField = Boolean.parseBoolean(readString(PARENT.length, scratch));
infos[i] = infos[i] =
new FieldInfo( new FieldInfo(
@ -188,7 +192,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
vectorNumDimensions, vectorNumDimensions,
vectorEncoding, vectorEncoding,
vectorDistFunc, vectorDistFunc,
isSoftDeletesField); isSoftDeletesField,
isParentField);
} }
SimpleTextUtil.checkFooter(input); SimpleTextUtil.checkFooter(input);
@ -320,6 +325,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.write(out, SOFT_DELETES); SimpleTextUtil.write(out, SOFT_DELETES);
SimpleTextUtil.write(out, Boolean.toString(fi.isSoftDeletesField()), scratch); SimpleTextUtil.write(out, Boolean.toString(fi.isSoftDeletesField()), scratch);
SimpleTextUtil.writeNewline(out); SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, PARENT);
SimpleTextUtil.write(out, Boolean.toString(fi.isParentField()), scratch);
SimpleTextUtil.writeNewline(out);
} }
SimpleTextUtil.writeChecksum(out, scratch); SimpleTextUtil.writeChecksum(out, scratch);
success = true; success = true;

View File

@ -196,7 +196,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
sortField[i] = SortFieldProvider.forName(provider).readSortField(bytes); sortField[i] = SortFieldProvider.forName(provider).readSortField(bytes);
assert bytes.eof(); assert bytes.eof();
} }
Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
final Sort indexSort;
if (sortField.length == 0) {
indexSort = null;
} else {
indexSort = new Sort(sortField);
}
SimpleTextUtil.checkFooter(input); SimpleTextUtil.checkFooter(input);
@ -335,7 +341,6 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
SimpleTextUtil.write(output, b.bytes.get().toString(), scratch); SimpleTextUtil.write(output, b.bytes.get().toString(), scratch);
SimpleTextUtil.writeNewline(output); SimpleTextUtil.writeNewline(output);
} }
SimpleTextUtil.writeChecksum(output, scratch); SimpleTextUtil.writeChecksum(output, scratch);
} }
} }

View File

@ -119,6 +119,7 @@ public class TestBlockWriter extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
true); true,
false);
} }
} }

View File

@ -206,6 +206,7 @@ public class TestSTBlockReader extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false); false);
} }

View File

@ -131,13 +131,14 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
Throwable priorE = null; Throwable priorE = null;
FieldInfo[] infos = null; FieldInfo[] infos = null;
try { try {
CodecUtil.checkIndexHeader( int format =
input, CodecUtil.checkIndexHeader(
Lucene94FieldInfosFormat.CODEC_NAME, input,
Lucene94FieldInfosFormat.FORMAT_START, Lucene94FieldInfosFormat.CODEC_NAME,
Lucene94FieldInfosFormat.FORMAT_CURRENT, Lucene94FieldInfosFormat.FORMAT_START,
segmentInfo.getId(), Lucene94FieldInfosFormat.FORMAT_CURRENT,
segmentSuffix); segmentInfo.getId(),
segmentSuffix);
final int size = input.readVInt(); // read in the size final int size = input.readVInt(); // read in the size
infos = new FieldInfo[size]; infos = new FieldInfo[size];
@ -157,6 +158,18 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0; boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
boolean isParentField =
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
if ((bits & 0xE0) != 0) {
throw new CorruptIndexException(
"unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input);
}
if (format < FORMAT_PARENT_FIELD && (bits & 0xF0) != 0) {
throw new CorruptIndexException(
"parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"",
input);
}
final IndexOptions indexOptions = getIndexOptions(input, input.readByte()); final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
@ -200,7 +213,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
vectorDimension, vectorDimension,
vectorEncoding, vectorEncoding,
vectorDistFunc, vectorDistFunc,
isSoftDeletesField); isSoftDeletesField,
isParentField);
infos[i].checkConsistency(); infos[i].checkConsistency();
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
throw new CorruptIndexException( throw new CorruptIndexException(
@ -348,6 +362,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
if (fi.omitsNorms()) bits |= OMIT_NORMS; if (fi.omitsNorms()) bits |= OMIT_NORMS;
if (fi.hasPayloads()) bits |= STORE_PAYLOADS; if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD; if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
output.writeByte(bits); output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions())); output.writeByte(indexOptionsByte(fi.getIndexOptions()));
@ -375,11 +390,14 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
// Codec header // Codec header
static final String CODEC_NAME = "Lucene94FieldInfos"; static final String CODEC_NAME = "Lucene94FieldInfos";
static final int FORMAT_START = 0; static final int FORMAT_START = 0;
static final int FORMAT_CURRENT = FORMAT_START; // this doesn't actually change the file format but uses up one more bit an existing bit pattern
static final int FORMAT_PARENT_FIELD = 1;
static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD;
// Field flags // Field flags
static final byte STORE_TERMVECTOR = 0x1; static final byte STORE_TERMVECTOR = 0x1;
static final byte OMIT_NORMS = 0x2; static final byte OMIT_NORMS = 0x2;
static final byte STORE_PAYLOADS = 0x4; static final byte STORE_PAYLOADS = 0x4;
static final byte SOFT_DELETES_FIELD = 0x8; static final byte SOFT_DELETES_FIELD = 0x8;
static final byte PARENT_FIELD_FIELD = 0x10;
} }

View File

@ -1176,34 +1176,46 @@ public final class CheckIndex implements Closeable {
comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext); comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext);
} }
int maxDoc = reader.maxDoc();
try { try {
LeafMetaData metaData = reader.getMetaData();
for (int docID = 1; docID < maxDoc; docID++) { FieldInfos fieldInfos = reader.getFieldInfos();
if (metaData.hasBlocks()
&& fieldInfos.getParentField() == null
&& metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) {
throw new IllegalStateException(
"parent field is not set but the index has document blocks and was created with version: "
+ metaData.getCreatedVersionMajor());
}
final DocIdSetIterator iter;
if (metaData.hasBlocks() && fieldInfos.getParentField() != null) {
iter = reader.getNumericDocValues(fieldInfos.getParentField());
} else {
iter = DocIdSetIterator.all(reader.maxDoc());
}
int prevDoc = iter.nextDoc();
int nextDoc;
while ((nextDoc = iter.nextDoc()) != NO_MORE_DOCS) {
int cmp = 0; int cmp = 0;
for (int i = 0; i < comparators.length; i++) { for (int i = 0; i < comparators.length; i++) {
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, // TODO: would be better if copy() didn't cause a term lookup in TermOrdVal & co,
// the segments are always the same here... // the segments are always the same here...
comparators[i].copy(0, docID - 1); comparators[i].copy(0, prevDoc);
comparators[i].setBottom(0); comparators[i].setBottom(0);
cmp = reverseMul[i] * comparators[i].compareBottom(docID); cmp = reverseMul[i] * comparators[i].compareBottom(nextDoc);
if (cmp != 0) { if (cmp != 0) {
break; break;
} }
} }
if (cmp > 0) { if (cmp > 0) {
throw new CheckIndexException( throw new CheckIndexException(
"segment has indexSort=" "segment has indexSort="
+ sort + sort
+ " but docID=" + " but docID="
+ (docID - 1) + (prevDoc)
+ " sorts after docID=" + " sorts after docID="
+ docID); + nextDoc);
} }
prevDoc = nextDoc;
} }
msg( msg(
infoStream, infoStream,

View File

@ -21,14 +21,17 @@ import java.text.NumberFormat;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -134,9 +137,11 @@ final class DocumentsWriterPerThread implements Accountable {
private final ReentrantLock lock = new ReentrantLock(); private final ReentrantLock lock = new ReentrantLock();
private int[] deleteDocIDs = new int[0]; private int[] deleteDocIDs = new int[0];
private int numDeletedDocIds = 0; private int numDeletedDocIds = 0;
private final int indexMajorVersionCreated;
private final IndexingChain.ReservedField<NumericDocValuesField> parentField;
DocumentsWriterPerThread( DocumentsWriterPerThread(
int indexVersionCreated, int indexMajorVersionCreated,
String segmentName, String segmentName,
Directory directoryOrig, Directory directoryOrig,
Directory directory, Directory directory,
@ -145,6 +150,7 @@ final class DocumentsWriterPerThread implements Accountable {
FieldInfos.Builder fieldInfos, FieldInfos.Builder fieldInfos,
AtomicLong pendingNumDocs, AtomicLong pendingNumDocs,
boolean enableTestPoints) { boolean enableTestPoints) {
this.indexMajorVersionCreated = indexMajorVersionCreated;
this.directory = new TrackingDirectoryWrapper(directory); this.directory = new TrackingDirectoryWrapper(directory);
this.fieldInfos = fieldInfos; this.fieldInfos = fieldInfos;
this.indexWriterConfig = indexWriterConfig; this.indexWriterConfig = indexWriterConfig;
@ -183,12 +189,19 @@ final class DocumentsWriterPerThread implements Accountable {
this.enableTestPoints = enableTestPoints; this.enableTestPoints = enableTestPoints;
indexingChain = indexingChain =
new IndexingChain( new IndexingChain(
indexVersionCreated, indexMajorVersionCreated,
segmentInfo, segmentInfo,
this.directory, this.directory,
fieldInfos, fieldInfos,
indexWriterConfig, indexWriterConfig,
this::onAbortingException); this::onAbortingException);
if (indexWriterConfig.getParentField() != null) {
this.parentField =
indexingChain.markAsReserved(
new NumericDocValuesField(indexWriterConfig.getParentField(), -1));
} else {
this.parentField = null;
}
} }
final void testPoint(String message) { final void testPoint(String message) {
@ -231,7 +244,23 @@ final class DocumentsWriterPerThread implements Accountable {
final int docsInRamBefore = numDocsInRAM; final int docsInRamBefore = numDocsInRAM;
boolean allDocsIndexed = false; boolean allDocsIndexed = false;
try { try {
for (Iterable<? extends IndexableField> doc : docs) { final Iterator<? extends Iterable<? extends IndexableField>> iterator = docs.iterator();
while (iterator.hasNext()) {
Iterable<? extends IndexableField> doc = iterator.next();
if (parentField != null) {
if (iterator.hasNext() == false) {
doc = addParentField(doc, parentField);
}
} else if (segmentInfo.getIndexSort() != null
&& iterator.hasNext()
&& indexMajorVersionCreated >= Version.LUCENE_10_0_0.major) {
// sort is configured but parent field is missing, yet we have a doc-block
// yet we must not fail if this index was created in an earlier version where this
// behavior was permitted.
throw new IllegalArgumentException(
"a parent field must be set in order to use document blocks with index sorting; see IndexWriterConfig#setParentField");
}
// Even on exception, the document is still added (but marked // Even on exception, the document is still added (but marked
// deleted), so we don't need to un-reserve at that point. // deleted), so we don't need to un-reserve at that point.
// Aborting exceptions will actually "lose" more than one // Aborting exceptions will actually "lose" more than one
@ -245,10 +274,11 @@ final class DocumentsWriterPerThread implements Accountable {
onNewDocOnRAM.run(); onNewDocOnRAM.run();
} }
} }
allDocsIndexed = true; final int numDocs = numDocsInRAM - docsInRamBefore;
if (numDocsInRAM - docsInRamBefore > 1) { if (numDocs > 1) {
segmentInfo.setHasBlocks(); segmentInfo.setHasBlocks();
} }
allDocsIndexed = true;
return finishDocuments(deleteNode, docsInRamBefore); return finishDocuments(deleteNode, docsInRamBefore);
} finally { } finally {
if (!allDocsIndexed && !aborted) { if (!allDocsIndexed && !aborted) {
@ -262,6 +292,34 @@ final class DocumentsWriterPerThread implements Accountable {
} }
} }
private Iterable<? extends IndexableField> addParentField(
Iterable<? extends IndexableField> doc, IndexableField parentField) {
return () -> {
final Iterator<? extends IndexableField> first = doc.iterator();
return new Iterator<>() {
IndexableField additionalField = parentField;
@Override
public boolean hasNext() {
return additionalField != null || first.hasNext();
}
@Override
public IndexableField next() {
if (additionalField != null) {
IndexableField field = additionalField;
additionalField = null;
return field;
}
if (first.hasNext()) {
return first.next();
}
throw new NoSuchElementException();
}
};
};
}
private long finishDocuments(DocumentsWriterDeleteQueue.Node<?> deleteNode, int docIdUpTo) { private long finishDocuments(DocumentsWriterDeleteQueue.Node<?> deleteNode, int docIdUpTo) {
/* /*
* here we actually finish the document in two steps 1. push the delete into * here we actually finish the document in two steps 1. push the delete into

View File

@ -63,6 +63,8 @@ public final class FieldInfo {
// whether this field is used as the soft-deletes field // whether this field is used as the soft-deletes field
private final boolean softDeletesField; private final boolean softDeletesField;
private final boolean isParentField;
/** /**
* Sole constructor. * Sole constructor.
* *
@ -84,7 +86,8 @@ public final class FieldInfo {
int vectorDimension, int vectorDimension,
VectorEncoding vectorEncoding, VectorEncoding vectorEncoding,
VectorSimilarityFunction vectorSimilarityFunction, VectorSimilarityFunction vectorSimilarityFunction,
boolean softDeletesField) { boolean softDeletesField,
boolean isParentField) {
this.name = Objects.requireNonNull(name); this.name = Objects.requireNonNull(name);
this.number = number; this.number = number;
this.docValuesType = this.docValuesType =
@ -111,6 +114,7 @@ public final class FieldInfo {
this.vectorEncoding = vectorEncoding; this.vectorEncoding = vectorEncoding;
this.vectorSimilarityFunction = vectorSimilarityFunction; this.vectorSimilarityFunction = vectorSimilarityFunction;
this.softDeletesField = softDeletesField; this.softDeletesField = softDeletesField;
this.isParentField = isParentField;
this.checkConsistency(); this.checkConsistency();
} }
@ -206,6 +210,13 @@ public final class FieldInfo {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')"); "vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')");
} }
if (softDeletesField && isParentField) {
throw new IllegalArgumentException(
"field can't be used as soft-deletes field and parent document field (field: '"
+ name
+ "')");
}
} }
/** /**
@ -633,4 +644,12 @@ public final class FieldInfo {
public boolean isSoftDeletesField() { public boolean isSoftDeletesField() {
return softDeletesField; return softDeletesField;
} }
/**
* Returns true if this field is configured and used as the parent document field field. See
* {@link IndexWriterConfig#setParentField(String)}
*/
public boolean isParentField() {
return isParentField;
}
} }

View File

@ -59,6 +59,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
private final boolean hasVectorValues; private final boolean hasVectorValues;
private final String softDeletesField; private final String softDeletesField;
private final String parentField;
// used only by fieldInfo(int) // used only by fieldInfo(int)
private final FieldInfo[] byNumber; private final FieldInfo[] byNumber;
@ -78,6 +80,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
boolean hasPointValues = false; boolean hasPointValues = false;
boolean hasVectorValues = false; boolean hasVectorValues = false;
String softDeletesField = null; String softDeletesField = null;
String parentField = null;
int size = 0; // number of elements in byNumberTemp, number of used array slots int size = 0; // number of elements in byNumberTemp, number of used array slots
FieldInfo[] byNumberTemp = new FieldInfo[10]; // initial array capacity of 10 FieldInfo[] byNumberTemp = new FieldInfo[10]; // initial array capacity of 10
@ -132,6 +135,13 @@ public class FieldInfos implements Iterable<FieldInfo> {
} }
softDeletesField = info.name; softDeletesField = info.name;
} }
if (info.isParentField()) {
if (parentField != null && parentField.equals(info.name) == false) {
throw new IllegalArgumentException(
"multiple parent fields [" + info.name + ", " + parentField + "]");
}
parentField = info.name;
}
} }
this.hasVectors = hasVectors; this.hasVectors = hasVectors;
@ -145,6 +155,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
this.hasPointValues = hasPointValues; this.hasPointValues = hasPointValues;
this.hasVectorValues = hasVectorValues; this.hasVectorValues = hasVectorValues;
this.softDeletesField = softDeletesField; this.softDeletesField = softDeletesField;
this.parentField = parentField;
List<FieldInfo> valuesTemp = new ArrayList<>(); List<FieldInfo> valuesTemp = new ArrayList<>();
byNumber = new FieldInfo[size]; byNumber = new FieldInfo[size];
@ -178,7 +189,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
.filter(Objects::nonNull) .filter(Objects::nonNull)
.findAny() .findAny()
.orElse(null); .orElse(null);
final Builder builder = new Builder(new FieldNumbers(softDeletesField)); final String parentField = getAndValidateParentField(leaves);
final Builder builder = new Builder(new FieldNumbers(softDeletesField, parentField));
for (final LeafReaderContext ctx : leaves) { for (final LeafReaderContext ctx : leaves) {
for (FieldInfo fieldInfo : ctx.reader().getFieldInfos()) { for (FieldInfo fieldInfo : ctx.reader().getFieldInfos()) {
builder.add(fieldInfo); builder.add(fieldInfo);
@ -188,6 +200,26 @@ public class FieldInfos implements Iterable<FieldInfo> {
} }
} }
private static String getAndValidateParentField(List<LeafReaderContext> leaves) {
boolean set = false;
String theField = null;
for (LeafReaderContext ctx : leaves) {
String field = ctx.reader().getFieldInfos().getParentField();
if (set && Objects.equals(field, theField) == false) {
throw new IllegalStateException(
"expected parent doc field to be \""
+ theField
+ " \" across all segments but found a segment with different field \""
+ field
+ "\"");
} else {
theField = field;
set = true;
}
}
return theField;
}
/** Returns a set of names of fields that have a terms index. The order is undefined. */ /** Returns a set of names of fields that have a terms index. The order is undefined. */
public static Collection<String> getIndexedFields(IndexReader reader) { public static Collection<String> getIndexedFields(IndexReader reader) {
return reader.leaves().stream() return reader.leaves().stream()
@ -254,6 +286,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
return softDeletesField; return softDeletesField;
} }
/** Returns the parent document field name if exists; otherwise returns null */
public String getParentField() {
return parentField;
}
/** Returns the number of fields */ /** Returns the number of fields */
public int size() { public int size() {
return byName.size(); return byName.size();
@ -345,7 +382,10 @@ public class FieldInfos implements Iterable<FieldInfo> {
// The soft-deletes field from IWC to enforce a single soft-deletes field // The soft-deletes field from IWC to enforce a single soft-deletes field
private final String softDeletesFieldName; private final String softDeletesFieldName;
FieldNumbers(String softDeletesFieldName) { // The parent document field from IWC to mark parent document when indexing
private final String parentFieldName;
FieldNumbers(String softDeletesFieldName, String parentFieldName) {
this.nameToNumber = new HashMap<>(); this.nameToNumber = new HashMap<>();
this.numberToName = new HashMap<>(); this.numberToName = new HashMap<>();
this.indexOptions = new HashMap<>(); this.indexOptions = new HashMap<>();
@ -355,11 +395,21 @@ public class FieldInfos implements Iterable<FieldInfo> {
this.omitNorms = new HashMap<>(); this.omitNorms = new HashMap<>();
this.storeTermVectors = new HashMap<>(); this.storeTermVectors = new HashMap<>();
this.softDeletesFieldName = softDeletesFieldName; this.softDeletesFieldName = softDeletesFieldName;
this.parentFieldName = parentFieldName;
if (softDeletesFieldName != null
&& parentFieldName != null
&& parentFieldName.equals(softDeletesFieldName)) {
throw new IllegalArgumentException(
"parent document and soft-deletes field can't be the same field \""
+ parentFieldName
+ "\"");
}
} }
synchronized void verifyFieldInfo(FieldInfo fi) { synchronized void verifyFieldInfo(FieldInfo fi) {
String fieldName = fi.getName(); String fieldName = fi.getName();
verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField()); verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField());
verifyParentFieldName(fieldName, fi.isParentField());
if (nameToNumber.containsKey(fieldName)) { if (nameToNumber.containsKey(fieldName)) {
verifySameSchema(fi); verifySameSchema(fi);
} }
@ -373,6 +423,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
synchronized int addOrGet(FieldInfo fi) { synchronized int addOrGet(FieldInfo fi) {
String fieldName = fi.getName(); String fieldName = fi.getName();
verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField()); verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField());
verifyParentFieldName(fieldName, fi.isParentField());
Integer fieldNumber = nameToNumber.get(fieldName); Integer fieldNumber = nameToNumber.get(fieldName);
if (fieldNumber != null) { if (fieldNumber != null) {
@ -437,6 +488,33 @@ public class FieldInfos implements Iterable<FieldInfo> {
} }
} }
private void verifyParentFieldName(String fieldName, boolean isParentField) {
if (isParentField) {
if (parentFieldName == null) {
throw new IllegalArgumentException(
"can't add field ["
+ fieldName
+ "] as parent document field; this IndexWriter has no parent document field configured");
} else if (fieldName.equals(parentFieldName) == false) {
throw new IllegalArgumentException(
"can't add field ["
+ fieldName
+ "] as parent document field; this IndexWriter is configured with ["
+ parentFieldName
+ "] as parent document field");
}
} else if (fieldName.equals(parentFieldName)) { // isParent == false
// this would be the case if the current index has a parent field that is
// not a parent field in the incoming index (think addIndices)
throw new IllegalArgumentException(
"can't add ["
+ fieldName
+ "] as non parent document field; this IndexWriter is configured with ["
+ parentFieldName
+ "] as parent document field");
}
}
private void verifySameSchema(FieldInfo fi) { private void verifySameSchema(FieldInfo fi) {
String fieldName = fi.getName(); String fieldName = fi.getName();
IndexOptions currentOpts = this.indexOptions.get(fieldName); IndexOptions currentOpts = this.indexOptions.get(fieldName);
@ -513,7 +591,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
(softDeletesFieldName != null && softDeletesFieldName.equals(fieldName))); (softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)),
(parentFieldName != null && parentFieldName.equals(fieldName)));
addOrGet(fi); addOrGet(fi);
} }
} else { } else {
@ -579,6 +658,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
if (dvType != dvType0) return null; if (dvType != dvType0) return null;
boolean isSoftDeletesField = fieldName.equals(softDeletesFieldName); boolean isSoftDeletesField = fieldName.equals(softDeletesFieldName);
boolean isParentField = fieldName.equals(parentFieldName);
return new FieldInfo( return new FieldInfo(
fieldName, fieldName,
newFieldNumber, newFieldNumber,
@ -595,7 +675,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
isSoftDeletesField); isSoftDeletesField,
isParentField);
} }
synchronized Set<String> getFieldNames() { synchronized Set<String> getFieldNames() {
@ -627,6 +708,14 @@ public class FieldInfos implements Iterable<FieldInfo> {
return globalFieldNumbers.softDeletesFieldName; return globalFieldNumbers.softDeletesFieldName;
} }
/**
* Returns the name of the parent document field or <tt>null</tt> if no parent field is
* configured
*/
public String getParentFieldName() {
return globalFieldNumbers.parentFieldName;
}
/** /**
* Adds the provided FieldInfo to this Builder if this field doesn't exist in this Builder. Also * Adds the provided FieldInfo to this Builder if this field doesn't exist in this Builder. Also
* adds a new field with its schema options to the global FieldNumbers if the field doesn't * adds a new field with its schema options to the global FieldNumbers if the field doesn't
@ -710,7 +799,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
fi.getVectorDimension(), fi.getVectorDimension(),
fi.getVectorEncoding(), fi.getVectorEncoding(),
fi.getVectorSimilarityFunction(), fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField()); fi.isSoftDeletesField(),
fi.isParentField());
byName.put(fiNew.getName(), fiNew); byName.put(fiNew.getName(), fiNew);
return fiNew; return fiNew;
} }

View File

@ -1261,7 +1261,8 @@ public class IndexWriter
* If this {@link SegmentInfos} has no global field number map the returned instance is empty * If this {@link SegmentInfos} has no global field number map the returned instance is empty
*/ */
private FieldNumbers getFieldNumberMap() throws IOException { private FieldNumbers getFieldNumberMap() throws IOException {
final FieldNumbers map = new FieldNumbers(config.softDeletesField); final FieldNumbers map =
new FieldNumbers(config.getSoftDeletesField(), config.getParentField());
for (SegmentCommitInfo info : segmentInfos) { for (SegmentCommitInfo info : segmentInfos) {
FieldInfos fis = readFieldInfos(info); FieldInfos fis = readFieldInfos(info);
@ -6614,10 +6615,12 @@ public class IndexWriter
} }
@Override @Override
public FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName) { public FieldInfosBuilder newFieldInfosBuilder(
String softDeletesFieldName, String parentFieldName) {
return new FieldInfosBuilder() { return new FieldInfosBuilder() {
private final FieldInfos.Builder builder = private final FieldInfos.Builder builder =
new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName)); new FieldInfos.Builder(
new FieldInfos.FieldNumbers(softDeletesFieldName, parentFieldName));
@Override @Override
public FieldInfosBuilder add(FieldInfo fi) { public FieldInfosBuilder add(FieldInfo fi) {

View File

@ -545,4 +545,20 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
this.eventListener = eventListener; this.eventListener = eventListener;
return this; return this;
} }
/**
* Sets the parent document field. If this optional property is set, IndexWriter will add an
* internal field to every root document added to the index writer. A document is considered a
* parent document if it's the last document in a document block indexed via {@link
* IndexWriter#addDocuments(Iterable)} or {@link IndexWriter#updateDocuments(Term, Iterable)} and
* its relatives. Additionally, all individual documents added via the single document methods
* ({@link IndexWriter#addDocuments(Iterable)} etc.) are also considered parent documents. This
* property is optional for all indices that don't use document blocks in combination with index
* sorting. In order to maintain the API guarantee that the document order of a block is not
* altered by the {@link IndexWriter} a marker for parent documents is required.
*/
public IndexWriterConfig setParentField(String parentField) {
this.parentField = parentField;
return this;
}
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
@ -27,6 +28,7 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
@ -38,6 +40,7 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsFormat; import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsWriter; import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.InvertableType;
import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.document.StoredValue; import org.apache.lucene.document.StoredValue;
@ -49,6 +52,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException; import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
@ -57,6 +61,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Version;
/** Default general purpose indexing chain, which handles indexing all types of fields. */ /** Default general purpose indexing chain, which handles indexing all types of fields. */
final class IndexingChain implements Accountable { final class IndexingChain implements Accountable {
@ -219,7 +224,31 @@ final class IndexingChain implements Accountable {
} }
LeafReader docValuesReader = getDocValuesLeafReader(); LeafReader docValuesReader = getDocValuesLeafReader();
Function<IndexSorter.DocComparator, IndexSorter.DocComparator> comparatorWrapper =
Function.identity();
if (state.segmentInfo.getHasBlocks() && state.fieldInfos.getParentField() != null) {
final DocIdSetIterator readerValues =
docValuesReader.getNumericDocValues(state.fieldInfos.getParentField());
if (readerValues == null) {
throw new CorruptIndexException(
"missing doc values for parent field \"" + state.fieldInfos.getParentField() + "\"",
"IndexingChain");
}
BitSet parents = BitSet.of(readerValues, state.segmentInfo.maxDoc());
comparatorWrapper =
in ->
(docID1, docID2) ->
in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2));
}
if (state.segmentInfo.getHasBlocks()
&& state.fieldInfos.getParentField() == null
&& indexCreatedVersionMajor >= Version.LUCENE_10_0_0.major) {
throw new CorruptIndexException(
"parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: "
+ indexCreatedVersionMajor,
"IndexingChain");
}
List<IndexSorter.DocComparator> comparators = new ArrayList<>(); List<IndexSorter.DocComparator> comparators = new ArrayList<>();
for (int i = 0; i < indexSort.getSort().length; i++) { for (int i = 0; i < indexSort.getSort().length; i++) {
SortField sortField = indexSort.getSort()[i]; SortField sortField = indexSort.getSort()[i];
@ -227,7 +256,10 @@ final class IndexingChain implements Accountable {
if (sorter == null) { if (sorter == null) {
throw new UnsupportedOperationException("Cannot sort index using sort field " + sortField); throw new UnsupportedOperationException("Cannot sort index using sort field " + sortField);
} }
comparators.add(sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc()));
IndexSorter.DocComparator docComparator =
sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc());
comparators.add(comparatorWrapper.apply(docComparator));
} }
Sorter sorter = new Sorter(indexSort); Sorter sorter = new Sorter(indexSort);
// returns null if the documents are already sorted // returns null if the documents are already sorted
@ -546,7 +578,17 @@ final class IndexingChain implements Accountable {
// build schema for each unique doc field // build schema for each unique doc field
for (IndexableField field : document) { for (IndexableField field : document) {
IndexableFieldType fieldType = field.fieldType(); IndexableFieldType fieldType = field.fieldType();
PerField pf = getOrAddPerField(field.name()); final boolean isReserved = field.getClass() == ReservedField.class;
PerField pf =
getOrAddPerField(
field.name(), false
/* we never add reserved fields during indexing should be done during DWPT setup*/ );
if (pf.reserved != isReserved) {
throw new IllegalArgumentException(
"\""
+ field.name()
+ "\" is a reserved field and should not be added to any document");
}
if (pf.fieldGen != fieldGen) { // first time we see this field in this document if (pf.fieldGen != fieldGen) { // first time we see this field in this document
fields[fieldCount++] = pf; fields[fieldCount++] = pf;
pf.fieldGen = fieldGen; pf.fieldGen = fieldGen;
@ -556,7 +598,7 @@ final class IndexingChain implements Accountable {
docFields[docFieldIdx++] = pf; docFields[docFieldIdx++] = pf;
updateDocFieldSchema(field.name(), pf.schema, fieldType); updateDocFieldSchema(field.name(), pf.schema, fieldType);
} }
// For each field, if it the first time we see this field in this segment, // For each field, if it's the first time we see this field in this segment,
// initialize its FieldInfo. // initialize its FieldInfo.
// If we have already seen this field, verify that its schema // If we have already seen this field, verify that its schema
// within the current doc matches its schema in the index. // within the current doc matches its schema in the index.
@ -646,7 +688,8 @@ final class IndexingChain implements Accountable {
s.vectorDimension, s.vectorDimension,
s.vectorEncoding, s.vectorEncoding,
s.vectorSimilarityFunction, s.vectorSimilarityFunction,
pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName()))); pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName()),
pf.fieldName.equals(fieldInfos.getParentFieldName())));
pf.setFieldInfo(fi); pf.setFieldInfo(fi);
if (fi.getIndexOptions() != IndexOptions.NONE) { if (fi.getIndexOptions() != IndexOptions.NONE) {
pf.setInvertState(); pf.setInvertState();
@ -741,7 +784,7 @@ final class IndexingChain implements Accountable {
* Returns a previously created {@link PerField}, absorbing the type information from {@link * Returns a previously created {@link PerField}, absorbing the type information from {@link
* FieldType}, and creates a new {@link PerField} if this field name wasn't seen yet. * FieldType}, and creates a new {@link PerField} if this field name wasn't seen yet.
*/ */
private PerField getOrAddPerField(String fieldName) { private PerField getOrAddPerField(String fieldName, boolean reserved) {
final int hashPos = fieldName.hashCode() & hashMask; final int hashPos = fieldName.hashCode() & hashMask;
PerField pf = fieldHash[hashPos]; PerField pf = fieldHash[hashPos];
while (pf != null && pf.fieldName.equals(fieldName) == false) { while (pf != null && pf.fieldName.equals(fieldName) == false) {
@ -757,7 +800,8 @@ final class IndexingChain implements Accountable {
schema, schema,
indexWriterConfig.getSimilarity(), indexWriterConfig.getSimilarity(),
indexWriterConfig.getInfoStream(), indexWriterConfig.getInfoStream(),
indexWriterConfig.getAnalyzer()); indexWriterConfig.getAnalyzer(),
reserved);
pf.next = fieldHash[hashPos]; pf.next = fieldHash[hashPos];
fieldHash[hashPos] = pf; fieldHash[hashPos] = pf;
totalFieldCount++; totalFieldCount++;
@ -1022,6 +1066,7 @@ final class IndexingChain implements Accountable {
final String fieldName; final String fieldName;
final int indexCreatedVersionMajor; final int indexCreatedVersionMajor;
final FieldSchema schema; final FieldSchema schema;
final boolean reserved;
FieldInfo fieldInfo; FieldInfo fieldInfo;
final Similarity similarity; final Similarity similarity;
@ -1059,13 +1104,15 @@ final class IndexingChain implements Accountable {
FieldSchema schema, FieldSchema schema,
Similarity similarity, Similarity similarity,
InfoStream infoStream, InfoStream infoStream,
Analyzer analyzer) { Analyzer analyzer,
boolean reserved) {
this.fieldName = fieldName; this.fieldName = fieldName;
this.indexCreatedVersionMajor = indexCreatedVersionMajor; this.indexCreatedVersionMajor = indexCreatedVersionMajor;
this.schema = schema; this.schema = schema;
this.similarity = similarity; this.similarity = similarity;
this.infoStream = infoStream; this.infoStream = infoStream;
this.analyzer = analyzer; this.analyzer = analyzer;
this.reserved = reserved;
} }
void reset(int docId) { void reset(int docId) {
@ -1512,4 +1559,77 @@ final class IndexingChain implements Accountable {
assertSame("point num bytes", fi.getPointNumBytes(), pointNumBytes); assertSame("point num bytes", fi.getPointNumBytes(), pointNumBytes);
} }
} }
/**
* Wraps the given field in a reserved field and registers it as reserved. Only DWPT should do
* this to mark fields as private / reserved to prevent this fieldname to be used from the outside
* of the IW / DWPT eco-system
*/
<T extends IndexableField> ReservedField<T> markAsReserved(T field) {
getOrAddPerField(field.name(), true);
return new ReservedField<T>(field);
}
static final class ReservedField<T extends IndexableField> implements IndexableField {
private final T delegate;
private ReservedField(T delegate) {
this.delegate = delegate;
}
T getDelegate() {
return delegate;
}
@Override
public String name() {
return delegate.name();
}
@Override
public IndexableFieldType fieldType() {
return delegate.fieldType();
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
return delegate.tokenStream(analyzer, reuse);
}
@Override
public BytesRef binaryValue() {
return delegate.binaryValue();
}
@Override
public String stringValue() {
return delegate.stringValue();
}
@Override
public CharSequence getCharSequenceValue() {
return delegate.getCharSequenceValue();
}
@Override
public Reader readerValue() {
return delegate.readerValue();
}
@Override
public Number numericValue() {
return delegate.numericValue();
}
@Override
public StoredValue storedValue() {
return delegate.storedValue();
}
@Override
public InvertableType invertableType() {
return delegate.invertableType();
}
}
} }

View File

@ -98,6 +98,9 @@ public class LiveIndexWriterConfig {
/** The field names involved in the index sort */ /** The field names involved in the index sort */
protected Set<String> indexSortFields = Collections.emptySet(); protected Set<String> indexSortFields = Collections.emptySet();
/** parent document field */
protected String parentField = null;
/** /**
* if an indexing thread should check for pending flushes on update in order to help out on a full * if an indexing thread should check for pending flushes on update in order to help out on a full
* flush * flush
@ -458,6 +461,11 @@ public class LiveIndexWriterConfig {
return eventListener; return eventListener;
} }
/** Returns the parent document field name if configured. */
public String getParentField() {
return parentField;
}
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -486,6 +494,7 @@ public class LiveIndexWriterConfig {
sb.append("maxFullFlushMergeWaitMillis=").append(getMaxFullFlushMergeWaitMillis()).append("\n"); sb.append("maxFullFlushMergeWaitMillis=").append(getMaxFullFlushMergeWaitMillis()).append("\n");
sb.append("leafSorter=").append(getLeafSorter()).append("\n"); sb.append("leafSorter=").append(getLeafSorter()).append("\n");
sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n"); sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n");
sb.append("parentField=").append(getParentField()).append("\n");
return sb.toString(); return sb.toString();
} }
} }

View File

@ -22,8 +22,10 @@ import java.util.List;
import org.apache.lucene.index.MergeState.DocMap; import org.apache.lucene.index.MergeState.DocMap;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -50,6 +52,31 @@ final class MultiSorter {
"Cannot use sort field " + fields[i] + " for index sorting"); "Cannot use sort field " + fields[i] + " for index sorting");
} }
comparables[i] = sorter.getComparableProviders(readers); comparables[i] = sorter.getComparableProviders(readers);
for (int j = 0; j < readers.size(); j++) {
CodecReader codecReader = readers.get(j);
FieldInfos fieldInfos = codecReader.getFieldInfos();
LeafMetaData metaData = codecReader.getMetaData();
if (metaData.hasBlocks() && fieldInfos.getParentField() != null) {
NumericDocValues parentDocs =
codecReader.getNumericDocValues(fieldInfos.getParentField());
assert parentDocs != null
: "parent field: "
+ fieldInfos.getParentField()
+ " must be present if index sorting is used with blocks";
BitSet parents = BitSet.of(parentDocs, codecReader.maxDoc());
IndexSorter.ComparableProvider[] providers = comparables[i];
IndexSorter.ComparableProvider provider = providers[j];
providers[j] = docId -> provider.getAsComparableLong(parents.nextSetBit(docId));
}
if (metaData.hasBlocks()
&& fieldInfos.getParentField() == null
&& metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) {
throw new CorruptIndexException(
"parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: "
+ metaData.getCreatedVersionMajor(),
"IndexingChain");
}
}
reverseMuls[i] = fields[i].getReverse() ? -1 : 1; reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
} }
int leafCount = readers.size(); int leafCount = readers.size();

View File

@ -111,9 +111,15 @@ public class ParallelLeafReader extends LeafReader {
.filter(Objects::nonNull) .filter(Objects::nonNull)
.findAny() .findAny()
.orElse(null); .orElse(null);
final String parentField =
completeReaderSet.stream()
.map(r -> r.getFieldInfos().getParentField())
.filter(Objects::nonNull)
.findAny()
.orElse(null);
// TODO: make this read-only in a cleaner way? // TODO: make this read-only in a cleaner way?
FieldInfos.Builder builder = FieldInfos.Builder builder =
new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField)); new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField, parentField));
Sort indexSort = null; Sort indexSort = null;
int createdVersionMajor = -1; int createdVersionMajor = -1;

View File

@ -720,7 +720,8 @@ final class ReadersAndUpdates {
fi.getVectorDimension(), fi.getVectorDimension(),
fi.getVectorEncoding(), fi.getVectorEncoding(),
fi.getVectorSimilarityFunction(), fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField()); fi.isSoftDeletesField(),
fi.isParentField());
} }
private SegmentReader createNewReaderWithLatestLiveDocs(SegmentReader reader) throws IOException { private SegmentReader createNewReaderWithLatestLiveDocs(SegmentReader reader) throws IOException {

View File

@ -17,9 +17,12 @@
package org.apache.lucene.index; package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.function.Function;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.TimSorter; import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues; import org.apache.lucene.util.packed.PackedLongValues;
@ -206,13 +209,33 @@ public final class Sorter {
SortField[] fields = sort.getSort(); SortField[] fields = sort.getSort();
final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length]; final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length];
Function<IndexSorter.DocComparator, IndexSorter.DocComparator> comparatorWrapper = in -> in;
LeafMetaData metaData = reader.getMetaData();
FieldInfos fieldInfos = reader.getFieldInfos();
if (metaData.hasBlocks() && fieldInfos.getParentField() != null) {
BitSet parents =
BitSet.of(reader.getNumericDocValues(fieldInfos.getParentField()), reader.maxDoc());
comparatorWrapper =
in ->
(docID1, docID2) ->
in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2));
}
if (metaData.hasBlocks()
&& fieldInfos.getParentField() == null
&& metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) {
throw new CorruptIndexException(
"parent field is not set but the index has blocks. indexCreatedVersionMajor: "
+ metaData.getCreatedVersionMajor(),
"Sorter");
}
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
IndexSorter sorter = fields[i].getIndexSorter(); IndexSorter sorter = fields[i].getIndexSorter();
if (sorter == null) { if (sorter == null) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Cannot use sortfield + " + fields[i] + " to sort indexes"); "Cannot use sortfield + " + fields[i] + " to sort indexes");
} }
comparators[i] = sorter.getDocComparator(reader, reader.maxDoc()); comparators[i] = comparatorWrapper.apply(sorter.getDocComparator(reader, reader.maxDoc()));
} }
return sort(reader.maxDoc(), comparators); return sort(reader.maxDoc(), comparators);
} }

View File

@ -31,7 +31,7 @@ public interface IndexPackageAccess {
void setIndexWriterMaxDocs(int limit); void setIndexWriterMaxDocs(int limit);
FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName); FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName, String parentFieldName);
void checkImpacts(Impacts impacts, int max); void checkImpacts(Impacts impacts, int max);

View File

@ -85,7 +85,6 @@ public final class Sort {
*/ */
public Sort rewrite(IndexSearcher searcher) throws IOException { public Sort rewrite(IndexSearcher searcher) throws IOException {
boolean changed = false; boolean changed = false;
SortField[] rewrittenSortFields = new SortField[fields.length]; SortField[] rewrittenSortFields = new SortField[fields.length];
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
rewrittenSortFields[i] = fields[i].rewrite(searcher); rewrittenSortFields[i] = fields[i].rewrite(searcher);
@ -100,7 +99,6 @@ public final class Sort {
@Override @Override
public String toString() { public String toString() {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
for (int i = 0; i < fields.length; i++) { for (int i = 0; i < fields.length; i++) {
buffer.append(fields[i].toString()); buffer.append(fields[i].toString());
if ((i + 1) < fields.length) buffer.append(','); if ((i + 1) < fields.length) buffer.append(',');

View File

@ -1937,4 +1937,97 @@ public class TestAddIndexes extends LuceneTestCase {
targetDir.close(); targetDir.close();
sourceDir.close(); sourceDir.close();
} }
public void testIllegalParentDocChange() throws Exception {
Directory dir1 = newDirectory();
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
iwc1.setParentField("foobar");
RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
Document parent = new Document();
w1.addDocuments(Arrays.asList(new Document(), new Document(), parent));
w1.commit();
w1.addDocuments(Arrays.asList(new Document(), new Document(), parent));
w1.commit();
// so the index sort is in fact burned into the index:
w1.forceMerge(1);
w1.close();
Directory dir2 = newDirectory();
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
iwc2.setParentField("foo");
RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
IndexReader r1 = DirectoryReader.open(dir1);
String message =
expectThrows(
IllegalArgumentException.class,
() -> {
w2.addIndexes((SegmentReader) getOnlyLeafReader(r1));
})
.getMessage();
assertEquals(
"can't add field [foobar] as parent document field; this IndexWriter is configured with [foo] as parent document field",
message);
message =
expectThrows(
IllegalArgumentException.class,
() -> {
w2.addIndexes(dir1);
})
.getMessage();
assertEquals(
"can't add field [foobar] as parent document field; this IndexWriter is configured with [foo] as parent document field",
message);
Directory dir3 = newDirectory();
IndexWriterConfig iwc3 = newIndexWriterConfig(new MockAnalyzer(random()));
iwc3.setParentField("foobar");
RandomIndexWriter w3 = new RandomIndexWriter(random(), dir3, iwc3);
w3.addIndexes((SegmentReader) getOnlyLeafReader(r1));
w3.addIndexes(dir1);
IOUtils.close(r1, dir1, w2, dir2, w3, dir3);
}
public void testIllegalNonParentField() throws IOException {
Directory dir1 = newDirectory();
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
Document parent = new Document();
parent.add(new StringField("foo", "XXX", Field.Store.NO));
w1.addDocument(parent);
w1.close();
Directory dir2 = newDirectory();
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
iwc2.setParentField("foo");
RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
IndexReader r1 = DirectoryReader.open(dir1);
String message =
expectThrows(
IllegalArgumentException.class,
() -> {
w2.addIndexes((SegmentReader) getOnlyLeafReader(r1));
})
.getMessage();
assertEquals(
"can't add [foo] as non parent document field; this IndexWriter is configured with [foo] as parent document field",
message);
message =
expectThrows(
IllegalArgumentException.class,
() -> {
w2.addIndexes(dir1);
})
.getMessage();
assertEquals(
"can't add [foo] as non parent document field; this IndexWriter is configured with [foo] as parent document field",
message);
IOUtils.close(r1, dir1, w2, dir2);
}
} }

View File

@ -114,6 +114,7 @@ public class TestCodecs extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false)); false));
} }
this.terms = terms; this.terms = terms;
@ -229,7 +230,8 @@ public class TestCodecs extends LuceneTestCase {
terms[i] = new TermData(text, docs, null); terms[i] = new TermData(text, docs, null);
} }
final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); final FieldInfos.Builder builder =
new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null));
final FieldData field = new FieldData("field", builder, terms, true, false); final FieldData field = new FieldData("field", builder, terms, true, false);
final FieldData[] fields = new FieldData[] {field}; final FieldData[] fields = new FieldData[] {field};
@ -292,7 +294,8 @@ public class TestCodecs extends LuceneTestCase {
} }
public void testRandomPostings() throws Throwable { public void testRandomPostings() throws Throwable {
final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); final FieldInfos.Builder builder =
new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null));
final FieldData[] fields = new FieldData[NUM_FIELDS]; final FieldData[] fields = new FieldData[NUM_FIELDS];
for (int i = 0; i < NUM_FIELDS; i++) { for (int i = 0; i < NUM_FIELDS; i++) {

View File

@ -236,7 +236,7 @@ public class TestDoc extends LuceneTestCase {
si, si,
InfoStream.getDefault(), InfoStream.getDefault(),
trackingDir, trackingDir,
new FieldInfos.FieldNumbers(null), new FieldInfos.FieldNumbers(null, null),
context); context);
merger.merge(); merger.merge();

View File

@ -239,7 +239,7 @@ public class TestFieldInfos extends LuceneTestCase {
} }
public void testFieldNumbersAutoIncrement() { public void testFieldNumbersAutoIncrement() {
FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes"); FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes", "parentDoc");
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
fieldNumbers.addOrGet( fieldNumbers.addOrGet(
new FieldInfo( new FieldInfo(
@ -258,6 +258,7 @@ public class TestFieldInfos extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false)); false));
} }
int idx = int idx =
@ -278,6 +279,7 @@ public class TestFieldInfos extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false)); false));
assertEquals("Field numbers 0 through 9 were allocated", 10, idx); assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
@ -300,6 +302,7 @@ public class TestFieldInfos extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false)); false));
assertEquals("Field numbers should reset after clear()", 0, idx); assertEquals("Field numbers should reset after clear()", 0, idx);
} }

View File

@ -45,8 +45,7 @@ public class TestFieldsReader extends LuceneTestCase {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
testDoc = new Document(); testDoc = new Document();
final String softDeletesFieldName = null; fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null));
fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName));
DocHelper.setupDoc(testDoc); DocHelper.setupDoc(testDoc);
for (IndexableField field : testDoc.getFields()) { for (IndexableField field : testDoc.getFields()) {
IndexableFieldType ift = field.fieldType(); IndexableFieldType ift = field.fieldType();
@ -67,7 +66,8 @@ public class TestFieldsReader extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
field.name().equals(softDeletesFieldName))); false,
false));
} }
dir = newDirectory(); dir = newDirectory();
IndexWriterConfig conf = IndexWriterConfig conf =

View File

@ -2122,6 +2122,10 @@ public class TestIndexSorting extends LuceneTestCase {
public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception { public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig iwc1 = newIndexWriterConfig(); IndexWriterConfig iwc1 = newIndexWriterConfig();
boolean useParent = rarely();
if (useParent) {
iwc1.setParentField("___parent");
}
Sort indexSort = Sort indexSort =
new Sort( new Sort(
new SortField("foo", SortField.Type.LONG), new SortField("bar", SortField.Type.LONG)); new SortField("foo", SortField.Type.LONG), new SortField("bar", SortField.Type.LONG));
@ -2154,6 +2158,9 @@ public class TestIndexSorting extends LuceneTestCase {
} else { } else {
iwc.setIndexSort(indexSort); iwc.setIndexSort(indexSort);
} }
if (useParent) {
iwc.setParentField("___parent");
}
IndexWriter w2 = new IndexWriter(dir2, iwc); IndexWriter w2 = new IndexWriter(dir2, iwc);
if (useReaders) { if (useReaders) {
@ -3165,4 +3172,277 @@ public class TestIndexSorting extends LuceneTestCase {
reader.close(); reader.close();
dir.close(); dir.close();
} }
public void testParentFieldNotConfigured() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
IllegalArgumentException ex =
expectThrows(
IllegalArgumentException.class,
() -> {
writer.addDocuments(Arrays.asList(new Document(), new Document()));
});
assertEquals(
"a parent field must be set in order to use document blocks with index sorting; see IndexWriterConfig#setParentField",
ex.getMessage());
}
}
}
public void testBlockContainsParentField() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
String parentField = "parent";
iwc.setParentField(parentField);
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
List<Runnable> runnabels =
Arrays.asList(
() -> {
IllegalArgumentException ex =
expectThrows(
IllegalArgumentException.class,
() -> {
Document doc = new Document();
doc.add(new NumericDocValuesField("parent", 0));
writer.addDocuments(Arrays.asList(doc, new Document()));
});
assertEquals(
"\"parent\" is a reserved field and should not be added to any document",
ex.getMessage());
},
() -> {
IllegalArgumentException ex =
expectThrows(
IllegalArgumentException.class,
() -> {
Document doc = new Document();
doc.add(new NumericDocValuesField("parent", 0));
writer.addDocuments(Arrays.asList(new Document(), doc));
});
assertEquals(
"\"parent\" is a reserved field and should not be added to any document",
ex.getMessage());
});
Collections.shuffle(runnabels, random());
for (Runnable runnable : runnabels) {
runnable.run();
}
}
}
}
public void testIndexSortWithBlocks() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
iwc.setCodec(codec);
String parentField = "parent";
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
iwc.setParentField(parentField);
LogMergePolicy policy = newLogMergePolicy();
// make sure that merge factor is always > 2
if (policy.getMergeFactor() <= 2) {
policy.setMergeFactor(3);
}
iwc.setMergePolicy(policy);
// add already sorted documents
codec.numCalls = 0;
codec.needsIndexSort = false;
try (IndexWriter w = new IndexWriter(dir, iwc)) {
int numDocs = random().nextInt(50, 100);
for (int i = 0; i < numDocs; i++) {
Document child1 = new Document();
child1.add(new StringField("id", Integer.toString(i), Store.YES));
child1.add(new NumericDocValuesField("id", i));
child1.add(new NumericDocValuesField("child", 1));
child1.add(new NumericDocValuesField("foo", random().nextInt()));
Document child2 = new Document();
child2.add(new StringField("id", Integer.toString(i), Store.YES));
child2.add(new NumericDocValuesField("id", i));
child2.add(new NumericDocValuesField("child", 2));
child2.add(new NumericDocValuesField("foo", random().nextInt()));
Document parent = new Document();
parent.add(new StringField("id", Integer.toString(i), Store.YES));
parent.add(new NumericDocValuesField("id", i));
parent.add(new NumericDocValuesField("foo", random().nextInt()));
w.addDocuments(Arrays.asList(child1, child2, parent));
if (rarely()) {
w.commit();
}
}
w.commit();
if (random().nextBoolean()) {
w.forceMerge(1, true);
}
}
try (DirectoryReader reader = DirectoryReader.open(dir)) {
for (LeafReaderContext ctx : reader.leaves()) {
LeafReader leaf = ctx.reader();
NumericDocValues parentDISI = leaf.getNumericDocValues(parentField);
NumericDocValues ids = leaf.getNumericDocValues("id");
NumericDocValues children = leaf.getNumericDocValues("child");
int doc;
int expectedDocID = 2;
while ((doc = parentDISI.nextDoc()) != NO_MORE_DOCS) {
assertEquals(-1, parentDISI.longValue());
assertEquals(expectedDocID, doc);
int id = ids.nextDoc();
long child1ID = ids.longValue();
assertEquals(id, children.nextDoc());
long child1 = children.longValue();
assertEquals(1, child1);
id = ids.nextDoc();
long child2ID = ids.longValue();
assertEquals(id, children.nextDoc());
long child2 = children.longValue();
assertEquals(2, child2);
int idParent = ids.nextDoc();
assertEquals(id + 1, idParent);
long parent = ids.longValue();
assertEquals(child1ID, parent);
assertEquals(child2ID, parent);
expectedDocID += 3;
}
}
}
}
}
@SuppressWarnings("fallthrough")
public void testMixRandomDocumentsWithBlocks() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
iwc.setCodec(codec);
String parentField = "parent";
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc.setIndexSort(indexSort);
iwc.setParentField(parentField);
RandomIndexWriter randomIndexWriter = new RandomIndexWriter(random(), dir, iwc);
int numDocs = random().nextInt(100, 1000);
for (int i = 0; i < numDocs; i++) {
if (rarely()) {
randomIndexWriter.deleteDocuments(new Term("id", "" + random().nextInt(0, i + 1)));
}
List<Document> docs = new ArrayList<>();
switch (random().nextInt(100) % 5) {
case 4:
Document child3 = new Document();
child3.add(new StringField("id", Integer.toString(i), Store.YES));
child3.add(new NumericDocValuesField("type", 2));
child3.add(new NumericDocValuesField("child_ord", 3));
child3.add(new NumericDocValuesField("foo", random().nextInt()));
docs.add(child3);
case 3:
Document child2 = new Document();
child2.add(new StringField("id", Integer.toString(i), Store.YES));
child2.add(new NumericDocValuesField("type", 2));
child2.add(new NumericDocValuesField("child_ord", 2));
child2.add(new NumericDocValuesField("foo", random().nextInt()));
docs.add(child2);
case 2:
Document child1 = new Document();
child1.add(new StringField("id", Integer.toString(i), Store.YES));
child1.add(new NumericDocValuesField("type", 2));
child1.add(new NumericDocValuesField("child_ord", 1));
child1.add(new NumericDocValuesField("foo", random().nextInt()));
docs.add(child1);
case 1:
Document root = new Document();
root.add(new StringField("id", Integer.toString(i), Store.YES));
root.add(new NumericDocValuesField("type", 1));
root.add(new NumericDocValuesField("num_children", docs.size()));
root.add(new NumericDocValuesField("foo", random().nextInt()));
docs.add(root);
randomIndexWriter.addDocuments(docs);
break;
case 0:
Document single = new Document();
single.add(new StringField("id", Integer.toString(i), Store.YES));
single.add(new NumericDocValuesField("type", 0));
single.add(new NumericDocValuesField("foo", random().nextInt()));
randomIndexWriter.addDocument(single);
}
if (rarely()) {
randomIndexWriter.forceMerge(1);
}
randomIndexWriter.commit();
}
randomIndexWriter.close();
try (DirectoryReader reader = DirectoryReader.open(dir)) {
for (LeafReaderContext ctx : reader.leaves()) {
LeafReader leaf = ctx.reader();
NumericDocValues parentDISI = leaf.getNumericDocValues(parentField);
assertNotNull(parentDISI);
NumericDocValues type = leaf.getNumericDocValues("type");
NumericDocValues childOrd = leaf.getNumericDocValues("child_ord");
NumericDocValues numChildren = leaf.getNumericDocValues("num_children");
int numCurrentChildren = 0;
int totalPendingChildren = 0;
String childId = null;
for (int i = 0; i < leaf.maxDoc(); i++) {
if (leaf.getLiveDocs() == null || leaf.getLiveDocs().get(i)) {
assertTrue(type.advanceExact(i));
int typeValue = (int) type.longValue();
switch (typeValue) {
case 2:
assertFalse(parentDISI.advanceExact(i));
assertTrue(childOrd.advanceExact(i));
if (numCurrentChildren == 0) { // first child
childId = leaf.storedFields().document(i).get("id");
totalPendingChildren = (int) childOrd.longValue() - 1;
} else {
assertNotNull(childId);
assertEquals(totalPendingChildren--, childOrd.longValue());
assertEquals(childId, leaf.storedFields().document(i).get("id"));
}
numCurrentChildren++;
break;
case 1:
assertTrue(parentDISI.advanceExact(i));
assertEquals(-1, parentDISI.longValue());
if (childOrd != null) {
assertFalse(childOrd.advanceExact(i));
}
assertTrue(numChildren.advanceExact(i));
assertEquals(0, totalPendingChildren);
assertEquals(numCurrentChildren, numChildren.longValue());
if (numCurrentChildren > 0) {
assertEquals(childId, leaf.storedFields().document(i).get("id"));
} else {
assertNull(childId);
}
numCurrentChildren = 0;
childId = null;
break;
case 0:
assertEquals(-1, parentDISI.longValue());
assertTrue(parentDISI.advanceExact(i));
if (childOrd != null) {
assertFalse(childOrd.advanceExact(i));
}
if (numChildren != null) {
assertFalse(numChildren.advanceExact(i));
}
break;
default:
fail();
}
}
}
}
}
}
}
} }

View File

@ -1722,6 +1722,44 @@ public class TestIndexWriter extends LuceneTestCase {
} }
} }
public void testSingleDocsDoNotTriggerHasBlocks() throws IOException {
try (Directory dir = newDirectory()) {
try (IndexWriter w =
new IndexWriter(
dir,
new IndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(Integer.MAX_VALUE)
.setRAMBufferSizeMB(100))) {
int docs = random().nextInt(1, 100);
for (int i = 0; i < docs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, Field.Store.NO));
w.addDocuments(Arrays.asList(doc));
}
w.commit();
SegmentInfos si = w.cloneSegmentInfos();
assertEquals(1, si.size());
assertFalse(si.asList().get(0).info.getHasBlocks());
Document doc = new Document();
doc.add(new StringField("id", "XXX", Field.Store.NO));
w.addDocuments(Arrays.asList(doc, doc));
w.commit();
si = w.cloneSegmentInfos();
assertEquals(2, si.size());
assertFalse(si.asList().get(0).info.getHasBlocks());
assertTrue(si.asList().get(1).info.getHasBlocks());
w.forceMerge(1);
w.commit();
si = w.cloneSegmentInfos();
assertEquals(1, si.size());
assertTrue(si.asList().get(0).info.getHasBlocks());
}
}
}
public void testCarryOverHasBlocks() throws Exception { public void testCarryOverHasBlocks() throws Exception {
try (Directory dir = newDirectory()) { try (Directory dir = newDirectory()) {
try (IndexWriter w = try (IndexWriter w =
@ -4790,4 +4828,89 @@ public class TestIndexWriter extends LuceneTestCase {
doc.add(newField(field, "value", storedTextType)); doc.add(newField(field, "value", storedTextType));
writer.addDocument(doc); writer.addDocument(doc);
} }
public void testParentAndSoftDeletesAreTheSame() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig indexWriterConfig = newIndexWriterConfig(new MockAnalyzer(random()));
indexWriterConfig.setSoftDeletesField("foo");
indexWriterConfig.setParentField("foo");
IllegalArgumentException iae =
expectThrows(
IllegalArgumentException.class, () -> new IndexWriter(dir, indexWriterConfig));
assertEquals(
"parent document and soft-deletes field can't be the same field \"foo\"",
iae.getMessage());
}
}
public void testIndexWithParentFieldIsCongruent() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setParentField("parent");
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
if (random().nextBoolean()) {
Document child1 = new Document();
child1.add(new StringField("id", Integer.toString(1), Field.Store.YES));
Document child2 = new Document();
child2.add(new StringField("id", Integer.toString(1), Field.Store.YES));
Document parent = new Document();
parent.add(new StringField("id", Integer.toString(1), Field.Store.YES));
writer.addDocuments(Arrays.asList(child1, child2, parent));
writer.flush();
if (random().nextBoolean()) {
writer.addDocuments(Arrays.asList(child1, child2, parent));
}
} else {
writer.addDocument(new Document());
}
writer.commit();
}
IllegalArgumentException ex =
expectThrows(
IllegalArgumentException.class,
() -> {
IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
config.setParentField("someOtherField");
new IndexWriter(dir, config);
});
assertEquals(
"can't add field [parent] as parent document field; this IndexWriter is configured with [someOtherField] as parent document field",
ex.getMessage());
ex =
expectThrows(
IllegalArgumentException.class,
() -> {
IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
new IndexWriter(dir, config);
});
assertEquals(
"can't add field [parent] as parent document field; this IndexWriter has no parent document field configured",
ex.getMessage());
}
}
public void testParentFieldIsAlreadyUsed() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
Document doc = new Document();
doc.add(new StringField("parent", Integer.toString(1), Field.Store.YES));
writer.addDocument(doc);
writer.commit();
}
IllegalArgumentException iae =
expectThrows(
IllegalArgumentException.class,
() -> {
IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
config.setParentField("parent");
new IndexWriter(dir, config);
});
assertEquals(
"can't add [parent] as non parent document field; this IndexWriter is configured with [parent] as parent document field",
iae.getMessage());
}
}
} }

View File

@ -199,7 +199,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
true); true,
false);
List<Integer> docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS); List<Integer> docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS);
List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 10, true)); List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 10, true));
for (DocValuesFieldUpdates update : updates) { for (DocValuesFieldUpdates update : updates) {
@ -237,7 +238,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
true); true,
false);
for (DocValuesFieldUpdates update : updates) { for (DocValuesFieldUpdates update : updates) {
deletes.onDocValuesUpdate(fieldInfo, update.iterator()); deletes.onDocValuesUpdate(fieldInfo, update.iterator());
} }
@ -301,7 +303,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
true); true,
false);
List<Integer> docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS); List<Integer> docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS);
List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 3, true)); List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 3, true));
for (DocValuesFieldUpdates update : updates) { for (DocValuesFieldUpdates update : updates) {
@ -370,7 +373,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
true); true,
false);
List<DocValuesFieldUpdates> updates = List<DocValuesFieldUpdates> updates =
Arrays.asList(singleUpdate(Arrays.asList(0, 1, DocIdSetIterator.NO_MORE_DOCS), 3, false)); Arrays.asList(singleUpdate(Arrays.asList(0, 1, DocIdSetIterator.NO_MORE_DOCS), 3, false));
for (DocValuesFieldUpdates update : updates) { for (DocValuesFieldUpdates update : updates) {
@ -407,7 +411,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
true); true,
false);
updates = Arrays.asList(singleUpdate(Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS), 3, true)); updates = Arrays.asList(singleUpdate(Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS), 3, true));
for (DocValuesFieldUpdates update : updates) { for (DocValuesFieldUpdates update : updates) {
deletes.onDocValuesUpdate(fieldInfo, update.iterator()); deletes.onDocValuesUpdate(fieldInfo, update.iterator());

View File

@ -104,7 +104,7 @@ public class TestSegmentMerger extends LuceneTestCase {
si, si,
InfoStream.getDefault(), InfoStream.getDefault(),
mergedDir, mergedDir,
new FieldInfos.FieldNumbers(null), new FieldInfos.FieldNumbers(null, null),
newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)))); newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))));
MergeState mergeState = merger.merge(); MergeState mergeState = merger.merge();
int docsMerged = mergeState.segmentInfo.maxDoc(); int docsMerged = mergeState.segmentInfo.maxDoc();

View File

@ -1297,7 +1297,8 @@ public class TestSortOptimization extends LuceneTestCase {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.DOT_PRODUCT, VectorSimilarityFunction.DOT_PRODUCT,
fi.isSoftDeletesField()); fi.isSoftDeletesField(),
fi.isParentField());
newInfos[i] = noIndexFI; newInfos[i] = noIndexFI;
i++; i++;
} }

View File

@ -183,9 +183,13 @@ public class TestTopFieldCollector extends LuceneTestCase {
dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setIndexSort(sort)); dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setIndexSort(sort));
Document doc = new Document(); Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 3)); doc.add(new NumericDocValuesField("foo", 3));
w.addDocuments(Arrays.asList(doc, doc, doc, doc)); for (Document d : Arrays.asList(doc, doc, doc, doc)) {
w.addDocument(d);
}
w.flush(); w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc)); for (Document d : Arrays.asList(doc, doc, doc, doc, doc, doc)) {
w.addDocument(d);
}
w.flush(); w.flush();
IndexReader reader = DirectoryReader.open(w); IndexReader reader = DirectoryReader.open(w);
assertEquals(2, reader.leaves().size()); assertEquals(2, reader.leaves().size());

View File

@ -103,6 +103,7 @@ public class TermVectorLeafReader extends LeafReader {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false); false);
fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo}); fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo});
} }

View File

@ -736,6 +736,7 @@ public class MemoryIndex {
fieldType.vectorDimension(), fieldType.vectorDimension(),
fieldType.vectorEncoding(), fieldType.vectorEncoding(),
fieldType.vectorSimilarityFunction(), fieldType.vectorSimilarityFunction(),
false,
false); false);
} }
@ -789,7 +790,8 @@ public class MemoryIndex {
info.fieldInfo.getVectorDimension(), info.fieldInfo.getVectorDimension(),
info.fieldInfo.getVectorEncoding(), info.fieldInfo.getVectorEncoding(),
info.fieldInfo.getVectorSimilarityFunction(), info.fieldInfo.getVectorSimilarityFunction(),
info.fieldInfo.isSoftDeletesField()); info.fieldInfo.isSoftDeletesField(),
info.fieldInfo.isParentField());
} else if (existingDocValuesType != docValuesType) { } else if (existingDocValuesType != docValuesType) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Can't add [" "Can't add ["

View File

@ -78,7 +78,7 @@ public class DummyCompressingCodec extends CompressingCodec {
@Override @Override
public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException { public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException {
out.copyBytes(buffersInput, buffersInput.size()); out.copyBytes(buffersInput, buffersInput.length());
} }
@Override @Override

View File

@ -68,7 +68,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
FieldInfo fi = createFieldInfo(); FieldInfo fi = createFieldInfo();
addAttributes(fi); addAttributes(fi);
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
@ -96,7 +96,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
fi.putAttribute("foo", "bar"); fi.putAttribute("foo", "bar");
fi.putAttribute("bar", "baz"); fi.putAttribute("bar", "baz");
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
@ -136,7 +136,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
FieldInfo fi = createFieldInfo(); FieldInfo fi = createFieldInfo();
addAttributes(fi); addAttributes(fi);
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
fail.setDoFail(); fail.setDoFail();
expectThrows( expectThrows(
@ -171,7 +171,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
FieldInfo fi = createFieldInfo(); FieldInfo fi = createFieldInfo();
addAttributes(fi); addAttributes(fi);
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
fail.setDoFail(); fail.setDoFail();
expectThrows( expectThrows(
@ -206,7 +206,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
FieldInfo fi = createFieldInfo(); FieldInfo fi = createFieldInfo();
addAttributes(fi); addAttributes(fi);
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
@ -243,7 +243,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
FieldInfo fi = createFieldInfo(); FieldInfo fi = createFieldInfo();
addAttributes(fi); addAttributes(fi);
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
@ -276,7 +276,9 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
String softDeletesField = String softDeletesField =
random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null; random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null;
var builder = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(softDeletesField); String parentField = random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null;
var builder = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(softDeletesField, parentField);
for (String field : fieldNames) { for (String field : fieldNames) {
IndexableFieldType fieldType = randomFieldType(random(), field); IndexableFieldType fieldType = randomFieldType(random(), field);
@ -307,7 +309,8 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
fieldType.vectorDimension(), fieldType.vectorDimension(),
fieldType.vectorEncoding(), fieldType.vectorEncoding(),
fieldType.vectorSimilarityFunction(), fieldType.vectorSimilarityFunction(),
field.equals(softDeletesField)); field.equals(softDeletesField),
field.equals(parentField));
addAttributes(fi); addAttributes(fi);
builder.add(fi); builder.add(fi);
} }
@ -431,6 +434,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false); false);
} }
} }

View File

@ -365,7 +365,8 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
proto.getVectorDimension(), proto.getVectorDimension(),
proto.getVectorEncoding(), proto.getVectorEncoding(),
proto.getVectorSimilarityFunction(), proto.getVectorSimilarityFunction(),
proto.isSoftDeletesField()); proto.isSoftDeletesField(),
proto.isParentField());
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field}); FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field});

View File

@ -80,6 +80,33 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
dir.close(); dir.close();
} }
public void testHasBlocks() throws IOException {
assumeTrue("test requires a codec that can read/write hasBlocks", supportsHasBlocks());
Directory dir = newDirectory();
Codec codec = getCodec();
byte[] id = StringHelper.randomId();
SegmentInfo info =
new SegmentInfo(
dir,
getVersions()[0],
getVersions()[0],
"_123",
1,
false,
random().nextBoolean(),
codec,
Collections.emptyMap(),
id,
Collections.emptyMap(),
null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
assertEquals(info.getHasBlocks(), info2.getHasBlocks());
dir.close();
}
/** Tests SI writer adds itself to files... */ /** Tests SI writer adds itself to files... */
public void testAddsSelfToFiles() throws Exception { public void testAddsSelfToFiles() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
@ -260,6 +287,10 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
return true; return true;
} }
protected boolean supportsHasBlocks() {
return true;
}
private SortField randomIndexSortField() { private SortField randomIndexSortField() {
boolean reversed = random().nextBoolean(); boolean reversed = random().nextBoolean();
SortField sortField; SortField sortField;
@ -360,7 +391,11 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
for (int j = 0; j < numSortFields; ++j) { for (int j = 0; j < numSortFields; ++j) {
sortFields[j] = randomIndexSortField(); sortFields[j] = randomIndexSortField();
} }
sort = new Sort(sortFields); if (supportsHasBlocks()) {
sort = new Sort(sortFields);
} else {
sort = new Sort(sortFields);
}
} }
Directory dir = newDirectory(); Directory dir = newDirectory();

View File

@ -112,7 +112,8 @@ public class MismatchedLeafReader extends FilterLeafReader {
oldInfo.getVectorEncoding(), // numeric type of vector samples oldInfo.getVectorEncoding(), // numeric type of vector samples
// distance function for calculating similarity of the field's vector // distance function for calculating similarity of the field's vector
oldInfo.getVectorSimilarityFunction(), oldInfo.getVectorSimilarityFunction(),
oldInfo.isSoftDeletesField()); // used as soft-deletes field oldInfo.isSoftDeletesField(), // used as soft-deletes field
oldInfo.isParentField());
shuffled.set(i, newInfo); shuffled.set(i, newInfo);
} }

View File

@ -158,6 +158,7 @@ public class RandomIndexWriter implements Closeable {
} else { } else {
softDeletesRatio = 0d; softDeletesRatio = 0d;
} }
w = mockIndexWriter(dir, c, r); w = mockIndexWriter(dir, c, r);
config = w.getConfig(); config = w.getConfig();
flushAt = TestUtil.nextInt(r, 10, 1000); flushAt = TestUtil.nextInt(r, 10, 1000);

View File

@ -165,6 +165,7 @@ public class RandomPostingsTester {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false); false);
fieldUpto++; fieldUpto++;
@ -738,6 +739,7 @@ public class RandomPostingsTester {
0, 0,
VectorEncoding.FLOAT32, VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN, VectorSimilarityFunction.EUCLIDEAN,
false,
false); false);
} }