From df6bd25ce44ead0b38cffca6ef9bc3a9ce63dbea Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 11 Jan 2024 16:11:15 +0100 Subject: [PATCH] Add support for index sorting with document blocks (#12829) Today index sorting will most likely break document blocks added with `IndexWriter#addDocuments(...)` and `#updateDocuments(...)` since the index sorter has no indication of what documents are part of a block. This change automatically adds a marker field to parent documents if configured in `IWC`. These marker documents are optional unless document blocks are indexed and index sorting is configured. In this case indexing blocks will fail unless a parent field is configured. Index sorting will preserve document blocks during sort. Documents within a block not be reordered by the sorting algorithm and will sort along side their parent documents. Relates to #12711 --- lucene/CHANGES.txt | 11 + lucene/MIGRATE.md | 8 + .../lucene60/Lucene60FieldInfosFormat.java | 3 +- .../lucene90/Lucene90FieldInfosFormat.java | 3 +- .../TestLucene70SegmentInfoFormat.java | 5 + .../TestLucene86SegmentInfoFormat.java | 5 + .../TestLucene90SegmentInfoFormat.java | 5 + .../TestBackwardsCompatibility.java | 79 +++++ .../SimpleTextFieldInfosFormat.java | 11 +- .../SimpleTextSegmentInfoFormat.java | 9 +- .../codecs/uniformsplit/TestBlockWriter.java | 3 +- .../sharedterms/TestSTBlockReader.java | 1 + .../lucene94/Lucene94FieldInfosFormat.java | 36 ++- .../org/apache/lucene/index/CheckIndex.java | 36 ++- .../index/DocumentsWriterPerThread.java | 68 ++++- .../org/apache/lucene/index/FieldInfo.java | 21 +- .../org/apache/lucene/index/FieldInfos.java | 100 ++++++- .../org/apache/lucene/index/IndexWriter.java | 9 +- .../lucene/index/IndexWriterConfig.java | 16 + .../apache/lucene/index/IndexingChain.java | 134 ++++++++- .../lucene/index/LiveIndexWriterConfig.java | 9 + .../org/apache/lucene/index/MultiSorter.java | 27 ++ .../lucene/index/ParallelLeafReader.java | 8 +- .../lucene/index/ReadersAndUpdates.java | 3 +- .../java/org/apache/lucene/index/Sorter.java | 25 +- .../internal/tests/IndexPackageAccess.java | 2 +- .../java/org/apache/lucene/search/Sort.java | 2 - .../apache/lucene/index/TestAddIndexes.java | 93 ++++++ .../org/apache/lucene/index/TestCodecs.java | 7 +- .../test/org/apache/lucene/index/TestDoc.java | 2 +- .../apache/lucene/index/TestFieldInfos.java | 5 +- .../apache/lucene/index/TestFieldsReader.java | 6 +- .../apache/lucene/index/TestIndexSorting.java | 280 ++++++++++++++++++ .../apache/lucene/index/TestIndexWriter.java | 123 ++++++++ .../lucene/index/TestPendingSoftDeletes.java | 15 +- .../lucene/index/TestSegmentMerger.java | 2 +- .../lucene/search/TestSortOptimization.java | 3 +- .../lucene/search/TestTopFieldCollector.java | 8 +- .../highlight/TermVectorLeafReader.java | 1 + .../lucene/index/memory/MemoryIndex.java | 4 +- .../dummy/DummyCompressingCodec.java | 2 +- .../index/BaseFieldInfoFormatTestCase.java | 20 +- .../index/BaseIndexFileFormatTestCase.java | 3 +- .../index/BaseSegmentInfoFormatTestCase.java | 37 ++- .../tests/index/MismatchedLeafReader.java | 3 +- .../lucene/tests/index/RandomIndexWriter.java | 1 + .../tests/index/RandomPostingsTester.java | 2 + 47 files changed, 1173 insertions(+), 83 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2a45e9a5b10..97f61ee850a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -90,6 +90,11 @@ New Features * LUCENE-10626 Hunspell: add tools to aid dictionary editing: analysis introspection, stem expansion and stem/flag suggestion (Peter Gromov) +* GITHUB#12829: For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via + IndexWriter#addDocuments or IndexWriter#updateDocuments also when index sorting is configured. Document blocks are + maintained alongside their parent documents during sort and merge. IndexWriterConfig now requires a parent field to be + specified if index sorting is used together with document blocks. (Simon Willnauer) + Improvements --------------------- @@ -131,6 +136,12 @@ Bug Fixes * GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those of DoubleValues#doubleValue(). (Uwe Schindler) +Changes in Backwards Compatibility Policy +----------------------------------------- + +* GITHUB#12829: IndexWriter#addDocuments or IndexWriter#updateDocuments now require a parent field name to be + specified in IndexWriterConfig is documents blocks are indexed and index time sorting is configured. (Simon Willnauer) + Other --------------------- diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index 75f7c5b4eeb..698548fdfd7 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -19,6 +19,14 @@ ## Migration from Lucene 9.x to Lucene 10.0 +### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829) + +For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via +IndexWriter#addDocuments or IndexWriter#updateDocuments when index sorting is configured. Document blocks are maintained +alongside their parent documents during sort and merge. The internally used parent field must be configured in +IndexWriterConfig only if index sorting is used together with documents blocks. See `IndexWriterConfig#setParendField` +for reference. + ### Minor API changes in MatchHighlighter and MatchRegionRetriever. (GITHUB#12881) The API of interfaces for accepting highlights has changed to allow performance improvements. Look at the issue and the PR diff to get diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java index 63347e18bab..a3e09db8ae9 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java @@ -217,7 +217,8 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - isSoftDeletesField); + isSoftDeletesField, + false); } catch (IllegalStateException e) { throw new CorruptIndexException( "invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90FieldInfosFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90FieldInfosFormat.java index b2b05c49614..22eb4558e3b 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90FieldInfosFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90FieldInfosFormat.java @@ -194,7 +194,8 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat { vectorDimension, VectorEncoding.FLOAT32, vectorDistFunc, - isSoftDeletesField); + isSoftDeletesField, + false); infos[i].checkConsistency(); } catch (IllegalStateException e) { throw new CorruptIndexException( diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/TestLucene70SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/TestLucene70SegmentInfoFormat.java index f3d777de28b..81482eff07c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/TestLucene70SegmentInfoFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene70/TestLucene70SegmentInfoFormat.java @@ -35,4 +35,9 @@ public class TestLucene70SegmentInfoFormat extends BaseSegmentInfoFormatTestCase protected Codec getCodec() { return new Lucene84RWCodec(); } + + @Override + protected boolean supportsHasBlocks() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/TestLucene86SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/TestLucene86SegmentInfoFormat.java index c8901616763..7388dcf8210 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/TestLucene86SegmentInfoFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene86/TestLucene86SegmentInfoFormat.java @@ -35,4 +35,9 @@ public class TestLucene86SegmentInfoFormat extends BaseSegmentInfoFormatTestCase protected Codec getCodec() { return new Lucene87RWCodec(); } + + @Override + protected boolean supportsHasBlocks() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90SegmentInfoFormat.java index 53a8a01a440..9ecc3490641 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90SegmentInfoFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene90/TestLucene90SegmentInfoFormat.java @@ -32,4 +32,9 @@ public class TestLucene90SegmentInfoFormat extends BaseSegmentInfoFormatTestCase protected Codec getCodec() { return new Lucene90RWCodec(); } + + @Override + protected boolean supportsHasBlocks() { + return false; + } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index acb3826d270..8becc84f7f0 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -99,6 +99,8 @@ import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldExistsQuery; @@ -2162,6 +2164,83 @@ public class TestBackwardsCompatibility extends LuceneTestCase { } } + public void testSortedIndexAddDocBlocks() throws Exception { + for (String name : oldSortedNames) { + Path path = createTempDir("sorted"); + InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name + ".zip"); + assertNotNull("Sorted index index " + name + " not found", resource); + TestUtil.unzip(resource, path); + + try (Directory dir = newFSDirectory(path)) { + final Sort sort; + try (DirectoryReader reader = DirectoryReader.open(dir)) { + assertEquals(1, reader.leaves().size()); + sort = reader.leaves().get(0).reader().getMetaData().getSort(); + assertNotNull(sort); + searchExampleIndex(reader); + } + // open writer + try (IndexWriter writer = + new IndexWriter( + dir, + newIndexWriterConfig(new MockAnalyzer(random())) + .setOpenMode(OpenMode.APPEND) + .setIndexSort(sort) + .setMergePolicy(newLogMergePolicy()))) { + // add 10 docs + for (int i = 0; i < 10; i++) { + Document child = new Document(); + child.add(new StringField("relation", "child", Field.Store.NO)); + child.add(new StringField("bid", "" + i, Field.Store.NO)); + child.add(new NumericDocValuesField("dateDV", i)); + Document parent = new Document(); + parent.add(new StringField("relation", "parent", Field.Store.NO)); + parent.add(new StringField("bid", "" + i, Field.Store.NO)); + parent.add(new NumericDocValuesField("dateDV", i)); + writer.addDocuments(Arrays.asList(child, child, parent)); + if (random().nextBoolean()) { + writer.flush(); + } + } + if (random().nextBoolean()) { + writer.forceMerge(1); + } + writer.commit(); + try (IndexReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = new IndexSearcher(reader); + for (int i = 0; i < 10; i++) { + TopDocs children = + searcher.search( + new BooleanQuery.Builder() + .add( + new TermQuery(new Term("relation", "child")), + BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST) + .build(), + 2); + TopDocs parents = + searcher.search( + new BooleanQuery.Builder() + .add( + new TermQuery(new Term("relation", "parent")), + BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST) + .build(), + 2); + assertEquals(2, children.totalHits.value); + assertEquals(1, parents.totalHits.value); + // make sure it's sorted + assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc); + assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc); + } + } + } + // This will confirm the docs are really sorted + TestUtil.checkIndex(dir); + } + } + } + private void searchExampleIndex(DirectoryReader reader) throws IOException { IndexSearcher searcher = newSearcher(reader); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java index d2c8f563c8d..21cfe9b613f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java @@ -72,6 +72,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { static final BytesRef VECTOR_ENCODING = new BytesRef(" vector encoding "); static final BytesRef VECTOR_SIMILARITY = new BytesRef(" vector similarity "); static final BytesRef SOFT_DELETES = new BytesRef(" soft-deletes "); + static final BytesRef PARENT = new BytesRef(" parent "); @Override public FieldInfos read( @@ -170,6 +171,9 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { SimpleTextUtil.readLine(input, scratch); assert StringHelper.startsWith(scratch.get(), SOFT_DELETES); boolean isSoftDeletesField = Boolean.parseBoolean(readString(SOFT_DELETES.length, scratch)); + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), PARENT); + boolean isParentField = Boolean.parseBoolean(readString(PARENT.length, scratch)); infos[i] = new FieldInfo( @@ -188,7 +192,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { vectorNumDimensions, vectorEncoding, vectorDistFunc, - isSoftDeletesField); + isSoftDeletesField, + isParentField); } SimpleTextUtil.checkFooter(input); @@ -320,6 +325,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { SimpleTextUtil.write(out, SOFT_DELETES); SimpleTextUtil.write(out, Boolean.toString(fi.isSoftDeletesField()), scratch); SimpleTextUtil.writeNewline(out); + + SimpleTextUtil.write(out, PARENT); + SimpleTextUtil.write(out, Boolean.toString(fi.isParentField()), scratch); + SimpleTextUtil.writeNewline(out); } SimpleTextUtil.writeChecksum(out, scratch); success = true; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java index 5480c0fec0c..2f707e1241b 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java @@ -196,7 +196,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { sortField[i] = SortFieldProvider.forName(provider).readSortField(bytes); assert bytes.eof(); } - Sort indexSort = sortField.length == 0 ? null : new Sort(sortField); + + final Sort indexSort; + if (sortField.length == 0) { + indexSort = null; + } else { + indexSort = new Sort(sortField); + } SimpleTextUtil.checkFooter(input); @@ -335,7 +341,6 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { SimpleTextUtil.write(output, b.bytes.get().toString(), scratch); SimpleTextUtil.writeNewline(output); } - SimpleTextUtil.writeChecksum(output, scratch); } } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java index 46a78341a5c..2ee6b8fd2d2 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java @@ -119,6 +119,7 @@ public class TestBlockWriter extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - true); + true, + false); } } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java index 4571a49243d..bf2b0133240 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java @@ -206,6 +206,7 @@ public class TestSTBlockReader extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java index 99352053a55..97c05435b96 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java @@ -131,13 +131,14 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat { Throwable priorE = null; FieldInfo[] infos = null; try { - CodecUtil.checkIndexHeader( - input, - Lucene94FieldInfosFormat.CODEC_NAME, - Lucene94FieldInfosFormat.FORMAT_START, - Lucene94FieldInfosFormat.FORMAT_CURRENT, - segmentInfo.getId(), - segmentSuffix); + int format = + CodecUtil.checkIndexHeader( + input, + Lucene94FieldInfosFormat.CODEC_NAME, + Lucene94FieldInfosFormat.FORMAT_START, + Lucene94FieldInfosFormat.FORMAT_CURRENT, + segmentInfo.getId(), + segmentSuffix); final int size = input.readVInt(); // read in the size infos = new FieldInfo[size]; @@ -157,6 +158,18 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat { boolean omitNorms = (bits & OMIT_NORMS) != 0; boolean storePayloads = (bits & STORE_PAYLOADS) != 0; boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0; + boolean isParentField = + format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false; + + if ((bits & 0xE0) != 0) { + throw new CorruptIndexException( + "unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input); + } + if (format < FORMAT_PARENT_FIELD && (bits & 0xF0) != 0) { + throw new CorruptIndexException( + "parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"", + input); + } final IndexOptions indexOptions = getIndexOptions(input, input.readByte()); @@ -200,7 +213,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat { vectorDimension, vectorEncoding, vectorDistFunc, - isSoftDeletesField); + isSoftDeletesField, + isParentField); infos[i].checkConsistency(); } catch (IllegalStateException e) { throw new CorruptIndexException( @@ -348,6 +362,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat { if (fi.omitsNorms()) bits |= OMIT_NORMS; if (fi.hasPayloads()) bits |= STORE_PAYLOADS; if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD; + if (fi.isParentField()) bits |= PARENT_FIELD_FIELD; output.writeByte(bits); output.writeByte(indexOptionsByte(fi.getIndexOptions())); @@ -375,11 +390,14 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat { // Codec header static final String CODEC_NAME = "Lucene94FieldInfos"; static final int FORMAT_START = 0; - static final int FORMAT_CURRENT = FORMAT_START; + // this doesn't actually change the file format but uses up one more bit an existing bit pattern + static final int FORMAT_PARENT_FIELD = 1; + static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD; // Field flags static final byte STORE_TERMVECTOR = 0x1; static final byte OMIT_NORMS = 0x2; static final byte STORE_PAYLOADS = 0x4; static final byte SOFT_DELETES_FIELD = 0x8; + static final byte PARENT_FIELD_FIELD = 0x10; } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index ca884be5b7e..00d79fa8934 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1176,34 +1176,46 @@ public final class CheckIndex implements Closeable { comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext); } - int maxDoc = reader.maxDoc(); - try { - - for (int docID = 1; docID < maxDoc; docID++) { - + LeafMetaData metaData = reader.getMetaData(); + FieldInfos fieldInfos = reader.getFieldInfos(); + if (metaData.hasBlocks() + && fieldInfos.getParentField() == null + && metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) { + throw new IllegalStateException( + "parent field is not set but the index has document blocks and was created with version: " + + metaData.getCreatedVersionMajor()); + } + final DocIdSetIterator iter; + if (metaData.hasBlocks() && fieldInfos.getParentField() != null) { + iter = reader.getNumericDocValues(fieldInfos.getParentField()); + } else { + iter = DocIdSetIterator.all(reader.maxDoc()); + } + int prevDoc = iter.nextDoc(); + int nextDoc; + while ((nextDoc = iter.nextDoc()) != NO_MORE_DOCS) { int cmp = 0; - for (int i = 0; i < comparators.length; i++) { - // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, + // TODO: would be better if copy() didn't cause a term lookup in TermOrdVal & co, // the segments are always the same here... - comparators[i].copy(0, docID - 1); + comparators[i].copy(0, prevDoc); comparators[i].setBottom(0); - cmp = reverseMul[i] * comparators[i].compareBottom(docID); + cmp = reverseMul[i] * comparators[i].compareBottom(nextDoc); if (cmp != 0) { break; } } - if (cmp > 0) { throw new CheckIndexException( "segment has indexSort=" + sort + " but docID=" - + (docID - 1) + + (prevDoc) + " sorts after docID=" - + docID); + + nextDoc); } + prevDoc = nextDoc; } msg( infoStream, diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 57ada3a5602..4a0f2f71666 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -21,14 +21,17 @@ import java.text.NumberFormat; import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Locale; +import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; @@ -134,9 +137,11 @@ final class DocumentsWriterPerThread implements Accountable { private final ReentrantLock lock = new ReentrantLock(); private int[] deleteDocIDs = new int[0]; private int numDeletedDocIds = 0; + private final int indexMajorVersionCreated; + private final IndexingChain.ReservedField parentField; DocumentsWriterPerThread( - int indexVersionCreated, + int indexMajorVersionCreated, String segmentName, Directory directoryOrig, Directory directory, @@ -145,6 +150,7 @@ final class DocumentsWriterPerThread implements Accountable { FieldInfos.Builder fieldInfos, AtomicLong pendingNumDocs, boolean enableTestPoints) { + this.indexMajorVersionCreated = indexMajorVersionCreated; this.directory = new TrackingDirectoryWrapper(directory); this.fieldInfos = fieldInfos; this.indexWriterConfig = indexWriterConfig; @@ -183,12 +189,19 @@ final class DocumentsWriterPerThread implements Accountable { this.enableTestPoints = enableTestPoints; indexingChain = new IndexingChain( - indexVersionCreated, + indexMajorVersionCreated, segmentInfo, this.directory, fieldInfos, indexWriterConfig, this::onAbortingException); + if (indexWriterConfig.getParentField() != null) { + this.parentField = + indexingChain.markAsReserved( + new NumericDocValuesField(indexWriterConfig.getParentField(), -1)); + } else { + this.parentField = null; + } } final void testPoint(String message) { @@ -231,7 +244,23 @@ final class DocumentsWriterPerThread implements Accountable { final int docsInRamBefore = numDocsInRAM; boolean allDocsIndexed = false; try { - for (Iterable doc : docs) { + final Iterator> iterator = docs.iterator(); + while (iterator.hasNext()) { + Iterable doc = iterator.next(); + if (parentField != null) { + if (iterator.hasNext() == false) { + doc = addParentField(doc, parentField); + } + } else if (segmentInfo.getIndexSort() != null + && iterator.hasNext() + && indexMajorVersionCreated >= Version.LUCENE_10_0_0.major) { + // sort is configured but parent field is missing, yet we have a doc-block + // yet we must not fail if this index was created in an earlier version where this + // behavior was permitted. + throw new IllegalArgumentException( + "a parent field must be set in order to use document blocks with index sorting; see IndexWriterConfig#setParentField"); + } + // Even on exception, the document is still added (but marked // deleted), so we don't need to un-reserve at that point. // Aborting exceptions will actually "lose" more than one @@ -245,10 +274,11 @@ final class DocumentsWriterPerThread implements Accountable { onNewDocOnRAM.run(); } } - allDocsIndexed = true; - if (numDocsInRAM - docsInRamBefore > 1) { + final int numDocs = numDocsInRAM - docsInRamBefore; + if (numDocs > 1) { segmentInfo.setHasBlocks(); } + allDocsIndexed = true; return finishDocuments(deleteNode, docsInRamBefore); } finally { if (!allDocsIndexed && !aborted) { @@ -262,6 +292,34 @@ final class DocumentsWriterPerThread implements Accountable { } } + private Iterable addParentField( + Iterable doc, IndexableField parentField) { + return () -> { + final Iterator first = doc.iterator(); + return new Iterator<>() { + IndexableField additionalField = parentField; + + @Override + public boolean hasNext() { + return additionalField != null || first.hasNext(); + } + + @Override + public IndexableField next() { + if (additionalField != null) { + IndexableField field = additionalField; + additionalField = null; + return field; + } + if (first.hasNext()) { + return first.next(); + } + throw new NoSuchElementException(); + } + }; + }; + } + private long finishDocuments(DocumentsWriterDeleteQueue.Node deleteNode, int docIdUpTo) { /* * here we actually finish the document in two steps 1. push the delete into diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java index 59184a6bff0..03f927a552d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java @@ -63,6 +63,8 @@ public final class FieldInfo { // whether this field is used as the soft-deletes field private final boolean softDeletesField; + private final boolean isParentField; + /** * Sole constructor. * @@ -84,7 +86,8 @@ public final class FieldInfo { int vectorDimension, VectorEncoding vectorEncoding, VectorSimilarityFunction vectorSimilarityFunction, - boolean softDeletesField) { + boolean softDeletesField, + boolean isParentField) { this.name = Objects.requireNonNull(name); this.number = number; this.docValuesType = @@ -111,6 +114,7 @@ public final class FieldInfo { this.vectorEncoding = vectorEncoding; this.vectorSimilarityFunction = vectorSimilarityFunction; this.softDeletesField = softDeletesField; + this.isParentField = isParentField; this.checkConsistency(); } @@ -206,6 +210,13 @@ public final class FieldInfo { throw new IllegalArgumentException( "vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')"); } + + if (softDeletesField && isParentField) { + throw new IllegalArgumentException( + "field can't be used as soft-deletes field and parent document field (field: '" + + name + + "')"); + } } /** @@ -633,4 +644,12 @@ public final class FieldInfo { public boolean isSoftDeletesField() { return softDeletesField; } + + /** + * Returns true if this field is configured and used as the parent document field field. See + * {@link IndexWriterConfig#setParentField(String)} + */ + public boolean isParentField() { + return isParentField; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index fd07d171010..5d483f7ece1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -59,6 +59,8 @@ public class FieldInfos implements Iterable { private final boolean hasVectorValues; private final String softDeletesField; + private final String parentField; + // used only by fieldInfo(int) private final FieldInfo[] byNumber; @@ -78,6 +80,7 @@ public class FieldInfos implements Iterable { boolean hasPointValues = false; boolean hasVectorValues = false; String softDeletesField = null; + String parentField = null; int size = 0; // number of elements in byNumberTemp, number of used array slots FieldInfo[] byNumberTemp = new FieldInfo[10]; // initial array capacity of 10 @@ -132,6 +135,13 @@ public class FieldInfos implements Iterable { } softDeletesField = info.name; } + if (info.isParentField()) { + if (parentField != null && parentField.equals(info.name) == false) { + throw new IllegalArgumentException( + "multiple parent fields [" + info.name + ", " + parentField + "]"); + } + parentField = info.name; + } } this.hasVectors = hasVectors; @@ -145,6 +155,7 @@ public class FieldInfos implements Iterable { this.hasPointValues = hasPointValues; this.hasVectorValues = hasVectorValues; this.softDeletesField = softDeletesField; + this.parentField = parentField; List valuesTemp = new ArrayList<>(); byNumber = new FieldInfo[size]; @@ -178,7 +189,8 @@ public class FieldInfos implements Iterable { .filter(Objects::nonNull) .findAny() .orElse(null); - final Builder builder = new Builder(new FieldNumbers(softDeletesField)); + final String parentField = getAndValidateParentField(leaves); + final Builder builder = new Builder(new FieldNumbers(softDeletesField, parentField)); for (final LeafReaderContext ctx : leaves) { for (FieldInfo fieldInfo : ctx.reader().getFieldInfos()) { builder.add(fieldInfo); @@ -188,6 +200,26 @@ public class FieldInfos implements Iterable { } } + private static String getAndValidateParentField(List leaves) { + boolean set = false; + String theField = null; + for (LeafReaderContext ctx : leaves) { + String field = ctx.reader().getFieldInfos().getParentField(); + if (set && Objects.equals(field, theField) == false) { + throw new IllegalStateException( + "expected parent doc field to be \"" + + theField + + " \" across all segments but found a segment with different field \"" + + field + + "\""); + } else { + theField = field; + set = true; + } + } + return theField; + } + /** Returns a set of names of fields that have a terms index. The order is undefined. */ public static Collection getIndexedFields(IndexReader reader) { return reader.leaves().stream() @@ -254,6 +286,11 @@ public class FieldInfos implements Iterable { return softDeletesField; } + /** Returns the parent document field name if exists; otherwise returns null */ + public String getParentField() { + return parentField; + } + /** Returns the number of fields */ public int size() { return byName.size(); @@ -345,7 +382,10 @@ public class FieldInfos implements Iterable { // The soft-deletes field from IWC to enforce a single soft-deletes field private final String softDeletesFieldName; - FieldNumbers(String softDeletesFieldName) { + // The parent document field from IWC to mark parent document when indexing + private final String parentFieldName; + + FieldNumbers(String softDeletesFieldName, String parentFieldName) { this.nameToNumber = new HashMap<>(); this.numberToName = new HashMap<>(); this.indexOptions = new HashMap<>(); @@ -355,11 +395,21 @@ public class FieldInfos implements Iterable { this.omitNorms = new HashMap<>(); this.storeTermVectors = new HashMap<>(); this.softDeletesFieldName = softDeletesFieldName; + this.parentFieldName = parentFieldName; + if (softDeletesFieldName != null + && parentFieldName != null + && parentFieldName.equals(softDeletesFieldName)) { + throw new IllegalArgumentException( + "parent document and soft-deletes field can't be the same field \"" + + parentFieldName + + "\""); + } } synchronized void verifyFieldInfo(FieldInfo fi) { String fieldName = fi.getName(); verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField()); + verifyParentFieldName(fieldName, fi.isParentField()); if (nameToNumber.containsKey(fieldName)) { verifySameSchema(fi); } @@ -373,6 +423,7 @@ public class FieldInfos implements Iterable { synchronized int addOrGet(FieldInfo fi) { String fieldName = fi.getName(); verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField()); + verifyParentFieldName(fieldName, fi.isParentField()); Integer fieldNumber = nameToNumber.get(fieldName); if (fieldNumber != null) { @@ -437,6 +488,33 @@ public class FieldInfos implements Iterable { } } + private void verifyParentFieldName(String fieldName, boolean isParentField) { + if (isParentField) { + if (parentFieldName == null) { + throw new IllegalArgumentException( + "can't add field [" + + fieldName + + "] as parent document field; this IndexWriter has no parent document field configured"); + } else if (fieldName.equals(parentFieldName) == false) { + throw new IllegalArgumentException( + "can't add field [" + + fieldName + + "] as parent document field; this IndexWriter is configured with [" + + parentFieldName + + "] as parent document field"); + } + } else if (fieldName.equals(parentFieldName)) { // isParent == false + // this would be the case if the current index has a parent field that is + // not a parent field in the incoming index (think addIndices) + throw new IllegalArgumentException( + "can't add [" + + fieldName + + "] as non parent document field; this IndexWriter is configured with [" + + parentFieldName + + "] as parent document field"); + } + } + private void verifySameSchema(FieldInfo fi) { String fieldName = fi.getName(); IndexOptions currentOpts = this.indexOptions.get(fieldName); @@ -513,7 +591,8 @@ public class FieldInfos implements Iterable { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - (softDeletesFieldName != null && softDeletesFieldName.equals(fieldName))); + (softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)), + (parentFieldName != null && parentFieldName.equals(fieldName))); addOrGet(fi); } } else { @@ -579,6 +658,7 @@ public class FieldInfos implements Iterable { if (dvType != dvType0) return null; boolean isSoftDeletesField = fieldName.equals(softDeletesFieldName); + boolean isParentField = fieldName.equals(parentFieldName); return new FieldInfo( fieldName, newFieldNumber, @@ -595,7 +675,8 @@ public class FieldInfos implements Iterable { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - isSoftDeletesField); + isSoftDeletesField, + isParentField); } synchronized Set getFieldNames() { @@ -627,6 +708,14 @@ public class FieldInfos implements Iterable { return globalFieldNumbers.softDeletesFieldName; } + /** + * Returns the name of the parent document field or null if no parent field is + * configured + */ + public String getParentFieldName() { + return globalFieldNumbers.parentFieldName; + } + /** * Adds the provided FieldInfo to this Builder if this field doesn't exist in this Builder. Also * adds a new field with its schema options to the global FieldNumbers if the field doesn't @@ -710,7 +799,8 @@ public class FieldInfos implements Iterable { fi.getVectorDimension(), fi.getVectorEncoding(), fi.getVectorSimilarityFunction(), - fi.isSoftDeletesField()); + fi.isSoftDeletesField(), + fi.isParentField()); byName.put(fiNew.getName(), fiNew); return fiNew; } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 417fa442be4..0c51450d2ff 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1261,7 +1261,8 @@ public class IndexWriter * If this {@link SegmentInfos} has no global field number map the returned instance is empty */ private FieldNumbers getFieldNumberMap() throws IOException { - final FieldNumbers map = new FieldNumbers(config.softDeletesField); + final FieldNumbers map = + new FieldNumbers(config.getSoftDeletesField(), config.getParentField()); for (SegmentCommitInfo info : segmentInfos) { FieldInfos fis = readFieldInfos(info); @@ -6614,10 +6615,12 @@ public class IndexWriter } @Override - public FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName) { + public FieldInfosBuilder newFieldInfosBuilder( + String softDeletesFieldName, String parentFieldName) { return new FieldInfosBuilder() { private final FieldInfos.Builder builder = - new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName)); + new FieldInfos.Builder( + new FieldInfos.FieldNumbers(softDeletesFieldName, parentFieldName)); @Override public FieldInfosBuilder add(FieldInfo fi) { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index c5e9a8bd224..e20a6371ef2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -545,4 +545,20 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig { this.eventListener = eventListener; return this; } + + /** + * Sets the parent document field. If this optional property is set, IndexWriter will add an + * internal field to every root document added to the index writer. A document is considered a + * parent document if it's the last document in a document block indexed via {@link + * IndexWriter#addDocuments(Iterable)} or {@link IndexWriter#updateDocuments(Term, Iterable)} and + * its relatives. Additionally, all individual documents added via the single document methods + * ({@link IndexWriter#addDocuments(Iterable)} etc.) are also considered parent documents. This + * property is optional for all indices that don't use document blocks in combination with index + * sorting. In order to maintain the API guarantee that the document order of a block is not + * altered by the {@link IndexWriter} a marker for parent documents is required. + */ + public IndexWriterConfig setParentField(String parentField) { + this.parentField = parentField; + return this; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java index cb2b1d1c1a8..560807d6973 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; import java.io.Closeable; import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -27,6 +28,7 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import java.util.function.Function; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.DocValuesConsumer; @@ -38,6 +40,7 @@ import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PointsFormat; import org.apache.lucene.codecs.PointsWriter; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.InvertableType; import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.StoredValue; @@ -49,6 +52,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSet; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException; @@ -57,6 +61,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.Version; /** Default general purpose indexing chain, which handles indexing all types of fields. */ final class IndexingChain implements Accountable { @@ -219,7 +224,31 @@ final class IndexingChain implements Accountable { } LeafReader docValuesReader = getDocValuesLeafReader(); + Function comparatorWrapper = + Function.identity(); + if (state.segmentInfo.getHasBlocks() && state.fieldInfos.getParentField() != null) { + final DocIdSetIterator readerValues = + docValuesReader.getNumericDocValues(state.fieldInfos.getParentField()); + if (readerValues == null) { + throw new CorruptIndexException( + "missing doc values for parent field \"" + state.fieldInfos.getParentField() + "\"", + "IndexingChain"); + } + BitSet parents = BitSet.of(readerValues, state.segmentInfo.maxDoc()); + comparatorWrapper = + in -> + (docID1, docID2) -> + in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2)); + } + if (state.segmentInfo.getHasBlocks() + && state.fieldInfos.getParentField() == null + && indexCreatedVersionMajor >= Version.LUCENE_10_0_0.major) { + throw new CorruptIndexException( + "parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: " + + indexCreatedVersionMajor, + "IndexingChain"); + } List comparators = new ArrayList<>(); for (int i = 0; i < indexSort.getSort().length; i++) { SortField sortField = indexSort.getSort()[i]; @@ -227,7 +256,10 @@ final class IndexingChain implements Accountable { if (sorter == null) { throw new UnsupportedOperationException("Cannot sort index using sort field " + sortField); } - comparators.add(sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc())); + + IndexSorter.DocComparator docComparator = + sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc()); + comparators.add(comparatorWrapper.apply(docComparator)); } Sorter sorter = new Sorter(indexSort); // returns null if the documents are already sorted @@ -546,7 +578,17 @@ final class IndexingChain implements Accountable { // build schema for each unique doc field for (IndexableField field : document) { IndexableFieldType fieldType = field.fieldType(); - PerField pf = getOrAddPerField(field.name()); + final boolean isReserved = field.getClass() == ReservedField.class; + PerField pf = + getOrAddPerField( + field.name(), false + /* we never add reserved fields during indexing should be done during DWPT setup*/ ); + if (pf.reserved != isReserved) { + throw new IllegalArgumentException( + "\"" + + field.name() + + "\" is a reserved field and should not be added to any document"); + } if (pf.fieldGen != fieldGen) { // first time we see this field in this document fields[fieldCount++] = pf; pf.fieldGen = fieldGen; @@ -556,7 +598,7 @@ final class IndexingChain implements Accountable { docFields[docFieldIdx++] = pf; updateDocFieldSchema(field.name(), pf.schema, fieldType); } - // For each field, if it the first time we see this field in this segment, + // For each field, if it's the first time we see this field in this segment, // initialize its FieldInfo. // If we have already seen this field, verify that its schema // within the current doc matches its schema in the index. @@ -646,7 +688,8 @@ final class IndexingChain implements Accountable { s.vectorDimension, s.vectorEncoding, s.vectorSimilarityFunction, - pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName()))); + pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName()), + pf.fieldName.equals(fieldInfos.getParentFieldName()))); pf.setFieldInfo(fi); if (fi.getIndexOptions() != IndexOptions.NONE) { pf.setInvertState(); @@ -741,7 +784,7 @@ final class IndexingChain implements Accountable { * Returns a previously created {@link PerField}, absorbing the type information from {@link * FieldType}, and creates a new {@link PerField} if this field name wasn't seen yet. */ - private PerField getOrAddPerField(String fieldName) { + private PerField getOrAddPerField(String fieldName, boolean reserved) { final int hashPos = fieldName.hashCode() & hashMask; PerField pf = fieldHash[hashPos]; while (pf != null && pf.fieldName.equals(fieldName) == false) { @@ -757,7 +800,8 @@ final class IndexingChain implements Accountable { schema, indexWriterConfig.getSimilarity(), indexWriterConfig.getInfoStream(), - indexWriterConfig.getAnalyzer()); + indexWriterConfig.getAnalyzer(), + reserved); pf.next = fieldHash[hashPos]; fieldHash[hashPos] = pf; totalFieldCount++; @@ -1022,6 +1066,7 @@ final class IndexingChain implements Accountable { final String fieldName; final int indexCreatedVersionMajor; final FieldSchema schema; + final boolean reserved; FieldInfo fieldInfo; final Similarity similarity; @@ -1059,13 +1104,15 @@ final class IndexingChain implements Accountable { FieldSchema schema, Similarity similarity, InfoStream infoStream, - Analyzer analyzer) { + Analyzer analyzer, + boolean reserved) { this.fieldName = fieldName; this.indexCreatedVersionMajor = indexCreatedVersionMajor; this.schema = schema; this.similarity = similarity; this.infoStream = infoStream; this.analyzer = analyzer; + this.reserved = reserved; } void reset(int docId) { @@ -1512,4 +1559,77 @@ final class IndexingChain implements Accountable { assertSame("point num bytes", fi.getPointNumBytes(), pointNumBytes); } } + + /** + * Wraps the given field in a reserved field and registers it as reserved. Only DWPT should do + * this to mark fields as private / reserved to prevent this fieldname to be used from the outside + * of the IW / DWPT eco-system + */ + ReservedField markAsReserved(T field) { + getOrAddPerField(field.name(), true); + return new ReservedField(field); + } + + static final class ReservedField implements IndexableField { + + private final T delegate; + + private ReservedField(T delegate) { + this.delegate = delegate; + } + + T getDelegate() { + return delegate; + } + + @Override + public String name() { + return delegate.name(); + } + + @Override + public IndexableFieldType fieldType() { + return delegate.fieldType(); + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { + return delegate.tokenStream(analyzer, reuse); + } + + @Override + public BytesRef binaryValue() { + return delegate.binaryValue(); + } + + @Override + public String stringValue() { + return delegate.stringValue(); + } + + @Override + public CharSequence getCharSequenceValue() { + return delegate.getCharSequenceValue(); + } + + @Override + public Reader readerValue() { + return delegate.readerValue(); + } + + @Override + public Number numericValue() { + return delegate.numericValue(); + } + + @Override + public StoredValue storedValue() { + return delegate.storedValue(); + } + + @Override + public InvertableType invertableType() { + return delegate.invertableType(); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 257c429818c..d9ac4f37d84 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -98,6 +98,9 @@ public class LiveIndexWriterConfig { /** The field names involved in the index sort */ protected Set indexSortFields = Collections.emptySet(); + /** parent document field */ + protected String parentField = null; + /** * if an indexing thread should check for pending flushes on update in order to help out on a full * flush @@ -458,6 +461,11 @@ public class LiveIndexWriterConfig { return eventListener; } + /** Returns the parent document field name if configured. */ + public String getParentField() { + return parentField; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -486,6 +494,7 @@ public class LiveIndexWriterConfig { sb.append("maxFullFlushMergeWaitMillis=").append(getMaxFullFlushMergeWaitMillis()).append("\n"); sb.append("leafSorter=").append(getLeafSorter()).append("\n"); sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n"); + sb.append("parentField=").append(getParentField()).append("\n"); return sb.toString(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java index 2b1fd2ee6b1..9a8e48e5d3e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java @@ -22,8 +22,10 @@ import java.util.List; import org.apache.lucene.index.MergeState.DocMap; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BitSet; import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.Version; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -50,6 +52,31 @@ final class MultiSorter { "Cannot use sort field " + fields[i] + " for index sorting"); } comparables[i] = sorter.getComparableProviders(readers); + for (int j = 0; j < readers.size(); j++) { + CodecReader codecReader = readers.get(j); + FieldInfos fieldInfos = codecReader.getFieldInfos(); + LeafMetaData metaData = codecReader.getMetaData(); + if (metaData.hasBlocks() && fieldInfos.getParentField() != null) { + NumericDocValues parentDocs = + codecReader.getNumericDocValues(fieldInfos.getParentField()); + assert parentDocs != null + : "parent field: " + + fieldInfos.getParentField() + + " must be present if index sorting is used with blocks"; + BitSet parents = BitSet.of(parentDocs, codecReader.maxDoc()); + IndexSorter.ComparableProvider[] providers = comparables[i]; + IndexSorter.ComparableProvider provider = providers[j]; + providers[j] = docId -> provider.getAsComparableLong(parents.nextSetBit(docId)); + } + if (metaData.hasBlocks() + && fieldInfos.getParentField() == null + && metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) { + throw new CorruptIndexException( + "parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: " + + metaData.getCreatedVersionMajor(), + "IndexingChain"); + } + } reverseMuls[i] = fields[i].getReverse() ? -1 : 1; } int leafCount = readers.size(); diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java index 80273cdebae..374dbb31194 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java @@ -111,9 +111,15 @@ public class ParallelLeafReader extends LeafReader { .filter(Objects::nonNull) .findAny() .orElse(null); + final String parentField = + completeReaderSet.stream() + .map(r -> r.getFieldInfos().getParentField()) + .filter(Objects::nonNull) + .findAny() + .orElse(null); // TODO: make this read-only in a cleaner way? FieldInfos.Builder builder = - new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField)); + new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField, parentField)); Sort indexSort = null; int createdVersionMajor = -1; diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java index 04306c024cb..e5eb05b35bb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java @@ -720,7 +720,8 @@ final class ReadersAndUpdates { fi.getVectorDimension(), fi.getVectorEncoding(), fi.getVectorSimilarityFunction(), - fi.isSoftDeletesField()); + fi.isSoftDeletesField(), + fi.isParentField()); } private SegmentReader createNewReaderWithLatestLiveDocs(SegmentReader reader) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java index 7945505be92..741dfc6944a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java @@ -17,9 +17,12 @@ package org.apache.lucene.index; import java.io.IOException; +import java.util.function.Function; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BitSet; import org.apache.lucene.util.TimSorter; +import org.apache.lucene.util.Version; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; @@ -206,13 +209,33 @@ public final class Sorter { SortField[] fields = sort.getSort(); final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length]; + Function comparatorWrapper = in -> in; + LeafMetaData metaData = reader.getMetaData(); + FieldInfos fieldInfos = reader.getFieldInfos(); + if (metaData.hasBlocks() && fieldInfos.getParentField() != null) { + BitSet parents = + BitSet.of(reader.getNumericDocValues(fieldInfos.getParentField()), reader.maxDoc()); + comparatorWrapper = + in -> + (docID1, docID2) -> + in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2)); + } + if (metaData.hasBlocks() + && fieldInfos.getParentField() == null + && metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) { + throw new CorruptIndexException( + "parent field is not set but the index has blocks. indexCreatedVersionMajor: " + + metaData.getCreatedVersionMajor(), + "Sorter"); + } + for (int i = 0; i < fields.length; i++) { IndexSorter sorter = fields[i].getIndexSorter(); if (sorter == null) { throw new IllegalArgumentException( "Cannot use sortfield + " + fields[i] + " to sort indexes"); } - comparators[i] = sorter.getDocComparator(reader, reader.maxDoc()); + comparators[i] = comparatorWrapper.apply(sorter.getDocComparator(reader, reader.maxDoc())); } return sort(reader.maxDoc(), comparators); } diff --git a/lucene/core/src/java/org/apache/lucene/internal/tests/IndexPackageAccess.java b/lucene/core/src/java/org/apache/lucene/internal/tests/IndexPackageAccess.java index 545b5659933..5ca6d9d1c58 100644 --- a/lucene/core/src/java/org/apache/lucene/internal/tests/IndexPackageAccess.java +++ b/lucene/core/src/java/org/apache/lucene/internal/tests/IndexPackageAccess.java @@ -31,7 +31,7 @@ public interface IndexPackageAccess { void setIndexWriterMaxDocs(int limit); - FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName); + FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName, String parentFieldName); void checkImpacts(Impacts impacts, int max); diff --git a/lucene/core/src/java/org/apache/lucene/search/Sort.java b/lucene/core/src/java/org/apache/lucene/search/Sort.java index 868fbf16360..394dcb1a23d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Sort.java +++ b/lucene/core/src/java/org/apache/lucene/search/Sort.java @@ -85,7 +85,6 @@ public final class Sort { */ public Sort rewrite(IndexSearcher searcher) throws IOException { boolean changed = false; - SortField[] rewrittenSortFields = new SortField[fields.length]; for (int i = 0; i < fields.length; i++) { rewrittenSortFields[i] = fields[i].rewrite(searcher); @@ -100,7 +99,6 @@ public final class Sort { @Override public String toString() { StringBuilder buffer = new StringBuilder(); - for (int i = 0; i < fields.length; i++) { buffer.append(fields[i].toString()); if ((i + 1) < fields.length) buffer.append(','); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java index cba815fbde3..152dcdddc55 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -1937,4 +1937,97 @@ public class TestAddIndexes extends LuceneTestCase { targetDir.close(); sourceDir.close(); } + + public void testIllegalParentDocChange() throws Exception { + Directory dir1 = newDirectory(); + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc1.setParentField("foobar"); + RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1); + Document parent = new Document(); + w1.addDocuments(Arrays.asList(new Document(), new Document(), parent)); + w1.commit(); + w1.addDocuments(Arrays.asList(new Document(), new Document(), parent)); + w1.commit(); + // so the index sort is in fact burned into the index: + w1.forceMerge(1); + w1.close(); + + Directory dir2 = newDirectory(); + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc2.setParentField("foo"); + RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2); + + IndexReader r1 = DirectoryReader.open(dir1); + String message = + expectThrows( + IllegalArgumentException.class, + () -> { + w2.addIndexes((SegmentReader) getOnlyLeafReader(r1)); + }) + .getMessage(); + assertEquals( + "can't add field [foobar] as parent document field; this IndexWriter is configured with [foo] as parent document field", + message); + + message = + expectThrows( + IllegalArgumentException.class, + () -> { + w2.addIndexes(dir1); + }) + .getMessage(); + assertEquals( + "can't add field [foobar] as parent document field; this IndexWriter is configured with [foo] as parent document field", + message); + + Directory dir3 = newDirectory(); + IndexWriterConfig iwc3 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc3.setParentField("foobar"); + RandomIndexWriter w3 = new RandomIndexWriter(random(), dir3, iwc3); + + w3.addIndexes((SegmentReader) getOnlyLeafReader(r1)); + w3.addIndexes(dir1); + + IOUtils.close(r1, dir1, w2, dir2, w3, dir3); + } + + public void testIllegalNonParentField() throws IOException { + Directory dir1 = newDirectory(); + IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random())); + RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1); + Document parent = new Document(); + parent.add(new StringField("foo", "XXX", Field.Store.NO)); + w1.addDocument(parent); + w1.close(); + + Directory dir2 = newDirectory(); + IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random())); + iwc2.setParentField("foo"); + RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2); + + IndexReader r1 = DirectoryReader.open(dir1); + String message = + expectThrows( + IllegalArgumentException.class, + () -> { + w2.addIndexes((SegmentReader) getOnlyLeafReader(r1)); + }) + .getMessage(); + assertEquals( + "can't add [foo] as non parent document field; this IndexWriter is configured with [foo] as parent document field", + message); + + message = + expectThrows( + IllegalArgumentException.class, + () -> { + w2.addIndexes(dir1); + }) + .getMessage(); + assertEquals( + "can't add [foo] as non parent document field; this IndexWriter is configured with [foo] as parent document field", + message); + + IOUtils.close(r1, dir1, w2, dir2); + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 40b00975805..eff9cbe763f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -114,6 +114,7 @@ public class TestCodecs extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false)); } this.terms = terms; @@ -229,7 +230,8 @@ public class TestCodecs extends LuceneTestCase { terms[i] = new TermData(text, docs, null); } - final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); + final FieldInfos.Builder builder = + new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null)); final FieldData field = new FieldData("field", builder, terms, true, false); final FieldData[] fields = new FieldData[] {field}; @@ -292,7 +294,8 @@ public class TestCodecs extends LuceneTestCase { } public void testRandomPostings() throws Throwable { - final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); + final FieldInfos.Builder builder = + new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null)); final FieldData[] fields = new FieldData[NUM_FIELDS]; for (int i = 0; i < NUM_FIELDS; i++) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index 9a8eda06c88..3b245dd4132 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -236,7 +236,7 @@ public class TestDoc extends LuceneTestCase { si, InfoStream.getDefault(), trackingDir, - new FieldInfos.FieldNumbers(null), + new FieldInfos.FieldNumbers(null, null), context); merger.merge(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java index a4a893c86aa..e647b6d2b33 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java @@ -239,7 +239,7 @@ public class TestFieldInfos extends LuceneTestCase { } public void testFieldNumbersAutoIncrement() { - FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes"); + FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes", "parentDoc"); for (int i = 0; i < 10; i++) { fieldNumbers.addOrGet( new FieldInfo( @@ -258,6 +258,7 @@ public class TestFieldInfos extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false)); } int idx = @@ -278,6 +279,7 @@ public class TestFieldInfos extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false)); assertEquals("Field numbers 0 through 9 were allocated", 10, idx); @@ -300,6 +302,7 @@ public class TestFieldInfos extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false)); assertEquals("Field numbers should reset after clear()", 0, idx); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java index 0c984fce0e4..b526f04f39d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -45,8 +45,7 @@ public class TestFieldsReader extends LuceneTestCase { @BeforeClass public static void beforeClass() throws Exception { testDoc = new Document(); - final String softDeletesFieldName = null; - fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName)); + fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null)); DocHelper.setupDoc(testDoc); for (IndexableField field : testDoc.getFields()) { IndexableFieldType ift = field.fieldType(); @@ -67,7 +66,8 @@ public class TestFieldsReader extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - field.name().equals(softDeletesFieldName))); + false, + false)); } dir = newDirectory(); IndexWriterConfig conf = diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java index 675d5b67768..ed776b371c2 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -2122,6 +2122,10 @@ public class TestIndexSorting extends LuceneTestCase { public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc1 = newIndexWriterConfig(); + boolean useParent = rarely(); + if (useParent) { + iwc1.setParentField("___parent"); + } Sort indexSort = new Sort( new SortField("foo", SortField.Type.LONG), new SortField("bar", SortField.Type.LONG)); @@ -2154,6 +2158,9 @@ public class TestIndexSorting extends LuceneTestCase { } else { iwc.setIndexSort(indexSort); } + if (useParent) { + iwc.setParentField("___parent"); + } IndexWriter w2 = new IndexWriter(dir2, iwc); if (useReaders) { @@ -3165,4 +3172,277 @@ public class TestIndexSorting extends LuceneTestCase { reader.close(); dir.close(); } + + public void testParentFieldNotConfigured() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + writer.addDocuments(Arrays.asList(new Document(), new Document())); + }); + assertEquals( + "a parent field must be set in order to use document blocks with index sorting; see IndexWriterConfig#setParentField", + ex.getMessage()); + } + } + } + + public void testBlockContainsParentField() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + String parentField = "parent"; + iwc.setParentField(parentField); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + List runnabels = + Arrays.asList( + () -> { + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + Document doc = new Document(); + doc.add(new NumericDocValuesField("parent", 0)); + writer.addDocuments(Arrays.asList(doc, new Document())); + }); + assertEquals( + "\"parent\" is a reserved field and should not be added to any document", + ex.getMessage()); + }, + () -> { + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + Document doc = new Document(); + doc.add(new NumericDocValuesField("parent", 0)); + writer.addDocuments(Arrays.asList(new Document(), doc)); + }); + assertEquals( + "\"parent\" is a reserved field and should not be added to any document", + ex.getMessage()); + }); + Collections.shuffle(runnabels, random()); + for (Runnable runnable : runnabels) { + runnable.run(); + } + } + } + } + + public void testIndexSortWithBlocks() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec(); + iwc.setCodec(codec); + String parentField = "parent"; + Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + iwc.setParentField(parentField); + LogMergePolicy policy = newLogMergePolicy(); + // make sure that merge factor is always > 2 + if (policy.getMergeFactor() <= 2) { + policy.setMergeFactor(3); + } + iwc.setMergePolicy(policy); + + // add already sorted documents + codec.numCalls = 0; + codec.needsIndexSort = false; + try (IndexWriter w = new IndexWriter(dir, iwc)) { + int numDocs = random().nextInt(50, 100); + for (int i = 0; i < numDocs; i++) { + Document child1 = new Document(); + child1.add(new StringField("id", Integer.toString(i), Store.YES)); + child1.add(new NumericDocValuesField("id", i)); + child1.add(new NumericDocValuesField("child", 1)); + child1.add(new NumericDocValuesField("foo", random().nextInt())); + Document child2 = new Document(); + child2.add(new StringField("id", Integer.toString(i), Store.YES)); + child2.add(new NumericDocValuesField("id", i)); + child2.add(new NumericDocValuesField("child", 2)); + child2.add(new NumericDocValuesField("foo", random().nextInt())); + Document parent = new Document(); + parent.add(new StringField("id", Integer.toString(i), Store.YES)); + parent.add(new NumericDocValuesField("id", i)); + parent.add(new NumericDocValuesField("foo", random().nextInt())); + w.addDocuments(Arrays.asList(child1, child2, parent)); + if (rarely()) { + w.commit(); + } + } + w.commit(); + if (random().nextBoolean()) { + w.forceMerge(1, true); + } + } + + try (DirectoryReader reader = DirectoryReader.open(dir)) { + for (LeafReaderContext ctx : reader.leaves()) { + LeafReader leaf = ctx.reader(); + NumericDocValues parentDISI = leaf.getNumericDocValues(parentField); + NumericDocValues ids = leaf.getNumericDocValues("id"); + NumericDocValues children = leaf.getNumericDocValues("child"); + int doc; + int expectedDocID = 2; + while ((doc = parentDISI.nextDoc()) != NO_MORE_DOCS) { + assertEquals(-1, parentDISI.longValue()); + assertEquals(expectedDocID, doc); + int id = ids.nextDoc(); + long child1ID = ids.longValue(); + assertEquals(id, children.nextDoc()); + long child1 = children.longValue(); + assertEquals(1, child1); + + id = ids.nextDoc(); + long child2ID = ids.longValue(); + assertEquals(id, children.nextDoc()); + long child2 = children.longValue(); + assertEquals(2, child2); + + int idParent = ids.nextDoc(); + assertEquals(id + 1, idParent); + long parent = ids.longValue(); + assertEquals(child1ID, parent); + assertEquals(child2ID, parent); + expectedDocID += 3; + } + } + } + } + } + + @SuppressWarnings("fallthrough") + public void testMixRandomDocumentsWithBlocks() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec(); + iwc.setCodec(codec); + String parentField = "parent"; + Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + iwc.setParentField(parentField); + RandomIndexWriter randomIndexWriter = new RandomIndexWriter(random(), dir, iwc); + int numDocs = random().nextInt(100, 1000); + for (int i = 0; i < numDocs; i++) { + if (rarely()) { + randomIndexWriter.deleteDocuments(new Term("id", "" + random().nextInt(0, i + 1))); + } + List docs = new ArrayList<>(); + switch (random().nextInt(100) % 5) { + case 4: + Document child3 = new Document(); + child3.add(new StringField("id", Integer.toString(i), Store.YES)); + child3.add(new NumericDocValuesField("type", 2)); + child3.add(new NumericDocValuesField("child_ord", 3)); + child3.add(new NumericDocValuesField("foo", random().nextInt())); + docs.add(child3); + case 3: + Document child2 = new Document(); + child2.add(new StringField("id", Integer.toString(i), Store.YES)); + child2.add(new NumericDocValuesField("type", 2)); + child2.add(new NumericDocValuesField("child_ord", 2)); + child2.add(new NumericDocValuesField("foo", random().nextInt())); + docs.add(child2); + case 2: + Document child1 = new Document(); + child1.add(new StringField("id", Integer.toString(i), Store.YES)); + child1.add(new NumericDocValuesField("type", 2)); + child1.add(new NumericDocValuesField("child_ord", 1)); + child1.add(new NumericDocValuesField("foo", random().nextInt())); + docs.add(child1); + case 1: + Document root = new Document(); + root.add(new StringField("id", Integer.toString(i), Store.YES)); + root.add(new NumericDocValuesField("type", 1)); + root.add(new NumericDocValuesField("num_children", docs.size())); + root.add(new NumericDocValuesField("foo", random().nextInt())); + docs.add(root); + randomIndexWriter.addDocuments(docs); + break; + case 0: + Document single = new Document(); + single.add(new StringField("id", Integer.toString(i), Store.YES)); + single.add(new NumericDocValuesField("type", 0)); + single.add(new NumericDocValuesField("foo", random().nextInt())); + randomIndexWriter.addDocument(single); + } + if (rarely()) { + randomIndexWriter.forceMerge(1); + } + randomIndexWriter.commit(); + } + + randomIndexWriter.close(); + try (DirectoryReader reader = DirectoryReader.open(dir)) { + for (LeafReaderContext ctx : reader.leaves()) { + LeafReader leaf = ctx.reader(); + NumericDocValues parentDISI = leaf.getNumericDocValues(parentField); + assertNotNull(parentDISI); + NumericDocValues type = leaf.getNumericDocValues("type"); + NumericDocValues childOrd = leaf.getNumericDocValues("child_ord"); + NumericDocValues numChildren = leaf.getNumericDocValues("num_children"); + int numCurrentChildren = 0; + int totalPendingChildren = 0; + String childId = null; + for (int i = 0; i < leaf.maxDoc(); i++) { + if (leaf.getLiveDocs() == null || leaf.getLiveDocs().get(i)) { + assertTrue(type.advanceExact(i)); + int typeValue = (int) type.longValue(); + switch (typeValue) { + case 2: + assertFalse(parentDISI.advanceExact(i)); + assertTrue(childOrd.advanceExact(i)); + if (numCurrentChildren == 0) { // first child + childId = leaf.storedFields().document(i).get("id"); + totalPendingChildren = (int) childOrd.longValue() - 1; + } else { + assertNotNull(childId); + assertEquals(totalPendingChildren--, childOrd.longValue()); + assertEquals(childId, leaf.storedFields().document(i).get("id")); + } + numCurrentChildren++; + break; + case 1: + assertTrue(parentDISI.advanceExact(i)); + assertEquals(-1, parentDISI.longValue()); + if (childOrd != null) { + assertFalse(childOrd.advanceExact(i)); + } + assertTrue(numChildren.advanceExact(i)); + assertEquals(0, totalPendingChildren); + assertEquals(numCurrentChildren, numChildren.longValue()); + if (numCurrentChildren > 0) { + assertEquals(childId, leaf.storedFields().document(i).get("id")); + } else { + assertNull(childId); + } + numCurrentChildren = 0; + childId = null; + break; + case 0: + assertEquals(-1, parentDISI.longValue()); + assertTrue(parentDISI.advanceExact(i)); + if (childOrd != null) { + assertFalse(childOrd.advanceExact(i)); + } + if (numChildren != null) { + assertFalse(numChildren.advanceExact(i)); + } + break; + default: + fail(); + } + } + } + } + } + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 1e2ac718321..7da560fdb40 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1722,6 +1722,44 @@ public class TestIndexWriter extends LuceneTestCase { } } + public void testSingleDocsDoNotTriggerHasBlocks() throws IOException { + try (Directory dir = newDirectory()) { + try (IndexWriter w = + new IndexWriter( + dir, + new IndexWriterConfig(new MockAnalyzer(random())) + .setMaxBufferedDocs(Integer.MAX_VALUE) + .setRAMBufferSizeMB(100))) { + + int docs = random().nextInt(1, 100); + for (int i = 0; i < docs; i++) { + Document doc = new Document(); + doc.add(new StringField("id", "" + i, Field.Store.NO)); + w.addDocuments(Arrays.asList(doc)); + } + w.commit(); + SegmentInfos si = w.cloneSegmentInfos(); + assertEquals(1, si.size()); + assertFalse(si.asList().get(0).info.getHasBlocks()); + + Document doc = new Document(); + doc.add(new StringField("id", "XXX", Field.Store.NO)); + w.addDocuments(Arrays.asList(doc, doc)); + w.commit(); + si = w.cloneSegmentInfos(); + assertEquals(2, si.size()); + assertFalse(si.asList().get(0).info.getHasBlocks()); + assertTrue(si.asList().get(1).info.getHasBlocks()); + w.forceMerge(1); + + w.commit(); + si = w.cloneSegmentInfos(); + assertEquals(1, si.size()); + assertTrue(si.asList().get(0).info.getHasBlocks()); + } + } + } + public void testCarryOverHasBlocks() throws Exception { try (Directory dir = newDirectory()) { try (IndexWriter w = @@ -4790,4 +4828,89 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(newField(field, "value", storedTextType)); writer.addDocument(doc); } + + public void testParentAndSoftDeletesAreTheSame() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig indexWriterConfig = newIndexWriterConfig(new MockAnalyzer(random())); + indexWriterConfig.setSoftDeletesField("foo"); + indexWriterConfig.setParentField("foo"); + IllegalArgumentException iae = + expectThrows( + IllegalArgumentException.class, () -> new IndexWriter(dir, indexWriterConfig)); + assertEquals( + "parent document and soft-deletes field can't be the same field \"foo\"", + iae.getMessage()); + } + } + + public void testIndexWithParentFieldIsCongruent() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + iwc.setParentField("parent"); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + if (random().nextBoolean()) { + Document child1 = new Document(); + child1.add(new StringField("id", Integer.toString(1), Field.Store.YES)); + Document child2 = new Document(); + child2.add(new StringField("id", Integer.toString(1), Field.Store.YES)); + Document parent = new Document(); + parent.add(new StringField("id", Integer.toString(1), Field.Store.YES)); + writer.addDocuments(Arrays.asList(child1, child2, parent)); + writer.flush(); + if (random().nextBoolean()) { + writer.addDocuments(Arrays.asList(child1, child2, parent)); + } + } else { + writer.addDocument(new Document()); + } + writer.commit(); + } + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random())); + config.setParentField("someOtherField"); + new IndexWriter(dir, config); + }); + assertEquals( + "can't add field [parent] as parent document field; this IndexWriter is configured with [someOtherField] as parent document field", + ex.getMessage()); + ex = + expectThrows( + IllegalArgumentException.class, + () -> { + IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random())); + new IndexWriter(dir, config); + }); + assertEquals( + "can't add field [parent] as parent document field; this IndexWriter has no parent document field configured", + ex.getMessage()); + } + } + + public void testParentFieldIsAlreadyUsed() throws IOException { + try (Directory dir = newDirectory()) { + + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + Document doc = new Document(); + doc.add(new StringField("parent", Integer.toString(1), Field.Store.YES)); + writer.addDocument(doc); + writer.commit(); + } + IllegalArgumentException iae = + expectThrows( + IllegalArgumentException.class, + () -> { + IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random())); + config.setParentField("parent"); + + new IndexWriter(dir, config); + }); + assertEquals( + "can't add [parent] as non parent document field; this IndexWriter is configured with [parent] as parent document field", + iae.getMessage()); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java index 02600f5d59d..3c6d57c5d2f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java @@ -199,7 +199,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - true); + true, + false); List docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS); List updates = Arrays.asList(singleUpdate(docsDeleted, 10, true)); for (DocValuesFieldUpdates update : updates) { @@ -237,7 +238,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - true); + true, + false); for (DocValuesFieldUpdates update : updates) { deletes.onDocValuesUpdate(fieldInfo, update.iterator()); } @@ -301,7 +303,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - true); + true, + false); List docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS); List updates = Arrays.asList(singleUpdate(docsDeleted, 3, true)); for (DocValuesFieldUpdates update : updates) { @@ -370,7 +373,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - true); + true, + false); List updates = Arrays.asList(singleUpdate(Arrays.asList(0, 1, DocIdSetIterator.NO_MORE_DOCS), 3, false)); for (DocValuesFieldUpdates update : updates) { @@ -407,7 +411,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, - true); + true, + false); updates = Arrays.asList(singleUpdate(Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS), 3, true)); for (DocValuesFieldUpdates update : updates) { deletes.onDocValuesUpdate(fieldInfo, update.iterator()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index da17f332e77..615accbb4df 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -104,7 +104,7 @@ public class TestSegmentMerger extends LuceneTestCase { si, InfoStream.getDefault(), mergedDir, - new FieldInfos.FieldNumbers(null), + new FieldInfos.FieldNumbers(null, null), newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)))); MergeState mergeState = merger.merge(); int docsMerged = mergeState.segmentInfo.maxDoc(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index 765c355e534..401bcb3e5c6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -1297,7 +1297,8 @@ public class TestSortOptimization extends LuceneTestCase { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.DOT_PRODUCT, - fi.isSoftDeletesField()); + fi.isSoftDeletesField(), + fi.isParentField()); newInfos[i] = noIndexFI; i++; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 3b85d99c850..47c0d50a55a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -183,9 +183,13 @@ public class TestTopFieldCollector extends LuceneTestCase { dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setIndexSort(sort)); Document doc = new Document(); doc.add(new NumericDocValuesField("foo", 3)); - w.addDocuments(Arrays.asList(doc, doc, doc, doc)); + for (Document d : Arrays.asList(doc, doc, doc, doc)) { + w.addDocument(d); + } w.flush(); - w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc)); + for (Document d : Arrays.asList(doc, doc, doc, doc, doc, doc)) { + w.addDocument(d); + } w.flush(); IndexReader reader = DirectoryReader.open(w); assertEquals(2, reader.leaves().size()); diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java index aeb68267732..17ba68777fb 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java @@ -103,6 +103,7 @@ public class TermVectorLeafReader extends LeafReader { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false); fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo}); } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 09a521c880d..2bd50c5355c 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -736,6 +736,7 @@ public class MemoryIndex { fieldType.vectorDimension(), fieldType.vectorEncoding(), fieldType.vectorSimilarityFunction(), + false, false); } @@ -789,7 +790,8 @@ public class MemoryIndex { info.fieldInfo.getVectorDimension(), info.fieldInfo.getVectorEncoding(), info.fieldInfo.getVectorSimilarityFunction(), - info.fieldInfo.isSoftDeletesField()); + info.fieldInfo.isSoftDeletesField(), + info.fieldInfo.isParentField()); } else if (existingDocValuesType != docValuesType) { throw new IllegalArgumentException( "Can't add [" diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/compressing/dummy/DummyCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/compressing/dummy/DummyCompressingCodec.java index 9d7cd7e7742..ef9f58e1bb3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/compressing/dummy/DummyCompressingCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/compressing/dummy/DummyCompressingCodec.java @@ -78,7 +78,7 @@ public class DummyCompressingCodec extends CompressingCodec { @Override public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException { - out.copyBytes(buffersInput, buffersInput.size()); + out.copyBytes(buffersInput, buffersInput.length()); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseFieldInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseFieldInfoFormatTestCase.java index 83f8b293113..5f09348fdfe 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseFieldInfoFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseFieldInfoFormatTestCase.java @@ -68,7 +68,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes FieldInfo fi = createFieldInfo(); addAttributes(fi); - FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); + FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish(); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); @@ -96,7 +96,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes fi.putAttribute("foo", "bar"); fi.putAttribute("bar", "baz"); - FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); + FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish(); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); @@ -136,7 +136,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes FieldInfo fi = createFieldInfo(); addAttributes(fi); - FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); + FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish(); fail.setDoFail(); expectThrows( @@ -171,7 +171,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes FieldInfo fi = createFieldInfo(); addAttributes(fi); - FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); + FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish(); fail.setDoFail(); expectThrows( @@ -206,7 +206,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes FieldInfo fi = createFieldInfo(); addAttributes(fi); - FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); + FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish(); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); @@ -243,7 +243,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes FieldInfo fi = createFieldInfo(); addAttributes(fi); - FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish(); + FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish(); codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT); @@ -276,7 +276,9 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes String softDeletesField = random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null; - var builder = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(softDeletesField); + String parentField = random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null; + + var builder = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(softDeletesField, parentField); for (String field : fieldNames) { IndexableFieldType fieldType = randomFieldType(random(), field); @@ -307,7 +309,8 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes fieldType.vectorDimension(), fieldType.vectorEncoding(), fieldType.vectorSimilarityFunction(), - field.equals(softDeletesField)); + field.equals(softDeletesField), + field.equals(parentField)); addAttributes(fi); builder.add(fi); } @@ -431,6 +434,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseIndexFileFormatTestCase.java index 3b73af0779f..17aa522bdf6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseIndexFileFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseIndexFileFormatTestCase.java @@ -365,7 +365,8 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { proto.getVectorDimension(), proto.getVectorEncoding(), proto.getVectorSimilarityFunction(), - proto.isSoftDeletesField()); + proto.isSoftDeletesField(), + proto.isParentField()); FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field}); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseSegmentInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseSegmentInfoFormatTestCase.java index 0ebb3fcffdc..1d65e3245dc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseSegmentInfoFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseSegmentInfoFormatTestCase.java @@ -80,6 +80,33 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT dir.close(); } + public void testHasBlocks() throws IOException { + assumeTrue("test requires a codec that can read/write hasBlocks", supportsHasBlocks()); + + Directory dir = newDirectory(); + Codec codec = getCodec(); + byte[] id = StringHelper.randomId(); + SegmentInfo info = + new SegmentInfo( + dir, + getVersions()[0], + getVersions()[0], + "_123", + 1, + false, + random().nextBoolean(), + codec, + Collections.emptyMap(), + id, + Collections.emptyMap(), + null); + info.setFiles(Collections.emptySet()); + codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); + SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); + assertEquals(info.getHasBlocks(), info2.getHasBlocks()); + dir.close(); + } + /** Tests SI writer adds itself to files... */ public void testAddsSelfToFiles() throws Exception { Directory dir = newDirectory(); @@ -260,6 +287,10 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT return true; } + protected boolean supportsHasBlocks() { + return true; + } + private SortField randomIndexSortField() { boolean reversed = random().nextBoolean(); SortField sortField; @@ -360,7 +391,11 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT for (int j = 0; j < numSortFields; ++j) { sortFields[j] = randomIndexSortField(); } - sort = new Sort(sortFields); + if (supportsHasBlocks()) { + sort = new Sort(sortFields); + } else { + sort = new Sort(sortFields); + } } Directory dir = newDirectory(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java index de9a0215d1b..e18c25ee0cf 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedLeafReader.java @@ -112,7 +112,8 @@ public class MismatchedLeafReader extends FilterLeafReader { oldInfo.getVectorEncoding(), // numeric type of vector samples // distance function for calculating similarity of the field's vector oldInfo.getVectorSimilarityFunction(), - oldInfo.isSoftDeletesField()); // used as soft-deletes field + oldInfo.isSoftDeletesField(), // used as soft-deletes field + oldInfo.isParentField()); shuffled.set(i, newInfo); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java index 2988957ba29..f6443c5c498 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java @@ -158,6 +158,7 @@ public class RandomIndexWriter implements Closeable { } else { softDeletesRatio = 0d; } + w = mockIndexWriter(dir, c, r); config = w.getConfig(); flushAt = TestUtil.nextInt(r, 10, 1000); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java index 3aefda16cb6..806888e13cd 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomPostingsTester.java @@ -165,6 +165,7 @@ public class RandomPostingsTester { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false); fieldUpto++; @@ -738,6 +739,7 @@ public class RandomPostingsTester { 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, + false, false); }