mirror of
https://github.com/apache/lucene.git
synced 2025-03-07 00:39:21 +00:00
Add support for index sorting with document blocks (#12829)
Today index sorting will most likely break document blocks added with `IndexWriter#addDocuments(...)` and `#updateDocuments(...)` since the index sorter has no indication of what documents are part of a block. This change automatically adds a marker field to parent documents if configured in `IWC`. These marker documents are optional unless document blocks are indexed and index sorting is configured. In this case indexing blocks will fail unless a parent field is configured. Index sorting will preserve document blocks during sort. Documents within a block not be reordered by the sorting algorithm and will sort along side their parent documents. Relates to #12711
This commit is contained in:
parent
b7728c5657
commit
df6bd25ce4
@ -90,6 +90,11 @@ New Features
|
||||
* LUCENE-10626 Hunspell: add tools to aid dictionary editing:
|
||||
analysis introspection, stem expansion and stem/flag suggestion (Peter Gromov)
|
||||
|
||||
* GITHUB#12829: For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
|
||||
IndexWriter#addDocuments or IndexWriter#updateDocuments also when index sorting is configured. Document blocks are
|
||||
maintained alongside their parent documents during sort and merge. IndexWriterConfig now requires a parent field to be
|
||||
specified if index sorting is used together with document blocks. (Simon Willnauer)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
@ -131,6 +136,12 @@ Bug Fixes
|
||||
* GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those
|
||||
of DoubleValues#doubleValue(). (Uwe Schindler)
|
||||
|
||||
Changes in Backwards Compatibility Policy
|
||||
-----------------------------------------
|
||||
|
||||
* GITHUB#12829: IndexWriter#addDocuments or IndexWriter#updateDocuments now require a parent field name to be
|
||||
specified in IndexWriterConfig is documents blocks are indexed and index time sorting is configured. (Simon Willnauer)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
@ -19,6 +19,14 @@
|
||||
|
||||
## Migration from Lucene 9.x to Lucene 10.0
|
||||
|
||||
### IndexWriter requires a parent document field in order to use index sorting with document blocks (GITHUB#12829)
|
||||
|
||||
For indices newly created as of 10.0.0 onwards, IndexWriter preserves document blocks indexed via
|
||||
IndexWriter#addDocuments or IndexWriter#updateDocuments when index sorting is configured. Document blocks are maintained
|
||||
alongside their parent documents during sort and merge. The internally used parent field must be configured in
|
||||
IndexWriterConfig only if index sorting is used together with documents blocks. See `IndexWriterConfig#setParendField`
|
||||
for reference.
|
||||
|
||||
### Minor API changes in MatchHighlighter and MatchRegionRetriever. (GITHUB#12881)
|
||||
|
||||
The API of interfaces for accepting highlights has changed to allow performance improvements. Look at the issue and the PR diff to get
|
||||
|
@ -217,7 +217,8 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
isSoftDeletesField);
|
||||
isSoftDeletesField,
|
||||
false);
|
||||
} catch (IllegalStateException e) {
|
||||
throw new CorruptIndexException(
|
||||
"invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
|
||||
|
@ -194,7 +194,8 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
||||
vectorDimension,
|
||||
VectorEncoding.FLOAT32,
|
||||
vectorDistFunc,
|
||||
isSoftDeletesField);
|
||||
isSoftDeletesField,
|
||||
false);
|
||||
infos[i].checkConsistency();
|
||||
} catch (IllegalStateException e) {
|
||||
throw new CorruptIndexException(
|
||||
|
@ -35,4 +35,9 @@ public class TestLucene70SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
|
||||
protected Codec getCodec() {
|
||||
return new Lucene84RWCodec();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsHasBlocks() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -35,4 +35,9 @@ public class TestLucene86SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
|
||||
protected Codec getCodec() {
|
||||
return new Lucene87RWCodec();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsHasBlocks() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -32,4 +32,9 @@ public class TestLucene90SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
|
||||
protected Codec getCodec() {
|
||||
return new Lucene90RWCodec();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsHasBlocks() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -99,6 +99,8 @@ import org.apache.lucene.index.TermVectors;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.FieldExistsQuery;
|
||||
@ -2162,6 +2164,83 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortedIndexAddDocBlocks() throws Exception {
|
||||
for (String name : oldSortedNames) {
|
||||
Path path = createTempDir("sorted");
|
||||
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name + ".zip");
|
||||
assertNotNull("Sorted index index " + name + " not found", resource);
|
||||
TestUtil.unzip(resource, path);
|
||||
|
||||
try (Directory dir = newFSDirectory(path)) {
|
||||
final Sort sort;
|
||||
try (DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
assertEquals(1, reader.leaves().size());
|
||||
sort = reader.leaves().get(0).reader().getMetaData().getSort();
|
||||
assertNotNull(sort);
|
||||
searchExampleIndex(reader);
|
||||
}
|
||||
// open writer
|
||||
try (IndexWriter writer =
|
||||
new IndexWriter(
|
||||
dir,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setOpenMode(OpenMode.APPEND)
|
||||
.setIndexSort(sort)
|
||||
.setMergePolicy(newLogMergePolicy()))) {
|
||||
// add 10 docs
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Document child = new Document();
|
||||
child.add(new StringField("relation", "child", Field.Store.NO));
|
||||
child.add(new StringField("bid", "" + i, Field.Store.NO));
|
||||
child.add(new NumericDocValuesField("dateDV", i));
|
||||
Document parent = new Document();
|
||||
parent.add(new StringField("relation", "parent", Field.Store.NO));
|
||||
parent.add(new StringField("bid", "" + i, Field.Store.NO));
|
||||
parent.add(new NumericDocValuesField("dateDV", i));
|
||||
writer.addDocuments(Arrays.asList(child, child, parent));
|
||||
if (random().nextBoolean()) {
|
||||
writer.flush();
|
||||
}
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
writer.forceMerge(1);
|
||||
}
|
||||
writer.commit();
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
TopDocs children =
|
||||
searcher.search(
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new TermQuery(new Term("relation", "child")),
|
||||
BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
2);
|
||||
TopDocs parents =
|
||||
searcher.search(
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new TermQuery(new Term("relation", "parent")),
|
||||
BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("bid", "" + i)), BooleanClause.Occur.MUST)
|
||||
.build(),
|
||||
2);
|
||||
assertEquals(2, children.totalHits.value);
|
||||
assertEquals(1, parents.totalHits.value);
|
||||
// make sure it's sorted
|
||||
assertEquals(children.scoreDocs[0].doc + 1, children.scoreDocs[1].doc);
|
||||
assertEquals(children.scoreDocs[1].doc + 1, parents.scoreDocs[0].doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
// This will confirm the docs are really sorted
|
||||
TestUtil.checkIndex(dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void searchExampleIndex(DirectoryReader reader) throws IOException {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
|
@ -72,6 +72,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
||||
static final BytesRef VECTOR_ENCODING = new BytesRef(" vector encoding ");
|
||||
static final BytesRef VECTOR_SIMILARITY = new BytesRef(" vector similarity ");
|
||||
static final BytesRef SOFT_DELETES = new BytesRef(" soft-deletes ");
|
||||
static final BytesRef PARENT = new BytesRef(" parent ");
|
||||
|
||||
@Override
|
||||
public FieldInfos read(
|
||||
@ -170,6 +171,9 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), SOFT_DELETES);
|
||||
boolean isSoftDeletesField = Boolean.parseBoolean(readString(SOFT_DELETES.length, scratch));
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), PARENT);
|
||||
boolean isParentField = Boolean.parseBoolean(readString(PARENT.length, scratch));
|
||||
|
||||
infos[i] =
|
||||
new FieldInfo(
|
||||
@ -188,7 +192,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
||||
vectorNumDimensions,
|
||||
vectorEncoding,
|
||||
vectorDistFunc,
|
||||
isSoftDeletesField);
|
||||
isSoftDeletesField,
|
||||
isParentField);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
@ -320,6 +325,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
||||
SimpleTextUtil.write(out, SOFT_DELETES);
|
||||
SimpleTextUtil.write(out, Boolean.toString(fi.isSoftDeletesField()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, PARENT);
|
||||
SimpleTextUtil.write(out, Boolean.toString(fi.isParentField()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
}
|
||||
SimpleTextUtil.writeChecksum(out, scratch);
|
||||
success = true;
|
||||
|
@ -196,7 +196,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
||||
sortField[i] = SortFieldProvider.forName(provider).readSortField(bytes);
|
||||
assert bytes.eof();
|
||||
}
|
||||
Sort indexSort = sortField.length == 0 ? null : new Sort(sortField);
|
||||
|
||||
final Sort indexSort;
|
||||
if (sortField.length == 0) {
|
||||
indexSort = null;
|
||||
} else {
|
||||
indexSort = new Sort(sortField);
|
||||
}
|
||||
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
|
||||
@ -335,7 +341,6 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
||||
SimpleTextUtil.write(output, b.bytes.get().toString(), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
|
||||
SimpleTextUtil.writeChecksum(output, scratch);
|
||||
}
|
||||
}
|
||||
|
@ -119,6 +119,7 @@ public class TestBlockWriter extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
@ -206,6 +206,7 @@ public class TestSTBlockReader extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
|
||||
|
@ -131,13 +131,14 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
||||
Throwable priorE = null;
|
||||
FieldInfo[] infos = null;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
Lucene94FieldInfosFormat.CODEC_NAME,
|
||||
Lucene94FieldInfosFormat.FORMAT_START,
|
||||
Lucene94FieldInfosFormat.FORMAT_CURRENT,
|
||||
segmentInfo.getId(),
|
||||
segmentSuffix);
|
||||
int format =
|
||||
CodecUtil.checkIndexHeader(
|
||||
input,
|
||||
Lucene94FieldInfosFormat.CODEC_NAME,
|
||||
Lucene94FieldInfosFormat.FORMAT_START,
|
||||
Lucene94FieldInfosFormat.FORMAT_CURRENT,
|
||||
segmentInfo.getId(),
|
||||
segmentSuffix);
|
||||
|
||||
final int size = input.readVInt(); // read in the size
|
||||
infos = new FieldInfo[size];
|
||||
@ -157,6 +158,18 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
|
||||
boolean isParentField =
|
||||
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
|
||||
|
||||
if ((bits & 0xE0) != 0) {
|
||||
throw new CorruptIndexException(
|
||||
"unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input);
|
||||
}
|
||||
if (format < FORMAT_PARENT_FIELD && (bits & 0xF0) != 0) {
|
||||
throw new CorruptIndexException(
|
||||
"parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"",
|
||||
input);
|
||||
}
|
||||
|
||||
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
|
||||
|
||||
@ -200,7 +213,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
||||
vectorDimension,
|
||||
vectorEncoding,
|
||||
vectorDistFunc,
|
||||
isSoftDeletesField);
|
||||
isSoftDeletesField,
|
||||
isParentField);
|
||||
infos[i].checkConsistency();
|
||||
} catch (IllegalStateException e) {
|
||||
throw new CorruptIndexException(
|
||||
@ -348,6 +362,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
||||
if (fi.omitsNorms()) bits |= OMIT_NORMS;
|
||||
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
|
||||
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
|
||||
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
|
||||
output.writeByte(bits);
|
||||
|
||||
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
|
||||
@ -375,11 +390,14 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
||||
// Codec header
|
||||
static final String CODEC_NAME = "Lucene94FieldInfos";
|
||||
static final int FORMAT_START = 0;
|
||||
static final int FORMAT_CURRENT = FORMAT_START;
|
||||
// this doesn't actually change the file format but uses up one more bit an existing bit pattern
|
||||
static final int FORMAT_PARENT_FIELD = 1;
|
||||
static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD;
|
||||
|
||||
// Field flags
|
||||
static final byte STORE_TERMVECTOR = 0x1;
|
||||
static final byte OMIT_NORMS = 0x2;
|
||||
static final byte STORE_PAYLOADS = 0x4;
|
||||
static final byte SOFT_DELETES_FIELD = 0x8;
|
||||
static final byte PARENT_FIELD_FIELD = 0x10;
|
||||
}
|
||||
|
@ -1176,34 +1176,46 @@ public final class CheckIndex implements Closeable {
|
||||
comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext);
|
||||
}
|
||||
|
||||
int maxDoc = reader.maxDoc();
|
||||
|
||||
try {
|
||||
|
||||
for (int docID = 1; docID < maxDoc; docID++) {
|
||||
|
||||
LeafMetaData metaData = reader.getMetaData();
|
||||
FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
if (metaData.hasBlocks()
|
||||
&& fieldInfos.getParentField() == null
|
||||
&& metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) {
|
||||
throw new IllegalStateException(
|
||||
"parent field is not set but the index has document blocks and was created with version: "
|
||||
+ metaData.getCreatedVersionMajor());
|
||||
}
|
||||
final DocIdSetIterator iter;
|
||||
if (metaData.hasBlocks() && fieldInfos.getParentField() != null) {
|
||||
iter = reader.getNumericDocValues(fieldInfos.getParentField());
|
||||
} else {
|
||||
iter = DocIdSetIterator.all(reader.maxDoc());
|
||||
}
|
||||
int prevDoc = iter.nextDoc();
|
||||
int nextDoc;
|
||||
while ((nextDoc = iter.nextDoc()) != NO_MORE_DOCS) {
|
||||
int cmp = 0;
|
||||
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
|
||||
// TODO: would be better if copy() didn't cause a term lookup in TermOrdVal & co,
|
||||
// the segments are always the same here...
|
||||
comparators[i].copy(0, docID - 1);
|
||||
comparators[i].copy(0, prevDoc);
|
||||
comparators[i].setBottom(0);
|
||||
cmp = reverseMul[i] * comparators[i].compareBottom(docID);
|
||||
cmp = reverseMul[i] * comparators[i].compareBottom(nextDoc);
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cmp > 0) {
|
||||
throw new CheckIndexException(
|
||||
"segment has indexSort="
|
||||
+ sort
|
||||
+ " but docID="
|
||||
+ (docID - 1)
|
||||
+ (prevDoc)
|
||||
+ " sorts after docID="
|
||||
+ docID);
|
||||
+ nextDoc);
|
||||
}
|
||||
prevDoc = nextDoc;
|
||||
}
|
||||
msg(
|
||||
infoStream,
|
||||
|
@ -21,14 +21,17 @@ import java.text.NumberFormat;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
@ -134,9 +137,11 @@ final class DocumentsWriterPerThread implements Accountable {
|
||||
private final ReentrantLock lock = new ReentrantLock();
|
||||
private int[] deleteDocIDs = new int[0];
|
||||
private int numDeletedDocIds = 0;
|
||||
private final int indexMajorVersionCreated;
|
||||
private final IndexingChain.ReservedField<NumericDocValuesField> parentField;
|
||||
|
||||
DocumentsWriterPerThread(
|
||||
int indexVersionCreated,
|
||||
int indexMajorVersionCreated,
|
||||
String segmentName,
|
||||
Directory directoryOrig,
|
||||
Directory directory,
|
||||
@ -145,6 +150,7 @@ final class DocumentsWriterPerThread implements Accountable {
|
||||
FieldInfos.Builder fieldInfos,
|
||||
AtomicLong pendingNumDocs,
|
||||
boolean enableTestPoints) {
|
||||
this.indexMajorVersionCreated = indexMajorVersionCreated;
|
||||
this.directory = new TrackingDirectoryWrapper(directory);
|
||||
this.fieldInfos = fieldInfos;
|
||||
this.indexWriterConfig = indexWriterConfig;
|
||||
@ -183,12 +189,19 @@ final class DocumentsWriterPerThread implements Accountable {
|
||||
this.enableTestPoints = enableTestPoints;
|
||||
indexingChain =
|
||||
new IndexingChain(
|
||||
indexVersionCreated,
|
||||
indexMajorVersionCreated,
|
||||
segmentInfo,
|
||||
this.directory,
|
||||
fieldInfos,
|
||||
indexWriterConfig,
|
||||
this::onAbortingException);
|
||||
if (indexWriterConfig.getParentField() != null) {
|
||||
this.parentField =
|
||||
indexingChain.markAsReserved(
|
||||
new NumericDocValuesField(indexWriterConfig.getParentField(), -1));
|
||||
} else {
|
||||
this.parentField = null;
|
||||
}
|
||||
}
|
||||
|
||||
final void testPoint(String message) {
|
||||
@ -231,7 +244,23 @@ final class DocumentsWriterPerThread implements Accountable {
|
||||
final int docsInRamBefore = numDocsInRAM;
|
||||
boolean allDocsIndexed = false;
|
||||
try {
|
||||
for (Iterable<? extends IndexableField> doc : docs) {
|
||||
final Iterator<? extends Iterable<? extends IndexableField>> iterator = docs.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
Iterable<? extends IndexableField> doc = iterator.next();
|
||||
if (parentField != null) {
|
||||
if (iterator.hasNext() == false) {
|
||||
doc = addParentField(doc, parentField);
|
||||
}
|
||||
} else if (segmentInfo.getIndexSort() != null
|
||||
&& iterator.hasNext()
|
||||
&& indexMajorVersionCreated >= Version.LUCENE_10_0_0.major) {
|
||||
// sort is configured but parent field is missing, yet we have a doc-block
|
||||
// yet we must not fail if this index was created in an earlier version where this
|
||||
// behavior was permitted.
|
||||
throw new IllegalArgumentException(
|
||||
"a parent field must be set in order to use document blocks with index sorting; see IndexWriterConfig#setParentField");
|
||||
}
|
||||
|
||||
// Even on exception, the document is still added (but marked
|
||||
// deleted), so we don't need to un-reserve at that point.
|
||||
// Aborting exceptions will actually "lose" more than one
|
||||
@ -245,10 +274,11 @@ final class DocumentsWriterPerThread implements Accountable {
|
||||
onNewDocOnRAM.run();
|
||||
}
|
||||
}
|
||||
allDocsIndexed = true;
|
||||
if (numDocsInRAM - docsInRamBefore > 1) {
|
||||
final int numDocs = numDocsInRAM - docsInRamBefore;
|
||||
if (numDocs > 1) {
|
||||
segmentInfo.setHasBlocks();
|
||||
}
|
||||
allDocsIndexed = true;
|
||||
return finishDocuments(deleteNode, docsInRamBefore);
|
||||
} finally {
|
||||
if (!allDocsIndexed && !aborted) {
|
||||
@ -262,6 +292,34 @@ final class DocumentsWriterPerThread implements Accountable {
|
||||
}
|
||||
}
|
||||
|
||||
private Iterable<? extends IndexableField> addParentField(
|
||||
Iterable<? extends IndexableField> doc, IndexableField parentField) {
|
||||
return () -> {
|
||||
final Iterator<? extends IndexableField> first = doc.iterator();
|
||||
return new Iterator<>() {
|
||||
IndexableField additionalField = parentField;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return additionalField != null || first.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexableField next() {
|
||||
if (additionalField != null) {
|
||||
IndexableField field = additionalField;
|
||||
additionalField = null;
|
||||
return field;
|
||||
}
|
||||
if (first.hasNext()) {
|
||||
return first.next();
|
||||
}
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
private long finishDocuments(DocumentsWriterDeleteQueue.Node<?> deleteNode, int docIdUpTo) {
|
||||
/*
|
||||
* here we actually finish the document in two steps 1. push the delete into
|
||||
|
@ -63,6 +63,8 @@ public final class FieldInfo {
|
||||
// whether this field is used as the soft-deletes field
|
||||
private final boolean softDeletesField;
|
||||
|
||||
private final boolean isParentField;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
*
|
||||
@ -84,7 +86,8 @@ public final class FieldInfo {
|
||||
int vectorDimension,
|
||||
VectorEncoding vectorEncoding,
|
||||
VectorSimilarityFunction vectorSimilarityFunction,
|
||||
boolean softDeletesField) {
|
||||
boolean softDeletesField,
|
||||
boolean isParentField) {
|
||||
this.name = Objects.requireNonNull(name);
|
||||
this.number = number;
|
||||
this.docValuesType =
|
||||
@ -111,6 +114,7 @@ public final class FieldInfo {
|
||||
this.vectorEncoding = vectorEncoding;
|
||||
this.vectorSimilarityFunction = vectorSimilarityFunction;
|
||||
this.softDeletesField = softDeletesField;
|
||||
this.isParentField = isParentField;
|
||||
this.checkConsistency();
|
||||
}
|
||||
|
||||
@ -206,6 +210,13 @@ public final class FieldInfo {
|
||||
throw new IllegalArgumentException(
|
||||
"vectorDimension must be >=0; got " + vectorDimension + " (field: '" + name + "')");
|
||||
}
|
||||
|
||||
if (softDeletesField && isParentField) {
|
||||
throw new IllegalArgumentException(
|
||||
"field can't be used as soft-deletes field and parent document field (field: '"
|
||||
+ name
|
||||
+ "')");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -633,4 +644,12 @@ public final class FieldInfo {
|
||||
public boolean isSoftDeletesField() {
|
||||
return softDeletesField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this field is configured and used as the parent document field field. See
|
||||
* {@link IndexWriterConfig#setParentField(String)}
|
||||
*/
|
||||
public boolean isParentField() {
|
||||
return isParentField;
|
||||
}
|
||||
}
|
||||
|
@ -59,6 +59,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
private final boolean hasVectorValues;
|
||||
private final String softDeletesField;
|
||||
|
||||
private final String parentField;
|
||||
|
||||
// used only by fieldInfo(int)
|
||||
private final FieldInfo[] byNumber;
|
||||
|
||||
@ -78,6 +80,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
boolean hasPointValues = false;
|
||||
boolean hasVectorValues = false;
|
||||
String softDeletesField = null;
|
||||
String parentField = null;
|
||||
|
||||
int size = 0; // number of elements in byNumberTemp, number of used array slots
|
||||
FieldInfo[] byNumberTemp = new FieldInfo[10]; // initial array capacity of 10
|
||||
@ -132,6 +135,13 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
}
|
||||
softDeletesField = info.name;
|
||||
}
|
||||
if (info.isParentField()) {
|
||||
if (parentField != null && parentField.equals(info.name) == false) {
|
||||
throw new IllegalArgumentException(
|
||||
"multiple parent fields [" + info.name + ", " + parentField + "]");
|
||||
}
|
||||
parentField = info.name;
|
||||
}
|
||||
}
|
||||
|
||||
this.hasVectors = hasVectors;
|
||||
@ -145,6 +155,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
this.hasPointValues = hasPointValues;
|
||||
this.hasVectorValues = hasVectorValues;
|
||||
this.softDeletesField = softDeletesField;
|
||||
this.parentField = parentField;
|
||||
|
||||
List<FieldInfo> valuesTemp = new ArrayList<>();
|
||||
byNumber = new FieldInfo[size];
|
||||
@ -178,7 +189,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
.filter(Objects::nonNull)
|
||||
.findAny()
|
||||
.orElse(null);
|
||||
final Builder builder = new Builder(new FieldNumbers(softDeletesField));
|
||||
final String parentField = getAndValidateParentField(leaves);
|
||||
final Builder builder = new Builder(new FieldNumbers(softDeletesField, parentField));
|
||||
for (final LeafReaderContext ctx : leaves) {
|
||||
for (FieldInfo fieldInfo : ctx.reader().getFieldInfos()) {
|
||||
builder.add(fieldInfo);
|
||||
@ -188,6 +200,26 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
}
|
||||
}
|
||||
|
||||
private static String getAndValidateParentField(List<LeafReaderContext> leaves) {
|
||||
boolean set = false;
|
||||
String theField = null;
|
||||
for (LeafReaderContext ctx : leaves) {
|
||||
String field = ctx.reader().getFieldInfos().getParentField();
|
||||
if (set && Objects.equals(field, theField) == false) {
|
||||
throw new IllegalStateException(
|
||||
"expected parent doc field to be \""
|
||||
+ theField
|
||||
+ " \" across all segments but found a segment with different field \""
|
||||
+ field
|
||||
+ "\"");
|
||||
} else {
|
||||
theField = field;
|
||||
set = true;
|
||||
}
|
||||
}
|
||||
return theField;
|
||||
}
|
||||
|
||||
/** Returns a set of names of fields that have a terms index. The order is undefined. */
|
||||
public static Collection<String> getIndexedFields(IndexReader reader) {
|
||||
return reader.leaves().stream()
|
||||
@ -254,6 +286,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
return softDeletesField;
|
||||
}
|
||||
|
||||
/** Returns the parent document field name if exists; otherwise returns null */
|
||||
public String getParentField() {
|
||||
return parentField;
|
||||
}
|
||||
|
||||
/** Returns the number of fields */
|
||||
public int size() {
|
||||
return byName.size();
|
||||
@ -345,7 +382,10 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
// The soft-deletes field from IWC to enforce a single soft-deletes field
|
||||
private final String softDeletesFieldName;
|
||||
|
||||
FieldNumbers(String softDeletesFieldName) {
|
||||
// The parent document field from IWC to mark parent document when indexing
|
||||
private final String parentFieldName;
|
||||
|
||||
FieldNumbers(String softDeletesFieldName, String parentFieldName) {
|
||||
this.nameToNumber = new HashMap<>();
|
||||
this.numberToName = new HashMap<>();
|
||||
this.indexOptions = new HashMap<>();
|
||||
@ -355,11 +395,21 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
this.omitNorms = new HashMap<>();
|
||||
this.storeTermVectors = new HashMap<>();
|
||||
this.softDeletesFieldName = softDeletesFieldName;
|
||||
this.parentFieldName = parentFieldName;
|
||||
if (softDeletesFieldName != null
|
||||
&& parentFieldName != null
|
||||
&& parentFieldName.equals(softDeletesFieldName)) {
|
||||
throw new IllegalArgumentException(
|
||||
"parent document and soft-deletes field can't be the same field \""
|
||||
+ parentFieldName
|
||||
+ "\"");
|
||||
}
|
||||
}
|
||||
|
||||
synchronized void verifyFieldInfo(FieldInfo fi) {
|
||||
String fieldName = fi.getName();
|
||||
verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField());
|
||||
verifyParentFieldName(fieldName, fi.isParentField());
|
||||
if (nameToNumber.containsKey(fieldName)) {
|
||||
verifySameSchema(fi);
|
||||
}
|
||||
@ -373,6 +423,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
synchronized int addOrGet(FieldInfo fi) {
|
||||
String fieldName = fi.getName();
|
||||
verifySoftDeletedFieldName(fieldName, fi.isSoftDeletesField());
|
||||
verifyParentFieldName(fieldName, fi.isParentField());
|
||||
Integer fieldNumber = nameToNumber.get(fieldName);
|
||||
|
||||
if (fieldNumber != null) {
|
||||
@ -437,6 +488,33 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyParentFieldName(String fieldName, boolean isParentField) {
|
||||
if (isParentField) {
|
||||
if (parentFieldName == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"can't add field ["
|
||||
+ fieldName
|
||||
+ "] as parent document field; this IndexWriter has no parent document field configured");
|
||||
} else if (fieldName.equals(parentFieldName) == false) {
|
||||
throw new IllegalArgumentException(
|
||||
"can't add field ["
|
||||
+ fieldName
|
||||
+ "] as parent document field; this IndexWriter is configured with ["
|
||||
+ parentFieldName
|
||||
+ "] as parent document field");
|
||||
}
|
||||
} else if (fieldName.equals(parentFieldName)) { // isParent == false
|
||||
// this would be the case if the current index has a parent field that is
|
||||
// not a parent field in the incoming index (think addIndices)
|
||||
throw new IllegalArgumentException(
|
||||
"can't add ["
|
||||
+ fieldName
|
||||
+ "] as non parent document field; this IndexWriter is configured with ["
|
||||
+ parentFieldName
|
||||
+ "] as parent document field");
|
||||
}
|
||||
}
|
||||
|
||||
private void verifySameSchema(FieldInfo fi) {
|
||||
String fieldName = fi.getName();
|
||||
IndexOptions currentOpts = this.indexOptions.get(fieldName);
|
||||
@ -513,7 +591,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
(softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)));
|
||||
(softDeletesFieldName != null && softDeletesFieldName.equals(fieldName)),
|
||||
(parentFieldName != null && parentFieldName.equals(fieldName)));
|
||||
addOrGet(fi);
|
||||
}
|
||||
} else {
|
||||
@ -579,6 +658,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
if (dvType != dvType0) return null;
|
||||
|
||||
boolean isSoftDeletesField = fieldName.equals(softDeletesFieldName);
|
||||
boolean isParentField = fieldName.equals(parentFieldName);
|
||||
return new FieldInfo(
|
||||
fieldName,
|
||||
newFieldNumber,
|
||||
@ -595,7 +675,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
isSoftDeletesField);
|
||||
isSoftDeletesField,
|
||||
isParentField);
|
||||
}
|
||||
|
||||
synchronized Set<String> getFieldNames() {
|
||||
@ -627,6 +708,14 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
return globalFieldNumbers.softDeletesFieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of the parent document field or <tt>null</tt> if no parent field is
|
||||
* configured
|
||||
*/
|
||||
public String getParentFieldName() {
|
||||
return globalFieldNumbers.parentFieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the provided FieldInfo to this Builder if this field doesn't exist in this Builder. Also
|
||||
* adds a new field with its schema options to the global FieldNumbers if the field doesn't
|
||||
@ -710,7 +799,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||
fi.getVectorDimension(),
|
||||
fi.getVectorEncoding(),
|
||||
fi.getVectorSimilarityFunction(),
|
||||
fi.isSoftDeletesField());
|
||||
fi.isSoftDeletesField(),
|
||||
fi.isParentField());
|
||||
byName.put(fiNew.getName(), fiNew);
|
||||
return fiNew;
|
||||
}
|
||||
|
@ -1261,7 +1261,8 @@ public class IndexWriter
|
||||
* If this {@link SegmentInfos} has no global field number map the returned instance is empty
|
||||
*/
|
||||
private FieldNumbers getFieldNumberMap() throws IOException {
|
||||
final FieldNumbers map = new FieldNumbers(config.softDeletesField);
|
||||
final FieldNumbers map =
|
||||
new FieldNumbers(config.getSoftDeletesField(), config.getParentField());
|
||||
|
||||
for (SegmentCommitInfo info : segmentInfos) {
|
||||
FieldInfos fis = readFieldInfos(info);
|
||||
@ -6614,10 +6615,12 @@ public class IndexWriter
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName) {
|
||||
public FieldInfosBuilder newFieldInfosBuilder(
|
||||
String softDeletesFieldName, String parentFieldName) {
|
||||
return new FieldInfosBuilder() {
|
||||
private final FieldInfos.Builder builder =
|
||||
new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName));
|
||||
new FieldInfos.Builder(
|
||||
new FieldInfos.FieldNumbers(softDeletesFieldName, parentFieldName));
|
||||
|
||||
@Override
|
||||
public FieldInfosBuilder add(FieldInfo fi) {
|
||||
|
@ -545,4 +545,20 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
|
||||
this.eventListener = eventListener;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the parent document field. If this optional property is set, IndexWriter will add an
|
||||
* internal field to every root document added to the index writer. A document is considered a
|
||||
* parent document if it's the last document in a document block indexed via {@link
|
||||
* IndexWriter#addDocuments(Iterable)} or {@link IndexWriter#updateDocuments(Term, Iterable)} and
|
||||
* its relatives. Additionally, all individual documents added via the single document methods
|
||||
* ({@link IndexWriter#addDocuments(Iterable)} etc.) are also considered parent documents. This
|
||||
* property is optional for all indices that don't use document blocks in combination with index
|
||||
* sorting. In order to maintain the API guarantee that the document order of a block is not
|
||||
* altered by the {@link IndexWriter} a marker for parent documents is required.
|
||||
*/
|
||||
public IndexWriterConfig setParentField(String parentField) {
|
||||
this.parentField = parentField;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
@ -27,6 +28,7 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
@ -38,6 +40,7 @@ import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsFormat;
|
||||
import org.apache.lucene.codecs.PointsWriter;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.InvertableType;
|
||||
import org.apache.lucene.document.KnnByteVectorField;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.document.StoredValue;
|
||||
@ -49,6 +52,7 @@ import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
|
||||
@ -57,6 +61,7 @@ import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Default general purpose indexing chain, which handles indexing all types of fields. */
|
||||
final class IndexingChain implements Accountable {
|
||||
@ -219,7 +224,31 @@ final class IndexingChain implements Accountable {
|
||||
}
|
||||
|
||||
LeafReader docValuesReader = getDocValuesLeafReader();
|
||||
Function<IndexSorter.DocComparator, IndexSorter.DocComparator> comparatorWrapper =
|
||||
Function.identity();
|
||||
|
||||
if (state.segmentInfo.getHasBlocks() && state.fieldInfos.getParentField() != null) {
|
||||
final DocIdSetIterator readerValues =
|
||||
docValuesReader.getNumericDocValues(state.fieldInfos.getParentField());
|
||||
if (readerValues == null) {
|
||||
throw new CorruptIndexException(
|
||||
"missing doc values for parent field \"" + state.fieldInfos.getParentField() + "\"",
|
||||
"IndexingChain");
|
||||
}
|
||||
BitSet parents = BitSet.of(readerValues, state.segmentInfo.maxDoc());
|
||||
comparatorWrapper =
|
||||
in ->
|
||||
(docID1, docID2) ->
|
||||
in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2));
|
||||
}
|
||||
if (state.segmentInfo.getHasBlocks()
|
||||
&& state.fieldInfos.getParentField() == null
|
||||
&& indexCreatedVersionMajor >= Version.LUCENE_10_0_0.major) {
|
||||
throw new CorruptIndexException(
|
||||
"parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: "
|
||||
+ indexCreatedVersionMajor,
|
||||
"IndexingChain");
|
||||
}
|
||||
List<IndexSorter.DocComparator> comparators = new ArrayList<>();
|
||||
for (int i = 0; i < indexSort.getSort().length; i++) {
|
||||
SortField sortField = indexSort.getSort()[i];
|
||||
@ -227,7 +256,10 @@ final class IndexingChain implements Accountable {
|
||||
if (sorter == null) {
|
||||
throw new UnsupportedOperationException("Cannot sort index using sort field " + sortField);
|
||||
}
|
||||
comparators.add(sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc()));
|
||||
|
||||
IndexSorter.DocComparator docComparator =
|
||||
sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc());
|
||||
comparators.add(comparatorWrapper.apply(docComparator));
|
||||
}
|
||||
Sorter sorter = new Sorter(indexSort);
|
||||
// returns null if the documents are already sorted
|
||||
@ -546,7 +578,17 @@ final class IndexingChain implements Accountable {
|
||||
// build schema for each unique doc field
|
||||
for (IndexableField field : document) {
|
||||
IndexableFieldType fieldType = field.fieldType();
|
||||
PerField pf = getOrAddPerField(field.name());
|
||||
final boolean isReserved = field.getClass() == ReservedField.class;
|
||||
PerField pf =
|
||||
getOrAddPerField(
|
||||
field.name(), false
|
||||
/* we never add reserved fields during indexing should be done during DWPT setup*/ );
|
||||
if (pf.reserved != isReserved) {
|
||||
throw new IllegalArgumentException(
|
||||
"\""
|
||||
+ field.name()
|
||||
+ "\" is a reserved field and should not be added to any document");
|
||||
}
|
||||
if (pf.fieldGen != fieldGen) { // first time we see this field in this document
|
||||
fields[fieldCount++] = pf;
|
||||
pf.fieldGen = fieldGen;
|
||||
@ -556,7 +598,7 @@ final class IndexingChain implements Accountable {
|
||||
docFields[docFieldIdx++] = pf;
|
||||
updateDocFieldSchema(field.name(), pf.schema, fieldType);
|
||||
}
|
||||
// For each field, if it the first time we see this field in this segment,
|
||||
// For each field, if it's the first time we see this field in this segment,
|
||||
// initialize its FieldInfo.
|
||||
// If we have already seen this field, verify that its schema
|
||||
// within the current doc matches its schema in the index.
|
||||
@ -646,7 +688,8 @@ final class IndexingChain implements Accountable {
|
||||
s.vectorDimension,
|
||||
s.vectorEncoding,
|
||||
s.vectorSimilarityFunction,
|
||||
pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName())));
|
||||
pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName()),
|
||||
pf.fieldName.equals(fieldInfos.getParentFieldName())));
|
||||
pf.setFieldInfo(fi);
|
||||
if (fi.getIndexOptions() != IndexOptions.NONE) {
|
||||
pf.setInvertState();
|
||||
@ -741,7 +784,7 @@ final class IndexingChain implements Accountable {
|
||||
* Returns a previously created {@link PerField}, absorbing the type information from {@link
|
||||
* FieldType}, and creates a new {@link PerField} if this field name wasn't seen yet.
|
||||
*/
|
||||
private PerField getOrAddPerField(String fieldName) {
|
||||
private PerField getOrAddPerField(String fieldName, boolean reserved) {
|
||||
final int hashPos = fieldName.hashCode() & hashMask;
|
||||
PerField pf = fieldHash[hashPos];
|
||||
while (pf != null && pf.fieldName.equals(fieldName) == false) {
|
||||
@ -757,7 +800,8 @@ final class IndexingChain implements Accountable {
|
||||
schema,
|
||||
indexWriterConfig.getSimilarity(),
|
||||
indexWriterConfig.getInfoStream(),
|
||||
indexWriterConfig.getAnalyzer());
|
||||
indexWriterConfig.getAnalyzer(),
|
||||
reserved);
|
||||
pf.next = fieldHash[hashPos];
|
||||
fieldHash[hashPos] = pf;
|
||||
totalFieldCount++;
|
||||
@ -1022,6 +1066,7 @@ final class IndexingChain implements Accountable {
|
||||
final String fieldName;
|
||||
final int indexCreatedVersionMajor;
|
||||
final FieldSchema schema;
|
||||
final boolean reserved;
|
||||
FieldInfo fieldInfo;
|
||||
final Similarity similarity;
|
||||
|
||||
@ -1059,13 +1104,15 @@ final class IndexingChain implements Accountable {
|
||||
FieldSchema schema,
|
||||
Similarity similarity,
|
||||
InfoStream infoStream,
|
||||
Analyzer analyzer) {
|
||||
Analyzer analyzer,
|
||||
boolean reserved) {
|
||||
this.fieldName = fieldName;
|
||||
this.indexCreatedVersionMajor = indexCreatedVersionMajor;
|
||||
this.schema = schema;
|
||||
this.similarity = similarity;
|
||||
this.infoStream = infoStream;
|
||||
this.analyzer = analyzer;
|
||||
this.reserved = reserved;
|
||||
}
|
||||
|
||||
void reset(int docId) {
|
||||
@ -1512,4 +1559,77 @@ final class IndexingChain implements Accountable {
|
||||
assertSame("point num bytes", fi.getPointNumBytes(), pointNumBytes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps the given field in a reserved field and registers it as reserved. Only DWPT should do
|
||||
* this to mark fields as private / reserved to prevent this fieldname to be used from the outside
|
||||
* of the IW / DWPT eco-system
|
||||
*/
|
||||
<T extends IndexableField> ReservedField<T> markAsReserved(T field) {
|
||||
getOrAddPerField(field.name(), true);
|
||||
return new ReservedField<T>(field);
|
||||
}
|
||||
|
||||
static final class ReservedField<T extends IndexableField> implements IndexableField {
|
||||
|
||||
private final T delegate;
|
||||
|
||||
private ReservedField(T delegate) {
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
T getDelegate() {
|
||||
return delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return delegate.name();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexableFieldType fieldType() {
|
||||
return delegate.fieldType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
|
||||
return delegate.tokenStream(analyzer, reuse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef binaryValue() {
|
||||
return delegate.binaryValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String stringValue() {
|
||||
return delegate.stringValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharSequence getCharSequenceValue() {
|
||||
return delegate.getCharSequenceValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
return delegate.readerValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
return delegate.numericValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StoredValue storedValue() {
|
||||
return delegate.storedValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public InvertableType invertableType() {
|
||||
return delegate.invertableType();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -98,6 +98,9 @@ public class LiveIndexWriterConfig {
|
||||
/** The field names involved in the index sort */
|
||||
protected Set<String> indexSortFields = Collections.emptySet();
|
||||
|
||||
/** parent document field */
|
||||
protected String parentField = null;
|
||||
|
||||
/**
|
||||
* if an indexing thread should check for pending flushes on update in order to help out on a full
|
||||
* flush
|
||||
@ -458,6 +461,11 @@ public class LiveIndexWriterConfig {
|
||||
return eventListener;
|
||||
}
|
||||
|
||||
/** Returns the parent document field name if configured. */
|
||||
public String getParentField() {
|
||||
return parentField;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
@ -486,6 +494,7 @@ public class LiveIndexWriterConfig {
|
||||
sb.append("maxFullFlushMergeWaitMillis=").append(getMaxFullFlushMergeWaitMillis()).append("\n");
|
||||
sb.append("leafSorter=").append(getLeafSorter()).append("\n");
|
||||
sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n");
|
||||
sb.append("parentField=").append(getParentField()).append("\n");
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
@ -22,8 +22,10 @@ import java.util.List;
|
||||
import org.apache.lucene.index.MergeState.DocMap;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
@ -50,6 +52,31 @@ final class MultiSorter {
|
||||
"Cannot use sort field " + fields[i] + " for index sorting");
|
||||
}
|
||||
comparables[i] = sorter.getComparableProviders(readers);
|
||||
for (int j = 0; j < readers.size(); j++) {
|
||||
CodecReader codecReader = readers.get(j);
|
||||
FieldInfos fieldInfos = codecReader.getFieldInfos();
|
||||
LeafMetaData metaData = codecReader.getMetaData();
|
||||
if (metaData.hasBlocks() && fieldInfos.getParentField() != null) {
|
||||
NumericDocValues parentDocs =
|
||||
codecReader.getNumericDocValues(fieldInfos.getParentField());
|
||||
assert parentDocs != null
|
||||
: "parent field: "
|
||||
+ fieldInfos.getParentField()
|
||||
+ " must be present if index sorting is used with blocks";
|
||||
BitSet parents = BitSet.of(parentDocs, codecReader.maxDoc());
|
||||
IndexSorter.ComparableProvider[] providers = comparables[i];
|
||||
IndexSorter.ComparableProvider provider = providers[j];
|
||||
providers[j] = docId -> provider.getAsComparableLong(parents.nextSetBit(docId));
|
||||
}
|
||||
if (metaData.hasBlocks()
|
||||
&& fieldInfos.getParentField() == null
|
||||
&& metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) {
|
||||
throw new CorruptIndexException(
|
||||
"parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: "
|
||||
+ metaData.getCreatedVersionMajor(),
|
||||
"IndexingChain");
|
||||
}
|
||||
}
|
||||
reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
|
||||
}
|
||||
int leafCount = readers.size();
|
||||
|
@ -111,9 +111,15 @@ public class ParallelLeafReader extends LeafReader {
|
||||
.filter(Objects::nonNull)
|
||||
.findAny()
|
||||
.orElse(null);
|
||||
final String parentField =
|
||||
completeReaderSet.stream()
|
||||
.map(r -> r.getFieldInfos().getParentField())
|
||||
.filter(Objects::nonNull)
|
||||
.findAny()
|
||||
.orElse(null);
|
||||
// TODO: make this read-only in a cleaner way?
|
||||
FieldInfos.Builder builder =
|
||||
new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField));
|
||||
new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField, parentField));
|
||||
|
||||
Sort indexSort = null;
|
||||
int createdVersionMajor = -1;
|
||||
|
@ -720,7 +720,8 @@ final class ReadersAndUpdates {
|
||||
fi.getVectorDimension(),
|
||||
fi.getVectorEncoding(),
|
||||
fi.getVectorSimilarityFunction(),
|
||||
fi.isSoftDeletesField());
|
||||
fi.isSoftDeletesField(),
|
||||
fi.isParentField());
|
||||
}
|
||||
|
||||
private SegmentReader createNewReaderWithLatestLiveDocs(SegmentReader reader) throws IOException {
|
||||
|
@ -17,9 +17,12 @@
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.Function;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.TimSorter;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
||||
@ -206,13 +209,33 @@ public final class Sorter {
|
||||
SortField[] fields = sort.getSort();
|
||||
final IndexSorter.DocComparator[] comparators = new IndexSorter.DocComparator[fields.length];
|
||||
|
||||
Function<IndexSorter.DocComparator, IndexSorter.DocComparator> comparatorWrapper = in -> in;
|
||||
LeafMetaData metaData = reader.getMetaData();
|
||||
FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
if (metaData.hasBlocks() && fieldInfos.getParentField() != null) {
|
||||
BitSet parents =
|
||||
BitSet.of(reader.getNumericDocValues(fieldInfos.getParentField()), reader.maxDoc());
|
||||
comparatorWrapper =
|
||||
in ->
|
||||
(docID1, docID2) ->
|
||||
in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2));
|
||||
}
|
||||
if (metaData.hasBlocks()
|
||||
&& fieldInfos.getParentField() == null
|
||||
&& metaData.getCreatedVersionMajor() >= Version.LUCENE_10_0_0.major) {
|
||||
throw new CorruptIndexException(
|
||||
"parent field is not set but the index has blocks. indexCreatedVersionMajor: "
|
||||
+ metaData.getCreatedVersionMajor(),
|
||||
"Sorter");
|
||||
}
|
||||
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
IndexSorter sorter = fields[i].getIndexSorter();
|
||||
if (sorter == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"Cannot use sortfield + " + fields[i] + " to sort indexes");
|
||||
}
|
||||
comparators[i] = sorter.getDocComparator(reader, reader.maxDoc());
|
||||
comparators[i] = comparatorWrapper.apply(sorter.getDocComparator(reader, reader.maxDoc()));
|
||||
}
|
||||
return sort(reader.maxDoc(), comparators);
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ public interface IndexPackageAccess {
|
||||
|
||||
void setIndexWriterMaxDocs(int limit);
|
||||
|
||||
FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName);
|
||||
FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName, String parentFieldName);
|
||||
|
||||
void checkImpacts(Impacts impacts, int max);
|
||||
|
||||
|
@ -85,7 +85,6 @@ public final class Sort {
|
||||
*/
|
||||
public Sort rewrite(IndexSearcher searcher) throws IOException {
|
||||
boolean changed = false;
|
||||
|
||||
SortField[] rewrittenSortFields = new SortField[fields.length];
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
rewrittenSortFields[i] = fields[i].rewrite(searcher);
|
||||
@ -100,7 +99,6 @@ public final class Sort {
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
buffer.append(fields[i].toString());
|
||||
if ((i + 1) < fields.length) buffer.append(',');
|
||||
|
@ -1937,4 +1937,97 @@ public class TestAddIndexes extends LuceneTestCase {
|
||||
targetDir.close();
|
||||
sourceDir.close();
|
||||
}
|
||||
|
||||
public void testIllegalParentDocChange() throws Exception {
|
||||
Directory dir1 = newDirectory();
|
||||
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc1.setParentField("foobar");
|
||||
RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
|
||||
Document parent = new Document();
|
||||
w1.addDocuments(Arrays.asList(new Document(), new Document(), parent));
|
||||
w1.commit();
|
||||
w1.addDocuments(Arrays.asList(new Document(), new Document(), parent));
|
||||
w1.commit();
|
||||
// so the index sort is in fact burned into the index:
|
||||
w1.forceMerge(1);
|
||||
w1.close();
|
||||
|
||||
Directory dir2 = newDirectory();
|
||||
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc2.setParentField("foo");
|
||||
RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
|
||||
|
||||
IndexReader r1 = DirectoryReader.open(dir1);
|
||||
String message =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
w2.addIndexes((SegmentReader) getOnlyLeafReader(r1));
|
||||
})
|
||||
.getMessage();
|
||||
assertEquals(
|
||||
"can't add field [foobar] as parent document field; this IndexWriter is configured with [foo] as parent document field",
|
||||
message);
|
||||
|
||||
message =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
w2.addIndexes(dir1);
|
||||
})
|
||||
.getMessage();
|
||||
assertEquals(
|
||||
"can't add field [foobar] as parent document field; this IndexWriter is configured with [foo] as parent document field",
|
||||
message);
|
||||
|
||||
Directory dir3 = newDirectory();
|
||||
IndexWriterConfig iwc3 = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc3.setParentField("foobar");
|
||||
RandomIndexWriter w3 = new RandomIndexWriter(random(), dir3, iwc3);
|
||||
|
||||
w3.addIndexes((SegmentReader) getOnlyLeafReader(r1));
|
||||
w3.addIndexes(dir1);
|
||||
|
||||
IOUtils.close(r1, dir1, w2, dir2, w3, dir3);
|
||||
}
|
||||
|
||||
public void testIllegalNonParentField() throws IOException {
|
||||
Directory dir1 = newDirectory();
|
||||
IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1, iwc1);
|
||||
Document parent = new Document();
|
||||
parent.add(new StringField("foo", "XXX", Field.Store.NO));
|
||||
w1.addDocument(parent);
|
||||
w1.close();
|
||||
|
||||
Directory dir2 = newDirectory();
|
||||
IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc2.setParentField("foo");
|
||||
RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2, iwc2);
|
||||
|
||||
IndexReader r1 = DirectoryReader.open(dir1);
|
||||
String message =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
w2.addIndexes((SegmentReader) getOnlyLeafReader(r1));
|
||||
})
|
||||
.getMessage();
|
||||
assertEquals(
|
||||
"can't add [foo] as non parent document field; this IndexWriter is configured with [foo] as parent document field",
|
||||
message);
|
||||
|
||||
message =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
w2.addIndexes(dir1);
|
||||
})
|
||||
.getMessage();
|
||||
assertEquals(
|
||||
"can't add [foo] as non parent document field; this IndexWriter is configured with [foo] as parent document field",
|
||||
message);
|
||||
|
||||
IOUtils.close(r1, dir1, w2, dir2);
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +114,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false));
|
||||
}
|
||||
this.terms = terms;
|
||||
@ -229,7 +230,8 @@ public class TestCodecs extends LuceneTestCase {
|
||||
terms[i] = new TermData(text, docs, null);
|
||||
}
|
||||
|
||||
final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
|
||||
final FieldInfos.Builder builder =
|
||||
new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null));
|
||||
|
||||
final FieldData field = new FieldData("field", builder, terms, true, false);
|
||||
final FieldData[] fields = new FieldData[] {field};
|
||||
@ -292,7 +294,8 @@ public class TestCodecs extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testRandomPostings() throws Throwable {
|
||||
final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
|
||||
final FieldInfos.Builder builder =
|
||||
new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null));
|
||||
|
||||
final FieldData[] fields = new FieldData[NUM_FIELDS];
|
||||
for (int i = 0; i < NUM_FIELDS; i++) {
|
||||
|
@ -236,7 +236,7 @@ public class TestDoc extends LuceneTestCase {
|
||||
si,
|
||||
InfoStream.getDefault(),
|
||||
trackingDir,
|
||||
new FieldInfos.FieldNumbers(null),
|
||||
new FieldInfos.FieldNumbers(null, null),
|
||||
context);
|
||||
|
||||
merger.merge();
|
||||
|
@ -239,7 +239,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testFieldNumbersAutoIncrement() {
|
||||
FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes");
|
||||
FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes", "parentDoc");
|
||||
for (int i = 0; i < 10; i++) {
|
||||
fieldNumbers.addOrGet(
|
||||
new FieldInfo(
|
||||
@ -258,6 +258,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false));
|
||||
}
|
||||
int idx =
|
||||
@ -278,6 +279,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false));
|
||||
assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
|
||||
|
||||
@ -300,6 +302,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false));
|
||||
assertEquals("Field numbers should reset after clear()", 0, idx);
|
||||
}
|
||||
|
@ -45,8 +45,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
testDoc = new Document();
|
||||
final String softDeletesFieldName = null;
|
||||
fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName));
|
||||
fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, null));
|
||||
DocHelper.setupDoc(testDoc);
|
||||
for (IndexableField field : testDoc.getFields()) {
|
||||
IndexableFieldType ift = field.fieldType();
|
||||
@ -67,7 +66,8 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
field.name().equals(softDeletesFieldName)));
|
||||
false,
|
||||
false));
|
||||
}
|
||||
dir = newDirectory();
|
||||
IndexWriterConfig conf =
|
||||
|
@ -2122,6 +2122,10 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||
public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc1 = newIndexWriterConfig();
|
||||
boolean useParent = rarely();
|
||||
if (useParent) {
|
||||
iwc1.setParentField("___parent");
|
||||
}
|
||||
Sort indexSort =
|
||||
new Sort(
|
||||
new SortField("foo", SortField.Type.LONG), new SortField("bar", SortField.Type.LONG));
|
||||
@ -2154,6 +2158,9 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||
} else {
|
||||
iwc.setIndexSort(indexSort);
|
||||
}
|
||||
if (useParent) {
|
||||
iwc.setParentField("___parent");
|
||||
}
|
||||
IndexWriter w2 = new IndexWriter(dir2, iwc);
|
||||
|
||||
if (useReaders) {
|
||||
@ -3165,4 +3172,277 @@ public class TestIndexSorting extends LuceneTestCase {
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testParentFieldNotConfigured() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
|
||||
iwc.setIndexSort(indexSort);
|
||||
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
writer.addDocuments(Arrays.asList(new Document(), new Document()));
|
||||
});
|
||||
assertEquals(
|
||||
"a parent field must be set in order to use document blocks with index sorting; see IndexWriterConfig#setParentField",
|
||||
ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBlockContainsParentField() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
String parentField = "parent";
|
||||
iwc.setParentField(parentField);
|
||||
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
|
||||
iwc.setIndexSort(indexSort);
|
||||
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
|
||||
List<Runnable> runnabels =
|
||||
Arrays.asList(
|
||||
() -> {
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("parent", 0));
|
||||
writer.addDocuments(Arrays.asList(doc, new Document()));
|
||||
});
|
||||
assertEquals(
|
||||
"\"parent\" is a reserved field and should not be added to any document",
|
||||
ex.getMessage());
|
||||
},
|
||||
() -> {
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("parent", 0));
|
||||
writer.addDocuments(Arrays.asList(new Document(), doc));
|
||||
});
|
||||
assertEquals(
|
||||
"\"parent\" is a reserved field and should not be added to any document",
|
||||
ex.getMessage());
|
||||
});
|
||||
Collections.shuffle(runnabels, random());
|
||||
for (Runnable runnable : runnabels) {
|
||||
runnable.run();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testIndexSortWithBlocks() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
|
||||
iwc.setCodec(codec);
|
||||
String parentField = "parent";
|
||||
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
|
||||
iwc.setIndexSort(indexSort);
|
||||
iwc.setParentField(parentField);
|
||||
LogMergePolicy policy = newLogMergePolicy();
|
||||
// make sure that merge factor is always > 2
|
||||
if (policy.getMergeFactor() <= 2) {
|
||||
policy.setMergeFactor(3);
|
||||
}
|
||||
iwc.setMergePolicy(policy);
|
||||
|
||||
// add already sorted documents
|
||||
codec.numCalls = 0;
|
||||
codec.needsIndexSort = false;
|
||||
try (IndexWriter w = new IndexWriter(dir, iwc)) {
|
||||
int numDocs = random().nextInt(50, 100);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document child1 = new Document();
|
||||
child1.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
child1.add(new NumericDocValuesField("id", i));
|
||||
child1.add(new NumericDocValuesField("child", 1));
|
||||
child1.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
Document child2 = new Document();
|
||||
child2.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
child2.add(new NumericDocValuesField("id", i));
|
||||
child2.add(new NumericDocValuesField("child", 2));
|
||||
child2.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
Document parent = new Document();
|
||||
parent.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
parent.add(new NumericDocValuesField("id", i));
|
||||
parent.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
w.addDocuments(Arrays.asList(child1, child2, parent));
|
||||
if (rarely()) {
|
||||
w.commit();
|
||||
}
|
||||
}
|
||||
w.commit();
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1, true);
|
||||
}
|
||||
}
|
||||
|
||||
try (DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
LeafReader leaf = ctx.reader();
|
||||
NumericDocValues parentDISI = leaf.getNumericDocValues(parentField);
|
||||
NumericDocValues ids = leaf.getNumericDocValues("id");
|
||||
NumericDocValues children = leaf.getNumericDocValues("child");
|
||||
int doc;
|
||||
int expectedDocID = 2;
|
||||
while ((doc = parentDISI.nextDoc()) != NO_MORE_DOCS) {
|
||||
assertEquals(-1, parentDISI.longValue());
|
||||
assertEquals(expectedDocID, doc);
|
||||
int id = ids.nextDoc();
|
||||
long child1ID = ids.longValue();
|
||||
assertEquals(id, children.nextDoc());
|
||||
long child1 = children.longValue();
|
||||
assertEquals(1, child1);
|
||||
|
||||
id = ids.nextDoc();
|
||||
long child2ID = ids.longValue();
|
||||
assertEquals(id, children.nextDoc());
|
||||
long child2 = children.longValue();
|
||||
assertEquals(2, child2);
|
||||
|
||||
int idParent = ids.nextDoc();
|
||||
assertEquals(id + 1, idParent);
|
||||
long parent = ids.longValue();
|
||||
assertEquals(child1ID, parent);
|
||||
assertEquals(child2ID, parent);
|
||||
expectedDocID += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("fallthrough")
|
||||
public void testMixRandomDocumentsWithBlocks() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
AssertingNeedsIndexSortCodec codec = new AssertingNeedsIndexSortCodec();
|
||||
iwc.setCodec(codec);
|
||||
String parentField = "parent";
|
||||
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
|
||||
iwc.setIndexSort(indexSort);
|
||||
iwc.setParentField(parentField);
|
||||
RandomIndexWriter randomIndexWriter = new RandomIndexWriter(random(), dir, iwc);
|
||||
int numDocs = random().nextInt(100, 1000);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
if (rarely()) {
|
||||
randomIndexWriter.deleteDocuments(new Term("id", "" + random().nextInt(0, i + 1)));
|
||||
}
|
||||
List<Document> docs = new ArrayList<>();
|
||||
switch (random().nextInt(100) % 5) {
|
||||
case 4:
|
||||
Document child3 = new Document();
|
||||
child3.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
child3.add(new NumericDocValuesField("type", 2));
|
||||
child3.add(new NumericDocValuesField("child_ord", 3));
|
||||
child3.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
docs.add(child3);
|
||||
case 3:
|
||||
Document child2 = new Document();
|
||||
child2.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
child2.add(new NumericDocValuesField("type", 2));
|
||||
child2.add(new NumericDocValuesField("child_ord", 2));
|
||||
child2.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
docs.add(child2);
|
||||
case 2:
|
||||
Document child1 = new Document();
|
||||
child1.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
child1.add(new NumericDocValuesField("type", 2));
|
||||
child1.add(new NumericDocValuesField("child_ord", 1));
|
||||
child1.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
docs.add(child1);
|
||||
case 1:
|
||||
Document root = new Document();
|
||||
root.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
root.add(new NumericDocValuesField("type", 1));
|
||||
root.add(new NumericDocValuesField("num_children", docs.size()));
|
||||
root.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
docs.add(root);
|
||||
randomIndexWriter.addDocuments(docs);
|
||||
break;
|
||||
case 0:
|
||||
Document single = new Document();
|
||||
single.add(new StringField("id", Integer.toString(i), Store.YES));
|
||||
single.add(new NumericDocValuesField("type", 0));
|
||||
single.add(new NumericDocValuesField("foo", random().nextInt()));
|
||||
randomIndexWriter.addDocument(single);
|
||||
}
|
||||
if (rarely()) {
|
||||
randomIndexWriter.forceMerge(1);
|
||||
}
|
||||
randomIndexWriter.commit();
|
||||
}
|
||||
|
||||
randomIndexWriter.close();
|
||||
try (DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
LeafReader leaf = ctx.reader();
|
||||
NumericDocValues parentDISI = leaf.getNumericDocValues(parentField);
|
||||
assertNotNull(parentDISI);
|
||||
NumericDocValues type = leaf.getNumericDocValues("type");
|
||||
NumericDocValues childOrd = leaf.getNumericDocValues("child_ord");
|
||||
NumericDocValues numChildren = leaf.getNumericDocValues("num_children");
|
||||
int numCurrentChildren = 0;
|
||||
int totalPendingChildren = 0;
|
||||
String childId = null;
|
||||
for (int i = 0; i < leaf.maxDoc(); i++) {
|
||||
if (leaf.getLiveDocs() == null || leaf.getLiveDocs().get(i)) {
|
||||
assertTrue(type.advanceExact(i));
|
||||
int typeValue = (int) type.longValue();
|
||||
switch (typeValue) {
|
||||
case 2:
|
||||
assertFalse(parentDISI.advanceExact(i));
|
||||
assertTrue(childOrd.advanceExact(i));
|
||||
if (numCurrentChildren == 0) { // first child
|
||||
childId = leaf.storedFields().document(i).get("id");
|
||||
totalPendingChildren = (int) childOrd.longValue() - 1;
|
||||
} else {
|
||||
assertNotNull(childId);
|
||||
assertEquals(totalPendingChildren--, childOrd.longValue());
|
||||
assertEquals(childId, leaf.storedFields().document(i).get("id"));
|
||||
}
|
||||
numCurrentChildren++;
|
||||
break;
|
||||
case 1:
|
||||
assertTrue(parentDISI.advanceExact(i));
|
||||
assertEquals(-1, parentDISI.longValue());
|
||||
if (childOrd != null) {
|
||||
assertFalse(childOrd.advanceExact(i));
|
||||
}
|
||||
assertTrue(numChildren.advanceExact(i));
|
||||
assertEquals(0, totalPendingChildren);
|
||||
assertEquals(numCurrentChildren, numChildren.longValue());
|
||||
if (numCurrentChildren > 0) {
|
||||
assertEquals(childId, leaf.storedFields().document(i).get("id"));
|
||||
} else {
|
||||
assertNull(childId);
|
||||
}
|
||||
numCurrentChildren = 0;
|
||||
childId = null;
|
||||
break;
|
||||
case 0:
|
||||
assertEquals(-1, parentDISI.longValue());
|
||||
assertTrue(parentDISI.advanceExact(i));
|
||||
if (childOrd != null) {
|
||||
assertFalse(childOrd.advanceExact(i));
|
||||
}
|
||||
if (numChildren != null) {
|
||||
assertFalse(numChildren.advanceExact(i));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1722,6 +1722,44 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
public void testSingleDocsDoNotTriggerHasBlocks() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w =
|
||||
new IndexWriter(
|
||||
dir,
|
||||
new IndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(Integer.MAX_VALUE)
|
||||
.setRAMBufferSizeMB(100))) {
|
||||
|
||||
int docs = random().nextInt(1, 100);
|
||||
for (int i = 0; i < docs; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "" + i, Field.Store.NO));
|
||||
w.addDocuments(Arrays.asList(doc));
|
||||
}
|
||||
w.commit();
|
||||
SegmentInfos si = w.cloneSegmentInfos();
|
||||
assertEquals(1, si.size());
|
||||
assertFalse(si.asList().get(0).info.getHasBlocks());
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "XXX", Field.Store.NO));
|
||||
w.addDocuments(Arrays.asList(doc, doc));
|
||||
w.commit();
|
||||
si = w.cloneSegmentInfos();
|
||||
assertEquals(2, si.size());
|
||||
assertFalse(si.asList().get(0).info.getHasBlocks());
|
||||
assertTrue(si.asList().get(1).info.getHasBlocks());
|
||||
w.forceMerge(1);
|
||||
|
||||
w.commit();
|
||||
si = w.cloneSegmentInfos();
|
||||
assertEquals(1, si.size());
|
||||
assertTrue(si.asList().get(0).info.getHasBlocks());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testCarryOverHasBlocks() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w =
|
||||
@ -4790,4 +4828,89 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||
doc.add(newField(field, "value", storedTextType));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
public void testParentAndSoftDeletesAreTheSame() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
IndexWriterConfig indexWriterConfig = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
indexWriterConfig.setSoftDeletesField("foo");
|
||||
indexWriterConfig.setParentField("foo");
|
||||
IllegalArgumentException iae =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class, () -> new IndexWriter(dir, indexWriterConfig));
|
||||
assertEquals(
|
||||
"parent document and soft-deletes field can't be the same field \"foo\"",
|
||||
iae.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testIndexWithParentFieldIsCongruent() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setParentField("parent");
|
||||
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
|
||||
if (random().nextBoolean()) {
|
||||
Document child1 = new Document();
|
||||
child1.add(new StringField("id", Integer.toString(1), Field.Store.YES));
|
||||
Document child2 = new Document();
|
||||
child2.add(new StringField("id", Integer.toString(1), Field.Store.YES));
|
||||
Document parent = new Document();
|
||||
parent.add(new StringField("id", Integer.toString(1), Field.Store.YES));
|
||||
writer.addDocuments(Arrays.asList(child1, child2, parent));
|
||||
writer.flush();
|
||||
if (random().nextBoolean()) {
|
||||
writer.addDocuments(Arrays.asList(child1, child2, parent));
|
||||
}
|
||||
} else {
|
||||
writer.addDocument(new Document());
|
||||
}
|
||||
writer.commit();
|
||||
}
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
config.setParentField("someOtherField");
|
||||
new IndexWriter(dir, config);
|
||||
});
|
||||
assertEquals(
|
||||
"can't add field [parent] as parent document field; this IndexWriter is configured with [someOtherField] as parent document field",
|
||||
ex.getMessage());
|
||||
ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
new IndexWriter(dir, config);
|
||||
});
|
||||
assertEquals(
|
||||
"can't add field [parent] as parent document field; this IndexWriter has no parent document field configured",
|
||||
ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testParentFieldIsAlreadyUsed() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("parent", Integer.toString(1), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
}
|
||||
IllegalArgumentException iae =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
config.setParentField("parent");
|
||||
|
||||
new IndexWriter(dir, config);
|
||||
});
|
||||
assertEquals(
|
||||
"can't add [parent] as non parent document field; this IndexWriter is configured with [parent] as parent document field",
|
||||
iae.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -199,7 +199,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
List<Integer> docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS);
|
||||
List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 10, true));
|
||||
for (DocValuesFieldUpdates update : updates) {
|
||||
@ -237,7 +238,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
for (DocValuesFieldUpdates update : updates) {
|
||||
deletes.onDocValuesUpdate(fieldInfo, update.iterator());
|
||||
}
|
||||
@ -301,7 +303,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
List<Integer> docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS);
|
||||
List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 3, true));
|
||||
for (DocValuesFieldUpdates update : updates) {
|
||||
@ -370,7 +373,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
List<DocValuesFieldUpdates> updates =
|
||||
Arrays.asList(singleUpdate(Arrays.asList(0, 1, DocIdSetIterator.NO_MORE_DOCS), 3, false));
|
||||
for (DocValuesFieldUpdates update : updates) {
|
||||
@ -407,7 +411,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
true);
|
||||
true,
|
||||
false);
|
||||
updates = Arrays.asList(singleUpdate(Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS), 3, true));
|
||||
for (DocValuesFieldUpdates update : updates) {
|
||||
deletes.onDocValuesUpdate(fieldInfo, update.iterator());
|
||||
|
@ -104,7 +104,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
||||
si,
|
||||
InfoStream.getDefault(),
|
||||
mergedDir,
|
||||
new FieldInfos.FieldNumbers(null),
|
||||
new FieldInfos.FieldNumbers(null, null),
|
||||
newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))));
|
||||
MergeState mergeState = merger.merge();
|
||||
int docsMerged = mergeState.segmentInfo.maxDoc();
|
||||
|
@ -1297,7 +1297,8 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.DOT_PRODUCT,
|
||||
fi.isSoftDeletesField());
|
||||
fi.isSoftDeletesField(),
|
||||
fi.isParentField());
|
||||
newInfos[i] = noIndexFI;
|
||||
i++;
|
||||
}
|
||||
|
@ -183,9 +183,13 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
||||
dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setIndexSort(sort));
|
||||
Document doc = new Document();
|
||||
doc.add(new NumericDocValuesField("foo", 3));
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc));
|
||||
for (Document d : Arrays.asList(doc, doc, doc, doc)) {
|
||||
w.addDocument(d);
|
||||
}
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
|
||||
for (Document d : Arrays.asList(doc, doc, doc, doc, doc, doc)) {
|
||||
w.addDocument(d);
|
||||
}
|
||||
w.flush();
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
assertEquals(2, reader.leaves().size());
|
||||
|
@ -103,6 +103,7 @@ public class TermVectorLeafReader extends LeafReader {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false);
|
||||
fieldInfos = new FieldInfos(new FieldInfo[] {fieldInfo});
|
||||
}
|
||||
|
@ -736,6 +736,7 @@ public class MemoryIndex {
|
||||
fieldType.vectorDimension(),
|
||||
fieldType.vectorEncoding(),
|
||||
fieldType.vectorSimilarityFunction(),
|
||||
false,
|
||||
false);
|
||||
}
|
||||
|
||||
@ -789,7 +790,8 @@ public class MemoryIndex {
|
||||
info.fieldInfo.getVectorDimension(),
|
||||
info.fieldInfo.getVectorEncoding(),
|
||||
info.fieldInfo.getVectorSimilarityFunction(),
|
||||
info.fieldInfo.isSoftDeletesField());
|
||||
info.fieldInfo.isSoftDeletesField(),
|
||||
info.fieldInfo.isParentField());
|
||||
} else if (existingDocValuesType != docValuesType) {
|
||||
throw new IllegalArgumentException(
|
||||
"Can't add ["
|
||||
|
@ -78,7 +78,7 @@ public class DummyCompressingCodec extends CompressingCodec {
|
||||
|
||||
@Override
|
||||
public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException {
|
||||
out.copyBytes(buffersInput, buffersInput.size());
|
||||
out.copyBytes(buffersInput, buffersInput.length());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -68,7 +68,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
FieldInfo fi = createFieldInfo();
|
||||
addAttributes(fi);
|
||||
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish();
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
|
||||
|
||||
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
|
||||
|
||||
@ -96,7 +96,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
fi.putAttribute("foo", "bar");
|
||||
fi.putAttribute("bar", "baz");
|
||||
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish();
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
|
||||
|
||||
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
|
||||
|
||||
@ -136,7 +136,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
FieldInfo fi = createFieldInfo();
|
||||
addAttributes(fi);
|
||||
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish();
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
|
||||
|
||||
fail.setDoFail();
|
||||
expectThrows(
|
||||
@ -171,7 +171,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
FieldInfo fi = createFieldInfo();
|
||||
addAttributes(fi);
|
||||
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish();
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
|
||||
|
||||
fail.setDoFail();
|
||||
expectThrows(
|
||||
@ -206,7 +206,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
FieldInfo fi = createFieldInfo();
|
||||
addAttributes(fi);
|
||||
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish();
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
|
||||
|
||||
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
|
||||
|
||||
@ -243,7 +243,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
FieldInfo fi = createFieldInfo();
|
||||
addAttributes(fi);
|
||||
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null).add(fi).finish();
|
||||
FieldInfos infos = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(null, null).add(fi).finish();
|
||||
|
||||
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
|
||||
|
||||
@ -276,7 +276,9 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
String softDeletesField =
|
||||
random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null;
|
||||
|
||||
var builder = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(softDeletesField);
|
||||
String parentField = random().nextBoolean() ? TestUtil.randomUnicodeString(random()) : null;
|
||||
|
||||
var builder = INDEX_PACKAGE_ACCESS.newFieldInfosBuilder(softDeletesField, parentField);
|
||||
|
||||
for (String field : fieldNames) {
|
||||
IndexableFieldType fieldType = randomFieldType(random(), field);
|
||||
@ -307,7 +309,8 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
fieldType.vectorDimension(),
|
||||
fieldType.vectorEncoding(),
|
||||
fieldType.vectorSimilarityFunction(),
|
||||
field.equals(softDeletesField));
|
||||
field.equals(softDeletesField),
|
||||
field.equals(parentField));
|
||||
addAttributes(fi);
|
||||
builder.add(fi);
|
||||
}
|
||||
@ -431,6 +434,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
@ -365,7 +365,8 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
||||
proto.getVectorDimension(),
|
||||
proto.getVectorEncoding(),
|
||||
proto.getVectorSimilarityFunction(),
|
||||
proto.isSoftDeletesField());
|
||||
proto.isSoftDeletesField(),
|
||||
proto.isParentField());
|
||||
|
||||
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] {field});
|
||||
|
||||
|
@ -80,6 +80,33 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testHasBlocks() throws IOException {
|
||||
assumeTrue("test requires a codec that can read/write hasBlocks", supportsHasBlocks());
|
||||
|
||||
Directory dir = newDirectory();
|
||||
Codec codec = getCodec();
|
||||
byte[] id = StringHelper.randomId();
|
||||
SegmentInfo info =
|
||||
new SegmentInfo(
|
||||
dir,
|
||||
getVersions()[0],
|
||||
getVersions()[0],
|
||||
"_123",
|
||||
1,
|
||||
false,
|
||||
random().nextBoolean(),
|
||||
codec,
|
||||
Collections.emptyMap(),
|
||||
id,
|
||||
Collections.emptyMap(),
|
||||
null);
|
||||
info.setFiles(Collections.<String>emptySet());
|
||||
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
|
||||
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
|
||||
assertEquals(info.getHasBlocks(), info2.getHasBlocks());
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Tests SI writer adds itself to files... */
|
||||
public void testAddsSelfToFiles() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
@ -260,6 +287,10 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean supportsHasBlocks() {
|
||||
return true;
|
||||
}
|
||||
|
||||
private SortField randomIndexSortField() {
|
||||
boolean reversed = random().nextBoolean();
|
||||
SortField sortField;
|
||||
@ -360,7 +391,11 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
|
||||
for (int j = 0; j < numSortFields; ++j) {
|
||||
sortFields[j] = randomIndexSortField();
|
||||
}
|
||||
sort = new Sort(sortFields);
|
||||
if (supportsHasBlocks()) {
|
||||
sort = new Sort(sortFields);
|
||||
} else {
|
||||
sort = new Sort(sortFields);
|
||||
}
|
||||
}
|
||||
|
||||
Directory dir = newDirectory();
|
||||
|
@ -112,7 +112,8 @@ public class MismatchedLeafReader extends FilterLeafReader {
|
||||
oldInfo.getVectorEncoding(), // numeric type of vector samples
|
||||
// distance function for calculating similarity of the field's vector
|
||||
oldInfo.getVectorSimilarityFunction(),
|
||||
oldInfo.isSoftDeletesField()); // used as soft-deletes field
|
||||
oldInfo.isSoftDeletesField(), // used as soft-deletes field
|
||||
oldInfo.isParentField());
|
||||
shuffled.set(i, newInfo);
|
||||
}
|
||||
|
||||
|
@ -158,6 +158,7 @@ public class RandomIndexWriter implements Closeable {
|
||||
} else {
|
||||
softDeletesRatio = 0d;
|
||||
}
|
||||
|
||||
w = mockIndexWriter(dir, c, r);
|
||||
config = w.getConfig();
|
||||
flushAt = TestUtil.nextInt(r, 10, 1000);
|
||||
|
@ -165,6 +165,7 @@ public class RandomPostingsTester {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false);
|
||||
fieldUpto++;
|
||||
|
||||
@ -738,6 +739,7 @@ public class RandomPostingsTester {
|
||||
0,
|
||||
VectorEncoding.FLOAT32,
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user