From 06973f5723e97de7b73a84b1ff3c8c35db79d7ec Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 7 May 2024 09:04:25 -0700 Subject: [PATCH] Harden BaseDocValuesFormatTestCase (#13346) We hit a Codec bug in Elasticsearch, but it went unnoticed because our tests extend from BaseDocValuesFormatTestCase, which doesn't attempt to read the doc-values of the same document twice. This change strengthens BaseDocValuesFormatTestCase checks and randomly inserts that access pattern. --- .../index/BaseDocValuesFormatTestCase.java | 305 ++++++++++++------ 1 file changed, 202 insertions(+), 103 deletions(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java index 8c412946b87..07c4507ae92 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java @@ -1414,6 +1414,98 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes ir.close(); } + protected void compareStoredFieldWithSortedNumericsDV( + DirectoryReader directoryReader, String storedField, String dvField) throws IOException { + for (LeafReaderContext leaf : directoryReader.leaves()) { + LeafReader reader = leaf.reader(); + StoredFields storedFields = reader.storedFields(); + SortedNumericDocValues docValues = reader.getSortedNumericDocValues(dvField); + if (docValues == null) { + // no stored values at all + for (int doc = 0; doc < reader.maxDoc(); doc++) { + assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField)); + } + continue; + } + for (int doc = 0; doc < reader.maxDoc(); doc++) { + String[] storedValues = storedFields.document(doc).getValues(storedField); + if (storedValues.length == 0) { + assertFalse(docValues.advanceExact(doc)); + continue; + } + switch (random().nextInt(3)) { + case 0 -> assertEquals(doc, docValues.nextDoc()); + case 1 -> assertEquals(doc, docValues.advance(doc)); + default -> assertTrue(docValues.advanceExact(doc)); + } + assertEquals(doc, docValues.docID()); + int repeats = 1 + random().nextInt(3); + for (int r = 0; r < repeats; r++) { + if (r > 0 || random().nextBoolean()) { + assertTrue(docValues.advanceExact(doc)); + } + assertEquals(storedValues.length, docValues.docValueCount()); + for (int v = 0; v < docValues.docValueCount(); v++) { + assertEquals(storedValues[v], Long.toString(docValues.nextValue())); + } + } + } + // jump with advanceExact + int iters = 1 + random().nextInt(3); + for (int i = 0; i < iters; i++) { + docValues = reader.getSortedNumericDocValues(dvField); + for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) { + String[] storedValues = storedFields.document(doc).getValues(storedField); + if (docValues.advanceExact(doc)) { + assertEquals(doc, docValues.docID()); + int repeats = 1 + random().nextInt(3); + for (int r = 0; r < repeats; r++) { + if (r > 0 || random().nextBoolean()) { + assertTrue(docValues.advanceExact(doc)); + } + assertEquals(storedValues.length, docValues.docValueCount()); + for (int v = 0; v < docValues.docValueCount(); v++) { + assertEquals(storedValues[v], Long.toString(docValues.nextValue())); + } + } + } else { + assertArrayEquals(new String[0], storedValues); + } + doc += random().nextInt(5); // skip some docs + } + } + // jump with advance + for (int i = 0; i < iters; i++) { + docValues = reader.getSortedNumericDocValues(dvField); + int doc = random().nextInt(leaf.reader().maxDoc()); + while (doc != NO_MORE_DOCS) { + int nextDoc = docValues.advance(doc); + // no stored fields in between + for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) { + String[] storedValues = storedFields.document(d).getValues(storedField); + assertArrayEquals(new String[0], storedValues); + } + doc = nextDoc; + if (doc != NO_MORE_DOCS) { + String[] storedValues = storedFields.document(doc).getValues(storedField); + int repeats = 1 + random().nextInt(3); + for (int r = 0; r < repeats; r++) { + if (r > 0 || random().nextBoolean()) { + assertTrue(docValues.advanceExact(doc)); + } + assertEquals(storedValues.length, docValues.docValueCount()); + for (int v = 0; v < docValues.docValueCount(); v++) { + assertEquals(storedValues[v], Long.toString(docValues.nextValue())); + } + } + doc = nextDoc + 1; + doc += random().nextInt(5); // skip some docs + } + } + } + } + } + private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplier values) throws Exception { Directory dir = newDirectory(); @@ -1452,38 +1544,18 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } - + try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) { + TestUtil.checkReader(reader); + compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv"); + } // merge some segments and ensure that at least one of them has more than // 256 values writer.forceMerge(numDocs / 256); - - writer.close(); - - // compare - DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir)); - TestUtil.checkReader(ir); - for (LeafReaderContext context : ir.leaves()) { - LeafReader r = context.reader(); - StoredFields storedFields = r.storedFields(); - SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv"); - for (int i = 0; i < r.maxDoc(); i++) { - if (i > docValues.docID()) { - docValues.nextDoc(); - } - String[] expected = storedFields.document(i).getValues("stored"); - if (i < docValues.docID()) { - assertEquals(0, expected.length); - } else { - String[] actual = new String[docValues.docValueCount()]; - for (int j = 0; j < actual.length; j++) { - actual[j] = Long.toString(docValues.nextValue()); - } - assertArrayEquals(expected, actual); - } - } + try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) { + TestUtil.checkReader(reader); + compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv"); } - ir.close(); - dir.close(); + IOUtils.close(writer, dir); } public void testBooleanNumericsVsStoredFields() throws Exception { @@ -2266,6 +2338,101 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes directory.close(); } + protected void compareStoredFieldWithSortedSetDV( + DirectoryReader directoryReader, String storedField, String dvField) throws IOException { + for (LeafReaderContext leaf : directoryReader.leaves()) { + LeafReader reader = leaf.reader(); + StoredFields storedFields = reader.storedFields(); + SortedSetDocValues docValues = reader.getSortedSetDocValues(dvField); + if (docValues == null) { + // no stored values at all + for (int doc = 0; doc < reader.maxDoc(); doc++) { + assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField)); + } + continue; + } + // sequentially + for (int doc = 0; doc < reader.maxDoc(); doc++) { + String[] storedValues = storedFields.document(doc).getValues(storedField); + if (storedValues.length == 0) { + assertFalse(docValues.advanceExact(doc)); + continue; + } + switch (random().nextInt(3)) { + case 0 -> assertEquals(doc, docValues.nextDoc()); + case 1 -> assertEquals(doc, docValues.advance(doc)); + default -> assertTrue(docValues.advanceExact(doc)); + } + assertEquals(doc, docValues.docID()); + assertEquals(storedValues.length, docValues.docValueCount()); + int repeats = 1 + random().nextInt(3); + for (int r = 0; r < repeats; r++) { + if (r > 0 || random().nextBoolean()) { + assertTrue(docValues.advanceExact(doc)); + } + for (int v = 0; v < docValues.docValueCount(); v++) { + long ord = docValues.nextOrd(); + assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString()); + } + } + } + // jump with advanceExact + int iters = 1 + random().nextInt(3); + for (int i = 0; i < iters; i++) { + docValues = reader.getSortedSetDocValues(dvField); + for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) { + String[] storedValues = storedFields.document(doc).getValues(storedField); + if (docValues.advanceExact(doc)) { + assertEquals(doc, docValues.docID()); + assertEquals(storedValues.length, docValues.docValueCount()); + int repeats = 1 + random().nextInt(3); + for (int r = 0; r < repeats; r++) { + if (r > 0 || random().nextBoolean()) { + assertTrue(docValues.advanceExact(doc)); + } + for (int v = 0; v < docValues.docValueCount(); v++) { + long ord = docValues.nextOrd(); + assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString()); + } + } + } else { + assertArrayEquals(new String[0], storedValues); + } + doc += random().nextInt(5); // skip some docs + } + } + // jump with advance + for (int i = 0; i < iters; i++) { + docValues = reader.getSortedSetDocValues(dvField); + int doc = random().nextInt(leaf.reader().maxDoc()); + while (doc != NO_MORE_DOCS) { + int nextDoc = docValues.advance(doc); + // no stored fields in between + for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) { + String[] storedValues = storedFields.document(d).getValues(storedField); + assertArrayEquals(new String[0], storedValues); + } + doc = nextDoc; + if (doc != NO_MORE_DOCS) { + int repeats = 1 + random().nextInt(3); + String[] storedValues = storedFields.document(doc).getValues(storedField); + for (int r = 0; r < repeats; r++) { + if (r > 0 || random().nextBoolean()) { + assertTrue(docValues.advanceExact(doc)); + } + for (int v = 0; v < docValues.docValueCount(); v++) { + long ord = docValues.nextOrd(); + assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString()); + } + } + doc = nextDoc + 1; + doc += random().nextInt(5); // skip some docs + } + } + } + } + } + protected void doTestSortedSetVsStoredFields( int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception { @@ -2312,91 +2479,23 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes writer.commit(); } } - // delete some docs int numDeletions = random().nextInt(numDocs / 10); - if (VERBOSE) { - System.out.println("\nTEST: now delete " + numDeletions + " docs"); - } for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } - // compare - if (VERBOSE) { - System.out.println("\nTEST: now get reader"); - } - DirectoryReader ir = writer.getReader(); - TestUtil.checkReader(ir); - for (LeafReaderContext context : ir.leaves()) { - LeafReader r = context.reader(); - StoredFields storedFields = r.storedFields(); - SortedSetDocValues docValues = r.getSortedSetDocValues("dv"); - for (int i = 0; i < r.maxDoc(); i++) { - String[] stringValues = storedFields.document(i).getValues("stored"); - if (docValues != null) { - if (docValues.docID() < i) { - docValues.nextDoc(); - } - } - if (docValues != null && stringValues.length > 0) { - assertEquals(i, docValues.docID()); - assertEquals(stringValues.length, docValues.docValueCount()); - for (String stringValue : stringValues) { - assert docValues != null; - long ord = docValues.nextOrd(); - BytesRef scratch = docValues.lookupOrd(ord); - assertEquals(stringValue, scratch.utf8ToString()); - } - } - } - } - if (VERBOSE) { - System.out.println("\nTEST: now close reader"); - } - ir.close(); - if (VERBOSE) { - System.out.println("TEST: force merge"); + try (DirectoryReader reader = writer.getReader()) { + TestUtil.checkReader(reader); + compareStoredFieldWithSortedSetDV(reader, "stored", "dv"); } writer.forceMerge(1); - - // compare again - ir = writer.getReader(); - TestUtil.checkReader(ir); - for (LeafReaderContext context : ir.leaves()) { - LeafReader r = context.reader(); - StoredFields storedFields = r.storedFields(); - SortedSetDocValues docValues = r.getSortedSetDocValues("dv"); - for (int i = 0; i < r.maxDoc(); i++) { - String[] stringValues = storedFields.document(i).getValues("stored"); - if (docValues.docID() < i) { - docValues.nextDoc(); - } - if (stringValues.length > 0) { - assertEquals(i, docValues.docID()); - assertEquals(stringValues.length, docValues.docValueCount()); - for (String stringValue : stringValues) { - assert docValues != null; - long ord = docValues.nextOrd(); - BytesRef scratch = docValues.lookupOrd(ord); - assertEquals(stringValue, scratch.utf8ToString()); - } - } - } + try (DirectoryReader reader = writer.getReader()) { + TestUtil.checkReader(reader); + compareStoredFieldWithSortedSetDV(reader, "stored", "dv"); } - if (VERBOSE) { - System.out.println("TEST: close reader"); - } - ir.close(); - if (VERBOSE) { - System.out.println("TEST: close writer"); - } - writer.close(); - if (VERBOSE) { - System.out.println("TEST: close dir"); - } - dir.close(); + IOUtils.close(writer, dir); } public void testSortedSetFixedLengthVsStoredFields() throws Exception {