Harden BaseDocValuesFormatTestCase (#13346)

We hit a Codec bug in Elasticsearch, but it went unnoticed because our 
tests extend from BaseDocValuesFormatTestCase, which doesn't attempt to
read the doc-values of the same document twice. This change strengthens
BaseDocValuesFormatTestCase checks and randomly inserts that access
pattern.
This commit is contained in:
Nhat Nguyen 2024-05-07 09:04:25 -07:00 committed by GitHub
parent 30da7dabe0
commit 06973f5723
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 202 additions and 103 deletions

View File

@ -1414,6 +1414,98 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
ir.close();
}
protected void compareStoredFieldWithSortedNumericsDV(
DirectoryReader directoryReader, String storedField, String dvField) throws IOException {
for (LeafReaderContext leaf : directoryReader.leaves()) {
LeafReader reader = leaf.reader();
StoredFields storedFields = reader.storedFields();
SortedNumericDocValues docValues = reader.getSortedNumericDocValues(dvField);
if (docValues == null) {
// no stored values at all
for (int doc = 0; doc < reader.maxDoc(); doc++) {
assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField));
}
continue;
}
for (int doc = 0; doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (storedValues.length == 0) {
assertFalse(docValues.advanceExact(doc));
continue;
}
switch (random().nextInt(3)) {
case 0 -> assertEquals(doc, docValues.nextDoc());
case 1 -> assertEquals(doc, docValues.advance(doc));
default -> assertTrue(docValues.advanceExact(doc));
}
assertEquals(doc, docValues.docID());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
assertEquals(storedValues.length, docValues.docValueCount());
for (int v = 0; v < docValues.docValueCount(); v++) {
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
}
}
}
// jump with advanceExact
int iters = 1 + random().nextInt(3);
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedNumericDocValues(dvField);
for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (docValues.advanceExact(doc)) {
assertEquals(doc, docValues.docID());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
assertEquals(storedValues.length, docValues.docValueCount());
for (int v = 0; v < docValues.docValueCount(); v++) {
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
}
}
} else {
assertArrayEquals(new String[0], storedValues);
}
doc += random().nextInt(5); // skip some docs
}
}
// jump with advance
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedNumericDocValues(dvField);
int doc = random().nextInt(leaf.reader().maxDoc());
while (doc != NO_MORE_DOCS) {
int nextDoc = docValues.advance(doc);
// no stored fields in between
for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) {
String[] storedValues = storedFields.document(d).getValues(storedField);
assertArrayEquals(new String[0], storedValues);
}
doc = nextDoc;
if (doc != NO_MORE_DOCS) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
assertEquals(storedValues.length, docValues.docValueCount());
for (int v = 0; v < docValues.docValueCount(); v++) {
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
}
}
doc = nextDoc + 1;
doc += random().nextInt(5); // skip some docs
}
}
}
}
}
private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplier values)
throws Exception {
Directory dir = newDirectory();
@ -1452,38 +1544,18 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv");
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);
writer.close();
// compare
DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
StoredFields storedFields = r.storedFields();
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
for (int i = 0; i < r.maxDoc(); i++) {
if (i > docValues.docID()) {
docValues.nextDoc();
}
String[] expected = storedFields.document(i).getValues("stored");
if (i < docValues.docID()) {
assertEquals(0, expected.length);
} else {
String[] actual = new String[docValues.docValueCount()];
for (int j = 0; j < actual.length; j++) {
actual[j] = Long.toString(docValues.nextValue());
}
assertArrayEquals(expected, actual);
}
}
try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv");
}
ir.close();
dir.close();
IOUtils.close(writer, dir);
}
public void testBooleanNumericsVsStoredFields() throws Exception {
@ -2266,6 +2338,101 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
directory.close();
}
protected void compareStoredFieldWithSortedSetDV(
DirectoryReader directoryReader, String storedField, String dvField) throws IOException {
for (LeafReaderContext leaf : directoryReader.leaves()) {
LeafReader reader = leaf.reader();
StoredFields storedFields = reader.storedFields();
SortedSetDocValues docValues = reader.getSortedSetDocValues(dvField);
if (docValues == null) {
// no stored values at all
for (int doc = 0; doc < reader.maxDoc(); doc++) {
assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField));
}
continue;
}
// sequentially
for (int doc = 0; doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (storedValues.length == 0) {
assertFalse(docValues.advanceExact(doc));
continue;
}
switch (random().nextInt(3)) {
case 0 -> assertEquals(doc, docValues.nextDoc());
case 1 -> assertEquals(doc, docValues.advance(doc));
default -> assertTrue(docValues.advanceExact(doc));
}
assertEquals(doc, docValues.docID());
assertEquals(storedValues.length, docValues.docValueCount());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
for (int v = 0; v < docValues.docValueCount(); v++) {
long ord = docValues.nextOrd();
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
}
}
}
// jump with advanceExact
int iters = 1 + random().nextInt(3);
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedSetDocValues(dvField);
for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) {
String[] storedValues = storedFields.document(doc).getValues(storedField);
if (docValues.advanceExact(doc)) {
assertEquals(doc, docValues.docID());
assertEquals(storedValues.length, docValues.docValueCount());
int repeats = 1 + random().nextInt(3);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
for (int v = 0; v < docValues.docValueCount(); v++) {
long ord = docValues.nextOrd();
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
}
}
} else {
assertArrayEquals(new String[0], storedValues);
}
doc += random().nextInt(5); // skip some docs
}
}
// jump with advance
for (int i = 0; i < iters; i++) {
docValues = reader.getSortedSetDocValues(dvField);
int doc = random().nextInt(leaf.reader().maxDoc());
while (doc != NO_MORE_DOCS) {
int nextDoc = docValues.advance(doc);
// no stored fields in between
for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) {
String[] storedValues = storedFields.document(d).getValues(storedField);
assertArrayEquals(new String[0], storedValues);
}
doc = nextDoc;
if (doc != NO_MORE_DOCS) {
int repeats = 1 + random().nextInt(3);
String[] storedValues = storedFields.document(doc).getValues(storedField);
for (int r = 0; r < repeats; r++) {
if (r > 0 || random().nextBoolean()) {
assertTrue(docValues.advanceExact(doc));
}
for (int v = 0; v < docValues.docValueCount(); v++) {
long ord = docValues.nextOrd();
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
}
}
doc = nextDoc + 1;
doc += random().nextInt(5); // skip some docs
}
}
}
}
}
protected void doTestSortedSetVsStoredFields(
int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues)
throws Exception {
@ -2312,91 +2479,23 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
if (VERBOSE) {
System.out.println("\nTEST: now delete " + numDeletions + " docs");
}
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
if (VERBOSE) {
System.out.println("\nTEST: now get reader");
}
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
StoredFields storedFields = r.storedFields();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = storedFields.document(i).getValues("stored");
if (docValues != null) {
if (docValues.docID() < i) {
docValues.nextDoc();
}
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
assertEquals(stringValues.length, docValues.docValueCount());
for (String stringValue : stringValues) {
assert docValues != null;
long ord = docValues.nextOrd();
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValue, scratch.utf8ToString());
}
}
}
}
if (VERBOSE) {
System.out.println("\nTEST: now close reader");
}
ir.close();
if (VERBOSE) {
System.out.println("TEST: force merge");
try (DirectoryReader reader = writer.getReader()) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedSetDV(reader, "stored", "dv");
}
writer.forceMerge(1);
// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
StoredFields storedFields = r.storedFields();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = storedFields.document(i).getValues("stored");
if (docValues.docID() < i) {
docValues.nextDoc();
}
if (stringValues.length > 0) {
assertEquals(i, docValues.docID());
assertEquals(stringValues.length, docValues.docValueCount());
for (String stringValue : stringValues) {
assert docValues != null;
long ord = docValues.nextOrd();
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValue, scratch.utf8ToString());
}
}
}
try (DirectoryReader reader = writer.getReader()) {
TestUtil.checkReader(reader);
compareStoredFieldWithSortedSetDV(reader, "stored", "dv");
}
if (VERBOSE) {
System.out.println("TEST: close reader");
}
ir.close();
if (VERBOSE) {
System.out.println("TEST: close writer");
}
writer.close();
if (VERBOSE) {
System.out.println("TEST: close dir");
}
dir.close();
IOUtils.close(writer, dir);
}
public void testSortedSetFixedLengthVsStoredFields() throws Exception {