mirror of https://github.com/apache/lucene.git
Harden BaseDocValuesFormatTestCase (#13346)
We hit a Codec bug in Elasticsearch, but it went unnoticed because our tests extend from BaseDocValuesFormatTestCase, which doesn't attempt to read the doc-values of the same document twice. This change strengthens BaseDocValuesFormatTestCase checks and randomly inserts that access pattern.
This commit is contained in:
parent
30da7dabe0
commit
06973f5723
|
@ -1414,6 +1414,98 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
ir.close();
|
||||
}
|
||||
|
||||
protected void compareStoredFieldWithSortedNumericsDV(
|
||||
DirectoryReader directoryReader, String storedField, String dvField) throws IOException {
|
||||
for (LeafReaderContext leaf : directoryReader.leaves()) {
|
||||
LeafReader reader = leaf.reader();
|
||||
StoredFields storedFields = reader.storedFields();
|
||||
SortedNumericDocValues docValues = reader.getSortedNumericDocValues(dvField);
|
||||
if (docValues == null) {
|
||||
// no stored values at all
|
||||
for (int doc = 0; doc < reader.maxDoc(); doc++) {
|
||||
assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
for (int doc = 0; doc < reader.maxDoc(); doc++) {
|
||||
String[] storedValues = storedFields.document(doc).getValues(storedField);
|
||||
if (storedValues.length == 0) {
|
||||
assertFalse(docValues.advanceExact(doc));
|
||||
continue;
|
||||
}
|
||||
switch (random().nextInt(3)) {
|
||||
case 0 -> assertEquals(doc, docValues.nextDoc());
|
||||
case 1 -> assertEquals(doc, docValues.advance(doc));
|
||||
default -> assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
assertEquals(doc, docValues.docID());
|
||||
int repeats = 1 + random().nextInt(3);
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
if (r > 0 || random().nextBoolean()) {
|
||||
assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
assertEquals(storedValues.length, docValues.docValueCount());
|
||||
for (int v = 0; v < docValues.docValueCount(); v++) {
|
||||
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
// jump with advanceExact
|
||||
int iters = 1 + random().nextInt(3);
|
||||
for (int i = 0; i < iters; i++) {
|
||||
docValues = reader.getSortedNumericDocValues(dvField);
|
||||
for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) {
|
||||
String[] storedValues = storedFields.document(doc).getValues(storedField);
|
||||
if (docValues.advanceExact(doc)) {
|
||||
assertEquals(doc, docValues.docID());
|
||||
int repeats = 1 + random().nextInt(3);
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
if (r > 0 || random().nextBoolean()) {
|
||||
assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
assertEquals(storedValues.length, docValues.docValueCount());
|
||||
for (int v = 0; v < docValues.docValueCount(); v++) {
|
||||
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assertArrayEquals(new String[0], storedValues);
|
||||
}
|
||||
doc += random().nextInt(5); // skip some docs
|
||||
}
|
||||
}
|
||||
// jump with advance
|
||||
for (int i = 0; i < iters; i++) {
|
||||
docValues = reader.getSortedNumericDocValues(dvField);
|
||||
int doc = random().nextInt(leaf.reader().maxDoc());
|
||||
while (doc != NO_MORE_DOCS) {
|
||||
int nextDoc = docValues.advance(doc);
|
||||
// no stored fields in between
|
||||
for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) {
|
||||
String[] storedValues = storedFields.document(d).getValues(storedField);
|
||||
assertArrayEquals(new String[0], storedValues);
|
||||
}
|
||||
doc = nextDoc;
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
String[] storedValues = storedFields.document(doc).getValues(storedField);
|
||||
int repeats = 1 + random().nextInt(3);
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
if (r > 0 || random().nextBoolean()) {
|
||||
assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
assertEquals(storedValues.length, docValues.docValueCount());
|
||||
for (int v = 0; v < docValues.docValueCount(); v++) {
|
||||
assertEquals(storedValues[v], Long.toString(docValues.nextValue()));
|
||||
}
|
||||
}
|
||||
doc = nextDoc + 1;
|
||||
doc += random().nextInt(5); // skip some docs
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplier values)
|
||||
throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -1452,38 +1544,18 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
|
||||
try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) {
|
||||
TestUtil.checkReader(reader);
|
||||
compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv");
|
||||
}
|
||||
// merge some segments and ensure that at least one of them has more than
|
||||
// 256 values
|
||||
writer.forceMerge(numDocs / 256);
|
||||
|
||||
writer.close();
|
||||
|
||||
// compare
|
||||
DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(dir));
|
||||
TestUtil.checkReader(ir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
StoredFields storedFields = r.storedFields();
|
||||
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
|
||||
for (int i = 0; i < r.maxDoc(); i++) {
|
||||
if (i > docValues.docID()) {
|
||||
docValues.nextDoc();
|
||||
}
|
||||
String[] expected = storedFields.document(i).getValues("stored");
|
||||
if (i < docValues.docID()) {
|
||||
assertEquals(0, expected.length);
|
||||
} else {
|
||||
String[] actual = new String[docValues.docValueCount()];
|
||||
for (int j = 0; j < actual.length; j++) {
|
||||
actual[j] = Long.toString(docValues.nextValue());
|
||||
}
|
||||
assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
try (DirectoryReader reader = maybeWrapWithMergingReader(DirectoryReader.open(dir))) {
|
||||
TestUtil.checkReader(reader);
|
||||
compareStoredFieldWithSortedNumericsDV(reader, "stored", "dv");
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
IOUtils.close(writer, dir);
|
||||
}
|
||||
|
||||
public void testBooleanNumericsVsStoredFields() throws Exception {
|
||||
|
@ -2266,6 +2338,101 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
directory.close();
|
||||
}
|
||||
|
||||
protected void compareStoredFieldWithSortedSetDV(
|
||||
DirectoryReader directoryReader, String storedField, String dvField) throws IOException {
|
||||
for (LeafReaderContext leaf : directoryReader.leaves()) {
|
||||
LeafReader reader = leaf.reader();
|
||||
StoredFields storedFields = reader.storedFields();
|
||||
SortedSetDocValues docValues = reader.getSortedSetDocValues(dvField);
|
||||
if (docValues == null) {
|
||||
// no stored values at all
|
||||
for (int doc = 0; doc < reader.maxDoc(); doc++) {
|
||||
assertArrayEquals(new String[0], storedFields.document(doc).getValues(storedField));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// sequentially
|
||||
for (int doc = 0; doc < reader.maxDoc(); doc++) {
|
||||
String[] storedValues = storedFields.document(doc).getValues(storedField);
|
||||
if (storedValues.length == 0) {
|
||||
assertFalse(docValues.advanceExact(doc));
|
||||
continue;
|
||||
}
|
||||
switch (random().nextInt(3)) {
|
||||
case 0 -> assertEquals(doc, docValues.nextDoc());
|
||||
case 1 -> assertEquals(doc, docValues.advance(doc));
|
||||
default -> assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
assertEquals(doc, docValues.docID());
|
||||
assertEquals(storedValues.length, docValues.docValueCount());
|
||||
int repeats = 1 + random().nextInt(3);
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
if (r > 0 || random().nextBoolean()) {
|
||||
assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
for (int v = 0; v < docValues.docValueCount(); v++) {
|
||||
long ord = docValues.nextOrd();
|
||||
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
// jump with advanceExact
|
||||
int iters = 1 + random().nextInt(3);
|
||||
for (int i = 0; i < iters; i++) {
|
||||
docValues = reader.getSortedSetDocValues(dvField);
|
||||
for (int doc = random().nextInt(leaf.reader().maxDoc()); doc < reader.maxDoc(); doc++) {
|
||||
String[] storedValues = storedFields.document(doc).getValues(storedField);
|
||||
if (docValues.advanceExact(doc)) {
|
||||
assertEquals(doc, docValues.docID());
|
||||
assertEquals(storedValues.length, docValues.docValueCount());
|
||||
int repeats = 1 + random().nextInt(3);
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
if (r > 0 || random().nextBoolean()) {
|
||||
assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
for (int v = 0; v < docValues.docValueCount(); v++) {
|
||||
long ord = docValues.nextOrd();
|
||||
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assertArrayEquals(new String[0], storedValues);
|
||||
}
|
||||
doc += random().nextInt(5); // skip some docs
|
||||
}
|
||||
}
|
||||
// jump with advance
|
||||
for (int i = 0; i < iters; i++) {
|
||||
docValues = reader.getSortedSetDocValues(dvField);
|
||||
int doc = random().nextInt(leaf.reader().maxDoc());
|
||||
while (doc != NO_MORE_DOCS) {
|
||||
int nextDoc = docValues.advance(doc);
|
||||
// no stored fields in between
|
||||
for (int d = doc; d < (nextDoc == NO_MORE_DOCS ? reader.maxDoc() : nextDoc); d++) {
|
||||
String[] storedValues = storedFields.document(d).getValues(storedField);
|
||||
assertArrayEquals(new String[0], storedValues);
|
||||
}
|
||||
doc = nextDoc;
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
int repeats = 1 + random().nextInt(3);
|
||||
String[] storedValues = storedFields.document(doc).getValues(storedField);
|
||||
for (int r = 0; r < repeats; r++) {
|
||||
if (r > 0 || random().nextBoolean()) {
|
||||
assertTrue(docValues.advanceExact(doc));
|
||||
}
|
||||
for (int v = 0; v < docValues.docValueCount(); v++) {
|
||||
long ord = docValues.nextOrd();
|
||||
assertEquals(storedValues[v], docValues.lookupOrd(ord).utf8ToString());
|
||||
}
|
||||
}
|
||||
doc = nextDoc + 1;
|
||||
doc += random().nextInt(5); // skip some docs
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void doTestSortedSetVsStoredFields(
|
||||
int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues)
|
||||
throws Exception {
|
||||
|
@ -2312,91 +2479,23 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
writer.commit();
|
||||
}
|
||||
}
|
||||
|
||||
// delete some docs
|
||||
int numDeletions = random().nextInt(numDocs / 10);
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: now delete " + numDeletions + " docs");
|
||||
}
|
||||
for (int i = 0; i < numDeletions; i++) {
|
||||
int id = random().nextInt(numDocs);
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||
}
|
||||
|
||||
// compare
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: now get reader");
|
||||
}
|
||||
DirectoryReader ir = writer.getReader();
|
||||
TestUtil.checkReader(ir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
StoredFields storedFields = r.storedFields();
|
||||
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
|
||||
for (int i = 0; i < r.maxDoc(); i++) {
|
||||
String[] stringValues = storedFields.document(i).getValues("stored");
|
||||
if (docValues != null) {
|
||||
if (docValues.docID() < i) {
|
||||
docValues.nextDoc();
|
||||
}
|
||||
}
|
||||
if (docValues != null && stringValues.length > 0) {
|
||||
assertEquals(i, docValues.docID());
|
||||
assertEquals(stringValues.length, docValues.docValueCount());
|
||||
for (String stringValue : stringValues) {
|
||||
assert docValues != null;
|
||||
long ord = docValues.nextOrd();
|
||||
BytesRef scratch = docValues.lookupOrd(ord);
|
||||
assertEquals(stringValue, scratch.utf8ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: now close reader");
|
||||
}
|
||||
ir.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: force merge");
|
||||
try (DirectoryReader reader = writer.getReader()) {
|
||||
TestUtil.checkReader(reader);
|
||||
compareStoredFieldWithSortedSetDV(reader, "stored", "dv");
|
||||
}
|
||||
writer.forceMerge(1);
|
||||
|
||||
// compare again
|
||||
ir = writer.getReader();
|
||||
TestUtil.checkReader(ir);
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
StoredFields storedFields = r.storedFields();
|
||||
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
|
||||
for (int i = 0; i < r.maxDoc(); i++) {
|
||||
String[] stringValues = storedFields.document(i).getValues("stored");
|
||||
if (docValues.docID() < i) {
|
||||
docValues.nextDoc();
|
||||
}
|
||||
if (stringValues.length > 0) {
|
||||
assertEquals(i, docValues.docID());
|
||||
assertEquals(stringValues.length, docValues.docValueCount());
|
||||
for (String stringValue : stringValues) {
|
||||
assert docValues != null;
|
||||
long ord = docValues.nextOrd();
|
||||
BytesRef scratch = docValues.lookupOrd(ord);
|
||||
assertEquals(stringValue, scratch.utf8ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
try (DirectoryReader reader = writer.getReader()) {
|
||||
TestUtil.checkReader(reader);
|
||||
compareStoredFieldWithSortedSetDV(reader, "stored", "dv");
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: close reader");
|
||||
}
|
||||
ir.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: close writer");
|
||||
}
|
||||
writer.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: close dir");
|
||||
}
|
||||
dir.close();
|
||||
IOUtils.close(writer, dir);
|
||||
}
|
||||
|
||||
public void testSortedSetFixedLengthVsStoredFields() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue