LUCENE-10401: Fix lookups on empty doc-values terms dictionaries. (#642)

This commit is contained in:
Adrien Grand 2022-02-04 09:28:35 +01:00 committed by GitHub
parent 57d9515eff
commit ed6c1b5aea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 5 deletions

View File

@ -217,6 +217,9 @@ Bug Fixes
* LUCENE-9952: Address inaccurate dim counts for SSDV faceting in cases where a dim is configured
as multi-valued. (Greg Miller)
* LUCENE-10401: Fix lookups on empty doc-value terms dictionaries to no longer
throw an ArrayIndexOutOfBoundsException. (Adrien Grand)
Other
---------------------

View File

@ -1131,7 +1131,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
private long seekTermsIndex(BytesRef text) throws IOException {
long lo = 0L;
long hi = (entry.termsDictSize - 1) >>> entry.termsDictIndexShift;
long hi = (entry.termsDictSize - 1) >> entry.termsDictIndexShift;
while (lo <= hi) {
final long mid = (lo + hi) >>> 1;
getTermFromIndex(mid);
@ -1144,7 +1144,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
assert hi < 0 || getTermFromIndex(hi).compareTo(text) <= 0;
assert hi == ((entry.termsDictSize - 1) >>> entry.termsDictIndexShift)
assert hi == ((entry.termsDictSize - 1) >> entry.termsDictIndexShift)
|| getTermFromIndex(hi + 1).compareTo(text) > 0;
return hi;
@ -1193,9 +1193,14 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
public SeekStatus seekCeil(BytesRef text) throws IOException {
final long block = seekBlock(text);
if (block == -1) {
// before the first term
seekExact(0L);
return SeekStatus.NOT_FOUND;
// before the first term, or empty terms dict
if (entry.termsDictSize == 0) {
ord = 0;
return SeekStatus.END;
} else {
seekExact(0L);
return SeekStatus.NOT_FOUND;
}
}
final long blockAddress = blockAddresses.get(block);
this.ord = block << TERMS_DICT_BLOCK_LZ4_SHIFT;

View File

@ -887,6 +887,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
assertEquals(-1, dv.lookupTerm(new BytesRef("lucene")));
ireader.close();
directory.close();
}
@ -2148,6 +2153,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
assertEquals(-1, dv.lookupTerm(new BytesRef("lucene")));
ireader.close();
directory.close();
}
@ -3390,6 +3400,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
assertEquals(-1, dv.lookupTerm(new BytesRef("lucene")));
ireader.close();
directory.close();
}
@ -3420,6 +3435,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
assertEquals(-1, dv.lookupTerm(new BytesRef("lucene")));
ireader.close();
directory.close();
}