mirror of https://github.com/apache/lucene.git
LUCENE-10405: binary and Sorted doc values are stored as BytesRef instead of BytesRefHash in memory index (#647)
When using the MemoryIndex, binary and Sorted doc values are stored as BytesRef instead of BytesRefHash so they don't have a limit on size.
This commit is contained in:
parent
deef3c704e
commit
4c578017af
|
@ -232,6 +232,9 @@ Bug Fixes
|
|||
|
||||
* LUCENE-10407: Containing intervals could sometimes yield incorrect matches when wrapped
|
||||
in a disjunction. (Alan Woodward, Dawid Weiss)
|
||||
|
||||
* LUCENE-10405: When using the MemoryIndex, binary and Sorted doc values are stored
|
||||
as BytesRef instead of BytesRefHash so they don't have a limit on size. (Ignacio Vera)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
|
|
@ -578,17 +578,6 @@ public class MemoryIndex {
|
|||
info.numericProducer.dvLongValues[info.numericProducer.count++] = (long) docValuesValue;
|
||||
break;
|
||||
case BINARY:
|
||||
if (info.binaryProducer.dvBytesValuesSet != null) {
|
||||
throw new IllegalArgumentException(
|
||||
"Only one value per field allowed for ["
|
||||
+ docValuesType
|
||||
+ "] doc values field ["
|
||||
+ fieldName
|
||||
+ "]");
|
||||
}
|
||||
info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
|
||||
info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
|
||||
break;
|
||||
case SORTED:
|
||||
if (info.binaryProducer.dvBytesValuesSet != null) {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -598,14 +587,13 @@ public class MemoryIndex {
|
|||
+ fieldName
|
||||
+ "]");
|
||||
}
|
||||
info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
|
||||
info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
|
||||
info.binaryProducer.dvBytesValuesSet = ((BytesRef) docValuesValue).clone();
|
||||
break;
|
||||
case SORTED_SET:
|
||||
if (info.binaryProducer.dvBytesValuesSet == null) {
|
||||
info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
|
||||
if (info.bytesRefHashProducer.dvBytesRefHashValuesSet == null) {
|
||||
info.bytesRefHashProducer.dvBytesRefHashValuesSet = new BytesRefHash(byteBlockPool);
|
||||
}
|
||||
info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
|
||||
info.bytesRefHashProducer.dvBytesRefHashValuesSet.add((BytesRef) docValuesValue);
|
||||
break;
|
||||
case NONE:
|
||||
default:
|
||||
|
@ -866,6 +854,8 @@ public class MemoryIndex {
|
|||
/** the last offset encountered in this field for multi field support */
|
||||
private int lastOffset;
|
||||
|
||||
private BytesRefHashDocValuesProducer bytesRefHashProducer;
|
||||
|
||||
private BinaryDocValuesProducer binaryProducer;
|
||||
|
||||
private NumericDocValuesProducer numericProducer;
|
||||
|
@ -884,7 +874,8 @@ public class MemoryIndex {
|
|||
this.fieldInfo = fieldInfo;
|
||||
this.sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
|
||||
this.terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
|
||||
;
|
||||
|
||||
this.bytesRefHashProducer = new BytesRefHashDocValuesProducer();
|
||||
this.binaryProducer = new BinaryDocValuesProducer();
|
||||
this.numericProducer = new NumericDocValuesProducer();
|
||||
}
|
||||
|
@ -914,10 +905,8 @@ public class MemoryIndex {
|
|||
if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
|
||||
numericProducer.prepareForUsage();
|
||||
}
|
||||
if (dvType == DocValuesType.BINARY
|
||||
|| dvType == DocValuesType.SORTED
|
||||
|| dvType == DocValuesType.SORTED_SET) {
|
||||
binaryProducer.prepareForUsage();
|
||||
if (dvType == DocValuesType.SORTED_SET) {
|
||||
bytesRefHashProducer.prepareForUsage();
|
||||
}
|
||||
if (pointValues != null) {
|
||||
assert pointValues[0].bytes.length == pointValues[0].length
|
||||
|
@ -1193,12 +1182,16 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
private static final class BinaryDocValuesProducer {
|
||||
BytesRef dvBytesValuesSet;
|
||||
}
|
||||
|
||||
BytesRefHash dvBytesValuesSet;
|
||||
private static final class BytesRefHashDocValuesProducer {
|
||||
|
||||
BytesRefHash dvBytesRefHashValuesSet;
|
||||
int[] bytesIds;
|
||||
|
||||
private void prepareForUsage() {
|
||||
bytesIds = dvBytesValuesSet.sort();
|
||||
bytesIds = dvBytesRefHashValuesSet.sort();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1316,8 +1309,7 @@ public class MemoryIndex {
|
|||
private SortedDocValues getSortedDocValues(String field, DocValuesType docValuesType) {
|
||||
Info info = getInfoForExpectedDocValuesType(field, docValuesType);
|
||||
if (info != null) {
|
||||
BytesRef value = info.binaryProducer.dvBytesValuesSet.get(0, new BytesRef());
|
||||
return sortedDocValues(value);
|
||||
return sortedDocValues(info.binaryProducer.dvBytesValuesSet);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
@ -1338,7 +1330,7 @@ public class MemoryIndex {
|
|||
Info info = getInfoForExpectedDocValuesType(field, DocValuesType.SORTED_SET);
|
||||
if (info != null) {
|
||||
return sortedSetDocValues(
|
||||
info.binaryProducer.dvBytesValuesSet, info.binaryProducer.bytesIds);
|
||||
info.bytesRefHashProducer.dvBytesRefHashValuesSet, info.bytesRefHashProducer.bytesIds);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -430,6 +430,30 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
assertEquals("quick brown fox", binaryDocValues.binaryValue().utf8ToString());
|
||||
}
|
||||
|
||||
public void testBigBinaryDocValues() throws Exception {
|
||||
Document doc = new Document();
|
||||
byte[] bytes = new byte[33 * 1024];
|
||||
random().nextBytes(bytes);
|
||||
doc.add(new BinaryDocValuesField("binary", new BytesRef(bytes)));
|
||||
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true);
|
||||
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
|
||||
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
|
||||
assertEquals(0, binaryDocValues.nextDoc());
|
||||
assertArrayEquals(bytes, binaryDocValues.binaryValue().bytes);
|
||||
}
|
||||
|
||||
public void testBigSortedDocValues() throws Exception {
|
||||
Document doc = new Document();
|
||||
byte[] bytes = new byte[33 * 1024];
|
||||
random().nextBytes(bytes);
|
||||
doc.add(new SortedDocValuesField("binary", new BytesRef(bytes)));
|
||||
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true);
|
||||
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
|
||||
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("binary");
|
||||
assertEquals(0, sortedDocValues.nextDoc());
|
||||
assertArrayEquals(bytes, sortedDocValues.lookupOrd(0).bytes);
|
||||
}
|
||||
|
||||
public void testPointValues() throws Exception {
|
||||
List<Function<Long, IndexableField>> fieldFunctions =
|
||||
Arrays.asList(
|
||||
|
|
Loading…
Reference in New Issue