mirror of https://github.com/apache/lucene.git
LUCENE-9539: Use more compact datastructures for sorting doc-values (#1908)
This change cuts over from object based data-structures to primitive / compressed data-structures.
This commit is contained in:
parent
1611586417
commit
c82b99464d
|
@ -238,6 +238,9 @@ Improvements
|
||||||
* LUCENE-9523: In query shapes over shape fields, skip points while traversing the
|
* LUCENE-9523: In query shapes over shape fields, skip points while traversing the
|
||||||
BKD tree when the relationship with the document is already known. (Ignacio Vera)
|
BKD tree when the relationship with the document is already known. (Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-9539: Use more compact datastructures to represent sorted doc-values in memory when
|
||||||
|
sorting a segment before flush and in SortingCodecReader. (Simon Willnauer)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -24,11 +24,10 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BitSet;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefArray;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.apache.lucene.util.packed.PackedLongValues;
|
import org.apache.lucene.util.packed.PackedLongValues;
|
||||||
|
@ -58,7 +57,7 @@ class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
|
||||||
|
|
||||||
private PackedLongValues finalLengths;
|
private PackedLongValues finalLengths;
|
||||||
|
|
||||||
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
this.bytes = new PagedBytes(BLOCK_BITS);
|
this.bytes = new PagedBytes(BLOCK_BITS);
|
||||||
this.bytesOut = bytes.getDataOutput();
|
this.bytesOut = bytes.getDataOutput();
|
||||||
|
@ -100,21 +99,6 @@ class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
|
||||||
bytesUsed = newBytesUsed;
|
bytesUsed = newBytesUsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
static CachedBinaryDVs sortDocValues(int maxDoc, Sorter.DocMap sortMap, BinaryDocValues oldValues) throws IOException {
|
|
||||||
FixedBitSet docsWithField = new FixedBitSet(maxDoc);
|
|
||||||
BytesRef[] values = new BytesRef[maxDoc];
|
|
||||||
while (true) {
|
|
||||||
int docID = oldValues.nextDoc();
|
|
||||||
if (docID == NO_MORE_DOCS) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
int newDocID = sortMap.oldToNew(docID);
|
|
||||||
docsWithField.set(newDocID);
|
|
||||||
values[newDocID] = BytesRef.deepCopyOf(oldValues.binaryValue());
|
|
||||||
}
|
|
||||||
return new CachedBinaryDVs(values, docsWithField);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
BinaryDocValues getDocValues() {
|
BinaryDocValues getDocValues() {
|
||||||
if (finalLengths == null) {
|
if (finalLengths == null) {
|
||||||
|
@ -131,7 +115,7 @@ class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
|
||||||
}
|
}
|
||||||
final CachedBinaryDVs sorted;
|
final CachedBinaryDVs sorted;
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap,
|
sorted = new CachedBinaryDVs(state.segmentInfo.maxDoc(), sortMap,
|
||||||
new BufferedBinaryDocValues(finalLengths, maxLength, bytes.getDataInput(), docsWithField.iterator()));
|
new BufferedBinaryDocValues(finalLengths, maxLength, bytes.getDataInput(), docsWithField.iterator()));
|
||||||
} else {
|
} else {
|
||||||
sorted = null;
|
sorted = null;
|
||||||
|
@ -206,8 +190,8 @@ class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
|
||||||
|
|
||||||
static class SortingBinaryDocValues extends BinaryDocValues {
|
static class SortingBinaryDocValues extends BinaryDocValues {
|
||||||
private final CachedBinaryDVs dvs;
|
private final CachedBinaryDVs dvs;
|
||||||
|
private final BytesRefBuilder spare = new BytesRefBuilder();
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private long cost = -1;
|
|
||||||
|
|
||||||
SortingBinaryDocValues(CachedBinaryDVs dvs) {
|
SortingBinaryDocValues(CachedBinaryDVs dvs) {
|
||||||
this.dvs = dvs;
|
this.dvs = dvs;
|
||||||
|
@ -215,11 +199,12 @@ class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
if (docID+1 == dvs.docsWithField.length()) {
|
do {
|
||||||
docID = NO_MORE_DOCS;
|
docID++;
|
||||||
} else {
|
if (docID == dvs.offsets.length) {
|
||||||
docID = dvs.docsWithField.nextSetBit(docID+1);
|
return docID = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
} while (dvs.offsets[docID] <= 0);
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,26 +225,29 @@ class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef binaryValue() {
|
public BytesRef binaryValue() {
|
||||||
return dvs.values[docID];
|
dvs.values.get(spare, dvs.offsets[docID]-1);
|
||||||
|
return spare.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long cost() {
|
public long cost() {
|
||||||
if (cost == -1) {
|
return dvs.values.size();
|
||||||
cost = dvs.docsWithField.cardinality();
|
|
||||||
}
|
|
||||||
return cost;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class CachedBinaryDVs {
|
static final class CachedBinaryDVs {
|
||||||
// TODO: at least cutover to BytesRefArray here:
|
final int[] offsets;
|
||||||
private final BytesRef[] values;
|
final BytesRefArray values;
|
||||||
private final BitSet docsWithField;
|
CachedBinaryDVs(int maxDoc, Sorter.DocMap sortMap, BinaryDocValues oldValues) throws IOException {
|
||||||
|
offsets = new int[maxDoc];
|
||||||
CachedBinaryDVs(BytesRef[] values, BitSet docsWithField) {
|
values = new BytesRefArray(Counter.newCounter());
|
||||||
this.values = values;
|
int offset = 1; // 0 means no values for this document
|
||||||
this.docsWithField = docsWithField;
|
int docID;
|
||||||
|
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
||||||
|
int newDocID = sortMap.oldToNew(docID);
|
||||||
|
values.append(oldValues.binaryValue());
|
||||||
|
offsets[newDocID] = offset++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,13 +39,13 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
||||||
private final FieldInfo fieldInfo;
|
private final FieldInfo fieldInfo;
|
||||||
private int currentDoc = -1;
|
private int currentDoc = -1;
|
||||||
private long currentValues[] = new long[8];
|
private long[] currentValues = new long[8];
|
||||||
private int currentUpto = 0;
|
private int currentUpto = 0;
|
||||||
|
|
||||||
private PackedLongValues finalValues;
|
private PackedLongValues finalValues;
|
||||||
private PackedLongValues finalValuesCount;
|
private PackedLongValues finalValuesCount;
|
||||||
|
|
||||||
public SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
this.iwBytesUsed = iwBytesUsed;
|
this.iwBytesUsed = iwBytesUsed;
|
||||||
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
|
||||||
|
@ -108,18 +108,28 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
return new BufferedSortedNumericDocValues(finalValues, finalValuesCount, docsWithField.iterator());
|
return new BufferedSortedNumericDocValues(finalValues, finalValuesCount, docsWithField.iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
static long[][] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedNumericDocValues oldValues) throws IOException {
|
static final class LongValues {
|
||||||
long[][] values = new long[maxDoc][];
|
final long[] offsets;
|
||||||
|
final PackedLongValues values;
|
||||||
|
LongValues(int maxDoc, Sorter.DocMap sortMap, SortedNumericDocValues oldValues, float acceptableOverheadRatio) throws IOException {
|
||||||
|
offsets = new long[maxDoc];
|
||||||
|
PackedLongValues.Builder valuesBuiler = PackedLongValues.packedBuilder(acceptableOverheadRatio);
|
||||||
int docID;
|
int docID;
|
||||||
|
long offsetIndex = 1; // 0 means the doc has no values
|
||||||
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
||||||
int newDocID = sortMap.oldToNew(docID);
|
int newDocID = sortMap.oldToNew(docID);
|
||||||
long[] docValues = new long[oldValues.docValueCount()];
|
int numValues = oldValues.docValueCount();
|
||||||
for (int i = 0; i < docValues.length; i++) {
|
valuesBuiler.add(numValues);
|
||||||
docValues[i] = oldValues.nextValue();
|
offsets[newDocID] = offsetIndex++;
|
||||||
|
for (int i = 0; i < numValues; i++) {
|
||||||
|
valuesBuiler.add(oldValues.nextValue());
|
||||||
|
offsetIndex++;
|
||||||
}
|
}
|
||||||
values[newDocID] = docValues;
|
|
||||||
}
|
}
|
||||||
return values;
|
values = valuesBuiler.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -135,10 +145,10 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
valueCounts = finalValuesCount;
|
valueCounts = finalValuesCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
final long[][] sorted;
|
final LongValues sorted;
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap,
|
sorted = new LongValues(state.segmentInfo.maxDoc(), sortMap,
|
||||||
new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator()));
|
new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator()), PackedInts.FASTEST);
|
||||||
} else {
|
} else {
|
||||||
sorted = null;
|
sorted = null;
|
||||||
}
|
}
|
||||||
|
@ -225,11 +235,13 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
|
|
||||||
static class SortingSortedNumericDocValues extends SortedNumericDocValues {
|
static class SortingSortedNumericDocValues extends SortedNumericDocValues {
|
||||||
private final SortedNumericDocValues in;
|
private final SortedNumericDocValues in;
|
||||||
private final long[][] values;
|
private final LongValues values;
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private int upto;
|
private long upto;
|
||||||
|
private int numValues = - 1;
|
||||||
|
private long limit;
|
||||||
|
|
||||||
SortingSortedNumericDocValues(SortedNumericDocValues in, long[][] values) {
|
SortingSortedNumericDocValues(SortedNumericDocValues in, LongValues values) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
@ -241,18 +253,15 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
while (true) {
|
do {
|
||||||
docID++;
|
docID++;
|
||||||
if (docID == values.length) {
|
if (docID >= values.offsets.length) {
|
||||||
docID = NO_MORE_DOCS;
|
return docID = NO_MORE_DOCS;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (values[docID] != null) {
|
} while (values.offsets[docID] <= 0);
|
||||||
break;
|
upto = values.offsets[docID];
|
||||||
}
|
numValues = Math.toIntExact(values.values.get(upto-1));
|
||||||
// skip missing docs
|
limit = upto + numValues;
|
||||||
}
|
|
||||||
upto = 0;
|
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -264,16 +273,23 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
@Override
|
@Override
|
||||||
public boolean advanceExact(int target) throws IOException {
|
public boolean advanceExact(int target) throws IOException {
|
||||||
docID = target;
|
docID = target;
|
||||||
upto = 0;
|
upto = values.offsets[docID];
|
||||||
return values[docID] != null;
|
if (values.offsets[docID] > 0) {
|
||||||
|
numValues = Math.toIntExact(values.values.get(upto-1));
|
||||||
|
limit = upto + numValues;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
limit = upto;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long nextValue() {
|
public long nextValue() {
|
||||||
if (upto == values[docID].length) {
|
if (upto == limit) {
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
} else {
|
} else {
|
||||||
return values[docID][upto++];
|
return values.values.get(upto++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,7 +300,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docValueCount() {
|
public int docValueCount() {
|
||||||
return values[docID].length;
|
return numValues;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
|
||||||
private final FieldInfo fieldInfo;
|
private final FieldInfo fieldInfo;
|
||||||
private int currentDoc = -1;
|
private int currentDoc = -1;
|
||||||
private int currentValues[] = new int[8];
|
private int[] currentValues = new int[8];
|
||||||
private int currentUpto;
|
private int currentUpto;
|
||||||
private int maxCount;
|
private int maxCount;
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
private int[] finalOrdMap;
|
private int[] finalOrdMap;
|
||||||
|
|
||||||
|
|
||||||
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
|
||||||
this.fieldInfo = fieldInfo;
|
this.fieldInfo = fieldInfo;
|
||||||
this.iwBytesUsed = iwBytesUsed;
|
this.iwBytesUsed = iwBytesUsed;
|
||||||
hash = new BytesRefHash(
|
hash = new BytesRefHash(
|
||||||
|
@ -139,28 +139,6 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
bytesUsed = newBytesUsed;
|
bytesUsed = newBytesUsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
static long[][] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedSetDocValues oldValues) throws IOException {
|
|
||||||
long[][] ords = new long[maxDoc][];
|
|
||||||
int docID;
|
|
||||||
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
|
||||||
int newDocID = sortMap.oldToNew(docID);
|
|
||||||
long[] docOrds = new long[1];
|
|
||||||
int upto = 0;
|
|
||||||
while (true) {
|
|
||||||
long ord = oldValues.nextOrd();
|
|
||||||
if (ord == NO_MORE_ORDS) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (upto == docOrds.length) {
|
|
||||||
docOrds = ArrayUtil.grow(docOrds);
|
|
||||||
}
|
|
||||||
docOrds[upto++] = ord;
|
|
||||||
}
|
|
||||||
ords[newDocID] = ArrayUtil.copyOfSubArray(docOrds, 0, upto);
|
|
||||||
}
|
|
||||||
return ords;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
SortedSetDocValues getDocValues() {
|
SortedSetDocValues getDocValues() {
|
||||||
if (finalOrds == null) {
|
if (finalOrds == null) {
|
||||||
|
@ -203,12 +181,12 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
ordMap = finalOrdMap;
|
ordMap = finalOrdMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
final long[][] sorted;
|
final DocOrds docOrds;
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap,
|
docOrds = new DocOrds(state.segmentInfo.maxDoc(), sortMap,
|
||||||
new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator()));
|
new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator()), PackedInts.FASTEST);
|
||||||
} else {
|
} else {
|
||||||
sorted = null;
|
docOrds = null;
|
||||||
}
|
}
|
||||||
dvConsumer.addSortedSetField(fieldInfo,
|
dvConsumer.addSortedSetField(fieldInfo,
|
||||||
new EmptyDocValuesProducer() {
|
new EmptyDocValuesProducer() {
|
||||||
|
@ -219,10 +197,10 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
}
|
}
|
||||||
final SortedSetDocValues buf =
|
final SortedSetDocValues buf =
|
||||||
new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator());
|
new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator());
|
||||||
if (sorted == null) {
|
if (docOrds == null) {
|
||||||
return buf;
|
return buf;
|
||||||
} else {
|
} else {
|
||||||
return new SortingSortedSetDocValues(buf, sorted);
|
return new SortingSortedSetDocValues(buf, docOrds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -236,7 +214,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
final PackedLongValues.Iterator ordsIter;
|
final PackedLongValues.Iterator ordsIter;
|
||||||
final PackedLongValues.Iterator ordCountsIter;
|
final PackedLongValues.Iterator ordCountsIter;
|
||||||
final DocIdSetIterator docsWithField;
|
final DocIdSetIterator docsWithField;
|
||||||
final int currentDoc[];
|
final int[] currentDoc;
|
||||||
|
|
||||||
private int ordCount;
|
private int ordCount;
|
||||||
private int ordUpto;
|
private int ordUpto;
|
||||||
|
@ -311,11 +289,11 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
static class SortingSortedSetDocValues extends SortedSetDocValues {
|
static class SortingSortedSetDocValues extends SortedSetDocValues {
|
||||||
|
|
||||||
private final SortedSetDocValues in;
|
private final SortedSetDocValues in;
|
||||||
private final long[][] ords;
|
private final DocOrds ords;
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
private int ordUpto;
|
private long ordUpto;
|
||||||
|
|
||||||
SortingSortedSetDocValues(SortedSetDocValues in, long[][] ords) {
|
SortingSortedSetDocValues(SortedSetDocValues in, DocOrds ords) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.ords = ords;
|
this.ords = ords;
|
||||||
}
|
}
|
||||||
|
@ -327,18 +305,13 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
while (true) {
|
do {
|
||||||
docID++;
|
docID++;
|
||||||
if (docID == ords.length) {
|
if (docID == ords.offsets.length) {
|
||||||
docID = NO_MORE_DOCS;
|
return docID = NO_MORE_DOCS;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (ords[docID] != null) {
|
} while (ords.offsets[docID] <= 0);
|
||||||
break;
|
ordUpto = ords.offsets[docID]-1;
|
||||||
}
|
|
||||||
// skip missing docs
|
|
||||||
}
|
|
||||||
ordUpto = 0;
|
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -351,16 +324,16 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
public boolean advanceExact(int target) throws IOException {
|
public boolean advanceExact(int target) throws IOException {
|
||||||
// needed in IndexSorter#StringSorter
|
// needed in IndexSorter#StringSorter
|
||||||
docID = target;
|
docID = target;
|
||||||
ordUpto = 0;
|
ordUpto = ords.offsets[docID]-1;
|
||||||
return ords[docID] != null;
|
return ords.offsets[docID] > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long nextOrd() {
|
public long nextOrd() {
|
||||||
if (ordUpto == ords[docID].length) {
|
long ord = ords.ords.get(ordUpto++);
|
||||||
|
if (ord == 0) {
|
||||||
return NO_MORE_ORDS;
|
return NO_MORE_ORDS;
|
||||||
} else {
|
} else {
|
||||||
return ords[docID][ordUpto++];
|
return ord - 1 ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -379,4 +352,31 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
return in.getValueCount();
|
return in.getValueCount();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final class DocOrds {
|
||||||
|
final long[] offsets;
|
||||||
|
final PackedLongValues ords;
|
||||||
|
|
||||||
|
DocOrds(int maxDoc, Sorter.DocMap sortMap, SortedSetDocValues oldValues, float acceptableOverheadRatio) throws IOException {
|
||||||
|
offsets = new long[maxDoc];
|
||||||
|
PackedLongValues.Builder builder = PackedLongValues.packedBuilder(acceptableOverheadRatio);
|
||||||
|
long ordOffset = 1; // 0 marks docs with no values
|
||||||
|
int docID;
|
||||||
|
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
|
||||||
|
int newDocID = sortMap.oldToNew(docID);
|
||||||
|
long startOffset = ordOffset;
|
||||||
|
long ord;
|
||||||
|
while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) {
|
||||||
|
builder.add(ord + 1);
|
||||||
|
ordOffset++;
|
||||||
|
}
|
||||||
|
if (startOffset != ordOffset) { // do we have any values?
|
||||||
|
offsets[newDocID] = startOffset;
|
||||||
|
builder.add(0); // 0 ord marks next value
|
||||||
|
ordOffset++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ords = builder.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
|
||||||
|
@ -51,10 +52,9 @@ public final class SortingCodecReader extends FilterCodecReader {
|
||||||
|
|
||||||
private final Map<String, int[]> cachedSortedDVs = new HashMap<>();
|
private final Map<String, int[]> cachedSortedDVs = new HashMap<>();
|
||||||
|
|
||||||
// TODO: pack long[][] into an int[] (offset) and long[] instead:
|
private final Map<String, SortedSetDocValuesWriter.DocOrds> cachedSortedSetDVs = new HashMap<>();
|
||||||
private final Map<String, long[][]> cachedSortedSetDVs = new HashMap<>();
|
|
||||||
|
|
||||||
private final Map<String, long[][]> cachedSortedNumericDVs = new HashMap<>();
|
private final Map<String, SortedNumericDocValuesWriter.LongValues> cachedSortedNumericDVs = new HashMap<>();
|
||||||
|
|
||||||
private static class SortingBits implements Bits {
|
private static class SortingBits implements Bits {
|
||||||
|
|
||||||
|
@ -381,7 +381,7 @@ public final class SortingCodecReader extends FilterCodecReader {
|
||||||
synchronized (cachedBinaryDVs) {
|
synchronized (cachedBinaryDVs) {
|
||||||
dvs = cachedBinaryDVs.get(field);
|
dvs = cachedBinaryDVs.get(field);
|
||||||
if (dvs == null) {
|
if (dvs == null) {
|
||||||
dvs = BinaryDocValuesWriter.sortDocValues(maxDoc(), docMap, oldDocValues);
|
dvs = new BinaryDocValuesWriter.CachedBinaryDVs(maxDoc(), docMap, oldDocValues);
|
||||||
cachedBinaryDVs.put(field.name, dvs);
|
cachedBinaryDVs.put(field.name, dvs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -412,11 +412,11 @@ public final class SortingCodecReader extends FilterCodecReader {
|
||||||
@Override
|
@Override
|
||||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||||
final SortedNumericDocValues oldDocValues = delegate.getSortedNumeric(field);
|
final SortedNumericDocValues oldDocValues = delegate.getSortedNumeric(field);
|
||||||
long[][] values;
|
SortedNumericDocValuesWriter.LongValues values;
|
||||||
synchronized (cachedSortedNumericDVs) {
|
synchronized (cachedSortedNumericDVs) {
|
||||||
values = cachedSortedNumericDVs.get(field);
|
values = cachedSortedNumericDVs.get(field);
|
||||||
if (values == null) {
|
if (values == null) {
|
||||||
values = SortedNumericDocValuesWriter.sortDocValues(maxDoc(), docMap, oldDocValues);
|
values = new SortedNumericDocValuesWriter.LongValues(maxDoc(), docMap, oldDocValues, PackedInts.FAST);
|
||||||
cachedSortedNumericDVs.put(field.name, values);
|
cachedSortedNumericDVs.put(field.name, values);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -427,11 +427,11 @@ public final class SortingCodecReader extends FilterCodecReader {
|
||||||
@Override
|
@Override
|
||||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||||
SortedSetDocValues oldDocValues = delegate.getSortedSet(field);
|
SortedSetDocValues oldDocValues = delegate.getSortedSet(field);
|
||||||
long[][] ords;
|
SortedSetDocValuesWriter.DocOrds ords;
|
||||||
synchronized (cachedSortedSetDVs) {
|
synchronized (cachedSortedSetDVs) {
|
||||||
ords = cachedSortedSetDVs.get(field);
|
ords = cachedSortedSetDVs.get(field);
|
||||||
if (ords == null) {
|
if (ords == null) {
|
||||||
ords = SortedSetDocValuesWriter.sortDocValues(maxDoc(), docMap, oldDocValues);
|
ords = new SortedSetDocValuesWriter.DocOrds(maxDoc(), docMap, oldDocValues, PackedInts.FASTEST);
|
||||||
cachedSortedSetDVs.put(field.name, ords);
|
cachedSortedSetDVs.put(field.name, ords);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue