Compress PagedBytesAtomicFieldData's termOrdToBytesOffset.
Using MonotonicAppendingLongBuffer instead of a GrowableWriter should help save several bits per value, especially when the bytes to store have similar lengths. Closes #3186
This commit is contained in:
parent
25f19f8b87
commit
a30d58aae2
|
@ -22,8 +22,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.PagedBytes.Reader;
|
import org.apache.lucene.util.PagedBytes.Reader;
|
||||||
import org.apache.lucene.util.packed.GrowableWriter;
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||||
|
@ -40,14 +39,14 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
// 0 ordinal in values means no value (its null)
|
// 0 ordinal in values means no value (its null)
|
||||||
private final PagedBytes.Reader bytes;
|
private final PagedBytes.Reader bytes;
|
||||||
private final PackedInts.Reader termOrdToBytesOffset;
|
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
||||||
protected final Ordinals ordinals;
|
protected final Ordinals ordinals;
|
||||||
|
|
||||||
private volatile int[] hashes;
|
private volatile int[] hashes;
|
||||||
private long size = -1;
|
private long size = -1;
|
||||||
private final long readerBytesSize;
|
private final long readerBytesSize;
|
||||||
|
|
||||||
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, long readerBytesSize, PackedInts.Reader termOrdToBytesOffset, Ordinals ordinals) {
|
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, long readerBytesSize, MonotonicAppendingLongBuffer termOrdToBytesOffset, Ordinals ordinals) {
|
||||||
this.bytes = bytes;
|
this.bytes = bytes;
|
||||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||||
this.ordinals = ordinals;
|
this.ordinals = ordinals;
|
||||||
|
@ -88,7 +87,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
private final int[] getHashes() {
|
private final int[] getHashes() {
|
||||||
if (hashes == null) {
|
if (hashes == null) {
|
||||||
int numberOfValues = termOrdToBytesOffset.size();
|
int numberOfValues = (int) termOrdToBytesOffset.size();
|
||||||
int[] hashes = new int[numberOfValues];
|
int[] hashes = new int[numberOfValues];
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
for (int i = 0; i < numberOfValues; i++) {
|
for (int i = 0; i < numberOfValues; i++) {
|
||||||
|
@ -121,12 +120,12 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
||||||
|
|
||||||
protected final PagedBytes.Reader bytes;
|
protected final PagedBytes.Reader bytes;
|
||||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
protected final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
||||||
protected final Ordinals.Docs ordinals;
|
protected final Ordinals.Docs ordinals;
|
||||||
|
|
||||||
protected final BytesRef scratch = new BytesRef();
|
protected final BytesRef scratch = new BytesRef();
|
||||||
|
|
||||||
BytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
BytesValues(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||||
super(ordinals);
|
super(ordinals);
|
||||||
this.bytes = bytes;
|
this.bytes = bytes;
|
||||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||||
|
@ -156,7 +155,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
private final Iter.Single iter;
|
private final Iter.Single iter;
|
||||||
|
|
||||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
Single(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||||
super(bytes, termOrdToBytesOffset, ordinals);
|
super(bytes, termOrdToBytesOffset, ordinals);
|
||||||
assert !ordinals.isMultiValued();
|
assert !ordinals.isMultiValued();
|
||||||
iter = newSingleIter();
|
iter = newSingleIter();
|
||||||
|
@ -175,7 +174,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
static final class SingleHashed extends Single {
|
static final class SingleHashed extends Single {
|
||||||
private final int[] hashes;
|
private final int[] hashes;
|
||||||
|
|
||||||
SingleHashed(int[] hashes, Reader bytes, org.apache.lucene.util.packed.PackedInts.Reader termOrdToBytesOffset, Docs ordinals) {
|
SingleHashed(int[] hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
|
||||||
super(bytes, termOrdToBytesOffset, ordinals);
|
super(bytes, termOrdToBytesOffset, ordinals);
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -203,7 +202,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
private final Iter.Multi iter;
|
private final Iter.Multi iter;
|
||||||
|
|
||||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
Multi(PagedBytes.Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||||
super(bytes, termOrdToBytesOffset, ordinals);
|
super(bytes, termOrdToBytesOffset, ordinals);
|
||||||
assert ordinals.isMultiValued();
|
assert ordinals.isMultiValued();
|
||||||
this.iter = newMultiIter();
|
this.iter = newMultiIter();
|
||||||
|
@ -219,7 +218,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
private final int[] hashes;
|
private final int[] hashes;
|
||||||
|
|
||||||
MultiHashed(int[] hashes, Reader bytes, org.apache.lucene.util.packed.PackedInts.Reader termOrdToBytesOffset, Docs ordinals) {
|
MultiHashed(int[] hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
|
||||||
super(bytes, termOrdToBytesOffset, ordinals);
|
super(bytes, termOrdToBytesOffset, ordinals);
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -246,7 +245,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
static class Empty extends PagedBytesAtomicFieldData {
|
static class Empty extends PagedBytesAtomicFieldData {
|
||||||
|
|
||||||
Empty(int numDocs) {
|
Empty(int numDocs) {
|
||||||
super(emptyBytes(), 0, new GrowableWriter(1, 2, PackedInts.FASTEST).getMutable(), new EmptyOrdinals(numDocs));
|
super(emptyBytes(), 0, new MonotonicAppendingLongBuffer(), new EmptyOrdinals(numDocs));
|
||||||
}
|
}
|
||||||
|
|
||||||
static PagedBytes.Reader emptyBytes() {
|
static PagedBytes.Reader emptyBytes() {
|
||||||
|
|
|
@ -20,10 +20,9 @@
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.packed.GrowableWriter;
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
|
@ -92,10 +91,8 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
||||||
startNumUniqueTerms = 1;
|
startNumUniqueTerms = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: expose this as an option..., have a nice parser for it...
|
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
|
||||||
float acceptableOverheadRatio = PackedInts.FAST;
|
termOrdToBytesOffset.add(0); // first ord is reserved for missing values
|
||||||
|
|
||||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1 + startNumUniqueTerms, acceptableOverheadRatio);
|
|
||||||
boolean preDefineBitsRequired = regex == null && frequency == null;
|
boolean preDefineBitsRequired = regex == null && frequency == null;
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
|
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
|
||||||
try {
|
try {
|
||||||
|
@ -105,13 +102,8 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
||||||
DocsEnum docsEnum = null;
|
DocsEnum docsEnum = null;
|
||||||
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
||||||
final int termOrd = builder.nextOrdinal();
|
final int termOrd = builder.nextOrdinal();
|
||||||
if (termOrd == termOrdToBytesOffset.size()) {
|
assert termOrd == termOrdToBytesOffset.size();
|
||||||
// NOTE: this code only runs if the incoming
|
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
|
||||||
// reader impl doesn't implement
|
|
||||||
// size (which should be uncommon)
|
|
||||||
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1 + termOrd, 1));
|
|
||||||
}
|
|
||||||
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
|
|
||||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||||
builder.addDoc(docId);
|
builder.addDoc(docId);
|
||||||
|
@ -119,10 +111,9 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
||||||
}
|
}
|
||||||
final long sizePointer = bytes.getPointer();
|
final long sizePointer = bytes.getPointer();
|
||||||
PagedBytes.Reader bytesReader = bytes.freeze(true);
|
PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||||
PackedInts.Reader termOrdToBytesOffsetReader = termOrdToBytesOffset.getMutable();
|
|
||||||
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
|
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
|
||||||
|
|
||||||
return new PagedBytesAtomicFieldData(bytesReader, sizePointer, termOrdToBytesOffsetReader, ordinals);
|
return new PagedBytesAtomicFieldData(bytesReader, sizePointer, termOrdToBytesOffset, ordinals);
|
||||||
} finally {
|
} finally {
|
||||||
builder.close();
|
builder.close();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue