mirror of
synced 2025-03-24 17:09:48 +00:00
add packed bytes variant for strings/bytes
This commit is contained in:
@ -56,6 +56,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
.put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder())
.put(Tuple.tuple("string", "packed_bytes"), new PackedBytesIndexFieldData.Builder())
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())
@ -0,0 +1,750 @@
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
import org.elasticsearch.index.fielddata.util.IntArrayRef;
import org.elasticsearch.index.fielddata.util.StringArrayRef;
public class PackedBytesAtomicFieldData implements AtomicOrdinalFieldData<ScriptDocValues.Strings> {
// 0 ordinal in values means no value (its null)
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final Ordinals ordinals;
private int[] hashes;
private long size = -1;
public PackedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
public boolean isMultiValued() {
return ordinals.isMultiValued();
public int getNumDocs() {
return ordinals.getNumDocs();
public boolean isValuesOrdered() {
return true;
public long getMemorySizeInBytes() {
if (size == -1) {
long size = ordinals.getMemorySizeInBytes();
// PackedBytes
size += RamUsage.NUM_BYTES_ARRAY_HEADER + bytes.getBlocks().length;
for (byte[] b : bytes.getBlocks()) {
size += b.length;
// PackedInts
size += termOrdToBytesOffset.ramBytesUsed();
this.size = size;
return size;
public OrdinalsBytesValues getBytesValues() {
return ordinals.isMultiValued() ? new BytesValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new BytesValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
public OrdinalsHashedBytesValues getHashedBytesValues() {
if (hashes == null) {
int numberOfValues = termOrdToBytesOffset.size();
int[] hashes = new int[numberOfValues];
BytesRef scratch = new BytesRef();
for (int i = 0; i < numberOfValues; i++) {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(i));
hashes[i] = value == null ? 0 : value.hashCode();
this.hashes = hashes;
return ordinals.isMultiValued() ? new HashedBytesValues.Multi(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals()) : new HashedBytesValues.Single(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals());
public OrdinalsStringValues getStringValues() {
return ordinals.isMultiValued() ? new StringValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new StringValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
public ScriptDocValues.Strings getScriptValues() {
return new ScriptDocValues.Strings(getStringValues());
static abstract class BytesValues implements org.elasticsearch.index.fielddata.OrdinalsBytesValues {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
BytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
public Ordinals.Docs ordinals() {
return this.ordinals;
public BytesRef getValueByOrd(int ord) {
return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
return bytes.fill(ret, termOrdToBytesOffset.get(ord));
public BytesRef getSafeValueByOrd(int ord) {
return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
public BytesRef makeSafe(BytesRef bytes) {
return BytesRef.deepCopyOf(bytes);
public BytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
public BytesRef getValueScratch(int docId, BytesRef ret) {
return bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
public BytesRef getValueSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
static class Single extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
public boolean isMultiValued() {
return false;
public BytesRefArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.values[0] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
return arrayScratch;
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
public Iter getIterSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
} else {
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
} else {
proc.onValue(docId, bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
static class Multi extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
private final ValuesIter iter;
private final SafeValuesIter safeIter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
this.iter = new ValuesIter(bytes, termOrdToBytesOffset);
this.safeIter = new SafeValuesIter(bytes, termOrdToBytesOffset);
public boolean isMultiValued() {
return true;
public BytesRefArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return BytesRefArrayRef.EMPTY;
for (int i = ords.start; i < ords.end; i++) {
arrayScratch.values[arrayScratch.end++] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ords.values[i]));
return arrayScratch;
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
public Iter getIterSafe(int docId) {
return safeIter.reset(ordinals.getIter(docId));
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
do {
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
} while ((ord = iter.next()) != 0);
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
do {
proc.onValue(docId, bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
} while ((ord = iter.next()) != 0);
static class ValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final BytesRef scratch = new BytesRef();
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
public boolean hasNext() {
return ord != 0;
public BytesRef next() {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return value;
static class SafeValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private Ordinals.Docs.Iter ordsIter;
private int ord;
SafeValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
public SafeValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
public boolean hasNext() {
return ord != 0;
public BytesRef next() {
BytesRef value = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return value;
static abstract class HashedBytesValues implements org.elasticsearch.index.fielddata.OrdinalsHashedBytesValues {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final int[] hashes;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch1 = new BytesRef();
protected final HashedBytesRef scratch = new HashedBytesRef();
HashedBytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
this.ordinals = ordinals;
public Ordinals.Docs ordinals() {
return this.ordinals;
public HashedBytesRef getValueByOrd(int ord) {
return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
public HashedBytesRef getSafeValueByOrd(int ord) {
return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
public HashedBytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
public HashedBytesRef getValueSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
static class Single extends HashedBytesValues {
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, hashes, ordinals);
public boolean isMultiValued() {
return false;
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
public Iter getIterSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
} else {
proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
} else {
proc.onValue(docId, new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
static class Multi extends HashedBytesValues {
private final ValuesIter iter;
private final SafeValuesIter safeIter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, hashes, ordinals);
this.iter = new ValuesIter(bytes, termOrdToBytesOffset, hashes);
this.safeIter = new SafeValuesIter(bytes, termOrdToBytesOffset, hashes);
public boolean isMultiValued() {
return true;
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
public Iter getIterSafe(int docId) {
return safeIter.reset(ordinals.getIter(docId));
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
do {
proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
} while ((ord = iter.next()) != 0);
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
do {
proc.onValue(docId, new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
} while ((ord = iter.next()) != 0);
static class ValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final int[] hashes;
private Ordinals.Docs.Iter ordsIter;
private int ord;
private final BytesRef scratch1 = new BytesRef();
private final HashedBytesRef scratch = new HashedBytesRef();
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
public boolean hasNext() {
return ord != 0;
public HashedBytesRef next() {
HashedBytesRef value = scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
ord = ordsIter.next();
return value;
static class SafeValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final int[] hashes;
private Ordinals.Docs.Iter ordsIter;
private int ord;
SafeValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
public SafeValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
public boolean hasNext() {
return ord != 0;
public HashedBytesRef next() {
HashedBytesRef value = new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
ord = ordsIter.next();
return value;
static abstract class StringValues implements OrdinalsStringValues {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
protected StringValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
public Ordinals.Docs ordinals() {
return ordinals;
public String getValueByOrd(int ord) {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return value.utf8ToString();
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
public String getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return value.utf8ToString();
static class Single extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
public boolean isMultiValued() {
return false;
public StringArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return StringArrayRef.EMPTY;
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
arrayScratch.values[0] = value == null ? null : value.utf8ToString();
return arrayScratch;
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
static class Multi extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
private final ValuesIter iter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
iter = new ValuesIter(bytes, termOrdToBytesOffset);
public boolean isMultiValued() {
return true;
public StringArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return StringArrayRef.EMPTY;
for (int i = ords.start; i < ords.end; i++) {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString();
return arrayScratch;
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
do {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
proc.onValue(docId, value == null ? null : value.utf8ToString());
} while ((ord = iter.next()) != 0);
static class ValuesIter implements StringValues.Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final BytesRef scratch = new BytesRef();
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
public boolean hasNext() {
return ord != 0;
public String next() {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return value == null ? null : value.utf8ToString();
@ -0,0 +1,193 @@
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.MultiFlatArrayOrdinals;
import org.elasticsearch.index.fielddata.ordinals.SingleArrayOrdinals;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.ArrayList;
public class PackedBytesIndexFieldData extends AbstractIndexFieldData<PackedBytesAtomicFieldData> implements IndexOrdinalFieldData<PackedBytesAtomicFieldData> {
public static class Builder implements IndexFieldData.Builder {
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new PackedBytesIndexFieldData(index, indexSettings, fieldNames, type, cache);
public PackedBytesIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
public boolean valuesOrdered() {
return true;
public PackedBytesAtomicFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
public PackedBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
final PagedBytes bytes = new PagedBytes(1);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
GrowableWriter termOrdToBytesOffset = new GrowableWriter(1, 2, PackedInts.FASTEST);
return new PackedBytesAtomicFieldData(bytes.freeze(true), termOrdToBytesOffset.getMutable(), new EmptyOrdinals(reader.maxDoc()));
final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc + 1;
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = terms.size();
if (numUniqueTerms != -1L) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
// TODO: expose this as an option..., have a nice parser for it...
float acceptableOverheadRatio = PackedInts.FAST;
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1 + startNumUniqueTerms, acceptableOverheadRatio);
ArrayList<int[]> ordinals = new ArrayList<int[]>();
int[] idx = new int[reader.maxDoc()];
ordinals.add(new int[reader.maxDoc()]);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
int termOrd = 1;
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
if (termOrd == termOrdToBytesOffset.size()) {
// NOTE: this code only runs if the incoming
// reader impl doesn't implement
// size (which should be uncommon)
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1 + termOrd, 1));
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
int[] ordinal;
if (idx[docId] >= ordinals.size()) {
ordinal = new int[reader.maxDoc()];
} else {
ordinal = ordinals.get(idx[docId]);
ordinal[docId] = termOrd;
} catch (RuntimeException e) {
if (e.getClass().getName().endsWith("StopFillCacheException")) {
// all is well, in case numeric parsers are used.
} else {
throw e;
PagedBytes.Reader bytesReader = bytes.freeze(true);
PackedInts.Reader termOrdToBytesOffsetReader = termOrdToBytesOffset.getMutable();
if (ordinals.size() == 1) {
return new PackedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, new SingleArrayOrdinals(ordinals.get(0), termOrd));
} else {
int[][] nativeOrdinals = new int[ordinals.size()][];
for (int i = 0; i < nativeOrdinals.length; i++) {
nativeOrdinals[i] = ordinals.get(i);
return new PackedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, new MultiFlatArrayOrdinals(nativeOrdinals, termOrd));
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue) {
// TODO support "missingValue" for sortMissingValue options here...
return new BytesRefFieldComparatorSource(this);
@ -0,0 +1,35 @@
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.test.unit.index.fielddata;
import com.google.common.collect.ImmutableMap;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.testng.annotations.Test;
public class PackedBytesStringFieldDataTests extends StringFieldDataTests {
protected FieldDataType getFieldDataType() {
return new FieldDataType("string", "packed_bytes", ImmutableMap.<String, String>of());
Reference in New Issue
Block a user