Added FST based FieldData implementation holding all data in a per segment FST.
This commit factors our a common API for BytesValues based impl to shared code and reduce code duplication.
This commit is contained in:
parent
72c76c2799
commit
129f02623b
|
@ -21,72 +21,89 @@ package org.elasticsearch.index.fielddata;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.IntArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.StringArrayRef;
|
||||
|
||||
/**
|
||||
*/
|
||||
public interface BytesValues {
|
||||
|
||||
static final BytesValues EMPTY = new Empty();
|
||||
public abstract class BytesValues {
|
||||
|
||||
public static final BytesValues EMPTY = new Empty();
|
||||
private boolean multiValued;
|
||||
protected final BytesRef scratch = new BytesRef();
|
||||
|
||||
protected BytesValues(boolean multiValued) {
|
||||
this.multiValued = multiValued;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is one of the documents in this field data values is multi valued?
|
||||
*/
|
||||
boolean isMultiValued();
|
||||
public final boolean isMultiValued() {
|
||||
return multiValued;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there a value for this doc?
|
||||
*/
|
||||
boolean hasValue(int docId);
|
||||
public abstract boolean hasValue(int docId);
|
||||
|
||||
/**
|
||||
* Converts the provided bytes to "safe" ones from a "non" safe call made (if needed).
|
||||
*/
|
||||
BytesRef makeSafe(BytesRef bytes);
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a bytes value for a docId. Note, the content of it might be shared across invocation.
|
||||
*/
|
||||
BytesRef getValue(int docId);
|
||||
public BytesRef getValue(int docId) {
|
||||
if (hasValue(docId)) {
|
||||
return getValueScratch(docId, scratch);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bytes value for the docId, with the provided "ret" which will be filled with the
|
||||
* result which will also be returned. If there is no value for this docId, the length will be 0.
|
||||
* Note, the bytes are not "safe".
|
||||
*/
|
||||
BytesRef getValueScratch(int docId, BytesRef ret);
|
||||
public abstract BytesRef getValueScratch(int docId, BytesRef ret);
|
||||
|
||||
/**
|
||||
* Returns an array wrapping all the bytes values for a doc. The content is guaranteed not to be shared.
|
||||
*/
|
||||
BytesRefArrayRef getValues(int docId);
|
||||
public abstract BytesRefArrayRef getValues(int docId);
|
||||
|
||||
/**
|
||||
* Returns a bytes value iterator for a docId. Note, the content of it might be shared across invocation.
|
||||
*/
|
||||
Iter getIter(int docId);
|
||||
public abstract Iter getIter(int docId);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Go over all the possible values in their BytesRef format for a specific doc.
|
||||
*/
|
||||
void forEachValueInDoc(int docId, ValueInDocProc proc);
|
||||
|
||||
public abstract void forEachValueInDoc(int docId, ValueInDocProc proc);
|
||||
public static interface ValueInDocProc {
|
||||
void onValue(int docId, BytesRef value);
|
||||
|
||||
void onMissing(int docId);
|
||||
}
|
||||
|
||||
static interface Iter {
|
||||
public static interface Iter {
|
||||
|
||||
boolean hasNext();
|
||||
|
||||
BytesRef next();
|
||||
|
||||
static class Empty implements Iter {
|
||||
public static class Empty implements Iter {
|
||||
|
||||
public static final Empty INSTANCE = new Empty();
|
||||
|
||||
|
@ -101,7 +118,7 @@ public interface BytesValues {
|
|||
}
|
||||
}
|
||||
|
||||
static class Single implements Iter {
|
||||
public final static class Single implements Iter {
|
||||
|
||||
public BytesRef value;
|
||||
public boolean done;
|
||||
|
@ -124,12 +141,43 @@ public interface BytesValues {
|
|||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Multi implements Iter {
|
||||
|
||||
private int ord;
|
||||
private BytesValues.WithOrdinals withOrds;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
public Multi(WithOrdinals withOrds) {
|
||||
this.withOrds = withOrds;
|
||||
assert withOrds.isMultiValued();
|
||||
|
||||
}
|
||||
|
||||
public Multi reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
withOrds.getValueScratchByOrd(ord, scratch);
|
||||
ord = ordsIter.next();
|
||||
return scratch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Empty implements BytesValues {
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
public static class Empty extends BytesValues {
|
||||
|
||||
public Empty() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -137,11 +185,6 @@ public interface BytesValues {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValue(int docId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
return BytesRefArrayRef.EMPTY;
|
||||
|
@ -157,13 +200,6 @@ public interface BytesValues {
|
|||
proc.onMissing(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
//todo we can also throw an excepiton here as the only value this method accepts is a scratch value...
|
||||
//todo ...extracted from this ByteValues, in our case, there are not values, so this should never be called!?!?
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
ret.length = 0;
|
||||
|
@ -171,43 +207,25 @@ public interface BytesValues {
|
|||
}
|
||||
}
|
||||
|
||||
public static class StringBased implements BytesValues {
|
||||
public static class StringBased extends BytesValues {
|
||||
|
||||
|
||||
private final StringValues values;
|
||||
|
||||
protected final BytesRef scratch = new BytesRef();
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
|
||||
private final ValueIter valueIter = new ValueIter();
|
||||
private final Proc proc = new Proc();
|
||||
|
||||
public StringBased(StringValues values) {
|
||||
super(values.isMultiValued());
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return values.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return values.hasValue(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
// we need to make a copy, since we use scratch to provide it
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValue(int docId) {
|
||||
String value = values.getValue(docId);
|
||||
if (value == null) return null;
|
||||
scratch.copyChars(value);
|
||||
return scratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
String value = values.getValue(docId);
|
||||
|
@ -244,7 +262,7 @@ public interface BytesValues {
|
|||
values.forEachValueInDoc(docId, this.proc.reset(proc));
|
||||
}
|
||||
|
||||
static class ValueIter implements Iter {
|
||||
public static class ValueIter implements Iter {
|
||||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private StringValues.Iter iter;
|
||||
|
@ -266,7 +284,7 @@ public interface BytesValues {
|
|||
}
|
||||
}
|
||||
|
||||
static class Proc implements StringValues.ValueInDocProc {
|
||||
public static class Proc implements StringValues.ValueInDocProc {
|
||||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private BytesValues.ValueInDocProc proc;
|
||||
|
@ -292,32 +310,106 @@ public interface BytesValues {
|
|||
/**
|
||||
* Bytes values that are based on ordinals.
|
||||
*/
|
||||
static interface WithOrdinals extends BytesValues {
|
||||
public static abstract class WithOrdinals extends BytesValues {
|
||||
|
||||
protected final Docs ordinals;
|
||||
protected final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
|
||||
|
||||
Ordinals.Docs ordinals();
|
||||
protected WithOrdinals(Ordinals.Docs ordinals) {
|
||||
super(ordinals.isMultiValued());
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
BytesRef getValueByOrd(int ord);
|
||||
public Ordinals.Docs ordinals() {
|
||||
return ordinals;
|
||||
}
|
||||
|
||||
public BytesRef getValueByOrd(int ord) {
|
||||
return getValueScratchByOrd(ord, scratch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
assert !isMultiValued();
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return BytesRefArrayRef.EMPTY;
|
||||
arrayScratch.values[0] = getSafeValueByOrd(ord);
|
||||
arrayScratch.end = 1;
|
||||
arrayScratch.start = 0;
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
assert !isMultiValued();
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, getValue(docId));
|
||||
}
|
||||
}
|
||||
|
||||
protected BytesRefArrayRef getValuesMulti(int docId) {
|
||||
assert isMultiValued();
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) {
|
||||
return BytesRefArrayRef.EMPTY;
|
||||
}
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
arrayScratch.values[arrayScratch.end++] = getValueScratchByOrd(ords.values[i], new BytesRef());
|
||||
}
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
protected void forEachValueInDocMulti(int docId, ValueInDocProc proc) {
|
||||
assert isMultiValued();
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
getValueScratchByOrd(ord, scratch);
|
||||
proc.onValue(docId, scratch);
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
return getValueScratchByOrd(ord, scratch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
return getValueScratchByOrd(ordinals.getOrd(docId), ret);
|
||||
}
|
||||
|
||||
public BytesRef getSafeValueByOrd(int ord) {
|
||||
return getValueScratchByOrd(ord, new BytesRef());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bytes value for the docId, with the provided "ret" which will be filled with the
|
||||
* result which will also be returned. If there is no value for this docId, the length will be 0.
|
||||
* Note, the bytes are not "safe".
|
||||
*/
|
||||
BytesRef getValueScratchByOrd(int ord, BytesRef ret);
|
||||
public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret);
|
||||
|
||||
BytesRef getSafeValueByOrd(int ord);
|
||||
public static class Empty extends WithOrdinals {
|
||||
|
||||
public static class Empty extends BytesValues.Empty implements WithOrdinals {
|
||||
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public Empty(EmptyOrdinals ordinals) {
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return ordinals.ordinals();
|
||||
public Empty(Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -335,38 +427,32 @@ public interface BytesValues {
|
|||
public BytesRef getSafeValueByOrd(int ord) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static class StringBased extends BytesValues.StringBased implements WithOrdinals {
|
||||
|
||||
private final StringValues.WithOrdinals values;
|
||||
|
||||
public StringBased(StringValues.WithOrdinals values) {
|
||||
super(values);
|
||||
this.values = values;
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return values.ordinals();
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
return BytesRefArrayRef.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueByOrd(int ord) {
|
||||
scratch.copyChars(values.getValueByOrd(ord));
|
||||
return scratch;
|
||||
public Iter getIter(int docId) {
|
||||
return Iter.Empty.INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
||||
ret.copyChars(values.getValueByOrd(ord));
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
proc.onMissing(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
ret.length = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getSafeValueByOrd(int ord) {
|
||||
return new BytesRef(values.getValueByOrd(ord));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.fielddata;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
||||
/**
|
||||
|
@ -346,7 +345,7 @@ public interface HashedBytesValues {
|
|||
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public Empty(EmptyOrdinals ordinals) {
|
||||
public Empty(Ordinals ordinals) {
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ public class IndexFieldDataService extends AbstractIndexComponent implements Ind
|
|||
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
|
||||
.put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("string", "paged_bytes"), new PagedBytesIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("string", "fst"), new FSTPackedIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())
|
||||
|
|
|
@ -19,11 +19,16 @@
|
|||
|
||||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.util.*;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.util.DoubleArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.IntArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.LongArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.StringArrayRef;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -335,7 +340,7 @@ public interface StringValues {
|
|||
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public Empty(EmptyOrdinals ordinals) {
|
||||
public Empty(Ordinals ordinals) {
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
|
@ -350,4 +355,152 @@ public interface StringValues {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class BytesValuesWrapper implements StringValues.WithOrdinals {
|
||||
private org.elasticsearch.index.fielddata.BytesValues.WithOrdinals delegate;
|
||||
private final CharsRef spare = new CharsRef();
|
||||
protected final Docs ordinals;
|
||||
protected final StringArrayRef arrayScratch;
|
||||
private final OrdinalIter iter = new OrdinalIter(this);
|
||||
|
||||
BytesValuesWrapper(BytesValues.WithOrdinals delegate) {
|
||||
arrayScratch = new StringArrayRef(new String[delegate.isMultiValued() ? 10 : 1], delegate.isMultiValued() ? 0 : 1);
|
||||
this.delegate = delegate;
|
||||
this.ordinals = delegate.ordinals();
|
||||
}
|
||||
|
||||
public static StringValues.WithOrdinals wrap(BytesValues.WithOrdinals values) {
|
||||
if (values.isMultiValued()) {
|
||||
return new MultiBytesValuesWrapper(values);
|
||||
} else {
|
||||
return new BytesValuesWrapper(values);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getValue(int docId) {
|
||||
final BytesRef value = delegate.getValue(docId);
|
||||
if (value != null) {
|
||||
UnicodeUtil.UTF8toUTF16(value, spare);
|
||||
return spare.toString();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(this.ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
assert !isMultiValued();
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return StringArrayRef.EMPTY;
|
||||
arrayScratch.values[0] = getValueByOrd(ord);
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
assert !isMultiValued();
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, getValueByOrd(ord));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Docs ordinals() {
|
||||
return delegate.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValueByOrd(int ord) {
|
||||
final BytesRef value = delegate.getValueByOrd(ord);
|
||||
if (value != null) {
|
||||
UnicodeUtil.UTF8toUTF16(value, spare);
|
||||
return spare.toString();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return delegate.isMultiValued();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return delegate.hasValue(docId);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class MultiBytesValuesWrapper extends BytesValuesWrapper {
|
||||
MultiBytesValuesWrapper(org.elasticsearch.index.fielddata.BytesValues.WithOrdinals delegate) {
|
||||
super(delegate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
assert isMultiValued();
|
||||
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return StringArrayRef.EMPTY;
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
arrayScratch.values[arrayScratch.end++] = getValueByOrd(ords.get(i));
|
||||
}
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
assert isMultiValued();
|
||||
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
do {
|
||||
proc.onValue(docId, getValueByOrd(ord));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final class OrdinalIter implements StringValues.Iter {
|
||||
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
private final StringValues.WithOrdinals values;
|
||||
|
||||
OrdinalIter(StringValues.WithOrdinals values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public OrdinalIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
final String valueByOrd = values.getValueByOrd(ord);
|
||||
ord = ordsIter.next();
|
||||
return valueByOrd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -107,26 +107,19 @@ public final class BytesRefValComparator extends FieldComparator<BytesRef> {
|
|||
return docTerms.getValue(doc).compareTo(value);
|
||||
}
|
||||
|
||||
public static class FilteredByteValues implements BytesValues {
|
||||
public static class FilteredByteValues extends BytesValues {
|
||||
|
||||
protected final BytesValues delegate;
|
||||
|
||||
public FilteredByteValues(BytesValues delegate) {
|
||||
super(delegate.isMultiValued());
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
public boolean isMultiValued() {
|
||||
return delegate.isMultiValued();
|
||||
}
|
||||
|
||||
public boolean hasValue(int docId) {
|
||||
return delegate.hasValue(docId);
|
||||
}
|
||||
|
||||
public BytesRef getValue(int docId) {
|
||||
return delegate.getValue(docId);
|
||||
}
|
||||
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
return delegate.makeSafe(bytes);
|
||||
}
|
||||
|
@ -146,6 +139,7 @@ public final class BytesRefValComparator extends FieldComparator<BytesRef> {
|
|||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
delegate.forEachValueInDoc(docId, proc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final class MultiValuedBytesWrapper extends FilteredByteValues {
|
||||
|
|
|
@ -139,7 +139,7 @@ public abstract class ByteArrayAtomicFieldData implements AtomicNumericFieldData
|
|||
|
||||
@Override
|
||||
public HashedBytesValues getHashedBytesValues() {
|
||||
return new HashedBytesValues.StringBased(getStringValues());
|
||||
return new HashedBytesValues.BytesBased(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -406,7 +406,7 @@ public abstract class ByteArrayAtomicFieldData implements AtomicNumericFieldData
|
|||
|
||||
@Override
|
||||
public HashedBytesValues getHashedBytesValues() {
|
||||
return new HashedBytesValues.StringBased(getStringValues());
|
||||
return new HashedBytesValues.BytesBased(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,11 +24,10 @@ import org.elasticsearch.common.RamUsage;
|
|||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.StringValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.IntArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.StringArrayRef;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -105,7 +104,7 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
|
||||
@Override
|
||||
public StringValues.WithOrdinals getStringValues() {
|
||||
return ordinals.isMultiValued() ? new StringValues.Multi(values, ordinals.ordinals()) : new StringValues.Single(values, ordinals.ordinals());
|
||||
return StringValues.BytesValuesWrapper.wrap(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -113,19 +112,13 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
return new ScriptDocValues.Strings(getStringValues());
|
||||
}
|
||||
|
||||
static abstract class BytesValues implements org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
||||
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
||||
|
||||
protected final BytesRef[] values;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
BytesValues(BytesRef[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return this.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -151,22 +144,12 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
return values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
// no need to do anything, its already concrete bytes...
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValue(int docId) {
|
||||
return values[ordinals.getOrd(docId)];
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
BytesRef value = values[ordinals.getOrd(docId)];
|
||||
|
@ -180,72 +163,35 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
return ret;
|
||||
}
|
||||
|
||||
static class Single extends BytesValues {
|
||||
static final class Single extends BytesValues {
|
||||
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(BytesRef[] values, Ordinals.Docs ordinals) {
|
||||
super(values, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return BytesRefArrayRef.EMPTY;
|
||||
arrayScratch.values[0] = values[ord];
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(values[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, values[ord]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends BytesValues {
|
||||
static final class Multi extends BytesValues {
|
||||
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
|
||||
private final ValuesIter iter;
|
||||
private final Iter.Multi iter;
|
||||
|
||||
Multi(BytesRef[] values, Ordinals.Docs ordinals) {
|
||||
super(values, ordinals);
|
||||
this.iter = new ValuesIter(values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
assert ordinals.isMultiValued();
|
||||
this.iter = new Iter.Multi(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return BytesRefArrayRef.EMPTY;
|
||||
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
arrayScratch.values[arrayScratch.end++] = values[ords.values[i]];
|
||||
}
|
||||
return arrayScratch;
|
||||
return getValuesMulti(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -255,45 +201,10 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
proc.onValue(docId, values[ord]);
|
||||
} while ((ord = iter.next()) != 0);
|
||||
forEachValueInDocMulti(docId, proc);
|
||||
}
|
||||
|
||||
static class ValuesIter implements Iter {
|
||||
|
||||
private final BytesRef[] values;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
ValuesIter(BytesRef[] values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
BytesRef value = values[ord];
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -443,160 +354,6 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
}
|
||||
}
|
||||
|
||||
static abstract class StringValues implements org.elasticsearch.index.fielddata.StringValues.WithOrdinals {
|
||||
|
||||
protected final BytesRef[] values;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected StringValues(BytesRef[] values, Ordinals.Docs ordinals) {
|
||||
this.values = values;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValueByOrd(int ord) {
|
||||
BytesRef value = values[ord];
|
||||
if (value == null) return null;
|
||||
return value.utf8ToString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValue(int docId) {
|
||||
BytesRef value = values[ordinals.getOrd(docId)];
|
||||
if (value == null) return null;
|
||||
return value.utf8ToString();
|
||||
}
|
||||
|
||||
static class Single extends StringValues {
|
||||
|
||||
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(BytesRef[] values, Ordinals.Docs ordinals) {
|
||||
super(values, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return StringArrayRef.EMPTY;
|
||||
BytesRef value = values[ord];
|
||||
arrayScratch.values[0] = value == null ? null : value.utf8ToString();
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(values[ord].utf8ToString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
proc.onValue(docId, values[ord].utf8ToString());
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends StringValues {
|
||||
|
||||
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
|
||||
private final ValuesIter iter;
|
||||
|
||||
Multi(BytesRef[] values, Ordinals.Docs ordinals) {
|
||||
super(values, ordinals);
|
||||
iter = new ValuesIter(values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return StringArrayRef.EMPTY;
|
||||
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
BytesRef value = values[ords.values[i]];
|
||||
arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString();
|
||||
}
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
BytesRef value = values[ord];
|
||||
proc.onValue(docId, value == null ? null : value.utf8ToString());
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements StringValues.Iter {
|
||||
|
||||
private final BytesRef[] values;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
ValuesIter(BytesRef[] values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
BytesRef value = values[ord];
|
||||
ord = ordsIter.next();
|
||||
return value == null ? null : value.utf8ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Empty extends ConcreteBytesRefAtomicFieldData {
|
||||
|
||||
Empty(int numDocs) {
|
||||
|
@ -625,7 +382,7 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
|
|||
|
||||
@Override
|
||||
public BytesValues.WithOrdinals getBytesValues() {
|
||||
return new BytesValues.WithOrdinals.Empty((EmptyOrdinals) ordinals);
|
||||
return new BytesValues.WithOrdinals.Empty(ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.HashedBytesValues;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.StringValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class FSTPackedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> {
|
||||
|
||||
public static FSTPackedBytesAtomicFieldData empty(int numDocs) {
|
||||
return new Empty(numDocs);
|
||||
}
|
||||
|
||||
// 0 ordinal in values means no value (its null)
|
||||
protected final Ordinals ordinals;
|
||||
|
||||
private int[] hashes;
|
||||
private long size = -1;
|
||||
|
||||
private final FST<Long> fst;
|
||||
|
||||
public FSTPackedBytesAtomicFieldData(FST<Long> fst, Ordinals ordinals) {
|
||||
this.ordinals = ordinals;
|
||||
this.fst = fst;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDocs() {
|
||||
return ordinals.getNumDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
long size = ordinals.getMemorySizeInBytes();
|
||||
// FST
|
||||
size += fst == null ? 0 : fst.sizeInBytes();
|
||||
this.size = size;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues.WithOrdinals getBytesValues() {
|
||||
assert fst != null;
|
||||
return ordinals.isMultiValued() ? new BytesValues.Multi(fst, ordinals.ordinals()) : new BytesValues.Single(fst, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesValues.WithOrdinals getHashedBytesValues() {
|
||||
assert fst != null;
|
||||
if (hashes == null) {
|
||||
BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
||||
int[] hashes = new int[ordinals.getMaxOrd()];
|
||||
InputOutput<Long> next;
|
||||
int i = 0;
|
||||
try {
|
||||
while((next = fstEnum.next()) != null) {
|
||||
hashes[i++] = next.input.hashCode();
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
//bogus
|
||||
}
|
||||
this.hashes = hashes;
|
||||
}
|
||||
return ordinals.isMultiValued() ? new HashedBytesValuesWithOrds.Multi(getBytesValues(), hashes) : new HashedBytesValuesWithOrds.Single(getBytesValues(), hashes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringValues.WithOrdinals getStringValues() {
|
||||
assert fst != null;
|
||||
return StringValues.BytesValuesWrapper.wrap(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues.Strings getScriptValues() {
|
||||
assert fst != null;
|
||||
return new ScriptDocValues.Strings(getStringValues());
|
||||
}
|
||||
|
||||
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
||||
|
||||
protected final FST<Long> fst;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch = new BytesRef();
|
||||
// per-thread resources
|
||||
protected final BytesReader in ;
|
||||
protected final Arc<Long> firstArc = new Arc<Long>();
|
||||
protected final Arc<Long> scratchArc = new Arc<Long>();
|
||||
protected final IntsRef scratchInts = new IntsRef();
|
||||
|
||||
BytesValues(FST<Long> fst, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.fst = fst;
|
||||
this.ordinals = ordinals;
|
||||
in = fst.getBytesReader();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
||||
in.setPosition(0);
|
||||
fst.getFirstArc(firstArc);
|
||||
try {
|
||||
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
|
||||
ret.grow(output.length);
|
||||
ret.length = ret.offset = 0;
|
||||
Util.toBytesRef(output, ret);
|
||||
} catch (IOException ex) {
|
||||
//bogus
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static final class Single extends BytesValues {
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(FST<Long> fst, Ordinals.Docs ordinals) {
|
||||
super(fst, ordinals);
|
||||
assert !ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(getValueByOrd(ord));
|
||||
}
|
||||
}
|
||||
|
||||
static final class Multi extends BytesValues {
|
||||
|
||||
private final Iter.Multi iter;
|
||||
|
||||
Multi(FST<Long> fst, Ordinals.Docs ordinals) {
|
||||
super(fst, ordinals);
|
||||
assert ordinals.isMultiValued();
|
||||
this.iter = new Iter.Multi(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
return getValuesMulti(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
forEachValueInDocMulti(docId, proc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static class Empty extends FSTPackedBytesAtomicFieldData {
|
||||
|
||||
Empty(int numDocs) {
|
||||
super(null, new EmptyOrdinals(numDocs));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDocs() {
|
||||
return ordinals.getNumDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues.WithOrdinals getBytesValues() {
|
||||
return new BytesValues.WithOrdinals.Empty(ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesValues.WithOrdinals getHashedBytesValues() {
|
||||
return new HashedBytesValuesWithOrds.Empty(ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringValues.WithOrdinals getStringValues() {
|
||||
return new StringValues.WithOrdinals.Empty(ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues.Strings getScriptValues() {
|
||||
return ScriptDocValues.EMPTY_STRINGS;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.AbstractIndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class FSTPackedIndexFieldData extends AbstractIndexFieldData<FSTPackedBytesAtomicFieldData> implements IndexFieldData.WithOrdinals<FSTPackedBytesAtomicFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@Override
|
||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new FSTPackedIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||
}
|
||||
}
|
||||
|
||||
public FSTPackedIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSTPackedBytesAtomicFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSTPackedBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
return FSTPackedBytesAtomicFieldData.empty(reader.maxDoc());
|
||||
}
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||
final IntsRef scratch = new IntsRef();
|
||||
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
try {
|
||||
|
||||
// 0 is reserved for "unset"
|
||||
fstBuilder.add(Util.toIntsRef(new BytesRef(), scratch), 0l);
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
DocsEnum docsEnum = null;
|
||||
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
||||
final int termOrd = builder.nextOrdinal();
|
||||
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
|
||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
builder.addDoc(docId);
|
||||
}
|
||||
}
|
||||
|
||||
FST<Long> fst = fstBuilder.finish();
|
||||
|
||||
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
|
||||
|
||||
return new FSTPackedBytesAtomicFieldData(fst, ordinals);
|
||||
} finally {
|
||||
builder.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
// TODO support "missingValue" for sortMissingValue options here...
|
||||
return new BytesRefFieldComparatorSource(this, sortMode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,179 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.plain.FSTPackedBytesAtomicFieldData.BytesValues;
|
||||
|
||||
/**
|
||||
* shared utils class - should be factored into HashedBytesValues
|
||||
*/
|
||||
abstract class HashedBytesValuesWithOrds implements org.elasticsearch.index.fielddata.HashedBytesValues.WithOrdinals {
|
||||
|
||||
protected final int[] hashes;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch1 = new BytesRef();
|
||||
protected final HashedBytesRef scratch = new HashedBytesRef();
|
||||
protected final BytesValues.WithOrdinals withOrds;
|
||||
|
||||
HashedBytesValuesWithOrds(BytesValues.WithOrdinals withOrds, int[] hashes) {
|
||||
this.hashes = hashes;
|
||||
this.ordinals = withOrds.ordinals();
|
||||
this.withOrds = withOrds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return withOrds.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected final void forEachValueInDocMulti(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
proc.onValue(docId, scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return this.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValueByOrd(int ord) {
|
||||
return scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getSafeValueByOrd(int ord) {
|
||||
return new HashedBytesRef(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
|
||||
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
return scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
|
||||
}
|
||||
|
||||
final static class Single extends HashedBytesValuesWithOrds {
|
||||
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(BytesValues.WithOrdinals withOrds, int[] hashes) {
|
||||
super(withOrds, hashes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final static class Multi extends HashedBytesValuesWithOrds {
|
||||
private final HashedBytesValuesWithOrds.Multi.MultiIter iter;
|
||||
|
||||
Multi(BytesValues.WithOrdinals withOrds, int[] hashes) {
|
||||
super(withOrds, hashes);
|
||||
this.iter = new MultiIter(withOrds, hashes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
forEachValueInDocMulti(docId, proc);
|
||||
}
|
||||
|
||||
final static class MultiIter implements Iter {
|
||||
|
||||
private final int[] hashes;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
private final BytesRef scratch1 = new BytesRef();
|
||||
private final HashedBytesRef scratch = new HashedBytesRef();
|
||||
private final BytesValues.WithOrdinals withOrds;
|
||||
|
||||
MultiIter(BytesValues.WithOrdinals withOrds, int[] hashes) {
|
||||
this.hashes = hashes;
|
||||
this.withOrds = withOrds;
|
||||
}
|
||||
|
||||
public HashedBytesValuesWithOrds.Multi.MultiIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef next() {
|
||||
HashedBytesRef value = scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,15 +23,13 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.HashedBytesValues;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.StringValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.IntArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.StringArrayRef;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -106,12 +104,12 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
}
|
||||
this.hashes = hashes;
|
||||
}
|
||||
return ordinals.isMultiValued() ? new HashedBytesValues.Multi(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals()) : new HashedBytesValues.Single(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals());
|
||||
return ordinals.isMultiValued() ? new HashedBytesValuesWithOrds.Multi(getBytesValues(), hashes) : new HashedBytesValuesWithOrds.Single(getBytesValues(), hashes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringValues.WithOrdinals getStringValues() {
|
||||
return ordinals.isMultiValued() ? new StringValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new StringValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
|
||||
return StringValues.BytesValuesWrapper.wrap(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -119,7 +117,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
return new ScriptDocValues.Strings(getStringValues());
|
||||
}
|
||||
|
||||
static abstract class BytesValues implements org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
||||
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
|
||||
|
||||
protected final PagedBytes.Reader bytes;
|
||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
||||
|
@ -128,6 +126,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
protected final BytesRef scratch = new BytesRef();
|
||||
|
||||
BytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.ordinals = ordinals;
|
||||
|
@ -138,70 +137,20 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
return this.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueByOrd(int ord) {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return scratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
||||
bytes.fill(ret, termOrdToBytesOffset.get(ord));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getSafeValueByOrd(int ord) {
|
||||
final BytesRef retVal = new BytesRef();
|
||||
bytes.fill(retVal, termOrdToBytesOffset.get(ord));
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
static final class Single extends BytesValues {
|
||||
|
||||
@Override
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return scratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static class Single extends BytesValues {
|
||||
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return BytesRefArrayRef.EMPTY;
|
||||
arrayScratch.values[0] = new BytesRef();
|
||||
bytes.fill(arrayScratch.values[0], termOrdToBytesOffset.get(ord));
|
||||
return arrayScratch;
|
||||
assert !ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -212,46 +161,21 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
return iter.reset(scratch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, scratch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends BytesValues {
|
||||
static final class Multi extends BytesValues {
|
||||
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
|
||||
private final ValuesIter iter;
|
||||
private final Iter.Multi iter;
|
||||
|
||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
this.iter = new ValuesIter(bytes, termOrdToBytesOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
assert ordinals.isMultiValued();
|
||||
this.iter = new Iter.Multi(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return BytesRefArrayRef.EMPTY;
|
||||
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
final BytesRef bytesRef = new BytesRef();
|
||||
bytes.fill(bytesRef, termOrdToBytesOffset.get(ords.values[i]));
|
||||
arrayScratch.values[arrayScratch.end++] = bytesRef;
|
||||
}
|
||||
return arrayScratch;
|
||||
return getValuesMulti(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -261,369 +185,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, scratch);
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
ord = ordsIter.next();
|
||||
return scratch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class HashedBytesValues implements org.elasticsearch.index.fielddata.HashedBytesValues.WithOrdinals {
|
||||
|
||||
protected final PagedBytes.Reader bytes;
|
||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
||||
protected final int[] hashes;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch1 = new BytesRef();
|
||||
protected final HashedBytesRef scratch = new HashedBytesRef();
|
||||
|
||||
HashedBytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.hashes = hashes;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return this.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValueByOrd(int ord) {
|
||||
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
|
||||
return scratch.reset(scratch1, hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getSafeValueByOrd(int ord) {
|
||||
final BytesRef bytesRef = new BytesRef();
|
||||
bytes.fill(bytesRef, termOrdToBytesOffset.get(ord));
|
||||
return new HashedBytesRef(bytesRef, hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
|
||||
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
|
||||
return scratch.reset(scratch1, hashes[ord]);
|
||||
}
|
||||
|
||||
static class Single extends HashedBytesValues {
|
||||
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, hashes, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
|
||||
return iter.reset(scratch.reset(scratch1, hashes[ord]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, scratch.reset(scratch1, hashes[ord]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends HashedBytesValues {
|
||||
|
||||
private final ValuesIter iter;
|
||||
|
||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, hashes, ordinals);
|
||||
this.iter = new ValuesIter(bytes, termOrdToBytesOffset, hashes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, scratch.reset(scratch1, hashes[ord]));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final int[] hashes;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
private final BytesRef scratch1 = new BytesRef();
|
||||
private final HashedBytesRef scratch = new HashedBytesRef();
|
||||
|
||||
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.hashes = hashes;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef next() {
|
||||
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
|
||||
HashedBytesRef value = scratch.reset(scratch1, hashes[ord]);
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class StringValues implements org.elasticsearch.index.fielddata.StringValues.WithOrdinals {
|
||||
|
||||
protected final PagedBytes.Reader bytes;
|
||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch = new BytesRef();
|
||||
|
||||
protected StringValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValueByOrd(int ord) {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return scratch.utf8ToString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return scratch.utf8ToString();
|
||||
}
|
||||
|
||||
static class Single extends StringValues {
|
||||
|
||||
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return StringArrayRef.EMPTY;
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
arrayScratch.values[0] = scratch.utf8ToString();
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return iter.reset(scratch.utf8ToString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, scratch.utf8ToString());
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends StringValues {
|
||||
|
||||
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
|
||||
private final ValuesIter iter;
|
||||
|
||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
iter = new ValuesIter(bytes, termOrdToBytesOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return StringArrayRef.EMPTY;
|
||||
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
|
||||
arrayScratch.values[arrayScratch.end++] = scratch.utf8ToString();
|
||||
}
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, scratch.utf8ToString());
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements StringValues.Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
ord = ordsIter.next();
|
||||
return scratch.utf8ToString();
|
||||
}
|
||||
forEachValueInDocMulti(docId, proc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -657,12 +219,12 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
|||
|
||||
@Override
|
||||
public BytesValues.WithOrdinals getBytesValues() {
|
||||
return new BytesValues.WithOrdinals.Empty((EmptyOrdinals) ordinals);
|
||||
return new BytesValues.WithOrdinals.Empty(ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesValues.WithOrdinals getHashedBytesValues() {
|
||||
return new HashedBytesValues.Empty((EmptyOrdinals) ordinals);
|
||||
return new HashedBytesValuesWithOrds.Empty(ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -85,7 +85,6 @@ public class PagedBytesIndexFieldData extends AbstractIndexFieldData<PagedBytesA
|
|||
|
||||
final PagedBytes bytes = new PagedBytes(15);
|
||||
int startBytesBPV;
|
||||
int startTermsBPV;
|
||||
int startNumUniqueTerms;
|
||||
|
||||
int maxDoc = reader.maxDoc();
|
||||
|
@ -109,12 +108,10 @@ public class PagedBytesIndexFieldData extends AbstractIndexFieldData<PagedBytesA
|
|||
}
|
||||
|
||||
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
|
||||
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
|
||||
|
||||
startNumUniqueTerms = (int) numUniqueTerms;
|
||||
} else {
|
||||
startBytesBPV = 1;
|
||||
startTermsBPV = 1;
|
||||
startNumUniqueTerms = 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
*/
|
||||
@Test
|
||||
public class FSTPackedBytesStringFieldDataTests extends StringFieldDataTests {
|
||||
|
||||
@Override
|
||||
protected FieldDataType getFieldDataType() {
|
||||
return new FieldDataType("string", ImmutableSettings.builder().put("format", "fst"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue