Added FST based FieldData implementation holding all data in a per segment FST.

This commit factors our a common API for BytesValues based impl to shared code and reduce code duplication.
This commit is contained in:
Simon Willnauer 2013-03-26 17:28:49 +01:00
parent 72c76c2799
commit 129f02623b
13 changed files with 959 additions and 821 deletions

View File

@ -21,72 +21,89 @@ package org.elasticsearch.index.fielddata;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
import org.elasticsearch.index.fielddata.util.IntArrayRef;
import org.elasticsearch.index.fielddata.util.StringArrayRef;
/**
*/
public interface BytesValues {
static final BytesValues EMPTY = new Empty();
public abstract class BytesValues {
public static final BytesValues EMPTY = new Empty();
private boolean multiValued;
protected final BytesRef scratch = new BytesRef();
protected BytesValues(boolean multiValued) {
this.multiValued = multiValued;
}
/**
* Is one of the documents in this field data values is multi valued?
*/
boolean isMultiValued();
public final boolean isMultiValued() {
return multiValued;
}
/**
* Is there a value for this doc?
*/
boolean hasValue(int docId);
public abstract boolean hasValue(int docId);
/**
* Converts the provided bytes to "safe" ones from a "non" safe call made (if needed).
*/
BytesRef makeSafe(BytesRef bytes);
public BytesRef makeSafe(BytesRef bytes) {
return BytesRef.deepCopyOf(bytes);
}
/**
* Returns a bytes value for a docId. Note, the content of it might be shared across invocation.
*/
BytesRef getValue(int docId);
public BytesRef getValue(int docId) {
if (hasValue(docId)) {
return getValueScratch(docId, scratch);
}
return null;
}
/**
* Returns the bytes value for the docId, with the provided "ret" which will be filled with the
* result which will also be returned. If there is no value for this docId, the length will be 0.
* Note, the bytes are not "safe".
*/
BytesRef getValueScratch(int docId, BytesRef ret);
public abstract BytesRef getValueScratch(int docId, BytesRef ret);
/**
* Returns an array wrapping all the bytes values for a doc. The content is guaranteed not to be shared.
*/
BytesRefArrayRef getValues(int docId);
public abstract BytesRefArrayRef getValues(int docId);
/**
* Returns a bytes value iterator for a docId. Note, the content of it might be shared across invocation.
*/
Iter getIter(int docId);
public abstract Iter getIter(int docId);
/**
* Go over all the possible values in their BytesRef format for a specific doc.
*/
void forEachValueInDoc(int docId, ValueInDocProc proc);
public abstract void forEachValueInDoc(int docId, ValueInDocProc proc);
public static interface ValueInDocProc {
void onValue(int docId, BytesRef value);
void onMissing(int docId);
}
static interface Iter {
public static interface Iter {
boolean hasNext();
BytesRef next();
static class Empty implements Iter {
public static class Empty implements Iter {
public static final Empty INSTANCE = new Empty();
@ -101,7 +118,7 @@ public interface BytesValues {
}
}
static class Single implements Iter {
public final static class Single implements Iter {
public BytesRef value;
public boolean done;
@ -124,12 +141,43 @@ public interface BytesValues {
return value;
}
}
static final class Multi implements Iter {
private int ord;
private BytesValues.WithOrdinals withOrds;
private Ordinals.Docs.Iter ordsIter;
private final BytesRef scratch = new BytesRef();
public Multi(WithOrdinals withOrds) {
this.withOrds = withOrds;
assert withOrds.isMultiValued();
}
public Multi reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public BytesRef next() {
withOrds.getValueScratchByOrd(ord, scratch);
ord = ordsIter.next();
return scratch;
}
}
}
static class Empty implements BytesValues {
@Override
public boolean isMultiValued() {
return false;
public static class Empty extends BytesValues {
public Empty() {
super(false);
}
@Override
@ -137,11 +185,6 @@ public interface BytesValues {
return false;
}
@Override
public BytesRef getValue(int docId) {
return null;
}
@Override
public BytesRefArrayRef getValues(int docId) {
return BytesRefArrayRef.EMPTY;
@ -157,13 +200,6 @@ public interface BytesValues {
proc.onMissing(docId);
}
@Override
public BytesRef makeSafe(BytesRef bytes) {
//todo we can also throw an excepiton here as the only value this method accepts is a scratch value...
//todo ...extracted from this ByteValues, in our case, there are not values, so this should never be called!?!?
return BytesRef.deepCopyOf(bytes);
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
ret.length = 0;
@ -171,43 +207,25 @@ public interface BytesValues {
}
}
public static class StringBased implements BytesValues {
public static class StringBased extends BytesValues {
private final StringValues values;
protected final BytesRef scratch = new BytesRef();
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
private final ValueIter valueIter = new ValueIter();
private final Proc proc = new Proc();
public StringBased(StringValues values) {
super(values.isMultiValued());
this.values = values;
}
@Override
public boolean isMultiValued() {
return values.isMultiValued();
}
@Override
public boolean hasValue(int docId) {
return values.hasValue(docId);
}
@Override
public BytesRef makeSafe(BytesRef bytes) {
// we need to make a copy, since we use scratch to provide it
return BytesRef.deepCopyOf(bytes);
}
@Override
public BytesRef getValue(int docId) {
String value = values.getValue(docId);
if (value == null) return null;
scratch.copyChars(value);
return scratch;
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
String value = values.getValue(docId);
@ -244,7 +262,7 @@ public interface BytesValues {
values.forEachValueInDoc(docId, this.proc.reset(proc));
}
static class ValueIter implements Iter {
public static class ValueIter implements Iter {
private final BytesRef scratch = new BytesRef();
private StringValues.Iter iter;
@ -266,7 +284,7 @@ public interface BytesValues {
}
}
static class Proc implements StringValues.ValueInDocProc {
public static class Proc implements StringValues.ValueInDocProc {
private final BytesRef scratch = new BytesRef();
private BytesValues.ValueInDocProc proc;
@ -292,32 +310,106 @@ public interface BytesValues {
/**
* Bytes values that are based on ordinals.
*/
static interface WithOrdinals extends BytesValues {
public static abstract class WithOrdinals extends BytesValues {
protected final Docs ordinals;
protected final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
Ordinals.Docs ordinals();
protected WithOrdinals(Ordinals.Docs ordinals) {
super(ordinals.isMultiValued());
this.ordinals = ordinals;
}
BytesRef getValueByOrd(int ord);
public Ordinals.Docs ordinals() {
return ordinals;
}
public BytesRef getValueByOrd(int ord) {
return getValueScratchByOrd(ord, scratch);
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public BytesRefArrayRef getValues(int docId) {
assert !isMultiValued();
int ord = ordinals.getOrd(docId);
if (ord == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.values[0] = getSafeValueByOrd(ord);
arrayScratch.end = 1;
arrayScratch.start = 0;
return arrayScratch;
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
assert !isMultiValued();
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, getValue(docId));
}
}
protected BytesRefArrayRef getValuesMulti(int docId) {
assert isMultiValued();
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) {
return BytesRefArrayRef.EMPTY;
}
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
arrayScratch.values[arrayScratch.end++] = getValueScratchByOrd(ords.values[i], new BytesRef());
}
return arrayScratch;
}
protected void forEachValueInDocMulti(int docId, ValueInDocProc proc) {
assert isMultiValued();
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
getValueScratchByOrd(ord, scratch);
proc.onValue(docId, scratch);
} while ((ord = iter.next()) != 0);
}
@Override
public BytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return getValueScratchByOrd(ord, scratch);
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
return getValueScratchByOrd(ordinals.getOrd(docId), ret);
}
public BytesRef getSafeValueByOrd(int ord) {
return getValueScratchByOrd(ord, new BytesRef());
}
/**
* Returns the bytes value for the docId, with the provided "ret" which will be filled with the
* result which will also be returned. If there is no value for this docId, the length will be 0.
* Note, the bytes are not "safe".
*/
BytesRef getValueScratchByOrd(int ord, BytesRef ret);
public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret);
BytesRef getSafeValueByOrd(int ord);
public static class Empty extends WithOrdinals {
public static class Empty extends BytesValues.Empty implements WithOrdinals {
private final Ordinals ordinals;
public Empty(EmptyOrdinals ordinals) {
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return ordinals.ordinals();
public Empty(Ordinals.Docs ordinals) {
super(ordinals);
}
@Override
@ -335,38 +427,32 @@ public interface BytesValues {
public BytesRef getSafeValueByOrd(int ord) {
return null;
}
}
public static class StringBased extends BytesValues.StringBased implements WithOrdinals {
private final StringValues.WithOrdinals values;
public StringBased(StringValues.WithOrdinals values) {
super(values);
this.values = values;
@Override
public boolean hasValue(int docId) {
return false;
}
@Override
public Ordinals.Docs ordinals() {
return values.ordinals();
public BytesRefArrayRef getValues(int docId) {
return BytesRefArrayRef.EMPTY;
}
@Override
public BytesRef getValueByOrd(int ord) {
scratch.copyChars(values.getValueByOrd(ord));
return scratch;
public Iter getIter(int docId) {
return Iter.Empty.INSTANCE;
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
ret.copyChars(values.getValueByOrd(ord));
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
proc.onMissing(docId);
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
ret.length = 0;
return ret;
}
@Override
public BytesRef getSafeValueByOrd(int ord) {
return new BytesRef(values.getValueByOrd(ord));
}
}
}
}

View File

@ -22,7 +22,6 @@ package org.elasticsearch.index.fielddata;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
/**
@ -346,7 +345,7 @@ public interface HashedBytesValues {
private final Ordinals ordinals;
public Empty(EmptyOrdinals ordinals) {
public Empty(Ordinals ordinals) {
this.ordinals = ordinals;
}

View File

@ -61,6 +61,7 @@ public class IndexFieldDataService extends AbstractIndexComponent implements Ind
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
.put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder())
.put(Tuple.tuple("string", "paged_bytes"), new PagedBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", "fst"), new FSTPackedIndexFieldData.Builder())
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())

View File

@ -19,11 +19,16 @@
package org.elasticsearch.index.fielddata;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.util.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.util.DoubleArrayRef;
import org.elasticsearch.index.fielddata.util.IntArrayRef;
import org.elasticsearch.index.fielddata.util.LongArrayRef;
import org.elasticsearch.index.fielddata.util.StringArrayRef;
/**
*/
@ -335,7 +340,7 @@ public interface StringValues {
private final Ordinals ordinals;
public Empty(EmptyOrdinals ordinals) {
public Empty(Ordinals ordinals) {
this.ordinals = ordinals;
}
@ -350,4 +355,152 @@ public interface StringValues {
}
}
}
public static class BytesValuesWrapper implements StringValues.WithOrdinals {
private org.elasticsearch.index.fielddata.BytesValues.WithOrdinals delegate;
private final CharsRef spare = new CharsRef();
protected final Docs ordinals;
protected final StringArrayRef arrayScratch;
private final OrdinalIter iter = new OrdinalIter(this);
BytesValuesWrapper(BytesValues.WithOrdinals delegate) {
arrayScratch = new StringArrayRef(new String[delegate.isMultiValued() ? 10 : 1], delegate.isMultiValued() ? 0 : 1);
this.delegate = delegate;
this.ordinals = delegate.ordinals();
}
public static StringValues.WithOrdinals wrap(BytesValues.WithOrdinals values) {
if (values.isMultiValued()) {
return new MultiBytesValuesWrapper(values);
} else {
return new BytesValuesWrapper(values);
}
}
@Override
public String getValue(int docId) {
final BytesRef value = delegate.getValue(docId);
if (value != null) {
UnicodeUtil.UTF8toUTF16(value, spare);
return spare.toString();
}
return null;
}
@Override
public Iter getIter(int docId) {
return iter.reset(this.ordinals.getIter(docId));
}
@Override
public StringArrayRef getValues(int docId) {
assert !isMultiValued();
int ord = ordinals.getOrd(docId);
if (ord == 0) return StringArrayRef.EMPTY;
arrayScratch.values[0] = getValueByOrd(ord);
return arrayScratch;
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
assert !isMultiValued();
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, getValueByOrd(ord));
}
}
@Override
public Docs ordinals() {
return delegate.ordinals;
}
@Override
public String getValueByOrd(int ord) {
final BytesRef value = delegate.getValueByOrd(ord);
if (value != null) {
UnicodeUtil.UTF8toUTF16(value, spare);
return spare.toString();
}
return null;
}
@Override
public boolean isMultiValued() {
return delegate.isMultiValued();
}
@Override
public boolean hasValue(int docId) {
return delegate.hasValue(docId);
}
}
static final class MultiBytesValuesWrapper extends BytesValuesWrapper {
MultiBytesValuesWrapper(org.elasticsearch.index.fielddata.BytesValues.WithOrdinals delegate) {
super(delegate);
}
@Override
public StringArrayRef getValues(int docId) {
assert isMultiValued();
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return StringArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
arrayScratch.values[arrayScratch.end++] = getValueByOrd(ords.get(i));
}
return arrayScratch;
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
assert isMultiValued();
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
} else {
do {
proc.onValue(docId, getValueByOrd(ord));
} while ((ord = iter.next()) != 0);
}
}
}
static final class OrdinalIter implements StringValues.Iter {
private Ordinals.Docs.Iter ordsIter;
private int ord;
private final StringValues.WithOrdinals values;
OrdinalIter(StringValues.WithOrdinals values) {
this.values = values;
}
public OrdinalIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public String next() {
final String valueByOrd = values.getValueByOrd(ord);
ord = ordsIter.next();
return valueByOrd;
}
}
}

View File

@ -107,26 +107,19 @@ public final class BytesRefValComparator extends FieldComparator<BytesRef> {
return docTerms.getValue(doc).compareTo(value);
}
public static class FilteredByteValues implements BytesValues {
public static class FilteredByteValues extends BytesValues {
protected final BytesValues delegate;
public FilteredByteValues(BytesValues delegate) {
super(delegate.isMultiValued());
this.delegate = delegate;
}
public boolean isMultiValued() {
return delegate.isMultiValued();
}
public boolean hasValue(int docId) {
return delegate.hasValue(docId);
}
public BytesRef getValue(int docId) {
return delegate.getValue(docId);
}
public BytesRef makeSafe(BytesRef bytes) {
return delegate.makeSafe(bytes);
}
@ -146,6 +139,7 @@ public final class BytesRefValComparator extends FieldComparator<BytesRef> {
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
delegate.forEachValueInDoc(docId, proc);
}
}
private static final class MultiValuedBytesWrapper extends FilteredByteValues {

View File

@ -139,7 +139,7 @@ public abstract class ByteArrayAtomicFieldData implements AtomicNumericFieldData
@Override
public HashedBytesValues getHashedBytesValues() {
return new HashedBytesValues.StringBased(getStringValues());
return new HashedBytesValues.BytesBased(getBytesValues());
}
@Override
@ -406,7 +406,7 @@ public abstract class ByteArrayAtomicFieldData implements AtomicNumericFieldData
@Override
public HashedBytesValues getHashedBytesValues() {
return new HashedBytesValues.StringBased(getStringValues());
return new HashedBytesValues.BytesBased(getBytesValues());
}
@Override

View File

@ -24,11 +24,10 @@ import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.StringValues;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
import org.elasticsearch.index.fielddata.util.IntArrayRef;
import org.elasticsearch.index.fielddata.util.StringArrayRef;
/**
*/
@ -105,7 +104,7 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
@Override
public StringValues.WithOrdinals getStringValues() {
return ordinals.isMultiValued() ? new StringValues.Multi(values, ordinals.ordinals()) : new StringValues.Single(values, ordinals.ordinals());
return StringValues.BytesValuesWrapper.wrap(getBytesValues());
}
@Override
@ -113,19 +112,13 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
return new ScriptDocValues.Strings(getStringValues());
}
static abstract class BytesValues implements org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
protected final BytesRef[] values;
protected final Ordinals.Docs ordinals;
BytesValues(BytesRef[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return this.ordinals;
}
@Override
@ -151,22 +144,12 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
return values[ord];
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public BytesRef makeSafe(BytesRef bytes) {
// no need to do anything, its already concrete bytes...
return bytes;
}
@Override
public BytesRef getValue(int docId) {
return values[ordinals.getOrd(docId)];
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
BytesRef value = values[ordinals.getOrd(docId)];
@ -180,72 +163,35 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
return ret;
}
static class Single extends BytesValues {
static final class Single extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(BytesRef[] values, Ordinals.Docs ordinals) {
super(values, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public BytesRefArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.values[0] = values[ord];
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(values[ord]);
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, values[ord]);
}
}
}
static class Multi extends BytesValues {
static final class Multi extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
private final ValuesIter iter;
private final Iter.Multi iter;
Multi(BytesRef[] values, Ordinals.Docs ordinals) {
super(values, ordinals);
this.iter = new ValuesIter(values);
}
@Override
public boolean isMultiValued() {
return true;
assert ordinals.isMultiValued();
this.iter = new Iter.Multi(this);
}
@Override
public BytesRefArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
arrayScratch.values[arrayScratch.end++] = values[ords.values[i]];
}
return arrayScratch;
return getValuesMulti(docId);
}
@Override
@ -255,45 +201,10 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
proc.onValue(docId, values[ord]);
} while ((ord = iter.next()) != 0);
forEachValueInDocMulti(docId, proc);
}
static class ValuesIter implements Iter {
private final BytesRef[] values;
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(BytesRef[] values) {
this.values = values;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public BytesRef next() {
BytesRef value = values[ord];
ord = ordsIter.next();
return value;
}
}
}
}
@ -443,160 +354,6 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
}
}
static abstract class StringValues implements org.elasticsearch.index.fielddata.StringValues.WithOrdinals {
protected final BytesRef[] values;
protected final Ordinals.Docs ordinals;
protected StringValues(BytesRef[] values, Ordinals.Docs ordinals) {
this.values = values;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return ordinals;
}
@Override
public String getValueByOrd(int ord) {
BytesRef value = values[ord];
if (value == null) return null;
return value.utf8ToString();
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public String getValue(int docId) {
BytesRef value = values[ordinals.getOrd(docId)];
if (value == null) return null;
return value.utf8ToString();
}
static class Single extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(BytesRef[] values, Ordinals.Docs ordinals) {
super(values, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public StringArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return StringArrayRef.EMPTY;
BytesRef value = values[ord];
arrayScratch.values[0] = value == null ? null : value.utf8ToString();
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(values[ord].utf8ToString());
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
return;
}
proc.onValue(docId, values[ord].utf8ToString());
}
}
static class Multi extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
private final ValuesIter iter;
Multi(BytesRef[] values, Ordinals.Docs ordinals) {
super(values, ordinals);
iter = new ValuesIter(values);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public StringArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return StringArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
BytesRef value = values[ords.values[i]];
arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString();
}
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
BytesRef value = values[ord];
proc.onValue(docId, value == null ? null : value.utf8ToString());
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements StringValues.Iter {
private final BytesRef[] values;
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(BytesRef[] values) {
this.values = values;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public String next() {
BytesRef value = values[ord];
ord = ordsIter.next();
return value == null ? null : value.utf8ToString();
}
}
}
}
static class Empty extends ConcreteBytesRefAtomicFieldData {
Empty(int numDocs) {
@ -625,7 +382,7 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi
@Override
public BytesValues.WithOrdinals getBytesValues() {
return new BytesValues.WithOrdinals.Empty((EmptyOrdinals) ordinals);
return new BytesValues.WithOrdinals.Empty(ordinals.ordinals());
}
@Override

View File

@ -0,0 +1,250 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.Util;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.HashedBytesValues;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.StringValues;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
/**
*/
public class FSTPackedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> {
public static FSTPackedBytesAtomicFieldData empty(int numDocs) {
return new Empty(numDocs);
}
// 0 ordinal in values means no value (its null)
protected final Ordinals ordinals;
private int[] hashes;
private long size = -1;
private final FST<Long> fst;
public FSTPackedBytesAtomicFieldData(FST<Long> fst, Ordinals ordinals) {
this.ordinals = ordinals;
this.fst = fst;
}
@Override
public void close() {
}
@Override
public boolean isMultiValued() {
return ordinals.isMultiValued();
}
@Override
public int getNumDocs() {
return ordinals.getNumDocs();
}
@Override
public boolean isValuesOrdered() {
return true;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
long size = ordinals.getMemorySizeInBytes();
// FST
size += fst == null ? 0 : fst.sizeInBytes();
this.size = size;
}
return size;
}
@Override
public BytesValues.WithOrdinals getBytesValues() {
assert fst != null;
return ordinals.isMultiValued() ? new BytesValues.Multi(fst, ordinals.ordinals()) : new BytesValues.Single(fst, ordinals.ordinals());
}
@Override
public HashedBytesValues.WithOrdinals getHashedBytesValues() {
assert fst != null;
if (hashes == null) {
BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
int[] hashes = new int[ordinals.getMaxOrd()];
InputOutput<Long> next;
int i = 0;
try {
while((next = fstEnum.next()) != null) {
hashes[i++] = next.input.hashCode();
}
} catch (IOException ex) {
//bogus
}
this.hashes = hashes;
}
return ordinals.isMultiValued() ? new HashedBytesValuesWithOrds.Multi(getBytesValues(), hashes) : new HashedBytesValuesWithOrds.Single(getBytesValues(), hashes);
}
@Override
public StringValues.WithOrdinals getStringValues() {
assert fst != null;
return StringValues.BytesValuesWrapper.wrap(getBytesValues());
}
@Override
public ScriptDocValues.Strings getScriptValues() {
assert fst != null;
return new ScriptDocValues.Strings(getStringValues());
}
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
protected final FST<Long> fst;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
// per-thread resources
protected final BytesReader in ;
protected final Arc<Long> firstArc = new Arc<Long>();
protected final Arc<Long> scratchArc = new Arc<Long>();
protected final IntsRef scratchInts = new IntsRef();
BytesValues(FST<Long> fst, Ordinals.Docs ordinals) {
super(ordinals);
this.fst = fst;
this.ordinals = ordinals;
in = fst.getBytesReader();
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
in.setPosition(0);
fst.getFirstArc(firstArc);
try {
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
ret.grow(output.length);
ret.length = ret.offset = 0;
Util.toBytesRef(output, ret);
} catch (IOException ex) {
//bogus
}
return ret;
}
static final class Single extends BytesValues {
private final Iter.Single iter = new Iter.Single();
Single(FST<Long> fst, Ordinals.Docs ordinals) {
super(fst, ordinals);
assert !ordinals.isMultiValued();
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(getValueByOrd(ord));
}
}
static final class Multi extends BytesValues {
private final Iter.Multi iter;
Multi(FST<Long> fst, Ordinals.Docs ordinals) {
super(fst, ordinals);
assert ordinals.isMultiValued();
this.iter = new Iter.Multi(this);
}
@Override
public BytesRefArrayRef getValues(int docId) {
return getValuesMulti(docId);
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
forEachValueInDocMulti(docId, proc);
}
}
}
static class Empty extends FSTPackedBytesAtomicFieldData {
Empty(int numDocs) {
super(null, new EmptyOrdinals(numDocs));
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public int getNumDocs() {
return ordinals.getNumDocs();
}
@Override
public boolean isValuesOrdered() {
return true;
}
@Override
public BytesValues.WithOrdinals getBytesValues() {
return new BytesValues.WithOrdinals.Empty(ordinals.ordinals());
}
@Override
public HashedBytesValues.WithOrdinals getHashedBytesValues() {
return new HashedBytesValuesWithOrds.Empty(ordinals);
}
@Override
public StringValues.WithOrdinals getStringValues() {
return new StringValues.WithOrdinals.Empty(ordinals);
}
@Override
public ScriptDocValues.Strings getScriptValues() {
return ScriptDocValues.EMPTY_STRINGS;
}
}
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AbstractIndexFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class FSTPackedIndexFieldData extends AbstractIndexFieldData<FSTPackedBytesAtomicFieldData> implements IndexFieldData.WithOrdinals<FSTPackedBytesAtomicFieldData> {
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new FSTPackedIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
public FSTPackedIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}
@Override
public boolean valuesOrdered() {
return true;
}
@Override
public FSTPackedBytesAtomicFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public FSTPackedBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
return FSTPackedBytesAtomicFieldData.empty(reader.maxDoc());
}
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
try {
// 0 is reserved for "unset"
fstBuilder.add(Util.toIntsRef(new BytesRef(), scratch), 0l);
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
final int termOrd = builder.nextOrdinal();
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
builder.addDoc(docId);
}
}
FST<Long> fst = fstBuilder.finish();
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
return new FSTPackedBytesAtomicFieldData(fst, ordinals);
} finally {
builder.close();
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
// TODO support "missingValue" for sortMissingValue options here...
return new BytesRefFieldComparatorSource(this, sortMode);
}
}

View File

@ -0,0 +1,179 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.plain.FSTPackedBytesAtomicFieldData.BytesValues;
/**
* shared utils class - should be factored into HashedBytesValues
*/
abstract class HashedBytesValuesWithOrds implements org.elasticsearch.index.fielddata.HashedBytesValues.WithOrdinals {
protected final int[] hashes;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch1 = new BytesRef();
protected final HashedBytesRef scratch = new HashedBytesRef();
protected final BytesValues.WithOrdinals withOrds;
HashedBytesValuesWithOrds(BytesValues.WithOrdinals withOrds, int[] hashes) {
this.hashes = hashes;
this.ordinals = withOrds.ordinals();
this.withOrds = withOrds;
}
@Override
public boolean isMultiValued() {
return withOrds.isMultiValued();
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]));
}
}
protected final void forEachValueInDocMulti(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
proc.onValue(docId, scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]));
} while ((ord = iter.next()) != 0);
}
@Override
public Ordinals.Docs ordinals() {
return this.ordinals;
}
@Override
public HashedBytesRef getValueByOrd(int ord) {
return scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
}
@Override
public HashedBytesRef getSafeValueByOrd(int ord) {
return new HashedBytesRef(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
}
@Override
public HashedBytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
}
final static class Single extends HashedBytesValuesWithOrds {
private final Iter.Single iter = new Iter.Single();
Single(BytesValues.WithOrdinals withOrds, int[] hashes) {
super(withOrds, hashes);
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]));
}
}
final static class Multi extends HashedBytesValuesWithOrds {
private final HashedBytesValuesWithOrds.Multi.MultiIter iter;
Multi(BytesValues.WithOrdinals withOrds, int[] hashes) {
super(withOrds, hashes);
this.iter = new MultiIter(withOrds, hashes);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
forEachValueInDocMulti(docId, proc);
}
final static class MultiIter implements Iter {
private final int[] hashes;
private Ordinals.Docs.Iter ordsIter;
private int ord;
private final BytesRef scratch1 = new BytesRef();
private final HashedBytesRef scratch = new HashedBytesRef();
private final BytesValues.WithOrdinals withOrds;
MultiIter(BytesValues.WithOrdinals withOrds, int[] hashes) {
this.hashes = hashes;
this.withOrds = withOrds;
}
public HashedBytesValuesWithOrds.Multi.MultiIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public HashedBytesRef next() {
HashedBytesRef value = scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]);
ord = ordsIter.next();
return value;
}
}
}
}

View File

@ -23,15 +23,13 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.HashedBytesValues;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.StringValues;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
import org.elasticsearch.index.fielddata.util.IntArrayRef;
import org.elasticsearch.index.fielddata.util.StringArrayRef;
/**
*/
@ -106,12 +104,12 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
this.hashes = hashes;
}
return ordinals.isMultiValued() ? new HashedBytesValues.Multi(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals()) : new HashedBytesValues.Single(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals());
return ordinals.isMultiValued() ? new HashedBytesValuesWithOrds.Multi(getBytesValues(), hashes) : new HashedBytesValuesWithOrds.Single(getBytesValues(), hashes);
}
@Override
public StringValues.WithOrdinals getStringValues() {
return ordinals.isMultiValued() ? new StringValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new StringValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
return StringValues.BytesValuesWrapper.wrap(getBytesValues());
}
@Override
@ -119,7 +117,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
return new ScriptDocValues.Strings(getStringValues());
}
static abstract class BytesValues implements org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
@ -128,6 +126,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
protected final BytesRef scratch = new BytesRef();
BytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(ordinals);
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
@ -138,70 +137,20 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
return this.ordinals;
}
@Override
public BytesRef getValueByOrd(int ord) {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return scratch;
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
bytes.fill(ret, termOrdToBytesOffset.get(ord));
return ret;
}
@Override
public BytesRef getSafeValueByOrd(int ord) {
final BytesRef retVal = new BytesRef();
bytes.fill(retVal, termOrdToBytesOffset.get(ord));
return retVal;
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
static final class Single extends BytesValues {
@Override
public BytesRef makeSafe(BytesRef bytes) {
return BytesRef.deepCopyOf(bytes);
}
@Override
public BytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return scratch;
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
return ret;
}
static class Single extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public BytesRefArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.values[0] = new BytesRef();
bytes.fill(arrayScratch.values[0], termOrdToBytesOffset.get(ord));
return arrayScratch;
assert !ordinals.isMultiValued();
}
@Override
@ -212,46 +161,21 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
return iter.reset(scratch);
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
proc.onValue(docId, scratch);
}
}
}
static class Multi extends BytesValues {
static final class Multi extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
private final ValuesIter iter;
private final Iter.Multi iter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
this.iter = new ValuesIter(bytes, termOrdToBytesOffset);
}
@Override
public boolean isMultiValued() {
return true;
assert ordinals.isMultiValued();
this.iter = new Iter.Multi(this);
}
@Override
public BytesRefArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
final BytesRef bytesRef = new BytesRef();
bytes.fill(bytesRef, termOrdToBytesOffset.get(ords.values[i]));
arrayScratch.values[arrayScratch.end++] = bytesRef;
}
return arrayScratch;
return getValuesMulti(docId);
}
@Override
@ -261,369 +185,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
proc.onValue(docId, scratch);
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final BytesRef scratch = new BytesRef();
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public BytesRef next() {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return scratch;
}
}
}
}
static abstract class HashedBytesValues implements org.elasticsearch.index.fielddata.HashedBytesValues.WithOrdinals {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final int[] hashes;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch1 = new BytesRef();
protected final HashedBytesRef scratch = new HashedBytesRef();
HashedBytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return this.ordinals;
}
@Override
public HashedBytesRef getValueByOrd(int ord) {
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
return scratch.reset(scratch1, hashes[ord]);
}
@Override
public HashedBytesRef getSafeValueByOrd(int ord) {
final BytesRef bytesRef = new BytesRef();
bytes.fill(bytesRef, termOrdToBytesOffset.get(ord));
return new HashedBytesRef(bytesRef, hashes[ord]);
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
}
@Override
public HashedBytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
return scratch.reset(scratch1, hashes[ord]);
}
static class Single extends HashedBytesValues {
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, hashes, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
return iter.reset(scratch.reset(scratch1, hashes[ord]));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
proc.onValue(docId, scratch.reset(scratch1, hashes[ord]));
}
}
}
static class Multi extends HashedBytesValues {
private final ValuesIter iter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, hashes, ordinals);
this.iter = new ValuesIter(bytes, termOrdToBytesOffset, hashes);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
proc.onValue(docId, scratch.reset(scratch1, hashes[ord]));
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final int[] hashes;
private Ordinals.Docs.Iter ordsIter;
private int ord;
private final BytesRef scratch1 = new BytesRef();
private final HashedBytesRef scratch = new HashedBytesRef();
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public HashedBytesRef next() {
bytes.fill(scratch1, termOrdToBytesOffset.get(ord));
HashedBytesRef value = scratch.reset(scratch1, hashes[ord]);
ord = ordsIter.next();
return value;
}
}
}
}
static abstract class StringValues implements org.elasticsearch.index.fielddata.StringValues.WithOrdinals {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
protected StringValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return ordinals;
}
@Override
public String getValueByOrd(int ord) {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return scratch.utf8ToString();
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public String getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return scratch.utf8ToString();
}
static class Single extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public StringArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return StringArrayRef.EMPTY;
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
arrayScratch.values[0] = scratch.utf8ToString();
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return iter.reset(scratch.utf8ToString());
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
return;
}
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
proc.onValue(docId, scratch.utf8ToString());
}
}
static class Multi extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
private final ValuesIter iter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
iter = new ValuesIter(bytes, termOrdToBytesOffset);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public StringArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return StringArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
arrayScratch.values[arrayScratch.end++] = scratch.utf8ToString();
}
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
proc.onValue(docId, scratch.utf8ToString());
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements StringValues.Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final BytesRef scratch = new BytesRef();
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public String next() {
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return scratch.utf8ToString();
}
forEachValueInDocMulti(docId, proc);
}
}
}
@ -657,12 +219,12 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
@Override
public BytesValues.WithOrdinals getBytesValues() {
return new BytesValues.WithOrdinals.Empty((EmptyOrdinals) ordinals);
return new BytesValues.WithOrdinals.Empty(ordinals.ordinals());
}
@Override
public HashedBytesValues.WithOrdinals getHashedBytesValues() {
return new HashedBytesValues.Empty((EmptyOrdinals) ordinals);
return new HashedBytesValuesWithOrds.Empty(ordinals);
}
@Override

View File

@ -85,7 +85,6 @@ public class PagedBytesIndexFieldData extends AbstractIndexFieldData<PagedBytesA
final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
@ -109,12 +108,10 @@ public class PagedBytesIndexFieldData extends AbstractIndexFieldData<PagedBytesA
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.testng.annotations.Test;
/**
*/
@Test
public class FSTPackedBytesStringFieldDataTests extends StringFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
return new FieldDataType("string", ImmutableSettings.builder().put("format", "fst"));
}
}