From 129f02623bb2e96039c7e839b0354ff61fa91ccd Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 26 Mar 2013 17:28:49 +0100 Subject: [PATCH] Added FST based FieldData implementation holding all data in a per segment FST. This commit factors our a common API for BytesValues based impl to shared code and reduce code duplication. --- .../index/fielddata/BytesValues.java | 268 ++++++---- .../index/fielddata/HashedBytesValues.java | 3 +- .../fielddata/IndexFieldDataService.java | 1 + .../index/fielddata/StringValues.java | 161 +++++- .../BytesRefValComparator.java | 12 +- .../plain/ByteArrayAtomicFieldData.java | 4 +- .../ConcreteBytesRefAtomicFieldData.java | 269 +--------- .../plain/FSTPackedBytesAtomicFieldData.java | 250 ++++++++++ .../plain/FSTPackedIndexFieldData.java | 125 +++++ .../plain/HashedBytesValuesWithOrds.java | 179 +++++++ .../plain/PagedBytesAtomicFieldData.java | 470 +----------------- .../plain/PagedBytesIndexFieldData.java | 3 - .../FSTPackedBytesStringFieldDataTests.java | 35 ++ 13 files changed, 959 insertions(+), 821 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedBytesAtomicFieldData.java create mode 100644 src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedIndexFieldData.java create mode 100644 src/main/java/org/elasticsearch/index/fielddata/plain/HashedBytesValuesWithOrds.java create mode 100644 src/test/java/org/elasticsearch/test/unit/index/fielddata/FSTPackedBytesStringFieldDataTests.java diff --git a/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java b/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java index 59e1260fcd6..c7bc5734b2f 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java @@ -21,72 +21,89 @@ package org.elasticsearch.index.fielddata; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalStateException; -import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; +import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs; import org.elasticsearch.index.fielddata.util.BytesRefArrayRef; +import org.elasticsearch.index.fielddata.util.IntArrayRef; import org.elasticsearch.index.fielddata.util.StringArrayRef; /** */ -public interface BytesValues { - - static final BytesValues EMPTY = new Empty(); +public abstract class BytesValues { + public static final BytesValues EMPTY = new Empty(); + private boolean multiValued; + protected final BytesRef scratch = new BytesRef(); + + protected BytesValues(boolean multiValued) { + this.multiValued = multiValued; + } + /** * Is one of the documents in this field data values is multi valued? */ - boolean isMultiValued(); + public final boolean isMultiValued() { + return multiValued; + } /** * Is there a value for this doc? */ - boolean hasValue(int docId); + public abstract boolean hasValue(int docId); /** * Converts the provided bytes to "safe" ones from a "non" safe call made (if needed). */ - BytesRef makeSafe(BytesRef bytes); + public BytesRef makeSafe(BytesRef bytes) { + return BytesRef.deepCopyOf(bytes); + } /** * Returns a bytes value for a docId. Note, the content of it might be shared across invocation. */ - BytesRef getValue(int docId); + public BytesRef getValue(int docId) { + if (hasValue(docId)) { + return getValueScratch(docId, scratch); + } + return null; + } /** * Returns the bytes value for the docId, with the provided "ret" which will be filled with the * result which will also be returned. If there is no value for this docId, the length will be 0. * Note, the bytes are not "safe". */ - BytesRef getValueScratch(int docId, BytesRef ret); + public abstract BytesRef getValueScratch(int docId, BytesRef ret); /** * Returns an array wrapping all the bytes values for a doc. The content is guaranteed not to be shared. */ - BytesRefArrayRef getValues(int docId); + public abstract BytesRefArrayRef getValues(int docId); /** * Returns a bytes value iterator for a docId. Note, the content of it might be shared across invocation. */ - Iter getIter(int docId); + public abstract Iter getIter(int docId); + + /** * Go over all the possible values in their BytesRef format for a specific doc. */ - void forEachValueInDoc(int docId, ValueInDocProc proc); - + public abstract void forEachValueInDoc(int docId, ValueInDocProc proc); public static interface ValueInDocProc { void onValue(int docId, BytesRef value); void onMissing(int docId); } - static interface Iter { + public static interface Iter { boolean hasNext(); BytesRef next(); - static class Empty implements Iter { + public static class Empty implements Iter { public static final Empty INSTANCE = new Empty(); @@ -101,7 +118,7 @@ public interface BytesValues { } } - static class Single implements Iter { + public final static class Single implements Iter { public BytesRef value; public boolean done; @@ -124,12 +141,43 @@ public interface BytesValues { return value; } } + + static final class Multi implements Iter { + + private int ord; + private BytesValues.WithOrdinals withOrds; + private Ordinals.Docs.Iter ordsIter; + private final BytesRef scratch = new BytesRef(); + public Multi(WithOrdinals withOrds) { + this.withOrds = withOrds; + assert withOrds.isMultiValued(); + + } + + public Multi reset(Ordinals.Docs.Iter ordsIter) { + this.ordsIter = ordsIter; + this.ord = ordsIter.next(); + return this; + } + + @Override + public boolean hasNext() { + return ord != 0; + } + + @Override + public BytesRef next() { + withOrds.getValueScratchByOrd(ord, scratch); + ord = ordsIter.next(); + return scratch; + } + } } - static class Empty implements BytesValues { - @Override - public boolean isMultiValued() { - return false; + public static class Empty extends BytesValues { + + public Empty() { + super(false); } @Override @@ -137,11 +185,6 @@ public interface BytesValues { return false; } - @Override - public BytesRef getValue(int docId) { - return null; - } - @Override public BytesRefArrayRef getValues(int docId) { return BytesRefArrayRef.EMPTY; @@ -157,13 +200,6 @@ public interface BytesValues { proc.onMissing(docId); } - @Override - public BytesRef makeSafe(BytesRef bytes) { - //todo we can also throw an excepiton here as the only value this method accepts is a scratch value... - //todo ...extracted from this ByteValues, in our case, there are not values, so this should never be called!?!? - return BytesRef.deepCopyOf(bytes); - } - @Override public BytesRef getValueScratch(int docId, BytesRef ret) { ret.length = 0; @@ -171,43 +207,25 @@ public interface BytesValues { } } - public static class StringBased implements BytesValues { + public static class StringBased extends BytesValues { + private final StringValues values; - protected final BytesRef scratch = new BytesRef(); private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1); private final ValueIter valueIter = new ValueIter(); private final Proc proc = new Proc(); public StringBased(StringValues values) { + super(values.isMultiValued()); this.values = values; } - @Override - public boolean isMultiValued() { - return values.isMultiValued(); - } - @Override public boolean hasValue(int docId) { return values.hasValue(docId); } - @Override - public BytesRef makeSafe(BytesRef bytes) { - // we need to make a copy, since we use scratch to provide it - return BytesRef.deepCopyOf(bytes); - } - - @Override - public BytesRef getValue(int docId) { - String value = values.getValue(docId); - if (value == null) return null; - scratch.copyChars(value); - return scratch; - } - @Override public BytesRef getValueScratch(int docId, BytesRef ret) { String value = values.getValue(docId); @@ -244,7 +262,7 @@ public interface BytesValues { values.forEachValueInDoc(docId, this.proc.reset(proc)); } - static class ValueIter implements Iter { + public static class ValueIter implements Iter { private final BytesRef scratch = new BytesRef(); private StringValues.Iter iter; @@ -266,7 +284,7 @@ public interface BytesValues { } } - static class Proc implements StringValues.ValueInDocProc { + public static class Proc implements StringValues.ValueInDocProc { private final BytesRef scratch = new BytesRef(); private BytesValues.ValueInDocProc proc; @@ -292,32 +310,106 @@ public interface BytesValues { /** * Bytes values that are based on ordinals. */ - static interface WithOrdinals extends BytesValues { + public static abstract class WithOrdinals extends BytesValues { + + protected final Docs ordinals; + protected final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0); - Ordinals.Docs ordinals(); + protected WithOrdinals(Ordinals.Docs ordinals) { + super(ordinals.isMultiValued()); + this.ordinals = ordinals; + } - BytesRef getValueByOrd(int ord); + public Ordinals.Docs ordinals() { + return ordinals; + } + + public BytesRef getValueByOrd(int ord) { + return getValueScratchByOrd(ord, scratch); + } + + @Override + public boolean hasValue(int docId) { + return ordinals.getOrd(docId) != 0; + } + + @Override + public BytesRefArrayRef getValues(int docId) { + assert !isMultiValued(); + int ord = ordinals.getOrd(docId); + if (ord == 0) return BytesRefArrayRef.EMPTY; + arrayScratch.values[0] = getSafeValueByOrd(ord); + arrayScratch.end = 1; + arrayScratch.start = 0; + return arrayScratch; + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + assert !isMultiValued(); + int ord = ordinals.getOrd(docId); + if (ord == 0) { + proc.onMissing(docId); + } else { + proc.onValue(docId, getValue(docId)); + } + } + + protected BytesRefArrayRef getValuesMulti(int docId) { + assert isMultiValued(); + IntArrayRef ords = ordinals.getOrds(docId); + int size = ords.size(); + if (size == 0) { + return BytesRefArrayRef.EMPTY; + } + arrayScratch.reset(size); + for (int i = ords.start; i < ords.end; i++) { + arrayScratch.values[arrayScratch.end++] = getValueScratchByOrd(ords.values[i], new BytesRef()); + } + return arrayScratch; + } + + protected void forEachValueInDocMulti(int docId, ValueInDocProc proc) { + assert isMultiValued(); + Ordinals.Docs.Iter iter = ordinals.getIter(docId); + int ord = iter.next(); + if (ord == 0) { + proc.onMissing(docId); + return; + } + do { + getValueScratchByOrd(ord, scratch); + proc.onValue(docId, scratch); + } while ((ord = iter.next()) != 0); + } + + @Override + public BytesRef getValue(int docId) { + int ord = ordinals.getOrd(docId); + if (ord == 0) return null; + return getValueScratchByOrd(ord, scratch); + } + + @Override + public BytesRef getValueScratch(int docId, BytesRef ret) { + return getValueScratchByOrd(ordinals.getOrd(docId), ret); + } + + public BytesRef getSafeValueByOrd(int ord) { + return getValueScratchByOrd(ord, new BytesRef()); + } /** * Returns the bytes value for the docId, with the provided "ret" which will be filled with the * result which will also be returned. If there is no value for this docId, the length will be 0. * Note, the bytes are not "safe". */ - BytesRef getValueScratchByOrd(int ord, BytesRef ret); + public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret); - BytesRef getSafeValueByOrd(int ord); + public static class Empty extends WithOrdinals { - public static class Empty extends BytesValues.Empty implements WithOrdinals { - - private final Ordinals ordinals; - - public Empty(EmptyOrdinals ordinals) { - this.ordinals = ordinals; - } - - @Override - public Ordinals.Docs ordinals() { - return ordinals.ordinals(); + public Empty(Ordinals.Docs ordinals) { + super(ordinals); } @Override @@ -335,38 +427,32 @@ public interface BytesValues { public BytesRef getSafeValueByOrd(int ord) { return null; } - } - public static class StringBased extends BytesValues.StringBased implements WithOrdinals { - - private final StringValues.WithOrdinals values; - - public StringBased(StringValues.WithOrdinals values) { - super(values); - this.values = values; + @Override + public boolean hasValue(int docId) { + return false; } @Override - public Ordinals.Docs ordinals() { - return values.ordinals(); + public BytesRefArrayRef getValues(int docId) { + return BytesRefArrayRef.EMPTY; } @Override - public BytesRef getValueByOrd(int ord) { - scratch.copyChars(values.getValueByOrd(ord)); - return scratch; + public Iter getIter(int docId) { + return Iter.Empty.INSTANCE; } @Override - public BytesRef getValueScratchByOrd(int ord, BytesRef ret) { - ret.copyChars(values.getValueByOrd(ord)); + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + proc.onMissing(docId); + } + + @Override + public BytesRef getValueScratch(int docId, BytesRef ret) { + ret.length = 0; return ret; } - - @Override - public BytesRef getSafeValueByOrd(int ord) { - return new BytesRef(values.getValueByOrd(ord)); - } } } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/HashedBytesValues.java b/src/main/java/org/elasticsearch/index/fielddata/HashedBytesValues.java index e931131605d..943c634ea16 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/HashedBytesValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/HashedBytesValues.java @@ -22,7 +22,6 @@ package org.elasticsearch.index.fielddata; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.lucene.HashedBytesRef; -import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; /** @@ -346,7 +345,7 @@ public interface HashedBytesValues { private final Ordinals ordinals; - public Empty(EmptyOrdinals ordinals) { + public Empty(Ordinals ordinals) { this.ordinals = ordinals; } diff --git a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java index 85c3dbaabc1..5e973c12b59 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java +++ b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java @@ -61,6 +61,7 @@ public class IndexFieldDataService extends AbstractIndexComponent implements Ind buildersByTypeAndFormat = MapBuilder., IndexFieldData.Builder>newMapBuilder() .put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder()) .put(Tuple.tuple("string", "paged_bytes"), new PagedBytesIndexFieldData.Builder()) + .put(Tuple.tuple("string", "fst"), new FSTPackedIndexFieldData.Builder()) .put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder()) .put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder()) .put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder()) diff --git a/src/main/java/org/elasticsearch/index/fielddata/StringValues.java b/src/main/java/org/elasticsearch/index/fielddata/StringValues.java index 283f369e2c5..7d49e658e54 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/StringValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/StringValues.java @@ -19,11 +19,16 @@ package org.elasticsearch.index.fielddata; -import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.ElasticSearchIllegalStateException; -import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.util.*; +import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs; +import org.elasticsearch.index.fielddata.util.DoubleArrayRef; +import org.elasticsearch.index.fielddata.util.IntArrayRef; +import org.elasticsearch.index.fielddata.util.LongArrayRef; +import org.elasticsearch.index.fielddata.util.StringArrayRef; /** */ @@ -335,7 +340,7 @@ public interface StringValues { private final Ordinals ordinals; - public Empty(EmptyOrdinals ordinals) { + public Empty(Ordinals ordinals) { this.ordinals = ordinals; } @@ -350,4 +355,152 @@ public interface StringValues { } } } + + public static class BytesValuesWrapper implements StringValues.WithOrdinals { + private org.elasticsearch.index.fielddata.BytesValues.WithOrdinals delegate; + private final CharsRef spare = new CharsRef(); + protected final Docs ordinals; + protected final StringArrayRef arrayScratch; + private final OrdinalIter iter = new OrdinalIter(this); + + BytesValuesWrapper(BytesValues.WithOrdinals delegate) { + arrayScratch = new StringArrayRef(new String[delegate.isMultiValued() ? 10 : 1], delegate.isMultiValued() ? 0 : 1); + this.delegate = delegate; + this.ordinals = delegate.ordinals(); + } + + public static StringValues.WithOrdinals wrap(BytesValues.WithOrdinals values) { + if (values.isMultiValued()) { + return new MultiBytesValuesWrapper(values); + } else { + return new BytesValuesWrapper(values); + } + } + @Override + public String getValue(int docId) { + final BytesRef value = delegate.getValue(docId); + if (value != null) { + UnicodeUtil.UTF8toUTF16(value, spare); + return spare.toString(); + } + return null; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(this.ordinals.getIter(docId)); + } + + @Override + public StringArrayRef getValues(int docId) { + assert !isMultiValued(); + int ord = ordinals.getOrd(docId); + if (ord == 0) return StringArrayRef.EMPTY; + arrayScratch.values[0] = getValueByOrd(ord); + return arrayScratch; + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + assert !isMultiValued(); + int ord = ordinals.getOrd(docId); + if (ord == 0) { + proc.onMissing(docId); + } else { + proc.onValue(docId, getValueByOrd(ord)); + } + } + + @Override + public Docs ordinals() { + return delegate.ordinals; + } + + @Override + public String getValueByOrd(int ord) { + final BytesRef value = delegate.getValueByOrd(ord); + if (value != null) { + UnicodeUtil.UTF8toUTF16(value, spare); + return spare.toString(); + } + return null; + } + + + @Override + public boolean isMultiValued() { + return delegate.isMultiValued(); + } + + + @Override + public boolean hasValue(int docId) { + return delegate.hasValue(docId); + } + + } + + static final class MultiBytesValuesWrapper extends BytesValuesWrapper { + MultiBytesValuesWrapper(org.elasticsearch.index.fielddata.BytesValues.WithOrdinals delegate) { + super(delegate); + } + + @Override + public StringArrayRef getValues(int docId) { + assert isMultiValued(); + + IntArrayRef ords = ordinals.getOrds(docId); + int size = ords.size(); + if (size == 0) return StringArrayRef.EMPTY; + arrayScratch.reset(size); + for (int i = ords.start; i < ords.end; i++) { + arrayScratch.values[arrayScratch.end++] = getValueByOrd(ords.get(i)); + } + return arrayScratch; + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + assert isMultiValued(); + + Ordinals.Docs.Iter iter = ordinals.getIter(docId); + int ord = iter.next(); + if (ord == 0) { + proc.onMissing(docId); + } else { + do { + proc.onValue(docId, getValueByOrd(ord)); + } while ((ord = iter.next()) != 0); + } + } + } + + static final class OrdinalIter implements StringValues.Iter { + + private Ordinals.Docs.Iter ordsIter; + private int ord; + private final StringValues.WithOrdinals values; + + OrdinalIter(StringValues.WithOrdinals values) { + this.values = values; + } + + public OrdinalIter reset(Ordinals.Docs.Iter ordsIter) { + this.ordsIter = ordsIter; + this.ord = ordsIter.next(); + return this; + } + + @Override + public boolean hasNext() { + return ord != 0; + } + + @Override + public String next() { + final String valueByOrd = values.getValueByOrd(ord); + ord = ordsIter.next(); + return valueByOrd; + } + } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java index c4e7253acef..ad66935c28e 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java +++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefValComparator.java @@ -107,26 +107,19 @@ public final class BytesRefValComparator extends FieldComparator { return docTerms.getValue(doc).compareTo(value); } - public static class FilteredByteValues implements BytesValues { + public static class FilteredByteValues extends BytesValues { protected final BytesValues delegate; public FilteredByteValues(BytesValues delegate) { + super(delegate.isMultiValued()); this.delegate = delegate; } - public boolean isMultiValued() { - return delegate.isMultiValued(); - } - public boolean hasValue(int docId) { return delegate.hasValue(docId); } - public BytesRef getValue(int docId) { - return delegate.getValue(docId); - } - public BytesRef makeSafe(BytesRef bytes) { return delegate.makeSafe(bytes); } @@ -146,6 +139,7 @@ public final class BytesRefValComparator extends FieldComparator { public void forEachValueInDoc(int docId, ValueInDocProc proc) { delegate.forEachValueInDoc(docId, proc); } + } private static final class MultiValuedBytesWrapper extends FilteredByteValues { diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/ByteArrayAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/ByteArrayAtomicFieldData.java index fb2b6a108e0..f71e377d44f 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/ByteArrayAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/ByteArrayAtomicFieldData.java @@ -139,7 +139,7 @@ public abstract class ByteArrayAtomicFieldData implements AtomicNumericFieldData @Override public HashedBytesValues getHashedBytesValues() { - return new HashedBytesValues.StringBased(getStringValues()); + return new HashedBytesValues.BytesBased(getBytesValues()); } @Override @@ -406,7 +406,7 @@ public abstract class ByteArrayAtomicFieldData implements AtomicNumericFieldData @Override public HashedBytesValues getHashedBytesValues() { - return new HashedBytesValues.StringBased(getStringValues()); + return new HashedBytesValues.BytesBased(getBytesValues()); } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/ConcreteBytesRefAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/ConcreteBytesRefAtomicFieldData.java index 14bb530b962..85889ff7b38 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/ConcreteBytesRefAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/ConcreteBytesRefAtomicFieldData.java @@ -24,11 +24,10 @@ import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.lucene.HashedBytesRef; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.StringValues; import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.index.fielddata.util.BytesRefArrayRef; -import org.elasticsearch.index.fielddata.util.IntArrayRef; -import org.elasticsearch.index.fielddata.util.StringArrayRef; /** */ @@ -105,7 +104,7 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi @Override public StringValues.WithOrdinals getStringValues() { - return ordinals.isMultiValued() ? new StringValues.Multi(values, ordinals.ordinals()) : new StringValues.Single(values, ordinals.ordinals()); + return StringValues.BytesValuesWrapper.wrap(getBytesValues()); } @Override @@ -113,19 +112,13 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi return new ScriptDocValues.Strings(getStringValues()); } - static abstract class BytesValues implements org.elasticsearch.index.fielddata.BytesValues.WithOrdinals { + static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals { protected final BytesRef[] values; - protected final Ordinals.Docs ordinals; BytesValues(BytesRef[] values, Ordinals.Docs ordinals) { + super(ordinals); this.values = values; - this.ordinals = ordinals; - } - - @Override - public Ordinals.Docs ordinals() { - return this.ordinals; } @Override @@ -151,22 +144,12 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi return values[ord]; } - @Override - public boolean hasValue(int docId) { - return ordinals.getOrd(docId) != 0; - } - @Override public BytesRef makeSafe(BytesRef bytes) { // no need to do anything, its already concrete bytes... return bytes; } - @Override - public BytesRef getValue(int docId) { - return values[ordinals.getOrd(docId)]; - } - @Override public BytesRef getValueScratch(int docId, BytesRef ret) { BytesRef value = values[ordinals.getOrd(docId)]; @@ -180,72 +163,35 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi return ret; } - static class Single extends BytesValues { + static final class Single extends BytesValues { - private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1); private final Iter.Single iter = new Iter.Single(); Single(BytesRef[] values, Ordinals.Docs ordinals) { super(values, ordinals); } - @Override - public boolean isMultiValued() { - return false; - } - - @Override - public BytesRefArrayRef getValues(int docId) { - int ord = ordinals.getOrd(docId); - if (ord == 0) return BytesRefArrayRef.EMPTY; - arrayScratch.values[0] = values[ord]; - return arrayScratch; - } - @Override public Iter getIter(int docId) { int ord = ordinals.getOrd(docId); if (ord == 0) return Iter.Empty.INSTANCE; return iter.reset(values[ord]); } - - @Override - public void forEachValueInDoc(int docId, ValueInDocProc proc) { - int ord = ordinals.getOrd(docId); - if (ord == 0) { - proc.onMissing(docId); - } else { - proc.onValue(docId, values[ord]); - } - } } - static class Multi extends BytesValues { + static final class Multi extends BytesValues { - private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0); - private final ValuesIter iter; + private final Iter.Multi iter; Multi(BytesRef[] values, Ordinals.Docs ordinals) { super(values, ordinals); - this.iter = new ValuesIter(values); - } - - @Override - public boolean isMultiValued() { - return true; + assert ordinals.isMultiValued(); + this.iter = new Iter.Multi(this); } @Override public BytesRefArrayRef getValues(int docId) { - IntArrayRef ords = ordinals.getOrds(docId); - int size = ords.size(); - if (size == 0) return BytesRefArrayRef.EMPTY; - - arrayScratch.reset(size); - for (int i = ords.start; i < ords.end; i++) { - arrayScratch.values[arrayScratch.end++] = values[ords.values[i]]; - } - return arrayScratch; + return getValuesMulti(docId); } @Override @@ -255,45 +201,10 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi @Override public void forEachValueInDoc(int docId, ValueInDocProc proc) { - Ordinals.Docs.Iter iter = ordinals.getIter(docId); - int ord = iter.next(); - if (ord == 0) { - proc.onMissing(docId); - return; - } - do { - proc.onValue(docId, values[ord]); - } while ((ord = iter.next()) != 0); + forEachValueInDocMulti(docId, proc); } - static class ValuesIter implements Iter { - - private final BytesRef[] values; - private Ordinals.Docs.Iter ordsIter; - private int ord; - - ValuesIter(BytesRef[] values) { - this.values = values; - } - - public ValuesIter reset(Ordinals.Docs.Iter ordsIter) { - this.ordsIter = ordsIter; - this.ord = ordsIter.next(); - return this; - } - - @Override - public boolean hasNext() { - return ord != 0; - } - - @Override - public BytesRef next() { - BytesRef value = values[ord]; - ord = ordsIter.next(); - return value; - } - } + } } @@ -443,160 +354,6 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi } } - static abstract class StringValues implements org.elasticsearch.index.fielddata.StringValues.WithOrdinals { - - protected final BytesRef[] values; - protected final Ordinals.Docs ordinals; - - protected StringValues(BytesRef[] values, Ordinals.Docs ordinals) { - this.values = values; - this.ordinals = ordinals; - } - - @Override - public Ordinals.Docs ordinals() { - return ordinals; - } - - @Override - public String getValueByOrd(int ord) { - BytesRef value = values[ord]; - if (value == null) return null; - return value.utf8ToString(); - } - - @Override - public boolean hasValue(int docId) { - return ordinals.getOrd(docId) != 0; - } - - @Override - public String getValue(int docId) { - BytesRef value = values[ordinals.getOrd(docId)]; - if (value == null) return null; - return value.utf8ToString(); - } - - static class Single extends StringValues { - - private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1); - private final Iter.Single iter = new Iter.Single(); - - Single(BytesRef[] values, Ordinals.Docs ordinals) { - super(values, ordinals); - } - - @Override - public boolean isMultiValued() { - return false; - } - - @Override - public StringArrayRef getValues(int docId) { - int ord = ordinals.getOrd(docId); - if (ord == 0) return StringArrayRef.EMPTY; - BytesRef value = values[ord]; - arrayScratch.values[0] = value == null ? null : value.utf8ToString(); - return arrayScratch; - } - - @Override - public Iter getIter(int docId) { - int ord = ordinals.getOrd(docId); - if (ord == 0) return Iter.Empty.INSTANCE; - return iter.reset(values[ord].utf8ToString()); - } - - @Override - public void forEachValueInDoc(int docId, ValueInDocProc proc) { - int ord = ordinals.getOrd(docId); - if (ord == 0) { - proc.onMissing(docId); - return; - } - proc.onValue(docId, values[ord].utf8ToString()); - } - } - - static class Multi extends StringValues { - - private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0); - private final ValuesIter iter; - - Multi(BytesRef[] values, Ordinals.Docs ordinals) { - super(values, ordinals); - iter = new ValuesIter(values); - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public StringArrayRef getValues(int docId) { - IntArrayRef ords = ordinals.getOrds(docId); - int size = ords.size(); - if (size == 0) return StringArrayRef.EMPTY; - - arrayScratch.reset(size); - for (int i = ords.start; i < ords.end; i++) { - BytesRef value = values[ords.values[i]]; - arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString(); - } - return arrayScratch; - } - - @Override - public Iter getIter(int docId) { - return iter.reset(ordinals.getIter(docId)); - } - - @Override - public void forEachValueInDoc(int docId, ValueInDocProc proc) { - Ordinals.Docs.Iter iter = ordinals.getIter(docId); - int ord = iter.next(); - if (ord == 0) { - proc.onMissing(docId); - return; - } - do { - BytesRef value = values[ord]; - proc.onValue(docId, value == null ? null : value.utf8ToString()); - } while ((ord = iter.next()) != 0); - } - - static class ValuesIter implements StringValues.Iter { - - private final BytesRef[] values; - private Ordinals.Docs.Iter ordsIter; - private int ord; - - ValuesIter(BytesRef[] values) { - this.values = values; - } - - public ValuesIter reset(Ordinals.Docs.Iter ordsIter) { - this.ordsIter = ordsIter; - this.ord = ordsIter.next(); - return this; - } - - @Override - public boolean hasNext() { - return ord != 0; - } - - @Override - public String next() { - BytesRef value = values[ord]; - ord = ordsIter.next(); - return value == null ? null : value.utf8ToString(); - } - } - } - } - static class Empty extends ConcreteBytesRefAtomicFieldData { Empty(int numDocs) { @@ -625,7 +382,7 @@ public class ConcreteBytesRefAtomicFieldData implements AtomicFieldData.WithOrdi @Override public BytesValues.WithOrdinals getBytesValues() { - return new BytesValues.WithOrdinals.Empty((EmptyOrdinals) ordinals); + return new BytesValues.WithOrdinals.Empty(ordinals.ordinals()); } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedBytesAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedBytesAtomicFieldData.java new file mode 100644 index 00000000000..e9763b9092f --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedBytesAtomicFieldData.java @@ -0,0 +1,250 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import java.io.IOException; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.fst.BytesRefFSTEnum; +import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.FST.Arc; +import org.apache.lucene.util.fst.FST.BytesReader; +import org.apache.lucene.util.fst.Util; +import org.elasticsearch.index.fielddata.AtomicFieldData; +import org.elasticsearch.index.fielddata.HashedBytesValues; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.StringValues; +import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; +import org.elasticsearch.index.fielddata.ordinals.Ordinals; +import org.elasticsearch.index.fielddata.util.BytesRefArrayRef; + +/** + */ +public class FSTPackedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals { + + public static FSTPackedBytesAtomicFieldData empty(int numDocs) { + return new Empty(numDocs); + } + + // 0 ordinal in values means no value (its null) + protected final Ordinals ordinals; + + private int[] hashes; + private long size = -1; + + private final FST fst; + + public FSTPackedBytesAtomicFieldData(FST fst, Ordinals ordinals) { + this.ordinals = ordinals; + this.fst = fst; + } + + @Override + public void close() { + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public int getNumDocs() { + return ordinals.getNumDocs(); + } + + @Override + public boolean isValuesOrdered() { + return true; + } + + @Override + public long getMemorySizeInBytes() { + if (size == -1) { + long size = ordinals.getMemorySizeInBytes(); + // FST + size += fst == null ? 0 : fst.sizeInBytes(); + this.size = size; + } + return size; + } + + @Override + public BytesValues.WithOrdinals getBytesValues() { + assert fst != null; + return ordinals.isMultiValued() ? new BytesValues.Multi(fst, ordinals.ordinals()) : new BytesValues.Single(fst, ordinals.ordinals()); + } + + @Override + public HashedBytesValues.WithOrdinals getHashedBytesValues() { + assert fst != null; + if (hashes == null) { + BytesRefFSTEnum fstEnum = new BytesRefFSTEnum(fst); + int[] hashes = new int[ordinals.getMaxOrd()]; + InputOutput next; + int i = 0; + try { + while((next = fstEnum.next()) != null) { + hashes[i++] = next.input.hashCode(); + } + } catch (IOException ex) { + //bogus + } + this.hashes = hashes; + } + return ordinals.isMultiValued() ? new HashedBytesValuesWithOrds.Multi(getBytesValues(), hashes) : new HashedBytesValuesWithOrds.Single(getBytesValues(), hashes); + } + + @Override + public StringValues.WithOrdinals getStringValues() { + assert fst != null; + return StringValues.BytesValuesWrapper.wrap(getBytesValues()); + } + + @Override + public ScriptDocValues.Strings getScriptValues() { + assert fst != null; + return new ScriptDocValues.Strings(getStringValues()); + } + + static abstract class BytesValues extends org.elasticsearch.index.fielddata.BytesValues.WithOrdinals { + + protected final FST fst; + protected final Ordinals.Docs ordinals; + + protected final BytesRef scratch = new BytesRef(); + // per-thread resources + protected final BytesReader in ; + protected final Arc firstArc = new Arc(); + protected final Arc scratchArc = new Arc(); + protected final IntsRef scratchInts = new IntsRef(); + + BytesValues(FST fst, Ordinals.Docs ordinals) { + super(ordinals); + this.fst = fst; + this.ordinals = ordinals; + in = fst.getBytesReader(); + } + + @Override + public BytesRef getValueScratchByOrd(int ord, BytesRef ret) { + in.setPosition(0); + fst.getFirstArc(firstArc); + try { + IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts); + ret.grow(output.length); + ret.length = ret.offset = 0; + Util.toBytesRef(output, ret); + } catch (IOException ex) { + //bogus + } + return ret; + } + + static final class Single extends BytesValues { + private final Iter.Single iter = new Iter.Single(); + + Single(FST fst, Ordinals.Docs ordinals) { + super(fst, ordinals); + assert !ordinals.isMultiValued(); + } + + @Override + public Iter getIter(int docId) { + int ord = ordinals.getOrd(docId); + if (ord == 0) return Iter.Empty.INSTANCE; + return iter.reset(getValueByOrd(ord)); + } + } + + static final class Multi extends BytesValues { + + private final Iter.Multi iter; + + Multi(FST fst, Ordinals.Docs ordinals) { + super(fst, ordinals); + assert ordinals.isMultiValued(); + this.iter = new Iter.Multi(this); + } + + @Override + public BytesRefArrayRef getValues(int docId) { + return getValuesMulti(docId); + } + + @Override + public Iter getIter(int docId) { + return iter.reset(ordinals.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + forEachValueInDocMulti(docId, proc); + } + } + } + + + + static class Empty extends FSTPackedBytesAtomicFieldData { + + Empty(int numDocs) { + super(null, new EmptyOrdinals(numDocs)); + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public int getNumDocs() { + return ordinals.getNumDocs(); + } + + @Override + public boolean isValuesOrdered() { + return true; + } + + @Override + public BytesValues.WithOrdinals getBytesValues() { + return new BytesValues.WithOrdinals.Empty(ordinals.ordinals()); + } + + @Override + public HashedBytesValues.WithOrdinals getHashedBytesValues() { + return new HashedBytesValuesWithOrds.Empty(ordinals); + } + + @Override + public StringValues.WithOrdinals getStringValues() { + return new StringValues.WithOrdinals.Empty(ordinals); + } + + @Override + public ScriptDocValues.Strings getScriptValues() { + return ScriptDocValues.EMPTY_STRINGS; + } + } + +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedIndexFieldData.java new file mode 100644 index 00000000000..f230c0867ff --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FSTPackedIndexFieldData.java @@ -0,0 +1,125 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.FST.INPUT_TYPE; +import org.apache.lucene.util.fst.PositiveIntOutputs; +import org.apache.lucene.util.fst.Util; +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.AbstractIndexFieldData; +import org.elasticsearch.index.fielddata.FieldDataType; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; +import org.elasticsearch.index.fielddata.fieldcomparator.SortMode; +import org.elasticsearch.index.fielddata.ordinals.Ordinals; +import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.settings.IndexSettings; + +/** + */ +public class FSTPackedIndexFieldData extends AbstractIndexFieldData implements IndexFieldData.WithOrdinals { + + public static class Builder implements IndexFieldData.Builder { + + @Override + public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { + return new FSTPackedIndexFieldData(index, indexSettings, fieldNames, type, cache); + } + } + + public FSTPackedIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) { + super(index, indexSettings, fieldNames, fieldDataType, cache); + } + + @Override + public boolean valuesOrdered() { + return true; + } + + @Override + public FSTPackedBytesAtomicFieldData load(AtomicReaderContext context) { + try { + return cache.load(context, this); + } catch (Throwable e) { + if (e instanceof ElasticSearchException) { + throw (ElasticSearchException) e; + } else { + throw new ElasticSearchException(e.getMessage(), e); + } + } + } + + @Override + public FSTPackedBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { + AtomicReader reader = context.reader(); + + Terms terms = reader.terms(getFieldNames().indexName()); + if (terms == null) { + return FSTPackedBytesAtomicFieldData.empty(reader.maxDoc()); + } + PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); + org.apache.lucene.util.fst.Builder fstBuilder = new org.apache.lucene.util.fst.Builder(INPUT_TYPE.BYTE1, outputs); + final IntsRef scratch = new IntsRef(); + + OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc()); + try { + + // 0 is reserved for "unset" + fstBuilder.add(Util.toIntsRef(new BytesRef(), scratch), 0l); + TermsEnum termsEnum = terms.iterator(null); + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { + final int termOrd = builder.nextOrdinal(); + fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd); + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + builder.addDoc(docId); + } + } + + FST fst = fstBuilder.finish(); + + final Ordinals ordinals = builder.build(fieldDataType.getSettings()); + + return new FSTPackedBytesAtomicFieldData(fst, ordinals); + } finally { + builder.close(); + } + } + + @Override + public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) { + // TODO support "missingValue" for sortMissingValue options here... + return new BytesRefFieldComparatorSource(this, sortMode); + } +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/HashedBytesValuesWithOrds.java b/src/main/java/org/elasticsearch/index/fielddata/plain/HashedBytesValuesWithOrds.java new file mode 100644 index 00000000000..9e1f91f36b2 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/HashedBytesValuesWithOrds.java @@ -0,0 +1,179 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.fielddata.plain; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.HashedBytesRef; +import org.elasticsearch.index.fielddata.ordinals.Ordinals; +import org.elasticsearch.index.fielddata.plain.FSTPackedBytesAtomicFieldData.BytesValues; + +/** + * shared utils class - should be factored into HashedBytesValues + */ +abstract class HashedBytesValuesWithOrds implements org.elasticsearch.index.fielddata.HashedBytesValues.WithOrdinals { + + protected final int[] hashes; + protected final Ordinals.Docs ordinals; + + protected final BytesRef scratch1 = new BytesRef(); + protected final HashedBytesRef scratch = new HashedBytesRef(); + protected final BytesValues.WithOrdinals withOrds; + + HashedBytesValuesWithOrds(BytesValues.WithOrdinals withOrds, int[] hashes) { + this.hashes = hashes; + this.ordinals = withOrds.ordinals(); + this.withOrds = withOrds; + } + + @Override + public boolean isMultiValued() { + return withOrds.isMultiValued(); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + int ord = ordinals.getOrd(docId); + if (ord == 0) { + proc.onMissing(docId); + } else { + proc.onValue(docId, scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord])); + } + } + + + protected final void forEachValueInDocMulti(int docId, ValueInDocProc proc) { + Ordinals.Docs.Iter iter = ordinals.getIter(docId); + int ord = iter.next(); + if (ord == 0) { + proc.onMissing(docId); + return; + } + do { + proc.onValue(docId, scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord])); + } while ((ord = iter.next()) != 0); + } + + + + @Override + public Ordinals.Docs ordinals() { + return this.ordinals; + } + + @Override + public HashedBytesRef getValueByOrd(int ord) { + return scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]); + } + + @Override + public HashedBytesRef getSafeValueByOrd(int ord) { + return new HashedBytesRef(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]); + } + + @Override + public boolean hasValue(int docId) { + return ordinals.getOrd(docId) != 0; + } + + @Override + public HashedBytesRef makeSafe(HashedBytesRef bytes) { + return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash); + } + + @Override + public HashedBytesRef getValue(int docId) { + int ord = ordinals.getOrd(docId); + if (ord == 0) return null; + return scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]); + } + + final static class Single extends HashedBytesValuesWithOrds { + + private final Iter.Single iter = new Iter.Single(); + + Single(BytesValues.WithOrdinals withOrds, int[] hashes) { + super(withOrds, hashes); + } + + @Override + public Iter getIter(int docId) { + int ord = ordinals.getOrd(docId); + if (ord == 0) return Iter.Empty.INSTANCE; + return iter.reset(scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord])); + } + + } + + final static class Multi extends HashedBytesValuesWithOrds { + private final HashedBytesValuesWithOrds.Multi.MultiIter iter; + + Multi(BytesValues.WithOrdinals withOrds, int[] hashes) { + super(withOrds, hashes); + this.iter = new MultiIter(withOrds, hashes); + } + + @Override + public boolean isMultiValued() { + return true; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(ordinals.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + forEachValueInDocMulti(docId, proc); + } + + final static class MultiIter implements Iter { + + private final int[] hashes; + private Ordinals.Docs.Iter ordsIter; + private int ord; + private final BytesRef scratch1 = new BytesRef(); + private final HashedBytesRef scratch = new HashedBytesRef(); + private final BytesValues.WithOrdinals withOrds; + + MultiIter(BytesValues.WithOrdinals withOrds, int[] hashes) { + this.hashes = hashes; + this.withOrds = withOrds; + } + + public HashedBytesValuesWithOrds.Multi.MultiIter reset(Ordinals.Docs.Iter ordsIter) { + this.ordsIter = ordsIter; + this.ord = ordsIter.next(); + return this; + } + + @Override + public boolean hasNext() { + return ord != 0; + } + + @Override + public HashedBytesRef next() { + HashedBytesRef value = scratch.reset(withOrds.getValueScratchByOrd(ord, scratch1), hashes[ord]); + ord = ordsIter.next(); + return value; + } + } + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java index 6e7a7962a66..242ae190c14 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java @@ -23,15 +23,13 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; -import org.elasticsearch.common.RamUsage; -import org.elasticsearch.common.lucene.HashedBytesRef; import org.elasticsearch.index.fielddata.AtomicFieldData; +import org.elasticsearch.index.fielddata.HashedBytesValues; import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.StringValues; import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.index.fielddata.util.BytesRefArrayRef; -import org.elasticsearch.index.fielddata.util.IntArrayRef; -import org.elasticsearch.index.fielddata.util.StringArrayRef; /** */ @@ -106,12 +104,12 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals