add packed bytes variant for strings/bytes
This commit is contained in:
parent
855b64a8a7
commit
45f27fe96a
|
@ -56,6 +56,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
|
|||
|
||||
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
|
||||
.put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("string", "packed_bytes"), new PackedBytesIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())
|
||||
|
|
|
@ -0,0 +1,750 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.IntArrayRef;
|
||||
import org.elasticsearch.index.fielddata.util.StringArrayRef;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class PackedBytesAtomicFieldData implements AtomicOrdinalFieldData<ScriptDocValues.Strings> {
|
||||
|
||||
// 0 ordinal in values means no value (its null)
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final Ordinals ordinals;
|
||||
|
||||
private int[] hashes;
|
||||
private long size = -1;
|
||||
|
||||
public PackedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDocs() {
|
||||
return ordinals.getNumDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
long size = ordinals.getMemorySizeInBytes();
|
||||
// PackedBytes
|
||||
size += RamUsage.NUM_BYTES_ARRAY_HEADER + bytes.getBlocks().length;
|
||||
for (byte[] b : bytes.getBlocks()) {
|
||||
size += b.length;
|
||||
}
|
||||
// PackedInts
|
||||
size += termOrdToBytesOffset.ramBytesUsed();
|
||||
this.size = size;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdinalsBytesValues getBytesValues() {
|
||||
return ordinals.isMultiValued() ? new BytesValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new BytesValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdinalsHashedBytesValues getHashedBytesValues() {
|
||||
if (hashes == null) {
|
||||
int numberOfValues = termOrdToBytesOffset.size();
|
||||
int[] hashes = new int[numberOfValues];
|
||||
BytesRef scratch = new BytesRef();
|
||||
for (int i = 0; i < numberOfValues; i++) {
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(i));
|
||||
hashes[i] = value == null ? 0 : value.hashCode();
|
||||
}
|
||||
this.hashes = hashes;
|
||||
}
|
||||
return ordinals.isMultiValued() ? new HashedBytesValues.Multi(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals()) : new HashedBytesValues.Single(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdinalsStringValues getStringValues() {
|
||||
return ordinals.isMultiValued() ? new StringValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new StringValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues.Strings getScriptValues() {
|
||||
return new ScriptDocValues.Strings(getStringValues());
|
||||
}
|
||||
|
||||
static abstract class BytesValues implements org.elasticsearch.index.fielddata.OrdinalsBytesValues {
|
||||
|
||||
protected final PagedBytes.Reader bytes;
|
||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch = new BytesRef();
|
||||
|
||||
BytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return this.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueByOrd(int ord) {
|
||||
return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
||||
return bytes.fill(ret, termOrdToBytesOffset.get(ord));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getSafeValueByOrd(int ord) {
|
||||
return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef makeSafe(BytesRef bytes) {
|
||||
return BytesRef.deepCopyOf(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueScratch(int docId, BytesRef ret) {
|
||||
return bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getValueSafe(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
|
||||
}
|
||||
|
||||
static class Single extends BytesValues {
|
||||
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return BytesRefArrayRef.EMPTY;
|
||||
arrayScratch.values[0] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIterSafe(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends BytesValues {
|
||||
|
||||
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
|
||||
private final ValuesIter iter;
|
||||
private final SafeValuesIter safeIter;
|
||||
|
||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
this.iter = new ValuesIter(bytes, termOrdToBytesOffset);
|
||||
this.safeIter = new SafeValuesIter(bytes, termOrdToBytesOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefArrayRef getValues(int docId) {
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return BytesRefArrayRef.EMPTY;
|
||||
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
arrayScratch.values[arrayScratch.end++] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ords.values[i]));
|
||||
}
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIterSafe(int docId) {
|
||||
return safeIter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
proc.onValue(docId, bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
static class SafeValuesIter implements Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
SafeValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
}
|
||||
|
||||
public SafeValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
BytesRef value = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class HashedBytesValues implements org.elasticsearch.index.fielddata.OrdinalsHashedBytesValues {
|
||||
|
||||
protected final PagedBytes.Reader bytes;
|
||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
||||
protected final int[] hashes;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch1 = new BytesRef();
|
||||
protected final HashedBytesRef scratch = new HashedBytesRef();
|
||||
|
||||
HashedBytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.hashes = hashes;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return this.ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValueByOrd(int ord) {
|
||||
return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getSafeValueByOrd(int ord) {
|
||||
return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
|
||||
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef getValueSafe(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
|
||||
}
|
||||
|
||||
static class Single extends HashedBytesValues {
|
||||
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, hashes, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIterSafe(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
} else {
|
||||
proc.onValue(docId, new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends HashedBytesValues {
|
||||
|
||||
private final ValuesIter iter;
|
||||
private final SafeValuesIter safeIter;
|
||||
|
||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, hashes, ordinals);
|
||||
this.iter = new ValuesIter(bytes, termOrdToBytesOffset, hashes);
|
||||
this.safeIter = new SafeValuesIter(bytes, termOrdToBytesOffset, hashes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIterSafe(int docId) {
|
||||
return safeIter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
proc.onValue(docId, new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final int[] hashes;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
private final BytesRef scratch1 = new BytesRef();
|
||||
private final HashedBytesRef scratch = new HashedBytesRef();
|
||||
|
||||
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.hashes = hashes;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef next() {
|
||||
HashedBytesRef value = scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
static class SafeValuesIter implements Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final int[] hashes;
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
SafeValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.hashes = hashes;
|
||||
}
|
||||
|
||||
public SafeValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashedBytesRef next() {
|
||||
HashedBytesRef value = new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
|
||||
ord = ordsIter.next();
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class StringValues implements OrdinalsStringValues {
|
||||
|
||||
protected final PagedBytes.Reader bytes;
|
||||
protected final PackedInts.Reader termOrdToBytesOffset;
|
||||
protected final Ordinals.Docs ordinals;
|
||||
|
||||
protected final BytesRef scratch = new BytesRef();
|
||||
|
||||
protected StringValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Ordinals.Docs ordinals() {
|
||||
return ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValueByOrd(int ord) {
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return value.utf8ToString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return ordinals.getOrd(docId) != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValue(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return null;
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
return value.utf8ToString();
|
||||
}
|
||||
|
||||
static class Single extends StringValues {
|
||||
|
||||
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
|
||||
private final Iter.Single iter = new Iter.Single();
|
||||
|
||||
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return StringArrayRef.EMPTY;
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
arrayScratch.values[0] = value == null ? null : value.utf8ToString();
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||
return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
int ord = ordinals.getOrd(docId);
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
|
||||
}
|
||||
}
|
||||
|
||||
static class Multi extends StringValues {
|
||||
|
||||
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
|
||||
private final ValuesIter iter;
|
||||
|
||||
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
|
||||
super(bytes, termOrdToBytesOffset, ordinals);
|
||||
iter = new ValuesIter(bytes, termOrdToBytesOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringArrayRef getValues(int docId) {
|
||||
IntArrayRef ords = ordinals.getOrds(docId);
|
||||
int size = ords.size();
|
||||
if (size == 0) return StringArrayRef.EMPTY;
|
||||
|
||||
arrayScratch.reset(size);
|
||||
for (int i = ords.start; i < ords.end; i++) {
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
|
||||
arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString();
|
||||
}
|
||||
return arrayScratch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iter getIter(int docId) {
|
||||
return iter.reset(ordinals.getIter(docId));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
|
||||
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
|
||||
int ord = iter.next();
|
||||
if (ord == 0) {
|
||||
proc.onMissing(docId);
|
||||
return;
|
||||
}
|
||||
do {
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
proc.onValue(docId, value == null ? null : value.utf8ToString());
|
||||
} while ((ord = iter.next()) != 0);
|
||||
}
|
||||
|
||||
static class ValuesIter implements StringValues.Iter {
|
||||
|
||||
private final PagedBytes.Reader bytes;
|
||||
private final PackedInts.Reader termOrdToBytesOffset;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private Ordinals.Docs.Iter ordsIter;
|
||||
private int ord;
|
||||
|
||||
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
|
||||
this.bytes = bytes;
|
||||
this.termOrdToBytesOffset = termOrdToBytesOffset;
|
||||
}
|
||||
|
||||
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
|
||||
this.ordsIter = ordsIter;
|
||||
this.ord = ordsIter.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ord != 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||
ord = ordsIter.next();
|
||||
return value == null ? null : value.utf8ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.MultiFlatArrayOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.SingleArrayOrdinals;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class PackedBytesIndexFieldData extends AbstractIndexFieldData<PackedBytesAtomicFieldData> implements IndexOrdinalFieldData<PackedBytesAtomicFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@Override
|
||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new PackedBytesIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||
}
|
||||
}
|
||||
|
||||
public PackedBytesIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PackedBytesAtomicFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PackedBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
final PagedBytes bytes = new PagedBytes(1);
|
||||
// 0 is reserved for "unset"
|
||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(1, 2, PackedInts.FASTEST);
|
||||
return new PackedBytesAtomicFieldData(bytes.freeze(true), termOrdToBytesOffset.getMutable(), new EmptyOrdinals(reader.maxDoc()));
|
||||
}
|
||||
|
||||
final PagedBytes bytes = new PagedBytes(15);
|
||||
int startBytesBPV;
|
||||
int startTermsBPV;
|
||||
int startNumUniqueTerms;
|
||||
|
||||
int maxDoc = reader.maxDoc();
|
||||
final int termCountHardLimit;
|
||||
if (maxDoc == Integer.MAX_VALUE) {
|
||||
termCountHardLimit = Integer.MAX_VALUE;
|
||||
} else {
|
||||
termCountHardLimit = maxDoc + 1;
|
||||
}
|
||||
|
||||
// Try for coarse estimate for number of bits; this
|
||||
// should be an underestimate most of the time, which
|
||||
// is fine -- GrowableWriter will reallocate as needed
|
||||
long numUniqueTerms = terms.size();
|
||||
if (numUniqueTerms != -1L) {
|
||||
if (numUniqueTerms > termCountHardLimit) {
|
||||
// app is misusing the API (there is more than
|
||||
// one term per doc); in this case we make best
|
||||
// effort to load what we can (see LUCENE-2142)
|
||||
numUniqueTerms = termCountHardLimit;
|
||||
}
|
||||
|
||||
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
|
||||
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
|
||||
|
||||
startNumUniqueTerms = (int) numUniqueTerms;
|
||||
} else {
|
||||
startBytesBPV = 1;
|
||||
startTermsBPV = 1;
|
||||
startNumUniqueTerms = 1;
|
||||
}
|
||||
|
||||
// TODO: expose this as an option..., have a nice parser for it...
|
||||
float acceptableOverheadRatio = PackedInts.FAST;
|
||||
|
||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1 + startNumUniqueTerms, acceptableOverheadRatio);
|
||||
|
||||
ArrayList<int[]> ordinals = new ArrayList<int[]>();
|
||||
int[] idx = new int[reader.maxDoc()];
|
||||
ordinals.add(new int[reader.maxDoc()]);
|
||||
|
||||
// 0 is reserved for "unset"
|
||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||
int termOrd = 1;
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
try
|
||||
|
||||
{
|
||||
DocsEnum docsEnum = null;
|
||||
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
||||
if (termOrd == termOrdToBytesOffset.size()) {
|
||||
// NOTE: this code only runs if the incoming
|
||||
// reader impl doesn't implement
|
||||
// size (which should be uncommon)
|
||||
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1 + termOrd, 1));
|
||||
}
|
||||
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
|
||||
|
||||
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
int[] ordinal;
|
||||
if (idx[docId] >= ordinals.size()) {
|
||||
ordinal = new int[reader.maxDoc()];
|
||||
ordinals.add(ordinal);
|
||||
} else {
|
||||
ordinal = ordinals.get(idx[docId]);
|
||||
}
|
||||
ordinal[docId] = termOrd;
|
||||
idx[docId]++;
|
||||
}
|
||||
termOrd++;
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getClass().getName().endsWith("StopFillCacheException")) {
|
||||
// all is well, in case numeric parsers are used.
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
PagedBytes.Reader bytesReader = bytes.freeze(true);
|
||||
PackedInts.Reader termOrdToBytesOffsetReader = termOrdToBytesOffset.getMutable();
|
||||
|
||||
if (ordinals.size() == 1) {
|
||||
return new PackedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, new SingleArrayOrdinals(ordinals.get(0), termOrd));
|
||||
} else {
|
||||
int[][] nativeOrdinals = new int[ordinals.size()][];
|
||||
for (int i = 0; i < nativeOrdinals.length; i++) {
|
||||
nativeOrdinals[i] = ordinals.get(i);
|
||||
}
|
||||
return new PackedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, new MultiFlatArrayOrdinals(nativeOrdinals, termOrd));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue) {
|
||||
// TODO support "missingValue" for sortMissingValue options here...
|
||||
return new BytesRefFieldComparatorSource(this);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
*/
|
||||
@Test
|
||||
public class PackedBytesStringFieldDataTests extends StringFieldDataTests {
|
||||
|
||||
@Override
|
||||
protected FieldDataType getFieldDataType() {
|
||||
return new FieldDataType("string", "packed_bytes", ImmutableMap.<String, String>of());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue