add packed bytes variant for strings/bytes

This commit is contained in:
Shay Banon 2013-01-20 23:34:02 +01:00
parent 855b64a8a7
commit 45f27fe96a
4 changed files with 979 additions and 0 deletions

View File

@ -56,6 +56,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder() buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
.put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder()) .put(Tuple.tuple("string", "concrete_bytes"), new ConcreteBytesRefIndexFieldData.Builder())
.put(Tuple.tuple("string", "packed_bytes"), new PackedBytesIndexFieldData.Builder())
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder()) .put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder()) .put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder()) .put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())

View File

@ -0,0 +1,750 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.lucene.HashedBytesRef;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.util.BytesRefArrayRef;
import org.elasticsearch.index.fielddata.util.IntArrayRef;
import org.elasticsearch.index.fielddata.util.StringArrayRef;
/**
*/
public class PackedBytesAtomicFieldData implements AtomicOrdinalFieldData<ScriptDocValues.Strings> {
// 0 ordinal in values means no value (its null)
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final Ordinals ordinals;
private int[] hashes;
private long size = -1;
public PackedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
}
@Override
public boolean isMultiValued() {
return ordinals.isMultiValued();
}
@Override
public int getNumDocs() {
return ordinals.getNumDocs();
}
@Override
public boolean isValuesOrdered() {
return true;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
long size = ordinals.getMemorySizeInBytes();
// PackedBytes
size += RamUsage.NUM_BYTES_ARRAY_HEADER + bytes.getBlocks().length;
for (byte[] b : bytes.getBlocks()) {
size += b.length;
}
// PackedInts
size += termOrdToBytesOffset.ramBytesUsed();
this.size = size;
}
return size;
}
@Override
public OrdinalsBytesValues getBytesValues() {
return ordinals.isMultiValued() ? new BytesValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new BytesValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
}
@Override
public OrdinalsHashedBytesValues getHashedBytesValues() {
if (hashes == null) {
int numberOfValues = termOrdToBytesOffset.size();
int[] hashes = new int[numberOfValues];
BytesRef scratch = new BytesRef();
for (int i = 0; i < numberOfValues; i++) {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(i));
hashes[i] = value == null ? 0 : value.hashCode();
}
this.hashes = hashes;
}
return ordinals.isMultiValued() ? new HashedBytesValues.Multi(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals()) : new HashedBytesValues.Single(bytes, termOrdToBytesOffset, hashes, ordinals.ordinals());
}
@Override
public OrdinalsStringValues getStringValues() {
return ordinals.isMultiValued() ? new StringValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new StringValues.Single(bytes, termOrdToBytesOffset, ordinals.ordinals());
}
@Override
public ScriptDocValues.Strings getScriptValues() {
return new ScriptDocValues.Strings(getStringValues());
}
static abstract class BytesValues implements org.elasticsearch.index.fielddata.OrdinalsBytesValues {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
BytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return this.ordinals;
}
@Override
public BytesRef getValueByOrd(int ord) {
return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
return bytes.fill(ret, termOrdToBytesOffset.get(ord));
}
@Override
public BytesRef getSafeValueByOrd(int ord) {
return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public BytesRef makeSafe(BytesRef bytes) {
return BytesRef.deepCopyOf(bytes);
}
@Override
public BytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return bytes.fill(scratch, termOrdToBytesOffset.get(ord));
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
return bytes.fill(ret, termOrdToBytesOffset.get(ordinals.getOrd(docId)));
}
@Override
public BytesRef getValueSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
}
static class Single extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public BytesRefArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.values[0] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
}
@Override
public Iter getIterSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
}
}
@Override
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
}
}
}
static class Multi extends BytesValues {
private final BytesRefArrayRef arrayScratch = new BytesRefArrayRef(new BytesRef[10], 0);
private final ValuesIter iter;
private final SafeValuesIter safeIter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
this.iter = new ValuesIter(bytes, termOrdToBytesOffset);
this.safeIter = new SafeValuesIter(bytes, termOrdToBytesOffset);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public BytesRefArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return BytesRefArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
arrayScratch.values[arrayScratch.end++] = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ords.values[i]));
}
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public Iter getIterSafe(int docId) {
return safeIter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)));
} while ((ord = iter.next()) != 0);
}
@Override
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
proc.onValue(docId, bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)));
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final BytesRef scratch = new BytesRef();
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public BytesRef next() {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return value;
}
}
static class SafeValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private Ordinals.Docs.Iter ordsIter;
private int ord;
SafeValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}
public SafeValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public BytesRef next() {
BytesRef value = bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return value;
}
}
}
}
static abstract class HashedBytesValues implements org.elasticsearch.index.fielddata.OrdinalsHashedBytesValues {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final int[] hashes;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch1 = new BytesRef();
protected final HashedBytesRef scratch = new HashedBytesRef();
HashedBytesValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return this.ordinals;
}
@Override
public HashedBytesRef getValueByOrd(int ord) {
return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
}
@Override
public HashedBytesRef getSafeValueByOrd(int ord) {
return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public HashedBytesRef makeSafe(HashedBytesRef bytes) {
return new HashedBytesRef(BytesRef.deepCopyOf(bytes.bytes), bytes.hash);
}
@Override
public HashedBytesRef getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
}
@Override
public HashedBytesRef getValueSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
return new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
}
static class Single extends HashedBytesValues {
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, hashes, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
}
@Override
public Iter getIterSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
}
}
@Override
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
} else {
proc.onValue(docId, new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
}
}
}
static class Multi extends HashedBytesValues {
private final ValuesIter iter;
private final SafeValuesIter safeIter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, hashes, ordinals);
this.iter = new ValuesIter(bytes, termOrdToBytesOffset, hashes);
this.safeIter = new SafeValuesIter(bytes, termOrdToBytesOffset, hashes);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public Iter getIterSafe(int docId) {
return safeIter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
proc.onValue(docId, scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]));
} while ((ord = iter.next()) != 0);
}
@Override
public void forEachSafeValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
proc.onValue(docId, new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]));
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final int[] hashes;
private Ordinals.Docs.Iter ordsIter;
private int ord;
private final BytesRef scratch1 = new BytesRef();
private final HashedBytesRef scratch = new HashedBytesRef();
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public HashedBytesRef next() {
HashedBytesRef value = scratch.reset(bytes.fill(scratch1, termOrdToBytesOffset.get(ord)), hashes[ord]);
ord = ordsIter.next();
return value;
}
}
static class SafeValuesIter implements Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final int[] hashes;
private Ordinals.Docs.Iter ordsIter;
private int ord;
SafeValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, int[] hashes) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.hashes = hashes;
}
public SafeValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public HashedBytesRef next() {
HashedBytesRef value = new HashedBytesRef(bytes.fill(new BytesRef(), termOrdToBytesOffset.get(ord)), hashes[ord]);
ord = ordsIter.next();
return value;
}
}
}
}
static abstract class StringValues implements OrdinalsStringValues {
protected final PagedBytes.Reader bytes;
protected final PackedInts.Reader termOrdToBytesOffset;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
protected StringValues(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
}
@Override
public Ordinals.Docs ordinals() {
return ordinals;
}
@Override
public String getValueByOrd(int ord) {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return value.utf8ToString();
}
@Override
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public String getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return null;
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return value.utf8ToString();
}
static class Single extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1);
private final Iter.Single iter = new Iter.Single();
Single(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public StringArrayRef getValues(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return StringArrayRef.EMPTY;
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
arrayScratch.values[0] = value == null ? null : value.utf8ToString();
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
proc.onMissing(docId);
return;
}
proc.onValue(docId, bytes.fill(scratch, termOrdToBytesOffset.get(ord)).utf8ToString());
}
}
static class Multi extends StringValues {
private final StringArrayRef arrayScratch = new StringArrayRef(new String[10], 0);
private final ValuesIter iter;
Multi(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, Ordinals.Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
iter = new ValuesIter(bytes, termOrdToBytesOffset);
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public StringArrayRef getValues(int docId) {
IntArrayRef ords = ordinals.getOrds(docId);
int size = ords.size();
if (size == 0) return StringArrayRef.EMPTY;
arrayScratch.reset(size);
for (int i = ords.start; i < ords.end; i++) {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ords.values[i]));
arrayScratch.values[arrayScratch.end++] = value == null ? null : value.utf8ToString();
}
return arrayScratch;
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
@Override
public void forEachValueInDoc(int docId, ValueInDocProc proc) {
Ordinals.Docs.Iter iter = ordinals.getIter(docId);
int ord = iter.next();
if (ord == 0) {
proc.onMissing(docId);
return;
}
do {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
proc.onValue(docId, value == null ? null : value.utf8ToString());
} while ((ord = iter.next()) != 0);
}
static class ValuesIter implements StringValues.Iter {
private final PagedBytes.Reader bytes;
private final PackedInts.Reader termOrdToBytesOffset;
private final BytesRef scratch = new BytesRef();
private Ordinals.Docs.Iter ordsIter;
private int ord;
ValuesIter(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}
public ValuesIter reset(Ordinals.Docs.Iter ordsIter) {
this.ordsIter = ordsIter;
this.ord = ordsIter.next();
return this;
}
@Override
public boolean hasNext() {
return ord != 0;
}
@Override
public String next() {
BytesRef value = bytes.fill(scratch, termOrdToBytesOffset.get(ord));
ord = ordsIter.next();
return value == null ? null : value.utf8ToString();
}
}
}
}
}

View File

@ -0,0 +1,193 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.MultiFlatArrayOrdinals;
import org.elasticsearch.index.fielddata.ordinals.SingleArrayOrdinals;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.ArrayList;
/**
*/
public class PackedBytesIndexFieldData extends AbstractIndexFieldData<PackedBytesAtomicFieldData> implements IndexOrdinalFieldData<PackedBytesAtomicFieldData> {
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new PackedBytesIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
public PackedBytesIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}
@Override
public boolean valuesOrdered() {
return true;
}
@Override
public PackedBytesAtomicFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public PackedBytesAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
final PagedBytes bytes = new PagedBytes(1);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
GrowableWriter termOrdToBytesOffset = new GrowableWriter(1, 2, PackedInts.FASTEST);
return new PackedBytesAtomicFieldData(bytes.freeze(true), termOrdToBytesOffset.getMutable(), new EmptyOrdinals(reader.maxDoc()));
}
final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc + 1;
}
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = terms.size();
if (numUniqueTerms != -1L) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
// TODO: expose this as an option..., have a nice parser for it...
float acceptableOverheadRatio = PackedInts.FAST;
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1 + startNumUniqueTerms, acceptableOverheadRatio);
ArrayList<int[]> ordinals = new ArrayList<int[]>();
int[] idx = new int[reader.maxDoc()];
ordinals.add(new int[reader.maxDoc()]);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
int termOrd = 1;
TermsEnum termsEnum = terms.iterator(null);
try
{
DocsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
if (termOrd == termOrdToBytesOffset.size()) {
// NOTE: this code only runs if the incoming
// reader impl doesn't implement
// size (which should be uncommon)
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1 + termOrd, 1));
}
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
int[] ordinal;
if (idx[docId] >= ordinals.size()) {
ordinal = new int[reader.maxDoc()];
ordinals.add(ordinal);
} else {
ordinal = ordinals.get(idx[docId]);
}
ordinal[docId] = termOrd;
idx[docId]++;
}
termOrd++;
}
} catch (RuntimeException e) {
if (e.getClass().getName().endsWith("StopFillCacheException")) {
// all is well, in case numeric parsers are used.
} else {
throw e;
}
}
PagedBytes.Reader bytesReader = bytes.freeze(true);
PackedInts.Reader termOrdToBytesOffsetReader = termOrdToBytesOffset.getMutable();
if (ordinals.size() == 1) {
return new PackedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, new SingleArrayOrdinals(ordinals.get(0), termOrd));
} else {
int[][] nativeOrdinals = new int[ordinals.size()][];
for (int i = 0; i < nativeOrdinals.length; i++) {
nativeOrdinals[i] = ordinals.get(i);
}
return new PackedBytesAtomicFieldData(bytesReader, termOrdToBytesOffsetReader, new MultiFlatArrayOrdinals(nativeOrdinals, termOrd));
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue) {
// TODO support "missingValue" for sortMissingValue options here...
return new BytesRefFieldComparatorSource(this);
}
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata;
import com.google.common.collect.ImmutableMap;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.testng.annotations.Test;
/**
*/
@Test
public class PackedBytesStringFieldDataTests extends StringFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
return new FieldDataType("string", "packed_bytes", ImmutableMap.<String, String>of());
}
}