Use SinglePackedOrdinals over SingleArrayOrdinals to reduce the memory ordinals take for single valued fields in field data.
Closes #3185
This commit is contained in:
parent
b995abfa80
commit
8d59ed3ab0
|
@ -17,28 +17,23 @@ package org.elasticsearch.index.fielddata.ordinals;
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
import org.apache.lucene.index.FilteredTermsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
import org.apache.lucene.util.IntBlockPool;
|
|
||||||
import org.apache.lucene.util.IntBlockPool.Allocator;
|
import org.apache.lucene.util.IntBlockPool.Allocator;
|
||||||
import org.apache.lucene.util.IntBlockPool.DirectAllocator;
|
import org.apache.lucene.util.IntBlockPool.DirectAllocator;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple class to build document ID <-> ordinal mapping. Note: Ordinals are
|
* Simple class to build document ID <-> ordinal mapping. Note: Ordinals are
|
||||||
* <tt>1</tt> based monotocially increasing positive integers. <tt>0</tt>
|
* <tt>1</tt> based monotocially increasing positive integers. <tt>0</tt>
|
||||||
|
@ -46,7 +41,10 @@ import org.elasticsearch.common.settings.Settings;
|
||||||
*/
|
*/
|
||||||
public final class OrdinalsBuilder implements Closeable {
|
public final class OrdinalsBuilder implements Closeable {
|
||||||
|
|
||||||
private final int[] ords;
|
private final int maxDoc;
|
||||||
|
private int[] mvOrds;
|
||||||
|
private GrowableWriter svOrds;
|
||||||
|
|
||||||
private int[] offsets;
|
private int[] offsets;
|
||||||
private final IntBlockPool pool;
|
private final IntBlockPool pool;
|
||||||
private final IntBlockPool.SliceWriter writer;
|
private final IntBlockPool.SliceWriter writer;
|
||||||
|
@ -57,19 +55,35 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
private int numMultiValuedDocs = 0;
|
private int numMultiValuedDocs = 0;
|
||||||
private int totalNumOrds = 0;
|
private int totalNumOrds = 0;
|
||||||
|
|
||||||
public OrdinalsBuilder(Terms terms, int maxDoc, Allocator allocator) {
|
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator) throws IOException {
|
||||||
this.ords = new int[maxDoc];
|
this.maxDoc = maxDoc;
|
||||||
|
// TODO: Make configurable...
|
||||||
|
float acceptableOverheadRatio = PackedInts.FAST;
|
||||||
|
if (preDefineBitsRequired) {
|
||||||
|
int numTerms = (int) terms.size();
|
||||||
|
if (numTerms == -1) {
|
||||||
|
svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
|
||||||
|
} else {
|
||||||
|
svOrds = new GrowableWriter(PackedInts.bitsRequired(numTerms), maxDoc, acceptableOverheadRatio);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
|
||||||
|
}
|
||||||
pool = new IntBlockPool(allocator);
|
pool = new IntBlockPool(allocator);
|
||||||
reader = new IntBlockPool.SliceReader(pool);
|
reader = new IntBlockPool.SliceReader(pool);
|
||||||
writer = new IntBlockPool.SliceWriter(pool);
|
writer = new IntBlockPool.SliceWriter(pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OrdinalsBuilder(int maxDoc) {
|
public OrdinalsBuilder(int maxDoc) throws IOException {
|
||||||
this(null, maxDoc);
|
this(null, false, maxDoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OrdinalsBuilder(Terms terms, int maxDoc) {
|
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc) throws IOException {
|
||||||
this(terms, maxDoc, new DirectAllocator());
|
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator());
|
||||||
|
}
|
||||||
|
|
||||||
|
public OrdinalsBuilder(Terms terms, int maxDoc) throws IOException {
|
||||||
|
this(terms, true, maxDoc, new DirectAllocator());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -93,25 +107,42 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
*/
|
*/
|
||||||
public OrdinalsBuilder addDoc(int doc) {
|
public OrdinalsBuilder addDoc(int doc) {
|
||||||
totalNumOrds++;
|
totalNumOrds++;
|
||||||
int docsOrd = ords[doc];
|
if (svOrds != null) {
|
||||||
if (docsOrd == 0) {
|
int docsOrd = (int) svOrds.get(doc);
|
||||||
ords[doc] = currentOrd;
|
if (docsOrd == 0) {
|
||||||
numDocsWithValue++;
|
svOrds.set(doc, currentOrd);
|
||||||
} else if (docsOrd > 0) {
|
numDocsWithValue++;
|
||||||
numMultiValuedDocs++;
|
} else {
|
||||||
int offset = writer.startNewSlice();
|
// Rebuilding ords that supports mv based on sv ords.
|
||||||
writer.writeInt(docsOrd);
|
mvOrds = new int[maxDoc];
|
||||||
writer.writeInt(currentOrd);
|
for (int docId = 0; docId < maxDoc; docId++) {
|
||||||
if (offsets == null) {
|
mvOrds[docId] = (int) svOrds.get(docId);
|
||||||
offsets = new int[ords.length];
|
}
|
||||||
|
svOrds = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mvOrds != null) {
|
||||||
|
int docsOrd = mvOrds[doc];
|
||||||
|
if (docsOrd == 0) {
|
||||||
|
mvOrds[doc] = currentOrd;
|
||||||
|
numDocsWithValue++;
|
||||||
|
} else if (docsOrd > 0) {
|
||||||
|
numMultiValuedDocs++;
|
||||||
|
int offset = writer.startNewSlice();
|
||||||
|
writer.writeInt(docsOrd);
|
||||||
|
writer.writeInt(currentOrd);
|
||||||
|
if (offsets == null) {
|
||||||
|
offsets = new int[mvOrds.length];
|
||||||
|
}
|
||||||
|
offsets[doc] = writer.getCurrentOffset();
|
||||||
|
mvOrds[doc] = (-1 * offset) - 1;
|
||||||
|
} else {
|
||||||
|
assert offsets != null;
|
||||||
|
writer.reset(offsets[doc]);
|
||||||
|
writer.writeInt(currentOrd);
|
||||||
|
offsets[doc] = writer.getCurrentOffset();
|
||||||
}
|
}
|
||||||
offsets[doc] = writer.getCurrentOffset();
|
|
||||||
ords[doc] = (-1 * offset) - 1;
|
|
||||||
} else {
|
|
||||||
assert offsets != null;
|
|
||||||
writer.reset(offsets[doc]);
|
|
||||||
writer.writeInt(currentOrd);
|
|
||||||
offsets[doc] = writer.getCurrentOffset();
|
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
@ -163,12 +194,22 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
* if every document has an ordinal associated with it this method returns <code>null</code>
|
* if every document has an ordinal associated with it this method returns <code>null</code>
|
||||||
*/
|
*/
|
||||||
public FixedBitSet buildDocsWithValuesSet() {
|
public FixedBitSet buildDocsWithValuesSet() {
|
||||||
if (numDocsWithValue == this.ords.length)
|
if (numDocsWithValue == maxDoc) {
|
||||||
return null;
|
return null;
|
||||||
final FixedBitSet bitSet = new FixedBitSet(this.ords.length);
|
}
|
||||||
for (int i = 0; i < ords.length; i++) {
|
final FixedBitSet bitSet = new FixedBitSet(maxDoc);
|
||||||
if (ords[i] != 0) {
|
if (svOrds != null) {
|
||||||
bitSet.set(i);
|
for (int docId = 0; docId < maxDoc; docId++) {
|
||||||
|
int ord = (int) svOrds.get(docId);
|
||||||
|
if (ord != 0) {
|
||||||
|
bitSet.set(docId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int docId = 0; docId < maxDoc; docId++) {
|
||||||
|
if (mvOrds[docId] != 0) {
|
||||||
|
bitSet.set(docId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return bitSet;
|
return bitSet;
|
||||||
|
@ -179,15 +220,15 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
*/
|
*/
|
||||||
public Ordinals build(Settings settings) {
|
public Ordinals build(Settings settings) {
|
||||||
if (numMultiValuedDocs == 0) {
|
if (numMultiValuedDocs == 0) {
|
||||||
return new SingleArrayOrdinals(ords, getNumOrds());
|
return new SinglePackedOrdinals(svOrds.getMutable(), getNumOrds());
|
||||||
}
|
}
|
||||||
final String multiOrdinals = settings.get("multi_ordinals", "sparse");
|
final String multiOrdinals = settings.get("multi_ordinals", "sparse");
|
||||||
if ("flat".equals(multiOrdinals)) {
|
if ("flat".equals(multiOrdinals)) {
|
||||||
final ArrayList<int[]> ordinalBuffer = new ArrayList<int[]>();
|
final ArrayList<int[]> ordinalBuffer = new ArrayList<int[]>();
|
||||||
for (int i = 0; i < ords.length; i++) {
|
for (int i = 0; i < mvOrds.length; i++) {
|
||||||
final IntsRef docOrds = docOrds(i);
|
final IntsRef docOrds = docOrds(i);
|
||||||
while (ordinalBuffer.size() < docOrds.length) {
|
while (ordinalBuffer.size() < docOrds.length) {
|
||||||
ordinalBuffer.add(new int[ords.length]);
|
ordinalBuffer.add(new int[mvOrds.length]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) {
|
for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) {
|
||||||
|
@ -211,24 +252,35 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
* Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it.
|
* Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it.
|
||||||
*/
|
*/
|
||||||
public IntsRef docOrds(int doc) {
|
public IntsRef docOrds(int doc) {
|
||||||
int docsOrd = ords[doc];
|
if (svOrds != null) {
|
||||||
intsRef.offset = 0;
|
int docsOrd = (int) svOrds.get(doc);
|
||||||
if (docsOrd == 0) {
|
intsRef.offset = 0;
|
||||||
intsRef.length = 0;
|
if (docsOrd == 0) {
|
||||||
} else if (docsOrd > 0) {
|
intsRef.length = 0;
|
||||||
intsRef.ints[0] = ords[doc];
|
} else if (docsOrd > 0) {
|
||||||
intsRef.length = 1;
|
intsRef.ints[0] = docsOrd;
|
||||||
} else {
|
intsRef.length = 1;
|
||||||
assert offsets != null;
|
}
|
||||||
reader.reset(-1 * (ords[doc] + 1), offsets[doc]);
|
} else {
|
||||||
int pos = 0;
|
int docsOrd = mvOrds[doc];
|
||||||
while (!reader.endOfSlice()) {
|
intsRef.offset = 0;
|
||||||
if (intsRef.ints.length <= pos) {
|
if (docsOrd == 0) {
|
||||||
intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1);
|
intsRef.length = 0;
|
||||||
}
|
} else if (docsOrd > 0) {
|
||||||
intsRef.ints[pos++] = reader.readInt();
|
intsRef.ints[0] = mvOrds[doc];
|
||||||
|
intsRef.length = 1;
|
||||||
|
} else {
|
||||||
|
assert offsets != null;
|
||||||
|
reader.reset(-1 * (mvOrds[doc] + 1), offsets[doc]);
|
||||||
|
int pos = 0;
|
||||||
|
while (!reader.endOfSlice()) {
|
||||||
|
if (intsRef.ints.length <= pos) {
|
||||||
|
intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1);
|
||||||
|
}
|
||||||
|
intsRef.ints[pos++] = reader.readInt();
|
||||||
|
}
|
||||||
|
intsRef.length = pos;
|
||||||
}
|
}
|
||||||
intsRef.length = pos;
|
|
||||||
}
|
}
|
||||||
return intsRef;
|
return intsRef;
|
||||||
}
|
}
|
||||||
|
@ -237,7 +289,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
* Returns the maximum document ID this builder can associate with an ordinal
|
* Returns the maximum document ID this builder can associate with an ordinal
|
||||||
*/
|
*/
|
||||||
public int maxDoc() {
|
public int maxDoc() {
|
||||||
return ords.length;
|
return maxDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1,147 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. ElasticSearch licenses this
|
|
||||||
* file to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.elasticsearch.common.RamUsage;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*/
|
|
||||||
public class SingleArrayOrdinals implements Ordinals {
|
|
||||||
|
|
||||||
// ordinals with value 0 indicates no value
|
|
||||||
private final int[] ordinals;
|
|
||||||
private final int numOrds;
|
|
||||||
private final int maxOrd;
|
|
||||||
|
|
||||||
private long size = -1;
|
|
||||||
|
|
||||||
public SingleArrayOrdinals(int[] ordinals, int numOrds) {
|
|
||||||
this.ordinals = ordinals;
|
|
||||||
this.numOrds = numOrds;
|
|
||||||
this.maxOrd = numOrds + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasSingleArrayBackingStorage() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getBackingStorage() {
|
|
||||||
return ordinals;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getMemorySizeInBytes() {
|
|
||||||
if (size == -1) {
|
|
||||||
size = RamUsage.NUM_BYTES_INT * ordinals.length + RamUsage.NUM_BYTES_ARRAY_HEADER;
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isMultiValued() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDocs() {
|
|
||||||
return ordinals.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumOrds() {
|
|
||||||
return numOrds;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getMaxOrd() {
|
|
||||||
return maxOrd;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Docs ordinals() {
|
|
||||||
return new Docs(this, ordinals);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class Docs implements Ordinals.Docs {
|
|
||||||
|
|
||||||
private final SingleArrayOrdinals parent;
|
|
||||||
private final int[] ordinals;
|
|
||||||
|
|
||||||
private final IntsRef intsScratch = new IntsRef(1);
|
|
||||||
private final SingleValueIter iter = new SingleValueIter();
|
|
||||||
|
|
||||||
public Docs(SingleArrayOrdinals parent, int[] ordinals) {
|
|
||||||
this.parent = parent;
|
|
||||||
this.ordinals = ordinals;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Ordinals ordinals() {
|
|
||||||
return parent;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDocs() {
|
|
||||||
return parent.getNumDocs();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumOrds() {
|
|
||||||
return parent.getNumOrds();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getMaxOrd() {
|
|
||||||
return parent.getMaxOrd();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isMultiValued() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getOrd(int docId) {
|
|
||||||
return ordinals[docId];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntsRef getOrds(int docId) {
|
|
||||||
final int ordinal = ordinals[docId];
|
|
||||||
if (ordinal == 0) {
|
|
||||||
intsScratch.length = 0;
|
|
||||||
} else {
|
|
||||||
intsScratch.ints[0] = ordinal;
|
|
||||||
intsScratch.offset = 0;
|
|
||||||
intsScratch.length = 1;
|
|
||||||
}
|
|
||||||
return intsScratch;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Iter getIter(int docId) {
|
|
||||||
return iter.reset(ordinals[docId]);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -18,16 +18,7 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import java.io.IOException;
|
import org.apache.lucene.index.*;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
@ -35,20 +26,20 @@ import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.fielddata.AbstractIndexFieldData;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
|
||||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
|
||||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||||
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public abstract class AbstractBytesIndexFieldData<FD extends AtomicFieldData.WithOrdinals<ScriptDocValues.Strings>> extends AbstractIndexFieldData<FD> implements IndexFieldData.WithOrdinals<FD> {
|
public abstract class AbstractBytesIndexFieldData<FD extends AtomicFieldData.WithOrdinals<ScriptDocValues.Strings>> extends AbstractIndexFieldData<FD> implements IndexFieldData.WithOrdinals<FD> {
|
||||||
|
|
||||||
private Settings frequency;
|
protected Settings frequency;
|
||||||
private Settings regex;
|
protected Settings regex;
|
||||||
|
|
||||||
protected AbstractBytesIndexFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType,
|
protected AbstractBytesIndexFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType,
|
||||||
IndexFieldDataCache cache) {
|
IndexFieldDataCache cache) {
|
||||||
|
|
|
@ -19,11 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
|
@ -67,8 +63,9 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
||||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||||
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||||
final IntsRef scratch = new IntsRef();
|
final IntsRef scratch = new IntsRef();
|
||||||
|
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
boolean preDefineBitsRequired = regex == null && frequency == null;
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
||||||
|
|
|
@ -19,11 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
@ -100,7 +96,8 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
||||||
float acceptableOverheadRatio = PackedInts.FAST;
|
float acceptableOverheadRatio = PackedInts.FAST;
|
||||||
|
|
||||||
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1 + startNumUniqueTerms, acceptableOverheadRatio);
|
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1 + startNumUniqueTerms, acceptableOverheadRatio);
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
boolean preDefineBitsRequired = regex == null && frequency == null;
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
|
||||||
try {
|
try {
|
||||||
// 0 is reserved for "unset"
|
// 0 is reserved for "unset"
|
||||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
@ -43,7 +44,7 @@ public abstract class MultiOrdinalsTests {
|
||||||
protected abstract Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings);
|
protected abstract Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRandomValues() {
|
public void testRandomValues() throws IOException {
|
||||||
Random random = new Random(100);
|
Random random = new Random(100);
|
||||||
int numDocs = 100 + random.nextInt(1000);
|
int numDocs = 100 + random.nextInt(1000);
|
||||||
int numOrdinals = 1 + random.nextInt(200);
|
int numOrdinals = 1 + random.nextInt(200);
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.test.unit.index.fielddata.ordinals;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
|
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||||
|
import org.elasticsearch.index.fielddata.ordinals.SinglePackedOrdinals;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
public class SingleOrdinalsTests {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSvValues() throws IOException {
|
||||||
|
int numDocs = 1000000;
|
||||||
|
int numOrdinals = numDocs / 4;
|
||||||
|
Map<Integer, Integer> controlDocToOrdinal = new HashMap<Integer, Integer>();
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
|
||||||
|
int ordinal = builder.nextOrdinal();
|
||||||
|
for (int doc = 0; doc < numDocs; doc++) {
|
||||||
|
if (doc % numOrdinals == 0) {
|
||||||
|
ordinal = builder.nextOrdinal();
|
||||||
|
}
|
||||||
|
controlDocToOrdinal.put(doc, ordinal);
|
||||||
|
builder.addDoc(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ordinals ords = builder.build(ImmutableSettings.EMPTY);
|
||||||
|
assertThat(ords, instanceOf(SinglePackedOrdinals.class));
|
||||||
|
Ordinals.Docs docs = ords.ordinals();
|
||||||
|
|
||||||
|
assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs()));
|
||||||
|
for (Map.Entry<Integer, Integer> entry : controlDocToOrdinal.entrySet()) {
|
||||||
|
assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey())));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMvOrdinalsTrigger() throws IOException {
|
||||||
|
int numDocs = 1000000;
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
|
||||||
|
builder.nextOrdinal();
|
||||||
|
for (int doc = 0; doc < numDocs; doc++) {
|
||||||
|
builder.addDoc(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ordinals ords = builder.build(ImmutableSettings.EMPTY);
|
||||||
|
assertThat(ords, instanceOf(SinglePackedOrdinals.class));
|
||||||
|
|
||||||
|
builder.nextOrdinal();
|
||||||
|
builder.addDoc(0);
|
||||||
|
ords = builder.build(ImmutableSettings.EMPTY);
|
||||||
|
assertThat(ords, not(instanceOf(SinglePackedOrdinals.class)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue