diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java
index 501bcd32466..8e3547618ba 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java
@@ -17,28 +17,23 @@ package org.elasticsearch.index.fielddata.ordinals;
* specific language governing permissions and limitations
* under the License.
*/
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
-
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefIterator;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.IntBlockPool;
+import org.apache.lucene.util.*;
import org.apache.lucene.util.IntBlockPool.Allocator;
import org.apache.lucene.util.IntBlockPool.DirectAllocator;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.packed.GrowableWriter;
+import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.settings.Settings;
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+
/**
* Simple class to build document ID <-> ordinal mapping. Note: Ordinals are
* 1 based monotocially increasing positive integers. 0
@@ -46,7 +41,10 @@ import org.elasticsearch.common.settings.Settings;
*/
public final class OrdinalsBuilder implements Closeable {
- private final int[] ords;
+ private final int maxDoc;
+ private int[] mvOrds;
+ private GrowableWriter svOrds;
+
private int[] offsets;
private final IntBlockPool pool;
private final IntBlockPool.SliceWriter writer;
@@ -57,19 +55,35 @@ public final class OrdinalsBuilder implements Closeable {
private int numMultiValuedDocs = 0;
private int totalNumOrds = 0;
- public OrdinalsBuilder(Terms terms, int maxDoc, Allocator allocator) {
- this.ords = new int[maxDoc];
+ public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator) throws IOException {
+ this.maxDoc = maxDoc;
+ // TODO: Make configurable...
+ float acceptableOverheadRatio = PackedInts.FAST;
+ if (preDefineBitsRequired) {
+ int numTerms = (int) terms.size();
+ if (numTerms == -1) {
+ svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
+ } else {
+ svOrds = new GrowableWriter(PackedInts.bitsRequired(numTerms), maxDoc, acceptableOverheadRatio);
+ }
+ } else {
+ svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
+ }
pool = new IntBlockPool(allocator);
reader = new IntBlockPool.SliceReader(pool);
writer = new IntBlockPool.SliceWriter(pool);
}
- public OrdinalsBuilder(int maxDoc) {
- this(null, maxDoc);
+ public OrdinalsBuilder(int maxDoc) throws IOException {
+ this(null, false, maxDoc);
}
- public OrdinalsBuilder(Terms terms, int maxDoc) {
- this(terms, maxDoc, new DirectAllocator());
+ public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc) throws IOException {
+ this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator());
+ }
+
+ public OrdinalsBuilder(Terms terms, int maxDoc) throws IOException {
+ this(terms, true, maxDoc, new DirectAllocator());
}
/**
@@ -93,25 +107,42 @@ public final class OrdinalsBuilder implements Closeable {
*/
public OrdinalsBuilder addDoc(int doc) {
totalNumOrds++;
- int docsOrd = ords[doc];
- if (docsOrd == 0) {
- ords[doc] = currentOrd;
- numDocsWithValue++;
- } else if (docsOrd > 0) {
- numMultiValuedDocs++;
- int offset = writer.startNewSlice();
- writer.writeInt(docsOrd);
- writer.writeInt(currentOrd);
- if (offsets == null) {
- offsets = new int[ords.length];
+ if (svOrds != null) {
+ int docsOrd = (int) svOrds.get(doc);
+ if (docsOrd == 0) {
+ svOrds.set(doc, currentOrd);
+ numDocsWithValue++;
+ } else {
+ // Rebuilding ords that supports mv based on sv ords.
+ mvOrds = new int[maxDoc];
+ for (int docId = 0; docId < maxDoc; docId++) {
+ mvOrds[docId] = (int) svOrds.get(docId);
+ }
+ svOrds = null;
+ }
+ }
+
+ if (mvOrds != null) {
+ int docsOrd = mvOrds[doc];
+ if (docsOrd == 0) {
+ mvOrds[doc] = currentOrd;
+ numDocsWithValue++;
+ } else if (docsOrd > 0) {
+ numMultiValuedDocs++;
+ int offset = writer.startNewSlice();
+ writer.writeInt(docsOrd);
+ writer.writeInt(currentOrd);
+ if (offsets == null) {
+ offsets = new int[mvOrds.length];
+ }
+ offsets[doc] = writer.getCurrentOffset();
+ mvOrds[doc] = (-1 * offset) - 1;
+ } else {
+ assert offsets != null;
+ writer.reset(offsets[doc]);
+ writer.writeInt(currentOrd);
+ offsets[doc] = writer.getCurrentOffset();
}
- offsets[doc] = writer.getCurrentOffset();
- ords[doc] = (-1 * offset) - 1;
- } else {
- assert offsets != null;
- writer.reset(offsets[doc]);
- writer.writeInt(currentOrd);
- offsets[doc] = writer.getCurrentOffset();
}
return this;
}
@@ -163,12 +194,22 @@ public final class OrdinalsBuilder implements Closeable {
* if every document has an ordinal associated with it this method returns null
*/
public FixedBitSet buildDocsWithValuesSet() {
- if (numDocsWithValue == this.ords.length)
+ if (numDocsWithValue == maxDoc) {
return null;
- final FixedBitSet bitSet = new FixedBitSet(this.ords.length);
- for (int i = 0; i < ords.length; i++) {
- if (ords[i] != 0) {
- bitSet.set(i);
+ }
+ final FixedBitSet bitSet = new FixedBitSet(maxDoc);
+ if (svOrds != null) {
+ for (int docId = 0; docId < maxDoc; docId++) {
+ int ord = (int) svOrds.get(docId);
+ if (ord != 0) {
+ bitSet.set(docId);
+ }
+ }
+ } else {
+ for (int docId = 0; docId < maxDoc; docId++) {
+ if (mvOrds[docId] != 0) {
+ bitSet.set(docId);
+ }
}
}
return bitSet;
@@ -179,15 +220,15 @@ public final class OrdinalsBuilder implements Closeable {
*/
public Ordinals build(Settings settings) {
if (numMultiValuedDocs == 0) {
- return new SingleArrayOrdinals(ords, getNumOrds());
+ return new SinglePackedOrdinals(svOrds.getMutable(), getNumOrds());
}
final String multiOrdinals = settings.get("multi_ordinals", "sparse");
if ("flat".equals(multiOrdinals)) {
final ArrayList ordinalBuffer = new ArrayList();
- for (int i = 0; i < ords.length; i++) {
+ for (int i = 0; i < mvOrds.length; i++) {
final IntsRef docOrds = docOrds(i);
while (ordinalBuffer.size() < docOrds.length) {
- ordinalBuffer.add(new int[ords.length]);
+ ordinalBuffer.add(new int[mvOrds.length]);
}
for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) {
@@ -211,24 +252,35 @@ public final class OrdinalsBuilder implements Closeable {
* Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it.
*/
public IntsRef docOrds(int doc) {
- int docsOrd = ords[doc];
- intsRef.offset = 0;
- if (docsOrd == 0) {
- intsRef.length = 0;
- } else if (docsOrd > 0) {
- intsRef.ints[0] = ords[doc];
- intsRef.length = 1;
- } else {
- assert offsets != null;
- reader.reset(-1 * (ords[doc] + 1), offsets[doc]);
- int pos = 0;
- while (!reader.endOfSlice()) {
- if (intsRef.ints.length <= pos) {
- intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1);
- }
- intsRef.ints[pos++] = reader.readInt();
+ if (svOrds != null) {
+ int docsOrd = (int) svOrds.get(doc);
+ intsRef.offset = 0;
+ if (docsOrd == 0) {
+ intsRef.length = 0;
+ } else if (docsOrd > 0) {
+ intsRef.ints[0] = docsOrd;
+ intsRef.length = 1;
+ }
+ } else {
+ int docsOrd = mvOrds[doc];
+ intsRef.offset = 0;
+ if (docsOrd == 0) {
+ intsRef.length = 0;
+ } else if (docsOrd > 0) {
+ intsRef.ints[0] = mvOrds[doc];
+ intsRef.length = 1;
+ } else {
+ assert offsets != null;
+ reader.reset(-1 * (mvOrds[doc] + 1), offsets[doc]);
+ int pos = 0;
+ while (!reader.endOfSlice()) {
+ if (intsRef.ints.length <= pos) {
+ intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1);
+ }
+ intsRef.ints[pos++] = reader.readInt();
+ }
+ intsRef.length = pos;
}
- intsRef.length = pos;
}
return intsRef;
}
@@ -237,7 +289,7 @@ public final class OrdinalsBuilder implements Closeable {
* Returns the maximum document ID this builder can associate with an ordinal
*/
public int maxDoc() {
- return ords.length;
+ return maxDoc;
}
/**
diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SingleArrayOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SingleArrayOrdinals.java
deleted file mode 100644
index f5052d453d3..00000000000
--- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SingleArrayOrdinals.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Licensed to ElasticSearch and Shay Banon under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. ElasticSearch licenses this
- * file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.fielddata.ordinals;
-
-import org.apache.lucene.util.IntsRef;
-import org.elasticsearch.common.RamUsage;
-
-/**
- */
-public class SingleArrayOrdinals implements Ordinals {
-
- // ordinals with value 0 indicates no value
- private final int[] ordinals;
- private final int numOrds;
- private final int maxOrd;
-
- private long size = -1;
-
- public SingleArrayOrdinals(int[] ordinals, int numOrds) {
- this.ordinals = ordinals;
- this.numOrds = numOrds;
- this.maxOrd = numOrds + 1;
- }
-
- @Override
- public boolean hasSingleArrayBackingStorage() {
- return true;
- }
-
- @Override
- public Object getBackingStorage() {
- return ordinals;
- }
-
- @Override
- public long getMemorySizeInBytes() {
- if (size == -1) {
- size = RamUsage.NUM_BYTES_INT * ordinals.length + RamUsage.NUM_BYTES_ARRAY_HEADER;
- }
- return size;
- }
-
- @Override
- public boolean isMultiValued() {
- return false;
- }
-
- @Override
- public int getNumDocs() {
- return ordinals.length;
- }
-
- @Override
- public int getNumOrds() {
- return numOrds;
- }
-
- @Override
- public int getMaxOrd() {
- return maxOrd;
- }
-
- @Override
- public Docs ordinals() {
- return new Docs(this, ordinals);
- }
-
- public static class Docs implements Ordinals.Docs {
-
- private final SingleArrayOrdinals parent;
- private final int[] ordinals;
-
- private final IntsRef intsScratch = new IntsRef(1);
- private final SingleValueIter iter = new SingleValueIter();
-
- public Docs(SingleArrayOrdinals parent, int[] ordinals) {
- this.parent = parent;
- this.ordinals = ordinals;
- }
-
- @Override
- public Ordinals ordinals() {
- return parent;
- }
-
- @Override
- public int getNumDocs() {
- return parent.getNumDocs();
- }
-
- @Override
- public int getNumOrds() {
- return parent.getNumOrds();
- }
-
- @Override
- public int getMaxOrd() {
- return parent.getMaxOrd();
- }
-
- @Override
- public boolean isMultiValued() {
- return false;
- }
-
- @Override
- public int getOrd(int docId) {
- return ordinals[docId];
- }
-
- @Override
- public IntsRef getOrds(int docId) {
- final int ordinal = ordinals[docId];
- if (ordinal == 0) {
- intsScratch.length = 0;
- } else {
- intsScratch.ints[0] = ordinal;
- intsScratch.offset = 0;
- intsScratch.length = 1;
- }
- return intsScratch;
- }
-
- @Override
- public Iter getIter(int docId) {
- return iter.reset(ordinals[docId]);
- }
-
- }
-}
diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractBytesIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractBytesIndexFieldData.java
index 38f50e9a039..1a5f9f96e4f 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractBytesIndexFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractBytesIndexFieldData.java
@@ -18,16 +18,7 @@
*/
package org.elasticsearch.index.fielddata.plain;
-import java.io.IOException;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.FilteredTermsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
@@ -35,20 +26,20 @@ import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
-import org.elasticsearch.index.fielddata.AbstractIndexFieldData;
-import org.elasticsearch.index.fielddata.AtomicFieldData;
-import org.elasticsearch.index.fielddata.FieldDataType;
-import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.index.fielddata.IndexFieldDataCache;
-import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.mapper.FieldMapper.Names;
+import java.io.IOException;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
public abstract class AbstractBytesIndexFieldData> extends AbstractIndexFieldData implements IndexFieldData.WithOrdinals {
- private Settings frequency;
- private Settings regex;
+ protected Settings frequency;
+ protected Settings regex;
protected AbstractBytesIndexFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType,
IndexFieldDataCache cache) {
diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java
index be3c7285a53..acb9480d581 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java
@@ -19,11 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.FST;
@@ -67,8 +63,9 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData fstBuilder = new org.apache.lucene.util.fst.Builder(INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();
-
- OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
+
+ boolean preDefineBitsRequired = regex == null && frequency == null;
+ OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
try {
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesIndexFieldData.java
index 0d80884c0b4..80797c8c433 100644
--- a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesIndexFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesIndexFieldData.java
@@ -19,11 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
@@ -100,7 +96,8 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData controlDocToOrdinal = new HashMap();
+ OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
+ int ordinal = builder.nextOrdinal();
+ for (int doc = 0; doc < numDocs; doc++) {
+ if (doc % numOrdinals == 0) {
+ ordinal = builder.nextOrdinal();
+ }
+ controlDocToOrdinal.put(doc, ordinal);
+ builder.addDoc(doc);
+ }
+
+ Ordinals ords = builder.build(ImmutableSettings.EMPTY);
+ assertThat(ords, instanceOf(SinglePackedOrdinals.class));
+ Ordinals.Docs docs = ords.ordinals();
+
+ assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs()));
+ for (Map.Entry entry : controlDocToOrdinal.entrySet()) {
+ assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey())));
+ }
+
+ }
+
+ @Test
+ public void testMvOrdinalsTrigger() throws IOException {
+ int numDocs = 1000000;
+ OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
+ builder.nextOrdinal();
+ for (int doc = 0; doc < numDocs; doc++) {
+ builder.addDoc(doc);
+ }
+
+ Ordinals ords = builder.build(ImmutableSettings.EMPTY);
+ assertThat(ords, instanceOf(SinglePackedOrdinals.class));
+
+ builder.nextOrdinal();
+ builder.addDoc(0);
+ ords = builder.build(ImmutableSettings.EMPTY);
+ assertThat(ords, not(instanceOf(SinglePackedOrdinals.class)));
+ }
+
+}