comparator) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+
+ /**
+ * Source of integer (returned as java long), per document. The underlying
+ * implementation may use different numbers of bits per value; long is only
+ * used since it can handle all precisions.
+ */
+ public static abstract class Source {
+
+ public long ints(int docID) {
+ throw new UnsupportedOperationException("ints are not supported");
+ }
+
+ public double floats(int docID) {
+ throw new UnsupportedOperationException("floats are not supported");
+ }
+
+ public BytesRef bytes(int docID) {
+ throw new UnsupportedOperationException("bytes are not supported");
+ }
+
+ /** Returns number of unique values. Some impls may
+ * throw UnsupportedOperationException. */
+ public int getValueCount() {
+ throw new UnsupportedOperationException();
+ }
+
+ public ValuesEnum getEnum() throws IOException{
+ return getEnum(null);
+ }
+
+ // nocommit - enable obtaining enum from source since this is already in memory
+ public /*abstract*/ ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public abstract long ramBytesUsed();
+ }
+
+ public static abstract class SortedSource extends Source {
+
+ @Override
+ public BytesRef bytes(int docID) {
+ return getByOrd(ord(docID));
+ }
+
+ /**
+ * Returns ord for specified docID. If this docID had not been added to the
+ * Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
+ * for the next (as defined by {@link Comparator} value.
+ */
+ public abstract int ord(int docID);
+
+ /** Returns value for specified ord. */
+ public abstract BytesRef getByOrd(int ord);
+
+ public static class LookupResult {
+ public boolean found;
+ public int ord;
+ }
+
+ /**
+ * Finds the largest ord whose value is <= the requested value. If
+ * {@link LookupResult#found} is true, then ord is an exact match. The
+ * returned {@link LookupResult} may be reused across calls.
+ */
+ public abstract LookupResult getByValue(BytesRef value);
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java
new file mode 100644
index 00000000000..c806b1650f7
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/Values.java
@@ -0,0 +1,48 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Controls whether per-field values are stored into
+ * index. This storage is non-sparse, so it's best to
+ * use this when all docs have the field, and loads all
+ * values into RAM, exposing a random access API, when
+ * loaded.
+ *
+ * NOTE: This feature is experimental and the
+ * API is free to change in non-backwards-compatible ways. */
+public enum Values {
+
+ /** Integral value is stored as packed ints. The bit
+ * precision is fixed across the segment, and
+ * determined by the min/max values in the field. */
+ PACKED_INTS,
+ PACKED_INTS_FIXED,
+ SIMPLE_FLOAT_4BYTE,
+ SIMPLE_FLOAT_8BYTE,
+
+ // nocommit -- shouldn't lucene decide/detect straight vs
+ // deref, as well fixed vs var?
+ BYTES_FIXED_STRAIGHT,
+ BYTES_FIXED_DEREF,
+ BYTES_FIXED_SORTED,
+
+ BYTES_VAR_STRAIGHT,
+ BYTES_VAR_DEREF,
+ BYTES_VAR_SORTED
+
+ // nocommit -- need STRING variants as well
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java
new file mode 100644
index 00000000000..35299482360
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java
@@ -0,0 +1,34 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Comparator;
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+
+public interface ValuesAttribute extends Attribute {
+ public Values type();
+ public BytesRef bytes();
+ public FloatsRef floats();
+ public LongsRef ints();
+ public void setType(Values type);
+ public Comparator bytesComparator();
+ public void setBytesComparator(Comparator comp);
+
+}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
new file mode 100644
index 00000000000..714ba4b7969
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
@@ -0,0 +1,151 @@
+package org.apache.lucene.index.values;
+
+import java.util.Comparator;
+
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.SetOnce;
+
+public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribute {
+ private Values type;
+ private BytesRef bytes = null;
+ private FloatsRef floats = null;
+ private LongsRef ints = null;
+ private Comparator bytesComp;
+
+ public BytesRef bytes() {
+ return bytes;
+ }
+
+ public FloatsRef floats() {
+ return floats;
+ }
+
+ public LongsRef ints() {
+ return ints;
+ }
+
+ public Values type() {
+ return type;
+ }
+
+ public void setType(Values type) {
+ this.type = type;
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ bytes = new BytesRef();
+ ints = null;
+ floats = null;
+ break;
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ ints = new LongsRef(new long[1], 0, 1);
+ bytes = null;
+ floats = null;
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ floats = new FloatsRef(new double[1], 0, 1);
+ ints = null;
+ bytes = null;
+ break;
+
+ }
+ }
+
+ @Override
+ public void clear() {
+ // TODO
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ ValuesAttributeImpl other = (ValuesAttributeImpl)target;
+ other.setType(type);
+
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ bytes = (BytesRef) other.bytes.clone();
+ break;
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ ints = (LongsRef) other.ints.clone();
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ floats = (FloatsRef) other.floats.clone();
+ break;
+
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#hashCode()
+ */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 0;
+ result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
+ result = prime * result + ((floats == null) ? 0 : floats.hashCode());
+ result = prime * result + ((ints == null) ? 0 : ints.hashCode());
+ result = prime * result + ((type == null) ? 0 : type.hashCode());
+ return result;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (getClass() != obj.getClass())
+ return false;
+ ValuesAttributeImpl other = (ValuesAttributeImpl) obj;
+ if (bytes == null) {
+ if (other.bytes != null)
+ return false;
+ } else if (!bytes.equals(other.bytes))
+ return false;
+ if (floats == null) {
+ if (other.floats != null)
+ return false;
+ } else if (!floats.equals(other.floats))
+ return false;
+ if (ints == null) {
+ if (other.ints != null)
+ return false;
+ } else if (!ints.equals(other.ints))
+ return false;
+ if (type == null) {
+ if (other.type != null)
+ return false;
+ } else if (!type.equals(other.type))
+ return false;
+ return true;
+ }
+
+ public Comparator bytesComparator() {
+ return bytesComp;
+ }
+
+ public void setBytesComparator(Comparator comp) {
+ bytesComp = comp;
+ }
+
+
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
new file mode 100644
index 00000000000..eed33457380
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
@@ -0,0 +1,62 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+
+public abstract class ValuesEnum extends DocIdSetIterator{
+ private AttributeSource source;
+ protected final ValuesAttribute attr;
+
+
+ protected ValuesEnum(Values enumType) {
+ this(null, enumType);
+ }
+
+ protected ValuesEnum(AttributeSource source, Values enumType) {
+ this.source = source;
+ boolean setType = !hasAttribute(ValuesAttribute.class);
+ attr = addAttribute(ValuesAttribute.class);
+ if (setType)
+ attr.setType(enumType);
+ }
+
+ public AttributeSource attributes() {
+ if (source == null)
+ source = new AttributeSource();
+ return source;
+ }
+
+ public T addAttribute(Class attr) {
+ return attributes().addAttribute(attr);
+ }
+
+ public T getAttribute(Class attr) {
+ return attributes().getAttribute(attr);
+ }
+
+ public boolean hasAttribute(Class attr) {
+ return attributes().hasAttribute(attr);
+ }
+
+ public abstract void close() throws IOException;
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
new file mode 100644
index 00000000000..5a9f9d6093b
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -0,0 +1,255 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.BytesRefHash.ParallelArrayBase;
+import org.apache.lucene.util.BytesRefHash.ParallelBytesStartArray;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Stores variable-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[] and both
+// docs reference that single source
+
+class VarDerefBytesImpl {
+
+ static final String CODEC_NAME = "VarDerefBytes";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+
+
+ private static class AddressParallelArray extends ParallelArrayBase {
+ final int[] address;
+
+ AddressParallelArray(int size, AtomicLong bytesUsed) {
+ super(size, bytesUsed);
+ address = new int[size];
+ }
+ @Override
+ protected int bytesPerEntry() {
+ return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry();
+ }
+
+ @Override
+ protected void copyTo(AddressParallelArray toArray, int numToCopy) {
+ super.copyTo(toArray, numToCopy);
+ System.arraycopy(address, 0, toArray.address, 0, size);
+
+ }
+
+ @Override
+ public AddressParallelArray newInstance(int size) {
+ return new AddressParallelArray(size, bytesUsed);
+ }
+
+ }
+
+
+ static class Writer extends BytesWriterBase {
+ private int[] docToAddress;
+ private int address = 1;
+
+ private final ParallelBytesStartArray array = new ParallelBytesStartArray(new AddressParallelArray(0, bytesUsed));
+ private final BytesRefHash hash = new BytesRefHash(pool, 16, array) ;
+
+ public Writer(Directory dir, String id) throws IOException {
+ this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
+ new AtomicLong());
+ }
+ public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed);
+ docToAddress = new int[1];
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+ }
+
+ @Override
+ synchronized public void add(int docID, BytesRef bytes) throws IOException {
+ if(bytes.length == 0)
+ return; // default
+ if(datOut == null)
+ initDataOut();
+ final int e = hash.add(bytes);
+
+ if (docID >= docToAddress.length) {
+ final int oldSize = docToAddress.length;
+ docToAddress = ArrayUtil.grow(docToAddress, 1+docID);
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (docToAddress.length - oldSize));
+ }
+ final int docAddress;
+ if (e >= 0) {
+ docAddress = array.array.address[e] = address;
+ address += IOUtils.writeLength(datOut, bytes);
+ datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ address += bytes.length;
+ } else {
+ docAddress = array.array.address[(-e)-1];
+ }
+ docToAddress[docID] = docAddress;
+ }
+
+ public long ramBytesUsed() {
+ return bytesUsed.get();
+ }
+
+ // Important that we get docCount, in case there were
+ // some last docs that we didn't see
+ @Override
+ synchronized public void finish(int docCount) throws IOException {
+ if(datOut == null)
+ return;
+ initIndexOut();
+ idxOut.writeInt(address-1);
+
+ // write index
+ // nocommit -- allow forcing fixed array (not -1)
+ // TODO(simonw): check the address calculation / make it more intuitive
+ final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1));
+ final int limit;
+ if (docCount > docToAddress.length) {
+ limit = docToAddress.length;
+ } else {
+ limit = docCount;
+ }
+ for(int i=0;i comp;
+
+ private final BytesRefHash hash = new BytesRefHash(pool);
+
+ public Writer(Directory dir, String id, Comparator comp)
+ throws IOException {
+ this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
+ new AtomicLong());
+ }
+
+ public Writer(Directory dir, String id, Comparator comp,
+ Allocator allocator, AtomicLong bytesUsed) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false,
+ new ByteBlockPool(allocator), bytesUsed);
+ this.comp = comp;
+ docToEntry = new int[1];
+ docToEntry[0] = -1;
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+
+ }
+
+ @Override
+ synchronized public void add(int docID, BytesRef bytes) throws IOException {
+ if (bytes.length == 0)
+ return;// default
+ if (docID >= docToEntry.length) {
+ int[] newArray = new int[ArrayUtil.oversize(1 + docID,
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
+ Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
+ bytesUsed.addAndGet((newArray.length - docToEntry.length)
+ * RamUsageEstimator.NUM_BYTES_INT);
+ docToEntry = newArray;
+ }
+ final int e = hash.add(bytes);
+ docToEntry[docID] = e < 0 ? (-e) - 1 : e;
+ }
+
+ // Important that we get docCount, in case there were
+ // some last docs that we didn't see
+ @Override
+ synchronized public void finish(int docCount) throws IOException {
+ final int count = hash.size();
+ if (count == 0)
+ return;
+ initIndexOut();
+ initDataOut();
+ int[] sortedEntries = hash.sort(comp);
+
+ // first dump bytes data, recording index & offset as
+ // we go
+ long offset = 0;
+ long lastOffset = 0;
+ final int[] index = new int[count];
+ final long[] offsets = new long[count];
+ for (int i = 0; i < count; i++) {
+ final int e = sortedEntries[i];
+ offsets[i] = offset;
+ index[e] = 1 + i;
+
+ final BytesRef bytes = hash.get(e);
+ // TODO: we could prefix code...
+ datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ lastOffset = offset;
+ offset += bytes.length;
+ }
+
+ // total bytes of data
+ idxOut.writeLong(offset);
+
+ // write index -- first doc -> 1+ord
+ // nocommit -- allow not -1:
+ final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut,
+ docCount, PackedInts.bitsRequired(count));
+ final int limit = docCount > docToEntry.length ? docToEntry.length
+ : docCount;
+ for (int i = 0; i < limit; i++) {
+ final int e = docToEntry[i];
+ indexWriter.add(e == -1 ? 0 : index[e]);
+ }
+ for (int i = limit; i < docCount; i++) {
+ indexWriter.add(0);
+ }
+ indexWriter.finish();
+
+ // next ord (0-based) -> offset
+ // nocommit -- allow not -1:
+ PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
+ PackedInts.bitsRequired(lastOffset));
+ for (int i = 0; i < count; i++) {
+ offsetWriter.add(offsets[i]);
+ }
+ offsetWriter.finish();
+
+ super.finish(docCount);
+ bytesUsed.addAndGet((-docToEntry.length)
+ * RamUsageEstimator.NUM_BYTES_INT);
+
+ }
+ }
+
+ public static class Reader extends BytesReaderBase {
+
+ Reader(Directory dir, String id, int maxDoc) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_START, true);
+ }
+
+ @Override
+ public org.apache.lucene.index.values.Reader.Source load()
+ throws IOException {
+ return loadSorted(null);
+ }
+
+ @Override
+ public SortedSource loadSorted(Comparator comp)
+ throws IOException {
+ return new Source(cloneData(), cloneIndex(), comp);
+ }
+
+ private static class Source extends BytesBaseSortedSource {
+ // TODO: paged data
+ private final byte[] data;
+ private final BytesRef bytesRef = new BytesRef();
+ private final PackedInts.Reader docToOrdIndex;
+ private final PackedInts.Reader ordToOffsetIndex; // 0-based
+ private final long totBytes;
+ private final int valueCount;
+ private final LookupResult lookupResult = new LookupResult();
+ private final Comparator comp;
+
+ public Source(IndexInput datIn, IndexInput idxIn,
+ Comparator comp) throws IOException {
+ super(datIn, idxIn);
+ totBytes = idxIn.readLong();
+ data = new byte[(int) totBytes];
+ datIn.readBytes(data, 0, (int) totBytes);
+ docToOrdIndex = PackedInts.getReader(idxIn);
+ ordToOffsetIndex = PackedInts.getReader(idxIn);
+ valueCount = ordToOffsetIndex.size();
+ bytesRef.bytes = data;
+ // default byte sort order
+ this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
+ : comp;
+
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord) {
+ return ord == 0 ? defaultValue : deref(--ord);
+ }
+
+ @Override
+ public int ord(int docID) {
+ return (int) docToOrdIndex.get(docID);
+ }
+
+ @Override
+ public LookupResult getByValue(BytesRef bytes) {
+ return binarySearch(bytes, 0, valueCount - 1);
+ }
+
+ public long ramBytesUsed() {
+ // TODO(simonw): move ram usage to PackedInts?
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ + data.length
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex
+ .getBitsPerValue()
+ * docToOrdIndex.getBitsPerValue())
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ordToOffsetIndex
+ .getBitsPerValue()
+ * ordToOffsetIndex.getBitsPerValue());
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ // ord is 0-based
+ private BytesRef deref(int ord) {
+ bytesRef.offset = (int) ordToOffsetIndex.get(ord);
+ final long nextOffset;
+ if (ord == valueCount - 1) {
+ nextOffset = totBytes;
+ } else {
+ nextOffset = ordToOffsetIndex.get(1 + ord);
+ }
+ bytesRef.length = (int) (nextOffset - bytesRef.offset);
+ return bytesRef;
+ }
+
+ // TODO: share w/ FixedSortedBytesValues?
+ private LookupResult binarySearch(BytesRef b, int low, int high) {
+
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ deref(mid);
+ final int cmp = comp.compare(bytesRef, b);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ lookupResult.ord = mid + 1;
+ lookupResult.found = true;
+ return lookupResult;
+ }
+ }
+ assert comp.compare(bytesRef, b) != 0;
+ lookupResult.ord = low;
+ lookupResult.found = false;
+ return lookupResult;
+ }
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ return new VarSortedBytesEnum(source, cloneData(), cloneIndex());
+ }
+
+ private static class VarSortedBytesEnum extends ValuesEnum {
+
+ private PackedInts.Reader docToOrdIndex;
+ private PackedInts.Reader ordToOffsetIndex;
+ private IndexInput idxIn;
+ private IndexInput datIn;
+ private final BytesRef bytesRef;
+ private int valueCount;
+ private long totBytes;
+ private int docCount;
+ private int pos = -1;
+ private final long fp;
+
+ protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn,
+ IndexInput idxIn) throws IOException {
+ super(source, Values.BYTES_VAR_SORTED);
+ bytesRef = attr.bytes();
+ totBytes = idxIn.readLong();
+ // keep that in memory to prevent lots of disk seeks
+ docToOrdIndex = PackedInts.getReader(idxIn);
+ ordToOffsetIndex = PackedInts.getReader(idxIn);
+ valueCount = ordToOffsetIndex.size();
+ docCount = docToOrdIndex.size();
+ fp = datIn.getFilePointer();
+ this.idxIn = idxIn;
+ this.datIn = datIn;
+ }
+
+ @Override
+ public void close() throws IOException {
+ idxIn.close();
+ datIn.close();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= docCount)
+ return pos = NO_MORE_DOCS;
+ final int ord = (int) docToOrdIndex.get(target) - 1;
+ if (ord == -1) {
+ bytesRef.length = 0;
+ bytesRef.offset = 0;
+ return pos = target;
+ }
+ final long offset = ordToOffsetIndex.get(ord);
+ final long nextOffset;
+ if (ord == valueCount - 1) {
+ nextOffset = totBytes;
+ } else {
+ nextOffset = ordToOffsetIndex.get(1 + ord);
+ }
+ final int length = (int) (nextOffset - offset);
+ datIn.seek(fp + offset);
+ if (bytesRef.bytes.length < length)
+ bytesRef.grow(length);
+ datIn.readBytes(bytesRef.bytes, 0, length);
+ bytesRef.length = length;
+ bytesRef.offset = 0;
+ return pos = target;
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(pos + 1);
+ }
+ }
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
new file mode 100644
index 00000000000..83b97479171
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -0,0 +1,232 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Variable length byte[] per document, no sharing
+
+class VarStraightBytesImpl {
+
+ static final String CODEC_NAME = "VarStraightBytes";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ static class Writer extends BytesWriterBase {
+ private int address;
+ // start at -1 if the first added value is > 0
+ private int lastDocID = -1;
+ private int[] docToAddress;
+
+ public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed);
+ docToAddress = new int[1];
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+ }
+
+ public Writer(Directory dir, String id) throws IOException {
+ this(dir, id, new AtomicLong());
+ }
+
+ // Fills up to but not including this docID
+ private void fill(final int docID) {
+ if (docID >= docToAddress.length) {
+ int oldSize = docToAddress.length;
+ docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
+ bytesUsed.addAndGet(-(docToAddress.length-oldSize)*RamUsageEstimator.NUM_BYTES_INT);
+ }
+ for (int i = lastDocID + 1; i < docID; i++) {
+ docToAddress[i] = address;
+ }
+ lastDocID = docID;
+ }
+
+ @Override
+ synchronized public void add(int docID, BytesRef bytes) throws IOException {
+ if(bytes.length == 0)
+ return; // default
+ if (datOut == null)
+ initDataOut();
+ fill(docID);
+ docToAddress[docID] = address;
+ datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+ address += bytes.length;
+ }
+
+ @Override
+ synchronized public void finish(int docCount) throws IOException {
+ if (datOut == null)
+ return;
+ initIndexOut();
+ // write all lengths to index
+ // write index
+ fill(docCount);
+ idxOut.writeVInt(address);
+ // nocommit -- allow not -1
+ final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
+ PackedInts.bitsRequired(address));
+ for (int i = 0; i < docCount; i++) {
+ w.add(docToAddress[i]);
+ }
+ w.finish();
+ bytesUsed.addAndGet(-(docToAddress.length)*RamUsageEstimator.NUM_BYTES_INT);
+ docToAddress = null;
+ super.finish(docCount);
+ }
+
+ public long ramBytesUsed() {
+ return bytesUsed.get();
+ }
+ }
+
+ public static class Reader extends BytesReaderBase {
+ private final int maxDoc;
+
+ Reader(Directory dir, String id, int maxDoc) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_START, true);
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public Source load() throws IOException {
+ return new Source(cloneData(), cloneIndex());
+ }
+
+ private class Source extends BytesBaseSource {
+ private final int totBytes;
+ // TODO: paged data
+ private final byte[] data;
+ private final BytesRef bytesRef = new BytesRef();
+ private final PackedInts.Reader addresses;
+
+ public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
+ super(datIn, idxIn);
+ totBytes = idxIn.readVInt();
+ data = new byte[totBytes];
+ datIn.readBytes(data, 0, totBytes);
+ addresses = PackedInts.getReader(idxIn);
+ bytesRef.bytes = data;
+ }
+
+ @Override
+ public BytesRef bytes(int docID) {
+ final int address = (int) addresses.get(docID);
+ bytesRef.offset = address;
+ if (docID == maxDoc - 1) {
+ bytesRef.length = totBytes - bytesRef.offset;
+ } else {
+ bytesRef.length = (int) addresses.get(1 + docID) - bytesRef.offset;
+ }
+ return bytesRef;
+ }
+
+ @Override
+ public int getValueCount() {
+ throw new UnsupportedOperationException();
+ }
+
+ public long ramBytesUsed() {
+ // TODO(simonw): move address ram usage to PackedInts?
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ + data.length
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses
+ .getBitsPerValue()
+ * addresses.size());
+ }
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ return new VarStrainghtBytesEnum(source, cloneData(), cloneIndex());
+ }
+
+ private class VarStrainghtBytesEnum extends ValuesEnum {
+ private final PackedInts.Reader addresses;
+ private final IndexInput datIn;
+ private final IndexInput idxIn;
+ private final long fp;
+ private final int totBytes;
+ private final BytesRef ref;
+ private int pos = -1;
+
+ protected VarStrainghtBytesEnum(AttributeSource source, IndexInput datIn,
+ IndexInput idxIn) throws IOException {
+ super(source, Values.BYTES_VAR_STRAIGHT);
+ totBytes = idxIn.readVInt();
+ fp = datIn.getFilePointer();
+ addresses = PackedInts.getReader(idxIn);
+ this.datIn = datIn;
+ this.idxIn = idxIn;
+ ref = attr.bytes();
+
+ }
+
+ @Override
+ public void close() throws IOException {
+ datIn.close();
+ idxIn.close();
+ }
+
+ @Override
+ public int advance(final int target) throws IOException {
+ if (target >= maxDoc) {
+ ref.length = 0;
+ ref.offset = 0;
+ return pos = NO_MORE_DOCS;
+ }
+ final long addr = addresses.get(target);
+ if (addr == totBytes) {
+ // nocommit is that a valid default value
+ ref.length = 0;
+ ref.offset = 0;
+ return pos = target;
+ }
+ datIn.seek(fp + addr);
+ final int size = (int) (target == maxDoc - 1 ? totBytes - addr
+ : addresses.get(target + 1) - addr);
+ if (ref.bytes.length < size)
+ ref.grow(size);
+ ref.length = size;
+ datIn.readBytes(ref.bytes, 0, size);
+ return pos = target;
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(pos+1);
+ }
+ }
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java
new file mode 100644
index 00000000000..13bf0947614
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java
@@ -0,0 +1,92 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+public abstract class Writer {
+
+ /** Records the specfied value for the docID */
+ public void add(int docID, long value) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Records the specfied value for the docID */
+ public void add(int docID, double value) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Records the specfied value for the docID */
+ public void add(int docID, BytesRef value) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Records the specfied value for the docID */
+ protected abstract void add(int docID) throws IOException;
+
+ protected abstract void setNextAttribute(ValuesAttribute attr);
+
+ /** Finish writing, close any files */
+ public abstract void finish(int docCount) throws IOException;
+
+ public static class MergeState {
+ public final Reader reader;
+ public final int docBase;
+ public final int docCount;
+ public final Bits bits;
+
+ public MergeState(Reader reader, int docBase, int docCount, Bits bits) {
+ assert reader != null;
+ this.reader = reader;
+ this.docBase = docBase;
+ this.docCount = docCount;
+ this.bits = bits;
+ }
+ }
+
+ public void add(List states) throws IOException {
+ for (MergeState state : states) {
+ merge(state);
+ }
+ }
+
+ // enables bulk copies in subclasses per MergeState
+ protected void merge(MergeState state) throws IOException {
+ final ValuesEnum valEnum = state.reader.getEnum();
+ assert valEnum != null;
+ try {
+ final ValuesAttribute attr = valEnum.addAttribute(ValuesAttribute.class);
+ setNextAttribute(attr);
+ int docID = state.docBase;
+ final Bits bits = state.bits;
+ final int docCount = state.docCount;
+ for (int i = 0; i < docCount; i++) {
+ if (bits == null || !bits.get(i)) {
+ if (valEnum.advance(i) == ValuesEnum.NO_MORE_DOCS)
+ break;
+ add(docID++);
+ }
+ }
+ } finally {
+ valEnum.close();
+ }
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
index 2677ef0d5b3..d71b89f6fa6 100644
--- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
@@ -22,8 +22,9 @@ import java.text.Collator;
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.FieldCache.DocTermsIndex;
+import org.apache.lucene.index.values.Reader.Source;
import org.apache.lucene.search.FieldCache.DocTerms;
+import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.search.cache.ByteValuesCreator;
import org.apache.lucene.search.cache.CachedArray;
import org.apache.lucene.search.cache.CachedArrayCreator;
@@ -39,9 +40,9 @@ import org.apache.lucene.search.cache.CachedArray.IntValues;
import org.apache.lucene.search.cache.CachedArray.LongValues;
import org.apache.lucene.search.cache.CachedArray.ShortValues;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.packed.Direct8;
import org.apache.lucene.util.packed.Direct16;
import org.apache.lucene.util.packed.Direct32;
+import org.apache.lucene.util.packed.Direct8;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -159,7 +160,6 @@ public abstract class FieldComparator {
* comparators can just return "this" to reuse the same
* comparator across segments
* @throws IOException
- * @throws IOException
*/
public abstract FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException;
@@ -309,6 +309,65 @@ public abstract class FieldComparator {
}
}
+ /** Uses float index values to sort by ascending value */
+ public static final class FloatIndexValuesComparator extends FieldComparator {
+ private final double[] values;
+ private Source currentReaderValues;
+ private final String field;
+ private double bottom;
+
+ FloatIndexValuesComparator(int numHits, String field) {
+ values = new double[numHits];
+ this.field = field;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ final double v1 = values[slot1];
+ final double v2 = values[slot2];
+ if (v1 > v2) {
+ return 1;
+ } else if (v1 < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ final double v2 = currentReaderValues.floats(doc);
+ if (bottom > v2) {
+ return 1;
+ } else if (bottom < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ values[slot] = currentReaderValues.floats(doc);
+ }
+
+ @Override
+ public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException {
+ currentReaderValues = reader.getIndexValuesCache().getFloats(field);
+ return this;
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public Comparable value(int slot) {
+ return Double.valueOf(values[slot]);
+ }
+ }
+
/** Parses field's values as float (using {@link
* FieldCache#getFloats} and sorts by ascending value */
public static final class FloatComparator extends NumericComparator {
@@ -448,6 +507,69 @@ public abstract class FieldComparator {
}
}
+ /** Loads int index values and sorts by ascending value. */
+ public static final class IntIndexValuesComparator extends FieldComparator {
+ private final long[] values;
+ private Source currentReaderValues;
+ private final String field;
+ private long bottom;
+
+ IntIndexValuesComparator(int numHits, String field) {
+ values = new long[numHits];
+ this.field = field;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ final long v1 = values[slot1];
+ final long v2 = values[slot2];
+ if (v1 > v2) {
+ return 1;
+ } else if (v1 < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ // TODO: there are sneaky non-branch ways to compute
+ // -1/+1/0 sign
+ final long v2 = currentReaderValues.ints(doc);
+ if (bottom > v2) {
+ return 1;
+ } else if (bottom < v2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ values[slot] = currentReaderValues.ints(doc);
+ }
+
+ @Override
+ public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException {
+ currentReaderValues = reader.getIndexValuesCache().getInts(field);
+ return this;
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public Comparable value(int slot) {
+ return Long.valueOf(values[slot]);
+ }
+ }
+
/** Parses field's values as long (using {@link
* FieldCache#getLongs} and sorts by ascending value */
public static final class LongComparator extends NumericComparator {
diff --git a/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java
index a32922b1947..c365294fe55 100644
--- a/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/ReqExclScorer.java
@@ -23,7 +23,7 @@ import java.io.IOException;
/** A Scorer for queries with a required subscorer
* and an excluding (prohibited) sub DocIdSetIterator.
*
- * This Scorer
implements {@link Scorer#skipTo(int)},
+ * This Scorer
implements {@link Scorer#advance(int)},
* and it uses the skipTo() on the given scorers.
*/
class ReqExclScorer extends Scorer {
diff --git a/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java
index c8e1b81ff54..cab09c864c8 100644
--- a/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
/** A Scorer for queries with a required part and an optional part.
* Delays skipTo() on the optional part until a score() is needed.
*
- * This Scorer
implements {@link Scorer#skipTo(int)}.
+ * This Scorer
implements {@link Scorer#advance(int)}.
*/
class ReqOptSumScorer extends Scorer {
/** The scorers passed from the constructor.
diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java
index 58c4582c0e3..623b785489a 100644
--- a/lucene/src/java/org/apache/lucene/search/SortField.java
+++ b/lucene/src/java/org/apache/lucene/search/SortField.java
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.io.Serializable;
+import java.util.Comparator;
import java.util.Locale;
import org.apache.lucene.search.cache.ByteValuesCreator;
@@ -29,6 +30,11 @@ import org.apache.lucene.search.cache.IntValuesCreator;
import org.apache.lucene.search.cache.LongValuesCreator;
import org.apache.lucene.search.cache.ShortValuesCreator;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.BytesRef;
+
+// nocommit -- for cleaner transition, maybe we should make
+// a new SortField that subclasses this one and always uses
+// index values?
/**
* Stores information about how to sort documents by terms in an individual
@@ -90,6 +96,9 @@ implements Serializable {
* uses ordinals to do the sorting. */
public static final int STRING_VAL = 11;
+ /** Sort use byte[] index values. */
+ public static final int BYTES = 12;
+
/** Represents sorting by document score (relevancy). */
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
@@ -440,6 +449,26 @@ implements Serializable {
field = StringHelper.intern(field);
}
+ private boolean useIndexValues;
+
+ public void setUseIndexValues(boolean b) {
+ useIndexValues = b;
+ }
+
+ public boolean getUseIndexValues() {
+ return useIndexValues;
+ }
+
+ private Comparator bytesComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+
+ public void setBytesComparator(Comparator b) {
+ bytesComparator = b;
+ }
+
+ public Comparator getBytesComparator() {
+ return bytesComparator;
+ }
+
/** Returns the {@link FieldComparator} to use for
* sorting.
*
@@ -469,10 +498,18 @@ implements Serializable {
return new FieldComparator.DocComparator(numHits);
case SortField.INT:
- return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer)missingValue );
+ if (useIndexValues) {
+ return new FieldComparator.IntIndexValuesComparator(numHits, field);
+ } else {
+ return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue);
+ }
case SortField.FLOAT:
- return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator)creator, (Float)missingValue );
+ if (useIndexValues) {
+ return new FieldComparator.FloatIndexValuesComparator(numHits, field);
+ } else {
+ return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue);
+ }
case SortField.LONG:
return new FieldComparator.LongComparator(numHits, (LongValuesCreator)creator, (Long)missingValue );
diff --git a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
index 151ae1a95db..4638313de98 100644
--- a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
@@ -247,6 +247,19 @@ public final class ArrayUtil {
public static short[] grow(short[] array) {
return grow(array, 1 + array.length);
}
+
+ public static double[] grow(double[] array, int minSize) {
+ if (array.length < minSize) {
+ double[] newArray = new double[oversize(minSize, RamUsageEstimator.NUM_BYTES_DOUBLE)];
+ System.arraycopy(array, 0, newArray, 0, array.length);
+ return newArray;
+ } else
+ return array;
+ }
+
+ public static double[] grow(double[] array) {
+ return grow(array, 1 + array.length);
+ }
public static short[] shrink(short[] array, int targetSize) {
final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
diff --git a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
index a34e9fe7a3c..a79e6f57f33 100644
--- a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
@@ -62,6 +62,18 @@ public final class ByteBlockPool {
return new byte[blockSize];
}
}
+
+ public static class DirectAllocator extends Allocator {
+
+ public DirectAllocator(int blockSize) {
+ super(blockSize);
+ }
+
+ @Override
+ public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+ }
+
+ }
public byte[][] buffers = new byte[10][];
diff --git a/lucene/src/java/org/apache/lucene/util/BytesRef.java b/lucene/src/java/org/apache/lucene/util/BytesRef.java
index 1ec291a5c65..342cc6d7aab 100644
--- a/lucene/src/java/org/apache/lucene/util/BytesRef.java
+++ b/lucene/src/java/org/apache/lucene/util/BytesRef.java
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import java.util.Comparator;
+import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
@@ -271,12 +272,13 @@ public final class BytesRef implements Comparable, Externalizable {
}
private final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
-
+
public static Comparator getUTF8SortedAsUnicodeComparator() {
return utf8SortedAsUnicodeSortOrder;
}
- private static class UTF8SortedAsUnicodeComparator implements Comparator {
+ @SuppressWarnings("serial") // serializable to work with contrib/remote
+ private static final class UTF8SortedAsUnicodeComparator implements Serializable, Comparator {
// Only singleton
private UTF8SortedAsUnicodeComparator() {};
diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
index a34ad775178..5254e76f742 100644
--- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -353,6 +353,7 @@ public final class BytesRefHash {
// 1 byte to store length
buffer[bufferUpto] = (byte) length;
pool.byteUpto += length + 1;
+ assert length >= 0: "Length must be positive: " + length;
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1,
length);
} else {
@@ -569,4 +570,64 @@ public final class BytesRefHash {
}
}
+
+ public static class ParallelBytesStartArray> extends BytesStartArray {
+ private final T prototype;
+ public T array;
+
+ public ParallelBytesStartArray(T template) {
+ this.prototype = template;
+ }
+ @Override
+ public int[] init() {
+ if(array == null) {
+ array = prototype.newInstance(2);
+ }
+ return array.textStart;
+ }
+
+ @Override
+ public int[] grow() {
+ array = array.grow();
+ return array.textStart;
+ }
+
+ @Override
+ public int[] clear() {
+ if(array != null) {
+ array.deref();
+ array = null;
+ }
+ return null;
+ }
+
+ @Override
+ public AtomicLong bytesUsed() {
+ return array.bytesUsed();
+ }
+
+ }
+
+ public abstract static class ParallelArrayBase> extends ParallelArray {
+ final int[] textStart;
+
+ protected ParallelArrayBase(int size, AtomicLong bytesUsed) {
+ super(size, bytesUsed);
+ textStart = new int[size];
+ }
+
+ @Override
+ protected int bytesPerEntry() {
+ return RamUsageEstimator.NUM_BYTES_INT;
+ }
+
+ @Override
+ protected void copyTo(T toArray, int numToCopy) {
+ System.arraycopy(textStart, 0, toArray.textStart, 0, size);
+ }
+
+ @Override
+ public abstract T newInstance(int size);
+
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/util/FloatsRef.java b/lucene/src/java/org/apache/lucene/util/FloatsRef.java
new file mode 100644
index 00000000000..9dd107e323d
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/FloatsRef.java
@@ -0,0 +1,91 @@
+/**
+ *
+ */
+package org.apache.lucene.util;
+
+
+public final class FloatsRef implements Cloneable{
+ public double[] floats;
+ public int offset;
+ public int length;
+
+ public FloatsRef() {
+ }
+
+ public FloatsRef(int capacity) {
+ floats = new double[capacity];
+ }
+
+ public void set(double value) {
+ floats[offset] = value;
+ }
+
+ public double get() {
+ return floats[offset];
+ }
+
+ public FloatsRef(double[] floats, int offset, int length) {
+ this.floats = floats;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ public FloatsRef(FloatsRef other) {
+ copy(other);
+ }
+
+ @Override
+ public Object clone() {
+ return new FloatsRef(this);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 0;
+ final int end = offset + length;
+ for(int i = offset; i < end; i++) {
+ long value = Double.doubleToLongBits(floats[i]);
+ result = prime * result + (int) (value ^ (value >>> 32));
+ }
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other instanceof FloatsRef && this.floatsEquals((FloatsRef) other);
+ }
+
+ public boolean floatsEquals(FloatsRef other) {
+ if (length == other.length) {
+ int otherUpto = other.offset;
+ final double[] otherFloats = other.floats;
+ final int end = offset + length;
+ for(int upto=offset;upto> 7) & 0xff));
+ return 2;
+ }
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/util/LongsRef.java b/lucene/src/java/org/apache/lucene/util/LongsRef.java
new file mode 100644
index 00000000000..c5dee1a15b1
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/LongsRef.java
@@ -0,0 +1,91 @@
+/**
+ *
+ */
+package org.apache.lucene.util;
+
+
+public final class LongsRef implements Cloneable {
+ public long[] ints;
+ public int offset;
+ public int length;
+
+ public LongsRef() {
+ }
+
+ public LongsRef(int capacity) {
+ ints = new long[capacity];
+ }
+
+ public LongsRef(long[] ints, int offset, int length) {
+ this.ints = ints;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ public LongsRef(LongsRef other) {
+ copy(other);
+ }
+
+ @Override
+ public Object clone() {
+ return new LongsRef(this);
+ }
+
+ public void set(long value) {
+ ints[offset] = value;
+ }
+
+ public long get() {
+ return ints[offset];
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 0;
+ final int end = offset + length;
+ for(int i = offset; i < end; i++) {
+ long value = ints[i];
+ result = prime * result + (int) (value ^ (value >>> 32));
+ }
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return this.intsEquals((LongsRef) other);
+ }
+
+ public boolean intsEquals(LongsRef other) {
+ if (length == other.length) {
+ int otherUpto = other.offset;
+ final long[] otherInts = other.ints;
+ final int end = offset + length;
+ for(int upto=offset;upto> {
+
+ public final int size;
+ protected final AtomicLong bytesUsed;
+
+ protected ParallelArray(final int size, AtomicLong bytesUsed) {
+ this.size = size;
+ this.bytesUsed = bytesUsed;
+ bytesUsed.addAndGet((size) * bytesPerEntry());
+
+ }
+
+ protected abstract int bytesPerEntry();
+
+ public AtomicLong bytesUsed() {
+ return bytesUsed;
+ }
+
+ public void deref() {
+ bytesUsed.addAndGet((-size) * bytesPerEntry());
+ }
+
+ public abstract T newInstance(int size);
+
+ public final T grow() {
+ int newSize = ArrayUtil.oversize(size + 1, bytesPerEntry());
+ T newArray = newInstance(newSize);
+ copyTo(newArray, size);
+ bytesUsed.addAndGet((newSize - size) * bytesPerEntry());
+ return newArray;
+ }
+
+ protected abstract void copyTo(T toArray, int numToCopy);
+}
diff --git a/lucene/src/java/org/apache/lucene/util/packed/Packed64.java b/lucene/src/java/org/apache/lucene/util/packed/Packed64.java
index b3826676503..691cec42a77 100644
--- a/lucene/src/java/org/apache/lucene/util/packed/Packed64.java
+++ b/lucene/src/java/org/apache/lucene/util/packed/Packed64.java
@@ -182,7 +182,7 @@ class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable {
final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE);
final int base = bitPos * FAC_BITPOS;
-
+ assert elementPos < blocks.length : "elementPos: " + elementPos + "; blocks.len: " + blocks.length;
return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) |
((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]);
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
index db77cb88d4d..07098d41e59 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
@@ -47,7 +47,7 @@ public class TestIndexWriterConfig extends LuceneTestCase {
// Does not implement anything - used only for type checking on IndexWriterConfig.
@Override
- DocConsumer getChain(DocumentsWriter documentsWriter) {
+ public DocConsumer getChain(DocumentsWriter documentsWriter) {
return null;
}
diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java
index 782cd3a2a01..97e1963ccd9 100644
--- a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/TermInfosWriter.java
@@ -61,7 +61,7 @@ final class TermInfosWriter {
int indexInterval = 128;
/** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
- * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in
+ * used to accelerate {@link TermDocs#advance(int)}. Larger values result in
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
new file mode 100644
index 00000000000..87efd06e5c4
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
@@ -0,0 +1,658 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.EnumSet;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.ValuesField;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.values.Reader.SortedSource;
+import org.apache.lucene.index.values.Reader.Source;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util._TestUtil;
+
+public class TestIndexValues extends LuceneTestCase {
+
+ public void testBytesStraight() throws IOException {
+ runTestBytes(Bytes.Mode.STRAIGHT, true);
+ runTestBytes(Bytes.Mode.STRAIGHT, false);
+ }
+
+ public void testBytesDeref() throws IOException {
+ runTestBytes(Bytes.Mode.DEREF, true);
+ runTestBytes(Bytes.Mode.DEREF, false);
+ }
+
+ public void testBytesSorted() throws IOException {
+ runTestBytes(Bytes.Mode.SORTED, true);
+ runTestBytes(Bytes.Mode.SORTED, false);
+ }
+
+ // nocommit -- for sorted test, do our own Sort of the
+ // values and verify it's identical
+ public void runTestBytes(final Bytes.Mode mode,
+ final boolean fixedSize) throws IOException {
+
+ final BytesRef bytesRef = new BytesRef();
+
+ final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef
+ .getUTF8SortedAsUnicodeComparator()
+ : null;
+
+ Directory dir = newDirectory();
+ Writer w = Bytes
+ .getWriter(dir, "test", mode, comp, fixedSize);
+ int maxDoc = 220;
+ final String[] values = new String[maxDoc];
+ final int lenMin, lenMax;
+ if (fixedSize) {
+ lenMin = lenMax = 3 + random.nextInt(7);
+ } else {
+ lenMin = 1;
+ lenMax = 15 + random.nextInt(6);
+ }
+ for (int i = 0; i < 100; i++) {
+ final String s;
+ if (i > 0 && random.nextInt(5) <= 2) {
+ // use prior value
+ s = values[2 * random.nextInt(i)];
+ } else {
+ s = _TestUtil.randomUnicodeString(random, lenMin, lenMax);
+ }
+ values[2 * i] = s;
+
+ UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef);
+ w.add(2 * i, bytesRef);
+ }
+ w.finish(maxDoc);
+
+ Reader r = Bytes.getReader(dir, "test", mode, fixedSize, maxDoc);
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum bytesEnum = r.getEnum();
+ assertNotNull("enum is null", bytesEnum);
+ ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
+ assertNotNull("attribute is null", attr);
+ BytesRef ref = attr.bytes();
+ assertNotNull("BytesRef is null - enum not initialized to use bytes", attr);
+
+ for (int i = 0; i < 2; i++) {
+ final int idx = 2 * i;
+ assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
+ String utf8String = ref.utf8ToString();
+ assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + " lenRight: " + utf8String.length() , values[idx], utf8String);
+ }
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc+1));
+
+ bytesEnum.close();
+ }
+
+
+ // Verify we can load source twice:
+ for (int iter = 0; iter < 2; iter++) {
+ Source s;
+ Reader.SortedSource ss;
+ if (mode == Bytes.Mode.SORTED) {
+ s = ss = r.loadSorted(comp);
+ } else {
+ s = r.load();
+ ss = null;
+ }
+
+ for (int i = 0; i < 100; i++) {
+ final int idx = 2 * i;
+ assertNotNull("doc " + idx + "; value=" + values[idx], s.bytes(idx));
+ assertEquals("doc " + idx, values[idx], s.bytes(idx).utf8ToString());
+ if (ss != null) {
+ assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
+ .utf8ToString());
+ Reader.SortedSource.LookupResult result = ss.getByValue(new BytesRef(
+ values[idx]));
+ assertTrue(result.found);
+ assertEquals(ss.ord(idx), result.ord);
+ }
+ }
+
+ // Lookup random strings:
+ if (mode == Bytes.Mode.SORTED) {
+ final int numValues = ss.getValueCount();
+ for (int i = 0; i < 1000; i++) {
+ BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString(
+ random, lenMin, lenMax));
+ SortedSource.LookupResult result = ss.getByValue(bytesValue);
+ if (result.found) {
+ assert result.ord > 0;
+ assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord)));
+ int count = 0;
+ for (int k = 0; k < 100; k++) {
+ if (bytesValue.utf8ToString().equals(values[2 * k])) {
+ assertEquals(ss.ord(2 * k), result.ord);
+ count++;
+ }
+ }
+ assertTrue(count > 0);
+ } else {
+ assert result.ord >= 0;
+ if (result.ord == 0) {
+ final BytesRef firstRef = ss.getByOrd(1);
+ // random string was before our first
+ assertTrue(firstRef.compareTo(bytesValue) > 0);
+ } else if (result.ord == numValues) {
+ final BytesRef lastRef = ss.getByOrd(numValues);
+ // random string was after our last
+ assertTrue(lastRef.compareTo(bytesValue) < 0);
+ } else {
+ // random string fell between two of our values
+ final BytesRef before = (BytesRef) ss.getByOrd(result.ord)
+ .clone();
+ final BytesRef after = ss.getByOrd(result.ord + 1);
+ assertTrue(before.compareTo(bytesValue) < 0);
+ assertTrue(bytesValue.compareTo(after) < 0);
+
+ }
+ }
+ }
+ }
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ public void testInts() throws IOException {
+ long maxV = 1;
+ final int NUM_VALUES = 1000;
+ final long[] values = new long[NUM_VALUES];
+ for (int rx = 1; rx < 63; rx++, maxV *= 2) {
+ for (int b = 0; b < 2; b++) {
+ Directory dir = newDirectory();
+ boolean useFixedArrays = b == 0;
+ Writer w = Ints.getWriter(dir, "test", useFixedArrays);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = random.nextLong() % (1 + maxV);
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalDocs = 1 + random.nextInt(9);
+ w.finish(NUM_VALUES + additionalDocs);
+
+ Reader r = Ints.getReader(dir, "test", useFixedArrays);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = r.load();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = s.ints(i);
+ assertEquals("index " + i + " b: " + b, values[i], v);
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ LongsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals(values[i], ints.get());
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals("" + i, 0, ints.get());
+ }
+
+ iEnum.close();
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ LongsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals(values[i], ints.get());
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals("" + i, 0, ints.get());
+ }
+
+ iEnum.close();
+ }
+ r.close();
+ dir.close();
+ }
+ }
+ }
+
+ public void testFloats4() throws IOException {
+ runTestFloats(4, 0.00001);
+ }
+
+ private void runTestFloats(int precision, double delta)
+ throws IOException {
+ Directory dir = newDirectory();
+ Writer w = Floats.getWriter(dir, "test", precision);
+ final int NUM_VALUES = 1000;
+ final double[] values = new double[NUM_VALUES];
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final double v = precision == 4 ? random.nextFloat() : random.nextDouble();
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalValues = 1 + random.nextInt(10);
+ w.finish(NUM_VALUES + additionalValues);
+
+ Reader r = Floats.getReader(dir, "test", NUM_VALUES
+ + additionalValues);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = r.load();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(values[i], s.floats(i), 0.0f);
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum fEnum = r.getEnum();
+ ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
+ FloatsRef floats = attr.floats();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, fEnum.nextDoc());
+ assertEquals(values[i], floats.get(), delta);
+ }
+ for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ assertEquals(i, fEnum.nextDoc());
+ assertEquals(0.0, floats.get(), delta);
+ }
+ fEnum.close();
+ }
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum fEnum = r.getEnum();
+ ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
+ FloatsRef floats = attr.floats();
+ for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
+ assertEquals(i, fEnum.advance(i));
+ assertEquals(values[i], floats.get(), delta);
+ }
+ for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ assertEquals(i, fEnum.advance(i));
+ assertEquals(0.0, floats.get(), delta);
+ }
+ fEnum.close();
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ public void testFloats8() throws IOException {
+ runTestFloats(8, 0.0);
+ }
+
+ /**
+ * Tests complete indexing of {@link Values} including deletions, merging and
+ * sparse value fields on Compound-File
+ */
+ public void testCFSIndex() throws IOException {
+ // without deletions
+ IndexWriterConfig cfg = writerConfig(true);
+ // primitives - no deletes
+ runTestNumerics(cfg,false);
+
+ cfg = writerConfig(true);
+ // bytes - no deletes
+ runTestIndexBytes(cfg, false);
+
+ // with deletions
+ cfg = writerConfig(true);
+ // primitives
+ runTestNumerics(cfg, true);
+
+ cfg = writerConfig(true);
+ // bytes
+ runTestIndexBytes(cfg, true);
+ }
+
+ /**
+ * Tests complete indexing of {@link Values} including deletions, merging and
+ * sparse value fields on None-Compound-File
+ */
+ public void testIndex() throws IOException {
+ //
+ // without deletions
+ IndexWriterConfig cfg = writerConfig(false);
+ // primitives - no deletes
+ runTestNumerics(cfg, false);
+
+ cfg = writerConfig(false);
+ // bytes - no deletes
+ runTestIndexBytes(cfg, false);
+
+ // with deletions
+ cfg = writerConfig(false);
+ // primitives
+ runTestNumerics(cfg, true);
+
+ cfg = writerConfig(false);
+ // bytes
+ runTestIndexBytes(cfg, true);
+ }
+
+ private IndexWriterConfig writerConfig(boolean useCompoundFile) {
+ final IndexWriterConfig cfg = newIndexWriterConfig(
+ TEST_VERSION_CURRENT, new MockAnalyzer());
+ MergePolicy mergePolicy = cfg.getMergePolicy();
+ if(mergePolicy instanceof LogMergePolicy) {
+ ((LogMergePolicy)mergePolicy).setUseCompoundFile(useCompoundFile);
+ } else if(useCompoundFile) {
+ LogMergePolicy policy = new LogDocMergePolicy();
+ policy.setUseCompoundFile(useCompoundFile);
+ cfg.setMergePolicy(policy);
+ }
+ return cfg;
+ }
+
+ public void runTestNumerics(IndexWriterConfig cfg,
+ boolean withDeletions) throws IOException {
+ Directory d = newDirectory();
+ IndexWriter w = new IndexWriter(d, cfg);
+ final int numValues = 350;
+ final List numVariantList = new ArrayList(NUMERICS);
+
+ // run in random order to test if fill works correctly during merges
+ Collections.shuffle(numVariantList, random);
+ for (Values val : numVariantList) {
+ OpenBitSet deleted = indexValues(w, numValues, val, numVariantList,
+ withDeletions, 7);
+ List closeables = new ArrayList();
+ IndexReader r = IndexReader.open(w);
+ final int numRemainingValues = (int) (numValues - deleted.cardinality());
+ final int base = r.numDocs() - numRemainingValues;
+ switch (val) {
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED: {
+ Reader intsReader = r.getIndexValues(val.name());
+ Source ints = intsReader.load();
+ ValuesEnum intsEnum = intsReader.getEnum();
+ assertNotNull(intsEnum);
+ LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
+ for (int i = 0; i < base; i++) {
+ assertEquals(0, ints.ints(i));
+ assertEquals(val.name() + " base: " + base + " index: " + i, i, random.nextBoolean()?intsEnum.advance(i): intsEnum.nextDoc());
+ assertEquals(0, enumRef.get());
+ }
+ int expected = 0;
+ for (int i = base; i < r.numDocs(); i++, expected++) {
+ while (deleted.get(expected)) {
+ expected++;
+ }
+ assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i));
+ assertEquals(expected, ints.ints(i));
+ assertEquals(expected, enumRef.get());
+
+ }
+ }
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE: {
+ Reader floatReader = r.getIndexValues(val.name());
+ Source floats = floatReader.load();
+ ValuesEnum floatEnum = floatReader.getEnum();
+ assertNotNull(floatEnum);
+ FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class).floats();
+
+ for (int i = 0; i < base; i++) {
+ assertEquals(0.0d, floats.floats(i), 0.0d);
+ assertEquals(i, random.nextBoolean()?floatEnum.advance(i): floatEnum.nextDoc());
+ assertEquals("index " + i, 0.0 ,enumRef.get(), 0.0);
+ }
+ int expected = 0;
+ for (int i = base; i < r.numDocs(); i++, expected++) {
+ while (deleted.get(expected)) {
+ expected++;
+ }
+ assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i));
+ assertEquals("index " + i, 2.0 * expected ,enumRef.get() , 0.00001);
+ assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001);
+ }
+ }
+ break;
+ default:
+ fail("unexpected value " + val);
+ }
+
+ closeables.add(r);
+ for (Closeable toClose : closeables) {
+ toClose.close();
+ }
+ }
+ w.close();
+ d.close();
+ }
+
+ private static EnumSet BYTES = EnumSet.of(
+ Values.BYTES_FIXED_DEREF,
+ Values.BYTES_FIXED_SORTED,
+ Values.BYTES_FIXED_STRAIGHT,
+ Values.BYTES_VAR_DEREF ,
+ Values.BYTES_VAR_SORTED,
+ Values.BYTES_VAR_STRAIGHT
+ );
+
+ private static EnumSet STRAIGHT_BYTES = EnumSet.of(
+ Values.BYTES_FIXED_STRAIGHT,
+ Values.BYTES_VAR_STRAIGHT
+ );
+
+ private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE);
+
+ private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS};
+ private OpenBitSet indexValues(IndexWriter w, int numValues,
+ Values value, List valueVarList, boolean withDeletions,
+ int multOfSeven) throws CorruptIndexException, IOException {
+ final boolean isNumeric = NUMERICS.contains(value);
+ OpenBitSet deleted = new OpenBitSet(numValues);
+ Document doc = new Document();
+ Fieldable field = random.nextBoolean()? new ValuesField(value.name()):newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]);
+ doc.add(field);
+
+ ValuesAttribute valuesAttribute = ValuesField.values(field);
+ valuesAttribute.setType(value);
+ final LongsRef intsRef = valuesAttribute.ints();
+ final FloatsRef floatsRef = valuesAttribute.floats();
+ final BytesRef bytesRef = valuesAttribute.bytes();
+
+ final String idBase = value.name() + "_";
+ final byte[] b = new byte[multOfSeven];
+ if (bytesRef != null) {
+ bytesRef.bytes = b;
+ bytesRef.length = b.length;
+ bytesRef.offset = 0;
+ }
+ //
+ byte upto = 0;
+ for (int i = 0; i < numValues; i++) {
+ if (isNumeric) {
+ switch (value) {
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ intsRef.set(i);
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ floatsRef.set(2.0f * i);
+ break;
+ default:
+ fail("unexpected value " + value);
+ }
+ } else {
+ for (int j = 0; j < b.length; j++) {
+ b[j] = upto++;
+ }
+ }
+ doc.removeFields("id");
+ doc.add(new Field("id", idBase + i, Store.YES,
+ Index.NOT_ANALYZED_NO_NORMS));
+ w.addDocument(doc);
+
+ if (i % 7 == 0) {
+ if (withDeletions && random.nextBoolean()) {
+ Values val = valueVarList.get(random.nextInt(1 + valueVarList
+ .indexOf(value)));
+ final int randInt = val == value ? random.nextInt(1 + i) : random
+ .nextInt(numValues);
+ w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
+ if (val == value) {
+ deleted.set(randInt);
+ }
+ }
+ w.commit();
+
+ }
+ }
+ w.commit();
+
+ // nocommit test unoptimized with deletions
+ if(withDeletions || random.nextBoolean())
+ w.optimize();
+ return deleted;
+ }
+
+ public void runTestIndexBytes(IndexWriterConfig cfg,
+ boolean withDeletions) throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ Directory d = newDirectory();
+ IndexWriter w = new IndexWriter(d, cfg);
+ final List byteVariantList = new ArrayList(BYTES);
+
+ // run in random order to test if fill works correctly during merges
+ Collections.shuffle(byteVariantList, random);
+ final int numValues = 350;
+ for (Values byteIndexValue : byteVariantList) {
+ List closeables = new ArrayList();
+
+ int bytesSize = 7 + random.nextInt(128);
+ OpenBitSet deleted = indexValues(w, numValues, byteIndexValue,
+ byteVariantList, withDeletions, bytesSize);
+ final IndexReader r = IndexReader.open(w);
+ assertEquals(0, r.numDeletedDocs());
+ final int numRemainingValues = (int) (numValues - deleted.cardinality());
+ final int base = r.numDocs() - numRemainingValues;
+
+ Reader bytesReader = r.getIndexValues(byteIndexValue.name());
+// closeables.add(bytesReader);
+ assertNotNull("field " + byteIndexValue.name()
+ + " returned null reader - maybe merged failed", bytesReader);
+ Source bytes = bytesReader.load();
+ ValuesEnum bytesEnum = bytesReader.getEnum();
+ assertNotNull(bytesEnum);
+ final ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
+ byte upto = 0;
+ // test the filled up slots for correctness
+ for (int i = 0; i < base; i++) {
+ final BytesRef br = bytes.bytes(i);
+ String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ + " base: " + base + " numDocs:" + r.numDocs();
+ switch (byteIndexValue) {
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_FIXED_STRAIGHT:
+ assertEquals(i, bytesEnum.advance(i));
+ // fixed straight returns bytesref with zero bytes all of fixed
+ // length
+ assertNotNull("expected none null - " + msg, br);
+ if(br.length != 0) {
+ assertEquals("expected zero bytes of length " + bytesSize + " - "
+ + msg, bytesSize, br.length);
+ for (int j = 0; j < br.length; j++) {
+ assertEquals("Byte at index " + j + " doesn't match - " + msg, 0,
+ br.bytes[br.offset + j]);
+ }
+ }
+ break;
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_DEREF:
+ case BYTES_FIXED_DEREF:
+ default:
+ assertNotNull("expected none null - " + msg, br);
+ if(br.length != 0){
+ bytes.bytes(i);
+ }
+ assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, br.length);
+ }
+ }
+ final BytesRef enumRef = attr.bytes();
+
+
+ // test the actual doc values added in this iteration
+ assertEquals(base + numRemainingValues, r.numDocs());
+ int v = 0;
+ for (int i = base; i < r.numDocs(); i++) {
+
+ String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize;
+ while (withDeletions && deleted.get(v++)) {
+ upto += bytesSize;
+ }
+
+ BytesRef br = bytes.bytes(i);
+ if(bytesEnum.docID() != i)
+ assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum.advance(i));
+ for (int j = 0; j < br.length; j++, upto++) {
+ assertEquals("EnumRef Byte at index " + j + " doesn't match - " + msg,
+ upto, enumRef.bytes[enumRef.offset + j]);
+ assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg,
+ upto, br.bytes[br.offset + j]);
+ }
+ }
+
+ // clean up
+ closeables.add(r);
+ for (Closeable toClose : closeables) {
+ toClose.close();
+ }
+ }
+
+ w.close();
+ d.close();
+ }
+
+}
diff --git a/lucene/src/test/org/apache/lucene/util/_TestUtil.java b/lucene/src/test/org/apache/lucene/util/_TestUtil.java
index 380e321f02d..c19044b5578 100644
--- a/lucene/src/test/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/src/test/org/apache/lucene/util/_TestUtil.java
@@ -117,6 +117,37 @@ public class _TestUtil {
}
return new String(buffer, 0, end);
}
+
+ public static String randomUnicodeString(Random r, int minLength, int maxLength) {
+ if(minLength > maxLength)
+ throw new IllegalArgumentException("minLength must be >= maxLength");
+ final boolean lenEqual = minLength==maxLength;
+ final int end = lenEqual?minLength:minLength + r.nextInt(maxLength-minLength+1);
+ if (end == 0) {
+ // allow 0 length
+ return "";
+ }
+
+ // TODO(simonw): check this
+ final int fixedPlane = 5;//minLength % 5;
+ final char[] buffer = new char[end];
+ for (int i = 0; i < end; i++) {
+ int t = lenEqual? fixedPlane: r.nextInt(5);
+ //buffer[i] = (char) (97 + r.nextInt(26));
+ if (0 == t && i < end - 1 && !lenEqual) {
+ // Make a surrogate pair
+ // High surrogate
+ buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff);
+ // Low surrogate
+ buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff);
+ }
+ else if (t <= 1) buffer[i] = (char) r.nextInt(0x80);
+ else if (2 == t) buffer[i] = (char) nextInt(r, 0x80, 0x800);
+ else if (3 == t) buffer[i] = (char) nextInt(r, 0x800, 0xd7ff);
+ else if (4 == t) buffer[i] = (char) nextInt(r, 0xe000, 0xffff);
+ }
+ return new String(buffer, 0, end);
+ }
private static final int[] blockStarts = {
0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400,
From 26833e93156f57011732d50859302941c6a3eba9 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Thu, 21 Oct 2010 07:09:39 +0000
Subject: [PATCH 003/116] Merged from trunk due to SOLR-2185:
QueryElevationComponentTest depends on execution order - failed on Hudson
only on this branch
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1025859 13f79535-47bb-0310-9956-ffa450edef68
---
.../handler/component/QueryElevationComponentTest.java | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
index ebaacdfdb0e..725abebcd6f 100644
--- a/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
+++ b/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java
@@ -49,6 +49,15 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 {
public static void beforeClass() throws Exception {
initCore("solrconfig-elevate.xml","schema12.xml");
}
+
+ @Before
+ @Override
+ public void setUp() throws Exception{
+ super.setUp();
+ clearIndex();
+ assertU(commit());
+ assertU(optimize());
+ }
@Test
public void testInterface() throws Exception
From 6528add69fe199a2feaad555c09e632a380d142c Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Fri, 22 Oct 2010 10:19:37 +0000
Subject: [PATCH 004/116] Taking SolrInfoMBeanTest out of the loop see
SOLR-2160
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1026275 13f79535-47bb-0310-9956-ffa450edef68
---
solr/src/test/org/apache/solr/SolrInfoMBeanTest.java | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java b/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java
index 2177f823a7f..d712c6cb2bd 100644
--- a/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java
+++ b/solr/src/test/org/apache/solr/SolrInfoMBeanTest.java
@@ -41,7 +41,9 @@ public class SolrInfoMBeanTest extends LuceneTestCase
* Gets a list of everything we can find in the classpath and makes sure it has
* a name, description, etc...
*/
+ @Ignore // TODO: reenable once SOLR-2160 is fixed
public void testCallMBeanInfo() throws Exception {
+// Object[] init = org.apache.solr.search.QParserPlugin.standardPlugins;
List classes = new ArrayList();
classes.addAll(getClassesForPackage(StandardRequestHandler.class.getPackage().getName()));
classes.addAll(getClassesForPackage(SearchHandler.class.getPackage().getName()));
@@ -82,7 +84,7 @@ public class SolrInfoMBeanTest extends LuceneTestCase
}
}
assertTrue( "there are at least 10 SolrInfoMBean that should be found in the classpath, found " + checked, checked > 10 );
- }
+ }
static final String FOLDER = File.separator + "build" + File.separator + "solr" + File.separator + "org" + File.separator + "apache" + File.separator + "solr" + File.separator;
From 3a4d4aa0c6e10a522c713ed1f9768b205845aca3 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Fri, 22 Oct 2010 19:12:12 +0000
Subject: [PATCH 005/116] fixed copy method in ValuesAttribute
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1026453 13f79535-47bb-0310-9956-ffa450edef68
---
.../apache/lucene/benchmark/byTask/feeds/DocMaker.java | 8 ++++----
.../src/java/org/apache/lucene/document/ValuesField.java | 1 -
.../apache/lucene/index/values/ValuesAttributeImpl.java | 6 +++---
3 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
index de030dadf23..b28300713ed 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@@ -175,7 +175,8 @@ public class DocMaker {
Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated()));
doc.add(idField);
-
+ trySetIndexValues(idField);
+
// Set NAME_FIELD
String name = docData.getName();
if (name == null) name = "";
@@ -390,16 +391,15 @@ public class DocMaker {
String[] split = fields.split(";");
Map result = new HashMap();
for (String tuple : split) {
- final String[] nameValue = tuple.split(":");
+ final String[] nameValue = tuple.split("=");
if (nameValue.length != 2) {
throw new IllegalArgumentException("illegal doc.stored.values format: "
- + fields + " expected fieldname:ValuesType;...;...;");
+ + fields + " expected fieldname=ValuesType;...;...;");
}
result.put(nameValue[0].trim(), Values.valueOf(nameValue[1]));
}
return result;
}
-
/** Set the configuration parameters of this doc maker. */
public void setConfig(Config config) {
this.config = config;
diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java
index df9332dc42d..43fc6bd6623 100644
--- a/lucene/src/java/org/apache/lucene/document/ValuesField.java
+++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java
@@ -130,7 +130,6 @@ public class ValuesField extends AbstractField {
default:
throw new IllegalArgumentException("unknown type: " + type);
}
-
return valField.set(field);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
index 714ba4b7969..fe504514ba7 100644
--- a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
@@ -77,15 +77,15 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
- bytes = (BytesRef) other.bytes.clone();
+ other.bytes.copy(bytes);
break;
case PACKED_INTS:
case PACKED_INTS_FIXED:
- ints = (LongsRef) other.ints.clone();
+ other.ints.copy(ints);
break;
case SIMPLE_FLOAT_4BYTE:
case SIMPLE_FLOAT_8BYTE:
- floats = (FloatsRef) other.floats.clone();
+ other.floats.copy(floats);
break;
}
From 99fd795df2dd4d2545ff20cdb4973187b7fa588a Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 26 Oct 2010 08:29:35 +0000
Subject: [PATCH 006/116] First step towards LUCENE-2700: Expose DocValues via
Fields
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1027401 13f79535-47bb-0310-9956-ffa450edef68
---
.../instantiated/InstantiatedIndexReader.java | 11 +
.../lucene/index/memory/MemoryIndex.java | 13 +
.../apache/lucene/index/DirectoryReader.java | 266 +-----------------
.../lucene/index/DocFieldProcessor.java | 171 ++---------
.../index/DocFieldProcessorPerThread.java | 11 +-
.../apache/lucene/index/DocumentsWriter.java | 10 +-
.../org/apache/lucene/index/FieldInfo.java | 2 +-
.../java/org/apache/lucene/index/Fields.java | 12 +
.../org/apache/lucene/index/FieldsEnum.java | 16 ++
.../lucene/index/FilterIndexReader.java | 11 +
.../org/apache/lucene/index/IndexReader.java | 16 +-
.../org/apache/lucene/index/MultiFields.java | 41 +++
.../apache/lucene/index/MultiFieldsEnum.java | 19 ++
.../apache/lucene/index/ParallelReader.java | 25 +-
.../org/apache/lucene/index/SegmentInfo.java | 7 +-
.../apache/lucene/index/SegmentMerger.java | 95 +------
.../apache/lucene/index/SegmentReader.java | 73 +----
.../lucene/index/codecs/FieldsConsumer.java | 50 +++-
.../lucene/index/codecs/FieldsProducer.java | 38 ++-
.../index/codecs/PerFieldCodecWrapper.java | 27 ++
.../index/codecs/PrefixCodedTermsReader.java | 7 +
.../index/codecs/PrefixCodedTermsWriter.java | 2 +-
.../index/codecs/preflex/PreFlexFields.java | 7 +
.../simpletext/SimpleTextFieldsReader.java | 7 +
.../org/apache/lucene/index/values/Bytes.java | 55 ++--
.../org/apache/lucene/index/values/Cache.java | 12 +-
.../values/{Reader.java => DocValues.java} | 6 +-
.../index/values/FixedDerefBytesImpl.java | 5 +
.../index/values/FixedSortedBytesImpl.java | 7 +-
.../index/values/FixedStraightBytesImpl.java | 5 +
.../apache/lucene/index/values/Floats.java | 30 +-
.../org/apache/lucene/index/values/Ints.java | 14 +-
.../lucene/index/values/PackedIntsImpl.java | 24 +-
.../index/values/VarDerefBytesImpl.java | 5 +
.../index/values/VarSortedBytesImpl.java | 7 +-
.../index/values/VarStraightBytesImpl.java | 5 +
.../apache/lucene/index/values/Writer.java | 58 ++--
.../apache/lucene/search/FieldComparator.java | 2 +-
.../org/apache/lucene/TestExternalCodecs.java | 14 +
.../codecs/preflexrw/PreFlexFieldsWriter.java | 7 +
.../lucene/index/values/TestIndexValues.java | 231 +++++++++------
41 files changed, 661 insertions(+), 763 deletions(-)
rename lucene/src/java/org/apache/lucene/index/values/{Reader.java => DocValues.java} (95%)
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
index 8fede649e66..96d9198146d 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
@@ -31,6 +31,7 @@ import java.util.Comparator;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.*;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
@@ -394,6 +395,11 @@ public class InstantiatedIndexReader extends IndexReader {
public TermsEnum terms() {
return new InstantiatedTermsEnum(orderedTerms, upto, currentField);
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ return null;
+ }
};
}
@@ -422,6 +428,11 @@ public class InstantiatedIndexReader extends IndexReader {
}
};
}
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ return null;
+ }
};
}
diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 11dd692ff83..5072cf3a762 100644
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -48,6 +48,7 @@ import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -789,6 +790,12 @@ public class MemoryIndex implements Serializable {
public TermsEnum terms() {
return new MemoryTermsEnum(sortedFields[upto].getValue());
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ // TODO
+ throw new UnsupportedOperationException("not implemented");
+ }
};
}
@@ -819,6 +826,12 @@ public class MemoryIndex implements Serializable {
};
}
}
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ // TODO
+ throw new UnsupportedOperationException("not implemented");
+ }
};
}
diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
index 2d29a248847..9863fb961d6 100644
--- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
+++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
@@ -36,14 +36,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.values.Reader;
-import org.apache.lucene.index.values.Values;
-import org.apache.lucene.index.values.ValuesEnum;
-import org.apache.lucene.index.values.Reader.Source;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.FloatsRef;
-import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.BytesRef;
@@ -996,264 +989,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
}
return commits;
- }
-
- public Reader getIndexValues(String field) {
- ensureOpen();
- if (subReaders.length == 1) {
- return subReaders[0].getIndexValues(field);
- }
- return new MultiValueReader(field);
- }
-
- private class MultiValueReader extends Reader {
-
- private String id;
- private Values value;
-
- public MultiValueReader(String id) {
- this.id = id;
- for (SegmentReader reader : subReaders) {
- FieldInfo fieldInfo = reader.fieldInfos().fieldInfo(id);
- if(fieldInfo != null){
- value = fieldInfo.getIndexValues();
- break;
- }
- }
- }
-
- @Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
- return new MultiValuesEnum(id, value);
- }
-
- @Override
- public Source load() throws IOException {
- return new MultiSource(id);
- }
-
- public void close() throws IOException {
- //
- }
-
- }
-
- private class MultiValuesEnum extends ValuesEnum {
- private int numDocs_ = 0;
- private int pos = -1;
- private int start = 0;
- private final String id;
- private final ValuesEnum[] enumCache;
- private ValuesEnum current;
-
- protected MultiValuesEnum(String id, Values enumType) {
- super(enumType);
- enumCache = new ValuesEnum[subReaders.length];
- this.id = id;
- }
-
- @Override
- public void close() throws IOException {
- for (ValuesEnum valuesEnum : enumCache) {
- if(valuesEnum != null)
- valuesEnum.close();
- }
- }
-
- @Override
- public int advance( int target) throws IOException {
- int n = target - start;
- do {
- if(target >= maxDoc)
- return pos = NO_MORE_DOCS;
- if (n >= numDocs_) {
- int idx = readerIndex(target);
- if (enumCache[idx] == null) {
- try {
- Reader indexValues = subReaders[idx].getIndexValues(id);
- if (indexValues != null) // nocommit does that work with default
- // values?
- enumCache[idx] = indexValues.getEnum(this.attributes());
- else
- enumCache[idx] = new DummyEnum(this.attributes(),
- subReaders[idx].maxDoc(), attr.type());
- } catch (IOException ex) {
- // nocommit what to do here?
- throw new RuntimeException(ex);
- }
- }
- current = enumCache[idx];
- start = starts[idx];
- numDocs_ = subReaders[idx].maxDoc();
- n = target - start;
- }
- target = start+numDocs_;
- } while ((n = current.advance(n)) == NO_MORE_DOCS);
- return pos = start+current.docID();
- }
-
-
- @Override
- public int docID() {
- return pos;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(pos+1);
- }
- }
-
- private class MultiSource extends Source {
- private int numDocs_ = 0;
- private int start = 0;
- private Source current;
- private final String id;
-
- MultiSource(String id) {
- this.id = id;
- }
-
- public long ints(int docID) {
- int n = docID - start;
- if(n >= numDocs_) {
- int idx = readerIndex(docID);
- try{
- current = subReaders[idx].getIndexValuesCache().getInts(id);
- if(current == null) //nocommit does that work with default values?
- current = new DummySource();
- }catch(IOException ex) {
- // nocommit what to do here?
- throw new RuntimeException(ex);
- }
- start = starts[idx];
- numDocs_ = subReaders[idx].maxDoc();
- n = docID - start;
- }
- return current.ints(n);
- }
-
- public double floats(int docID) {
- int n = docID - start;
- if(n >= numDocs_) {
- int idx = readerIndex(docID);
- try{
- current = subReaders[idx].getIndexValuesCache().getFloats(id);
- if(current == null) //nocommit does that work with default values?
- current = new DummySource();
- }catch(IOException ex) {
- // nocommit what to do here?
- throw new RuntimeException(ex);
- }
- numDocs_ = subReaders[idx].maxDoc();
-
- start = starts[idx];
- n = docID - start;
- }
- return current.floats(n);
- }
-
- public BytesRef bytes(int docID) {
- int n = docID - start;
- if(n >= numDocs_) {
- int idx = readerIndex(docID);
- try{
- current = subReaders[idx].getIndexValuesCache().getBytes(id);
- if(current == null) //nocommit does that work with default values?
- current = new DummySource();
- }catch(IOException ex) {
- // nocommit what to do here?
- throw new RuntimeException(ex);
- }
- numDocs_ = subReaders[idx].maxDoc();
- start = starts[idx];
- n = docID - start;
- }
- return current.bytes(n);
- }
-
- public long ramBytesUsed() {
- return current.ramBytesUsed();
- }
-
- }
-
- private static class DummySource extends Source {
- private final BytesRef ref = new BytesRef();
- @Override
- public BytesRef bytes(int docID) {
- return ref;
- }
-
-
- @Override
- public double floats(int docID) {
- return 0.0d;
- }
-
- @Override
- public long ints(int docID) {
- return 0;
- }
-
- public long ramBytesUsed() {
- return 0;
- }
- }
-
- private static class DummyEnum extends ValuesEnum {
- private int pos = -1;
- private final int maxDoc;
-
- public DummyEnum(AttributeSource source, int maxDoc, Values type) {
- super(source, type);
- this.maxDoc = maxDoc;
- switch (type) {
- case BYTES_VAR_STRAIGHT:
- case BYTES_FIXED_STRAIGHT:
- case BYTES_FIXED_DEREF:
- case BYTES_FIXED_SORTED:
- case BYTES_VAR_DEREF:
- case BYTES_VAR_SORTED:
- // nocommit - this is not correct for Fixed_straight
- BytesRef bytes = attr.bytes();
- bytes.length = 0;
- bytes.offset = 0;
- break;
- case PACKED_INTS:
- case PACKED_INTS_FIXED:
- LongsRef ints = attr.ints();
- ints.set(0);
- break;
-
- case SIMPLE_FLOAT_4BYTE:
- case SIMPLE_FLOAT_8BYTE:
- FloatsRef floats = attr.floats();
- floats.set(0d);
- break;
- default:
- throw new IllegalArgumentException("unknown Values type: " + type);
- }
- }
- @Override
- public void close() throws IOException {
- }
-
- @Override
- public int advance(int target) throws IOException {
- return pos = (pos < maxDoc ? target: NO_MORE_DOCS);
- }
- @Override
- public int docID() {
- return pos;
- }
- @Override
- public int nextDoc() throws IOException {
- return advance(pos+1);
- }
-
- }
-
+ }
private static final class ReaderCommit extends IndexCommit {
private String segmentsFileName;
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index dee8168937f..4db1363e722 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -18,11 +18,13 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.store.Directory;
+import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.values.Ints;
import org.apache.lucene.index.values.Floats;
import org.apache.lucene.index.values.Bytes;
import org.apache.lucene.index.values.ValuesAttribute;
import org.apache.lucene.index.values.Writer;
+import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
@@ -48,154 +50,33 @@ final class DocFieldProcessor extends DocConsumer {
final FieldInfos fieldInfos = new FieldInfos();
final DocFieldConsumer consumer;
final StoredFieldsWriter fieldsWriter;
- final private Map indexValues = new HashMap();
+ final private Map docValues = new HashMap();
+ private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter
- synchronized IndexValuesProcessor getProcessor(Directory dir, String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo)
- throws IOException {
- if(attr == null)
- return null;
- IndexValuesProcessor p = indexValues.get(name);
- if (p == null) {
- org.apache.lucene.index.values.Values v = attr.type();
- final String id = segment + "_" + fieldInfo.number;
- switch(v) {
- case PACKED_INTS:
- p = new IntValuesProcessor(dir, id, false);
- break;
- case PACKED_INTS_FIXED:
- p = new IntValuesProcessor(dir, id, true);
- break;
- case SIMPLE_FLOAT_4BYTE:
- p = new FloatValuesProcessor(dir, id, 4);
- break;
- case SIMPLE_FLOAT_8BYTE:
- p = new FloatValuesProcessor(dir, id, 8);
- break;
- case BYTES_FIXED_STRAIGHT:
- p = new BytesValuesProcessor(dir, id, true, null, Bytes.Mode.STRAIGHT);
- break;
- case BYTES_FIXED_DEREF:
- p = new BytesValuesProcessor(dir, id, true, null, Bytes.Mode.DEREF);
- break;
- case BYTES_FIXED_SORTED:
- p = new BytesValuesProcessor(dir, id, true, attr.bytesComparator(), Bytes.Mode.SORTED);
- break;
- case BYTES_VAR_STRAIGHT:
- p = new BytesValuesProcessor(dir, id, false, null, Bytes.Mode.STRAIGHT);
- break;
- case BYTES_VAR_DEREF:
- p = new BytesValuesProcessor(dir, id, false, null, Bytes.Mode.DEREF);
- break;
- case BYTES_VAR_SORTED:
- p = new BytesValuesProcessor(dir, id, false, attr.bytesComparator(), Bytes.Mode.SORTED);
- break;
- }
- fieldInfo.setIndexValues(v);
- indexValues.put(name, p);
- }
+ synchronized DocValuesConsumer docValuesConsumer(Directory dir,
+ String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo)
+ throws IOException {
+ DocValuesConsumer valuesConsumer;
+ if ((valuesConsumer = docValues.get(name)) == null) {
+ fieldInfo.setIndexValues(attr.type());
- return p;
- }
-
- static abstract class IndexValuesProcessor {
- public abstract void add(int docID, String name, ValuesAttribute attr) throws IOException;
- public abstract void finish(int docCount) throws IOException;
- public abstract void files(Collection files) throws IOException;
- }
-
- static class FloatValuesProcessor extends IndexValuesProcessor {
- private final Writer writer;
- private final String id;
-
- public FloatValuesProcessor(Directory dir, String id, int precision) throws IOException {
- this.id = id;
- writer = Floats.getWriter(dir, id, precision);
- }
-
- @Override
- public void add(int docID, String name, ValuesAttribute attr) throws IOException {
- final FloatsRef floats = attr.floats();
- if(floats != null) {
- writer.add(docID, floats.get());
- return;
- }
- throw new IllegalArgumentException("could not extract float/double from field " + name);
- }
-
- @Override
- public void finish(int docCount) throws IOException {
- writer.finish(docCount);
- }
-
- @Override
- public void files(Collection files) {
- Floats.files(id, files);
- }
- }
-
- static class IntValuesProcessor extends IndexValuesProcessor {
- private final Writer writer;
- private final String id;
-
- public IntValuesProcessor(Directory dir, String id, boolean fixedArray) throws IOException {
- this.id = id;
- writer = Ints.getWriter(dir, id, fixedArray);
- }
-
- @Override
- public void add(int docID, String name, ValuesAttribute attr) throws IOException {
- final LongsRef ints = attr.ints();
- if(ints != null) {
- writer.add(docID, ints.get());
- return;
- }
- throw new IllegalArgumentException("could not extract int/long from field " + name);
- }
-
- @Override
- public void finish(int docCount) throws IOException {
- writer.finish(docCount);
- }
-
- @Override
- public void files(Collection files) throws IOException {
- Ints.files(id, files);
- }
- }
-
- static class BytesValuesProcessor extends IndexValuesProcessor {
- private final Writer writer;
- private final String id;
- private final Directory dir;
-
- public BytesValuesProcessor(Directory dir, String id, boolean fixedSize, Comparator comp, Bytes.Mode mode) throws IOException {
- this.id = id;
- writer = Bytes.getWriter(dir, id, mode,comp, fixedSize);
- this.dir = dir;
- }
-
- // nocommit -- make this thread private and not sync'd
- @Override
- public synchronized void add(int docID, String name, ValuesAttribute attr) throws IOException {
- final BytesRef bytes = attr.bytes();
- if(bytes != null) {
- writer.add(docID, bytes);
- return;
+ if(fieldsConsumer == null) {
+ /* nocommit -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice.
+ * we need to find a way that allows us to obtain a FieldsConsumer per DocumentsWriter. Currently some codecs rely on
+ * the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier
+ * to support docvalues and later on stored fields too.
+ */
+ SegmentWriteState state = docWriter.segWriteState();
+ fieldsConsumer = state.codec.fieldsConsumer(state);
}
- throw new IllegalArgumentException("could not extract byte[] from field " + name);
+ valuesConsumer = fieldsConsumer.addValuesField(fieldInfo);
+ docValues.put(name, valuesConsumer);
}
+ return valuesConsumer;
- @Override
- public void finish(int docCount) throws IOException {
- writer.finish(docCount);
- }
-
- @Override
- public void files(Collection files) throws IOException {
- Bytes.files(dir, id, files);
- }
}
+
public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
this.docWriter = docWriter;
this.consumer = consumer;
@@ -221,13 +102,17 @@ final class DocFieldProcessor extends DocConsumer {
fieldsWriter.flush(state);
consumer.flush(childThreadsAndFields, state);
- for(IndexValuesProcessor p : indexValues.values()) {
+ for(DocValuesConsumer p : docValues.values()) {
if (p != null) {
p.finish(state.numDocs);
p.files(state.flushedFiles);
}
}
- indexValues.clear();
+ docValues.clear();
+ if(fieldsConsumer != null) {
+ fieldsConsumer.close(); // nocommit this should go away
+ fieldsConsumer = null;
+ }
// Important to save after asking consumer to flush so
// consumer can alter the FieldInfo* if necessary. EG,
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
index 56e7dea9597..0f2fed91c6d 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
@@ -20,14 +20,12 @@ package org.apache.lucene.index;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
-import java.util.Set;
-import java.util.Map.Entry;
import java.io.IOException;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.values.ValuesAttribute;
+import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.RamUsageEstimator;
@@ -255,17 +253,16 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread {
final DocFieldProcessorPerField perField = fields[i];
final Fieldable fieldable = perField.fields[0];
perField.consumer.processFields(perField.fields, perField.fieldCount);
+
if(!fieldable.hasFieldAttribute())
continue;
final AttributeSource attrSource = fieldable.getFieldAttributes();
if(!attrSource.hasAttribute(ValuesAttribute.class))
continue;
final ValuesAttribute attribute = attrSource.getAttribute(ValuesAttribute.class);
- final DocFieldProcessor.IndexValuesProcessor processor = docFieldProcessor
- .getProcessor(docState.docWriter.directory,
+ final DocValuesConsumer consumer = docFieldProcessor.docValuesConsumer(docState.docWriter.directory,
docState.docWriter.segment, fieldable.name(), attribute, perField.fieldInfo);
- if (processor != null)
- processor.add(docState.docID, fieldable.name(), attribute);
+ consumer.add(docState.docID, attribute);
}
if (docState.maxTermPrefix != null && docState.infoStream != null) {
docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
index 27784c261a3..acc20d4b842 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -604,9 +604,13 @@ final class DocumentsWriter {
synchronized private void initFlushState(boolean onlyDocStore) {
initSegmentName(onlyDocStore);
- flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
- docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(),
- writer.codecs);
+ flushState = segWriteState();
+ }
+
+ SegmentWriteState segWriteState() {
+ return new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
+ docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(),
+ writer.codecs);
}
/** Returns the codec used to flush the last segment */
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
index d0bdd1cc68f..d7529874599 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
@@ -103,7 +103,7 @@ public final class FieldInfo {
}
}
- Values getIndexValues() {
+ public Values getIndexValues() {
return indexValues;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/Fields.java b/lucene/src/java/org/apache/lucene/index/Fields.java
index a14ca1d52c3..f3fe6542775 100644
--- a/lucene/src/java/org/apache/lucene/index/Fields.java
+++ b/lucene/src/java/org/apache/lucene/index/Fields.java
@@ -19,6 +19,8 @@ package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.index.values.DocValues;
+
/** Flex API for access to fields and terms
* @lucene.experimental */
@@ -31,6 +33,16 @@ public abstract class Fields {
/** Get the {@link Terms} for this field. This may return
* null if the field does not exist. */
public abstract Terms terms(String field) throws IOException;
+
+ /**
+ * Returns {@link DocValues} for the current field.
+ *
+ * @param field the field name
+ * @return the {@link DocValues} for this field or null
if not
+ * applicable.
+ * @throws IOException
+ */
+ public abstract DocValues docValues(String field) throws IOException;
public final static Fields[] EMPTY_ARRAY = new Fields[0];
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java
index 4a2d2dc0d35..e3112ca8b5b 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.ValuesEnum;
import org.apache.lucene.util.AttributeSource;
@@ -57,6 +58,16 @@ public abstract class FieldsEnum {
* will not return null. */
public abstract TermsEnum terms() throws IOException;
+ /**
+ * Returns {@link DocValues} for the current field.
+ *
+ * @return the {@link DocValues} for this field or null
if not
+ * applicable.
+ * @throws IOException
+ */
+ public abstract DocValues docValues() throws IOException;
+
+
public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0];
/** Provides zero fields */
@@ -71,5 +82,10 @@ public abstract class FieldsEnum {
public TermsEnum terms() {
throw new IllegalStateException("this method should never be called");
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ throw new IllegalStateException("this method should never be called");
+ }
};
}
diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
index 0731a1c3553..838e939945e 100644
--- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.ValuesEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
@@ -59,6 +60,11 @@ public class FilterIndexReader extends IndexReader {
public Terms terms(String field) throws IOException {
return in.terms(field);
}
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ return in.docValues(field);
+ }
}
/** Base class for filtering {@link Terms}
@@ -117,6 +123,11 @@ public class FilterIndexReader extends IndexReader {
public TermsEnum terms() throws IOException {
return in.terms();
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ return in.docValues();
+ }
}
/** Base class for filtering {@link TermsEnum} implementations. */
diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java
index c28f13856ee..2cb8d6d9d72 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@@ -22,7 +22,7 @@ import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.values.Cache;
-import org.apache.lucene.index.values.Reader;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.store.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -117,6 +117,9 @@ public abstract class IndexReader implements Cloneable,Closeable {
public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET");
/** All fields with termvectors with offset values and position values enabled */
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
+ /** All fields holding doc values */
+ public static final FieldOption DOC_VALUES = new FieldOption ("DOC_VALUES");
+
}
private boolean closed;
@@ -1374,10 +1377,13 @@ public abstract class IndexReader implements Cloneable,Closeable {
public int getTermInfosIndexDivisor() {
throw new UnsupportedOperationException("This reader does not support this method.");
}
-
- // nocommit -- should this expose the iterator API via Fields and access Source only via getIndexValuesCache?
- public Reader getIndexValues(String field) {
- throw new UnsupportedOperationException();
+
+ public DocValues docValues(String field) throws IOException {
+ final Fields fields = fields();
+ if (fields == null) {
+ return null;
+ }
+ return fields.docValues(field);
}
private final Cache indexValuesCache = new Cache(this);
diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java
index fc3beb7e74c..f642383c36b 100644
--- a/lucene/src/java/org/apache/lucene/index/MultiFields.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java
@@ -22,6 +22,10 @@ import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
+
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.MultiDocValues;
+import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs
import org.apache.lucene.util.Bits;
@@ -46,6 +50,7 @@ public final class MultiFields extends Fields {
private final Fields[] subs;
private final ReaderUtil.Slice[] subSlices;
private final Map terms = new HashMap();
+ private final Map docValues = new HashMap();
/** Returns a single {@link Fields} instance for this
* reader, merging fields/terms/docs/positions on the
@@ -186,6 +191,12 @@ public final class MultiFields extends Fields {
return fields.terms(field);
}
}
+
+ /** This method may return null if the field does not exist.*/
+ public static DocValues getDocValues(IndexReader r, String field) throws IOException {
+ final Fields fields = getFields(r);
+ return fields == null? null: fields.docValues(field);
+ }
/** Returns {@link DocsEnum} for the specified field &
* term. This may return null if the term does not
@@ -270,5 +281,35 @@ public final class MultiFields extends Fields {
return result;
}
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ final DocValues result;
+
+ if (!docValues.containsKey(field)) {
+
+ // Lazy init: first time this field is requested, we
+ // create & add to docValues:
+ final List subs2 = new ArrayList();
+ final List slices2 = new ArrayList();
+
+ // Gather all sub-readers that share this field
+ for(int i=0;i values = new ArrayList();
+ for (int i = 0; i < numTop; i++) {
+ final DocValues docValues = top[i].fields.docValues();
+ if (docValues != null) {
+ values.add(new MultiDocValues.DocValuesIndex(docValues,
+ top[i].index));
+ }
+ }
+ // TODO return an empty docvalues instance if values are empty
+ return docValues.reset(values.toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java
index 0aa19ae4d5d..e553f2998a8 100644
--- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java
+++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java
@@ -21,7 +21,9 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.Pair;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
@@ -174,14 +176,22 @@ public class ParallelReader extends IndexReader {
return TermsEnum.EMPTY;
}
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ assert currentReader != null;
+ return MultiFields.getDocValues(currentReader, currentField);
+ }
}
// Single instance of this, per ParallelReader instance
private class ParallelFields extends Fields {
- final HashMap fields = new HashMap();
+ final HashMap> fields = new HashMap>();
public void addField(String field, IndexReader r) throws IOException {
- fields.put(field, MultiFields.getFields(r).terms(field));
+ Fields multiFields = MultiFields.getFields(r);
+ fields.put(field, new Pair( multiFields.terms(field),
+ multiFields.docValues(field)));
}
@Override
@@ -190,11 +200,16 @@ public class ParallelReader extends IndexReader {
}
@Override
public Terms terms(String field) throws IOException {
- return fields.get(field);
+ return fields.get(field).cur;
+ }
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ return fields.get(field).cud;
}
}
-
- @Override
+
+ @Override
public Bits getDeletedDocs() {
return MultiFields.getDeletedDocs(readers.get(0));
}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
index e8804653432..830072e7329 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -478,12 +478,7 @@ public final class SegmentInfo {
if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
fileSet.add(delFileName);
}
- //nocommit - is there a better way to get all the dat / idx files?
- for(String file : dir.listAll()) {
- if(file.startsWith(name) && (file.endsWith("dat") || file.endsWith("idx"))){
- fileSet.add(file);
- }
- }
+
if (normGen != null) {
for (int i = 0; i < normGen.length; i++) {
long gen = normGen[i];
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
index 4546b3d8278..95577954852 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -33,7 +33,7 @@ import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.values.Bytes;
import org.apache.lucene.index.values.Ints;
-import org.apache.lucene.index.values.Reader;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.Floats;
import org.apache.lucene.index.values.Values;
import org.apache.lucene.index.values.Writer;
@@ -162,9 +162,6 @@ final class SegmentMerger {
if (mergeDocStores && fieldInfos.hasVectors())
mergeVectors();
-
- mergeIndexValues();
-
return mergedDocs;
}
@@ -178,12 +175,6 @@ final class SegmentMerger {
reader.close();
}
}
-
- private void addIfExists(Set files, String file, Directory dir) throws IOException{
- if(dir.fileExists(file)){
- files.add(file);
- }
- }
final List createCompoundFile(String fileName, final SegmentInfo info)
throws IOException {
@@ -203,14 +194,6 @@ final class SegmentMerger {
final int numFIs = fieldInfos.size();
for (int i = 0; i < numFIs; i++) {
final FieldInfo fi = fieldInfos.fieldInfo(i);
- // Index Values aka. CSF
- if (fi.indexValues != null) {
- addIfExists(fileSet, IndexFileNames.segmentFileName(segment, Integer
- .toString(fi.number), IndexFileNames.CSF_DATA_EXTENSION), directory);
- addIfExists(fileSet, IndexFileNames.segmentFileName(segment, Integer
- .toString(fi.number), IndexFileNames.CSF_INDEX_EXTENSION),
- directory);
- }
if (fi.isIndexed && !fi.omitNorms) {
fileSet.add(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
break;
@@ -318,7 +301,7 @@ final class SegmentMerger {
if (mergedIndexValues == null) {
merged.setIndexValues(fiIndexValues);
} else if (mergedIndexValues != fiIndexValues) {
- // nocommit -- what to do?
+ // TODO -- can we recover from this?
throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedIndexValues + " to " + fiIndexValues);
}
}
@@ -331,8 +314,7 @@ final class SegmentMerger {
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false);
-
- // nocommit -- how should we handle index values here?
+ fieldInfos.add(reader.getFieldNames(FieldOption.DOC_VALUES), false);
}
}
fieldInfos.write(directory, segment + ".fnm");
@@ -393,77 +375,6 @@ final class SegmentMerger {
return docCount;
}
- private void mergeIndexValues() throws IOException {
- final int numFields = fieldInfos.size();
- for (int i = 0; i < numFields; i++) {
- final FieldInfo fieldInfo = fieldInfos.fieldInfo(i);
- final Values v = fieldInfo.indexValues;
- // nocommit we need some kind of compatibility notation for values such
- // that two slighly different segments can be merged eg. fixed vs.
- // variable byte len or float32 vs. float64
-
- if (v != null) {
- int docBase = 0;
- final List mergeStates = new ArrayList();
- for (IndexReader reader : readers) {
- Reader r = reader.getIndexValues(fieldInfo.name);
- if (r != null) {
- mergeStates.add(new Writer.MergeState(r, docBase, reader
- .maxDoc(), reader.getDeletedDocs()));
- }
- docBase += reader.numDocs();
- }
- if (mergeStates.isEmpty()) {
- continue;
- }
- final String id = segment + "_" + fieldInfo.number;
- final Writer writer;
- switch (v) {
- case PACKED_INTS:
- case PACKED_INTS_FIXED:
- writer = Ints.getWriter(directory, id, true);
- break;
- case SIMPLE_FLOAT_4BYTE:
- writer = Floats.getWriter(directory, id, 4);
- break;
- case SIMPLE_FLOAT_8BYTE:
- writer = Floats.getWriter(directory, id, 8);
- break;
- case BYTES_FIXED_STRAIGHT:
- writer = Bytes.getWriter(directory, id,
- Bytes.Mode.STRAIGHT, null, true);
- break;
- case BYTES_FIXED_DEREF:
- writer = Bytes.getWriter(directory, id,
- Bytes.Mode.DEREF, null, true);
- break;
- case BYTES_FIXED_SORTED:
- // nocommit -- enable setting Comparator
- writer = Bytes.getWriter(directory, id,
- Bytes.Mode.SORTED, null, true);
- break;
- case BYTES_VAR_STRAIGHT:
- writer = Bytes.getWriter(directory, id,
- Bytes.Mode.STRAIGHT, null, false);
- break;
- case BYTES_VAR_DEREF:
- writer = Bytes.getWriter(directory, id,
- Bytes.Mode.DEREF, null, false);
- break;
- case BYTES_VAR_SORTED:
- // nocommit -- enable setting Comparator
- writer = Bytes.getWriter(directory, id,
- Bytes.Mode.SORTED, null, false);
- break;
- default:
- continue;
- }
- writer.add(mergeStates);
- writer.finish(mergedDocs);
- }
- }
- }
-
private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
final FieldsReader matchingFieldsReader)
throws IOException, MergeAbortedException, CorruptIndexException {
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
index bbbd90e43b2..9c854662a24 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
@@ -44,7 +44,7 @@ import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.values.Bytes;
import org.apache.lucene.index.values.Ints;
-import org.apache.lucene.index.values.Reader;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.Floats;
import org.apache.lucene.index.values.Values;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
@@ -141,7 +141,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
// Ask codec for its Fields
fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor));
assert fields != null;
- openIndexValuesReaders(cfsDir, si);
success = true;
} finally {
if (!success) {
@@ -155,57 +154,8 @@ public class SegmentReader extends IndexReader implements Cloneable {
// not assigned yet).
this.origInstance = origInstance;
}
-
- final Map indexValues = new HashMap();
-
- // Only opens files... doesn't actually load any values
- private void openIndexValuesReaders(Directory dir, SegmentInfo si) throws IOException {
- final int numFields = fieldInfos.size();
- for(int i=0;i mergeStates = new ArrayList();
+ for (IndexReader reader : mergeState.readers) {
+ DocValues r = reader.docValues(mergeState.fieldInfo.name);
+ if (r != null) {
+ mergeStates.add(new Writer.MergeState(r, docBase, reader
+ .maxDoc(), reader.getDeletedDocs()));
+ }
+ docBase += reader.numDocs();
+ }
+ if (mergeStates.isEmpty()) {
+ continue;
+ }
+ final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo);
+ docValuesConsumer.merge(mergeStates);
+ docValuesConsumer.finish(mergeState.mergedDocCount);
+ }
+
+ // merge doc values
+//
}
}
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java
index a378680328e..a4ce963828b 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsProducer.java
@@ -17,10 +17,13 @@ package org.apache.lucene.index.codecs;
* limitations under the License.
*/
-import org.apache.lucene.index.Fields;
-
-import java.io.IOException;
import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.values.DocValues;
/** Abstract API that consumes terms, doc, freq, prox and
* payloads postings. Concrete implementations of this
@@ -33,4 +36,33 @@ import java.io.Closeable;
public abstract class FieldsProducer extends Fields implements Closeable {
public abstract void close() throws IOException;
public abstract void loadTermsIndex(int indexDivisor) throws IOException;
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ return null;
+ }
+
+ public static final FieldsProducer EMPTY = new FieldsProducer() {
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ return null;
+ }
+
+ @Override
+ public FieldsEnum iterator() throws IOException {
+ return FieldsEnum.EMPTY;
+ }
+
+ @Override
+ public void loadTermsIndex(int indexDivisor) throws IOException {
+
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+ };
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java
index 8839e8f2c30..cf21d6c3620 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java
@@ -35,6 +35,8 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.store.Directory;
@@ -112,6 +114,18 @@ public class PerFieldCodecWrapper extends Codec {
throw err;
}
}
+
+ @Override
+ public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
+ fieldsSeen.add(field.name);
+ Codec codec = getCodec(field.name);
+ FieldsConsumer fields = codecs.get(codec);
+ if (fields == null) {
+ fields = codec.fieldsConsumer(state);
+ codecs.put(codec, fields);
+ }
+ return fields.addValuesField(field);
+ }
}
private class FieldsReader extends FieldsProducer {
@@ -164,6 +178,11 @@ public class PerFieldCodecWrapper extends Codec {
return null;
}
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ return codecs.get(getCodec(current)).docValues(current);
+ }
}
@Override
@@ -207,6 +226,14 @@ public class PerFieldCodecWrapper extends Codec {
it.next().loadTermsIndex(indexDivisor);
}
}
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ final Codec codec = getCodec(field);
+ FieldsProducer fields = codecs.get(codec);
+ assert fields != null;
+ return fields.docValues(field);
+ }
}
public FieldsProducer fieldsProducer(SegmentReadState state)
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
index d3cd2ce7597..5f5f607cb1a 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
@@ -41,6 +41,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs
+import org.apache.lucene.index.values.DocValues;
/** Handles a terms dict, but decouples all details of
* doc/freqs/positions reading to an instance of {@link
@@ -245,6 +246,12 @@ public class PrefixCodedTermsReader extends FieldsProducer {
public TermsEnum terms() throws IOException {
return current.iterator();
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
private class FieldReader extends Terms implements Closeable {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
index 198ed7dac50..c9929903fd0 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
@@ -94,7 +94,7 @@ public class PrefixCodedTermsWriter extends FieldsConsumer {
@Override
public TermsConsumer addField(FieldInfo field) {
- assert currentField == null || currentField.name.compareTo(field.name) < 0;
+ assert currentField == null || currentField.name.compareTo(field.name) < 0 : "current field name " + (currentField == null? null: currentField.name) + " given: " +field.name;
currentField = field;
TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field);
TermsConsumer terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
index 57072463ca0..61781aeecca 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
@@ -37,6 +37,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.CompoundFileReader;
import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
@@ -222,6 +223,12 @@ public class PreFlexFields extends FieldsProducer {
termsEnum.reset(current);
return termsEnum;
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ //DocValues are not available on PreFlex indices
+ return null;
+ }
}
private class PreTerms extends Terms {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
index 14c72b8a919..cc9f7de9be8 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.simpletext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.Terms;
@@ -103,6 +104,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
public TermsEnum terms() throws IOException {
return new SimpleTextTermsEnum(in.getFilePointer(), omitTF);
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
private class SimpleTextTermsEnum extends TermsEnum {
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index 34e79758293..bd9fd4544f4 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -25,8 +25,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.values.Reader.SortedSource;
-import org.apache.lucene.index.values.Reader.Source;
+import org.apache.lucene.index.values.DocValues.SortedSource;
+import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -47,8 +47,7 @@ import org.apache.lucene.util.CodecUtil;
* NOTE: Each byte[] must be <= 32768 bytes in length
*
*/
-//nocommit - add mmap version
-//nocommti - add bulk copy where possible
+//TODO - add bulk copy where possible
public final class Bytes {
// don't instantiate!
@@ -59,17 +58,7 @@ public final class Bytes {
STRAIGHT, DEREF, SORTED
};
- public static void files(Directory dir, String id, Collection files)
- throws IOException {
- files.add(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
- final String idxFile = IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_INDEX_EXTENSION);
- if (dir.fileExists(idxFile)) {
- files.add(idxFile);
- }
- }
-
+
// nocommit -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
public static Writer getWriter(Directory dir, String id, Mode mode,
@@ -101,7 +90,7 @@ public final class Bytes {
}
// nocommit -- I can peek @ header to determing fixed/mode?
- public static Reader getReader(Directory dir, String id, Mode mode,
+ public static DocValues getValues(Directory dir, String id, Mode mode,
boolean fixedSize, int maxDoc) throws IOException {
if (fixedSize) {
if (mode == Mode.STRAIGHT) {
@@ -172,6 +161,7 @@ public final class Bytes {
static abstract class BytesWriterBase extends Writer {
+
private final Directory dir;
private final String id;
protected IndexOutput idxOut;
@@ -239,13 +229,32 @@ public final class Bytes {
bytesRef = attr.bytes();
assert bytesRef != null;
}
+
+ @Override
+ public void add(int docID, ValuesAttribute attr) throws IOException {
+ final BytesRef ref;
+ if((ref = attr.bytes()) != null) {
+ add(docID, ref);
+ }
+ }
+
+ @Override
+ public void files(Collection files) throws IOException {
+ files.add(IndexFileNames.segmentFileName(id, "",
+ IndexFileNames.CSF_DATA_EXTENSION));
+ final String idxFile = IndexFileNames.segmentFileName(id, "",
+ IndexFileNames.CSF_INDEX_EXTENSION);
+ if (dir.fileExists(idxFile)) { // TODO is this correct? could be initialized lazy
+ files.add(idxFile);
+ }
+ }
}
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
- static abstract class BytesReaderBase extends Reader {
+ static abstract class BytesReaderBase extends DocValues {
protected final IndexInput idxIn;
protected final IndexInput datIn;
protected final int version;
@@ -270,20 +279,15 @@ public final class Bytes {
}
protected final IndexInput cloneData() {
- assert !isClosed.get():printEx();
// is never NULL
return (IndexInput) datIn.clone();
}
protected final IndexInput cloneIndex() {
- assert !isClosed.get():printEx();
return idxIn == null ? null : (IndexInput) idxIn.clone();
}
- private final AtomicBoolean isClosed = new AtomicBoolean(false);
- Exception ex;
+
public void close() throws IOException {
- assert !isClosed.getAndSet(true);
- ex =new Exception();
if (datIn != null) {
datIn.close();
}
@@ -291,11 +295,6 @@ public final class Bytes {
idxIn.close();
}
}
-
- private String printEx() {
- ex.printStackTrace();
- return ex.getMessage();
- }
}
}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/values/Cache.java b/lucene/src/java/org/apache/lucene/index/values/Cache.java
index 3f3b9dc4890..711e11cdb3d 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Cache.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Cache.java
@@ -23,8 +23,8 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.values.Reader.SortedSource;
-import org.apache.lucene.index.values.Reader.Source;
+import org.apache.lucene.index.values.DocValues.SortedSource;
+import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.util.BytesRef;
public class Cache {
@@ -42,7 +42,7 @@ public class Cache {
synchronized public Source getInts(String id) throws IOException {
Source s = ints.get(id);
if (s == null) {
- final Reader indexValues = r.getIndexValues(id);
+ final DocValues indexValues = r.docValues(id);
if (indexValues == null) {
return null;
}
@@ -56,7 +56,7 @@ public class Cache {
synchronized public Source getFloats(String id) throws IOException {
Source s = floats.get(id);
if (s == null) {
- final Reader indexValues = r.getIndexValues(id);
+ final DocValues indexValues = r.docValues(id);
if (indexValues == null) {
return null;
}
@@ -71,7 +71,7 @@ public class Cache {
Comparator comp) throws IOException {
SortedSource s = sortedBytes.get(id);
if (s == null) {
- final Reader indexValues = r.getIndexValues(id);
+ final DocValues indexValues = r.docValues(id);
if (indexValues == null) {
return null;
}
@@ -87,7 +87,7 @@ public class Cache {
synchronized public Source getBytes(String id) throws IOException {
Source s = bytes.get(id);
if (s == null) {
- final Reader indexValues = r.getIndexValues(id);
+ final DocValues indexValues = r.docValues(id);
if (indexValues == null) {
return null;
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Reader.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
similarity index 95%
rename from lucene/src/java/org/apache/lucene/index/values/Reader.java
rename to lucene/src/java/org/apache/lucene/index/values/DocValues.java
index 0bbd90f3a59..501a2c981fe 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Reader.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
@@ -23,9 +23,11 @@ import java.util.Comparator;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
-public abstract class Reader implements Closeable {
+public abstract class DocValues implements Closeable {
+ public static final DocValues[] EMPTY_ARRAY = new DocValues[0];
+
public ValuesEnum getEnum() throws IOException{
return getEnum(null);
}
@@ -38,6 +40,8 @@ public abstract class Reader implements Closeable {
throw new UnsupportedOperationException();
}
+ public abstract Values type();
+
/**
* Source of integer (returned as java long), per document. The underlying
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
index 3cac5b20ac2..7e30711b465 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
@@ -257,6 +257,11 @@ class FixedDerefBytesImpl {
}
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_FIXED_DEREF;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
index 75e26eb588c..810c6a0a82c 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
@@ -156,7 +156,7 @@ class FixedSortedBytesImpl {
}
@Override
- public org.apache.lucene.index.values.Reader.Source load() throws IOException {
+ public org.apache.lucene.index.values.DocValues.Source load() throws IOException {
return loadSorted(null);
}
@@ -254,5 +254,10 @@ class FixedSortedBytesImpl {
// do unsorted
return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size);
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_FIXED_SORTED;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
index 00564264178..3566e336764 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
@@ -217,5 +217,10 @@ class FixedStraightBytesImpl {
return advance(pos+1);
}
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_FIXED_STRAIGHT;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index 3caccdb7eb0..e343565c9b5 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -28,10 +28,6 @@ public class Floats {
private static final int INT_ZERO = Float.floatToRawIntBits(0.0f);
private static final long LONG_ZERO = Double.doubleToRawLongBits(0.0);
- public static void files(String id, Collection files) {
- files.add(id + "." + IndexFileNames.CSF_DATA_EXTENSION);
- }
-
public static Writer getWriter(Directory dir, String id, int precisionBytes)
throws IOException {
if (precisionBytes != 4 && precisionBytes != 8) {
@@ -45,12 +41,14 @@ public class Floats {
}
}
- public static Reader getReader(Directory dir, String id, int maxDoc)
+ public static DocValues getValues(Directory dir, String id, int maxDoc)
throws IOException {
return new FloatsReader(dir, id, maxDoc);
}
abstract static class FloatsWriter extends Writer {
+
+
private final Directory dir;
private final String id;
private FloatsRef floatsRef;
@@ -81,6 +79,13 @@ public class Floats {
protected void add(int docID) throws IOException {
add(docID, floatsRef.get());
}
+
+ @Override
+ public void add(int docID, ValuesAttribute attr) throws IOException {
+ final FloatsRef ref;
+ if((ref = attr.floats()) != null)
+ add(docID, ref.get());
+ }
@Override
protected void setNextAttribute(ValuesAttribute attr) {
@@ -109,6 +114,13 @@ public class Floats {
} else
super.merge(state);
}
+
+ @Override
+ public void files(Collection files) throws IOException {
+ files.add(IndexFileNames.segmentFileName(id, "",
+ IndexFileNames.CSF_DATA_EXTENSION));
+ }
+
}
@@ -203,7 +215,7 @@ public class Floats {
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
- static class FloatsReader extends Reader {
+ static class FloatsReader extends DocValues {
private final IndexInput datIn;
private final int precisionBytes;
@@ -303,6 +315,12 @@ public class Floats {
return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc)
: new Floats8EnumImpl(source, indexInput, maxDoc);
}
+
+ @Override
+ public Values type() {
+ return precisionBytes == 4 ? Values.SIMPLE_FLOAT_4BYTE
+ : Values.SIMPLE_FLOAT_8BYTE;
+ }
}
static final class Floats4Enum extends FloatsEnumImpl {
diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java
index a5ea55204c5..9b4e585c64d 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Ints.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java
@@ -1,32 +1,24 @@
package org.apache.lucene.index.values;
import java.io.IOException;
-import java.util.Collection;
-import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.PackedIntsImpl.IntsReader;
import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter;
import org.apache.lucene.store.Directory;
-//nocommit - add mmap version
-//nocommti - add bulk copy where possible
+//TODO - add bulk copy where possible
public class Ints {
private Ints() {
}
- public static void files(String id, Collection files)
- throws IOException {
- files.add(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
- }
public static Writer getWriter(Directory dir, String id, boolean useFixedArray)
throws IOException {
- //nocommit - implement fixed?!
+ //TODO - implement fixed?!
return new IntsWriter(dir, id);
}
- public static Reader getReader(Directory dir, String id, boolean useFixedArray) throws IOException {
+ public static DocValues getValues(Directory dir, String id, boolean useFixedArray) throws IOException {
return new IntsReader(dir, id);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
index ac843859a97..64735a6597c 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
@@ -17,6 +17,7 @@ package org.apache.lucene.index.values;
* limitations under the License.
*/
import java.io.IOException;
+import java.util.Collection;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.Directory;
@@ -38,6 +39,8 @@ class PackedIntsImpl {
static final int VERSION_CURRENT = VERSION_START;
static class IntsWriter extends Writer {
+
+
// nocommit - can we bulkcopy this on a merge?
private LongsRef intsRef;
private long[] docToValue;
@@ -125,13 +128,27 @@ class PackedIntsImpl {
protected void setNextAttribute(ValuesAttribute attr) {
intsRef = attr.ints();
}
+
+ @Override
+ public void add(int docID, ValuesAttribute attr) throws IOException {
+ final LongsRef ref;
+ if((ref = attr.ints()) != null) {
+ add(docID, ref.get());
+ }
+ }
+
+ @Override
+ public void files(Collection files) throws IOException {
+ files.add(IndexFileNames.segmentFileName(id, "",
+ IndexFileNames.CSF_DATA_EXTENSION));
+ }
}
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
- static class IntsReader extends Reader {
+ static class IntsReader extends DocValues {
private final IndexInput datIn;
protected IntsReader(Directory dir, String id) throws IOException {
@@ -186,6 +203,11 @@ class PackedIntsImpl {
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new IntsEnumImpl(source, (IndexInput) datIn.clone());
}
+
+ @Override
+ public Values type() {
+ return Values.PACKED_INTS;
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index 5a9f9d6093b..dccbd3bba08 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -251,5 +251,10 @@ class VarDerefBytesImpl {
datIn.readBytes(ref.bytes, 0, size);
}
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_VAR_DEREF;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
index 9987343d684..c8536d8dc0c 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
@@ -157,7 +157,7 @@ class VarSortedBytesImpl {
}
@Override
- public org.apache.lucene.index.values.Reader.Source load()
+ public org.apache.lucene.index.values.DocValues.Source load()
throws IOException {
return loadSorted(null);
}
@@ -340,5 +340,10 @@ class VarSortedBytesImpl {
return advance(pos + 1);
}
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_VAR_SORTED;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
index 83b97479171..436a9799fc4 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -228,5 +228,10 @@ class VarStraightBytesImpl {
return advance(pos+1);
}
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_VAR_STRAIGHT;
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java
index 13bf0947614..ae081778769 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Writer.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java
@@ -17,12 +17,17 @@ package org.apache.lucene.index.values;
* limitations under the License.
*/
import java.io.IOException;
-import java.util.List;
+import java.util.Comparator;
+import org.apache.lucene.index.values.codec.DocValuesConsumer;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-public abstract class Writer {
+public abstract class Writer extends DocValuesConsumer {
+
+ public static final String INDEX_EXTENSION = "idx";
+ public static final String DATA_EXTENSION = "dat";
/** Records the specfied value for the docID */
public void add(int docID, long value) throws IOException {
@@ -47,28 +52,8 @@ public abstract class Writer {
/** Finish writing, close any files */
public abstract void finish(int docCount) throws IOException;
- public static class MergeState {
- public final Reader reader;
- public final int docBase;
- public final int docCount;
- public final Bits bits;
-
- public MergeState(Reader reader, int docBase, int docCount, Bits bits) {
- assert reader != null;
- this.reader = reader;
- this.docBase = docBase;
- this.docCount = docCount;
- this.bits = bits;
- }
- }
-
- public void add(List states) throws IOException {
- for (MergeState state : states) {
- merge(state);
- }
- }
-
// enables bulk copies in subclasses per MergeState
+ @Override
protected void merge(MergeState state) throws IOException {
final ValuesEnum valEnum = state.reader.getEnum();
assert valEnum != null;
@@ -89,4 +74,31 @@ public abstract class Writer {
valEnum.close();
}
}
+
+ public static Writer create(Values v, String id,
+ Directory directory, Comparator comp) throws IOException {
+ switch (v) {
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ return Ints.getWriter(directory, id, true);
+ case SIMPLE_FLOAT_4BYTE:
+ return Floats.getWriter(directory, id, 4);
+ case SIMPLE_FLOAT_8BYTE:
+ return Floats.getWriter(directory, id, 8);
+ case BYTES_FIXED_STRAIGHT:
+ return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true);
+ case BYTES_FIXED_DEREF:
+ return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true);
+ case BYTES_FIXED_SORTED:
+ return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true);
+ case BYTES_VAR_STRAIGHT:
+ return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false);
+ case BYTES_VAR_DEREF:
+ return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false);
+ case BYTES_VAR_SORTED:
+ return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false);
+ default:
+ throw new IllegalArgumentException("Unknown Values: " + v);
+ }
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
index d71b89f6fa6..2b322d6d4f8 100644
--- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
@@ -22,7 +22,7 @@ import java.text.Collator;
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.values.Reader.Source;
+import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.search.cache.ByteValuesCreator;
diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
index 2d421b03808..cf552bf058d 100644
--- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
+++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
@@ -25,6 +25,8 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.index.codecs.*;
import org.apache.lucene.index.codecs.standard.*;
import org.apache.lucene.index.codecs.pulsing.*;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.store.*;
import java.util.*;
import java.io.*;
@@ -159,6 +161,13 @@ public class TestExternalCodecs extends LuceneTestCase {
public void close() {
// TODO: finalize stuff
}
+
+ @Override
+ public DocValuesConsumer addValuesField(FieldInfo field)
+ throws IOException {
+ //TODO(simonw): can we fix this easily?
+ throw new UnsupportedOperationException("no implemented");
+ }
}
private static class RAMTermsConsumer extends TermsConsumer {
@@ -257,6 +266,11 @@ public class TestExternalCodecs extends LuceneTestCase {
public TermsEnum terms() {
return new RAMTermsEnum(postings.fieldToTerms.get(current));
}
+
+ @Override
+ public DocValues docValues() throws IOException {
+ throw new UnsupportedOperationException("not implemented");
+ }
}
static class RAMTermsEnum extends TermsEnum {
diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
index 2b0a4167174..deb23f079f9 100644
--- a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.codecs.preflex.TermInfo;
+import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
@@ -209,4 +210,10 @@ class PreFlexFieldsWriter extends FieldsConsumer {
return BytesRef.getUTF8SortedAsUTF16Comparator();
}
}
+
+ @Override
+ public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
+ //TODO(simonw): can we fix this easily?
+ throw new UnsupportedOperationException("no implemented");
+ }
}
\ No newline at end of file
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
index 87efd06e5c4..5bc064965c1 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
@@ -33,15 +33,20 @@ import org.apache.lucene.document.ValuesField;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.values.Reader.SortedSource;
-import org.apache.lucene.index.values.Reader.Source;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.DocValues.SortedSource;
+import org.apache.lucene.index.values.DocValues.Source;
+import org.apache.lucene.index.values.codec.DocValuesCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
@@ -51,9 +56,33 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
public class TestIndexValues extends LuceneTestCase {
+ // TODO test addIndexes
+ private static DocValuesCodec docValuesCodec;
+
+ @BeforeClass
+ public static void beforeClassLuceneTestCaseJ4() {
+ LuceneTestCase.beforeClassLuceneTestCaseJ4();
+ final CodecProvider cp = CodecProvider.getDefault();
+ docValuesCodec = new DocValuesCodec(cp.lookup(CodecProvider.getDefaultCodec()));
+ cp.register(docValuesCodec);
+ CodecProvider.setDefaultCodec(docValuesCodec.name);
+ }
+
+ @AfterClass
+ public static void afterClassLuceneTestCaseJ4() {
+ final CodecProvider cp = CodecProvider.getDefault();
+ cp.unregister(docValuesCodec);
+ LuceneTestCase.afterClassLuceneTestCaseJ4();
+ }
+
+
public void testBytesStraight() throws IOException {
runTestBytes(Bytes.Mode.STRAIGHT, true);
runTestBytes(Bytes.Mode.STRAIGHT, false);
@@ -71,18 +100,16 @@ public class TestIndexValues extends LuceneTestCase {
// nocommit -- for sorted test, do our own Sort of the
// values and verify it's identical
- public void runTestBytes(final Bytes.Mode mode,
- final boolean fixedSize) throws IOException {
+ public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize)
+ throws IOException {
final BytesRef bytesRef = new BytesRef();
final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef
- .getUTF8SortedAsUnicodeComparator()
- : null;
+ .getUTF8SortedAsUnicodeComparator() : null;
Directory dir = newDirectory();
- Writer w = Bytes
- .getWriter(dir, "test", mode, comp, fixedSize);
+ Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize);
int maxDoc = 220;
final String[] values = new String[maxDoc];
final int lenMin, lenMax;
@@ -107,32 +134,33 @@ public class TestIndexValues extends LuceneTestCase {
}
w.finish(maxDoc);
- Reader r = Bytes.getReader(dir, "test", mode, fixedSize, maxDoc);
+ DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc);
for (int iter = 0; iter < 2; iter++) {
ValuesEnum bytesEnum = r.getEnum();
assertNotNull("enum is null", bytesEnum);
ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
assertNotNull("attribute is null", attr);
BytesRef ref = attr.bytes();
- assertNotNull("BytesRef is null - enum not initialized to use bytes", attr);
+ assertNotNull("BytesRef is null - enum not initialized to use bytes",
+ attr);
for (int i = 0; i < 2; i++) {
final int idx = 2 * i;
assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
String utf8String = ref.utf8ToString();
- assertEquals("doc: " + idx + " lenLeft: " + values[idx].length() + " lenRight: " + utf8String.length() , values[idx], utf8String);
+ assertEquals("doc: " + idx + " lenLeft: " + values[idx].length()
+ + " lenRight: " + utf8String.length(), values[idx], utf8String);
}
assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
- assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc+1));
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
bytesEnum.close();
}
-
-
+
// Verify we can load source twice:
for (int iter = 0; iter < 2; iter++) {
Source s;
- Reader.SortedSource ss;
+ DocValues.SortedSource ss;
if (mode == Bytes.Mode.SORTED) {
s = ss = r.loadSorted(comp);
} else {
@@ -147,8 +175,8 @@ public class TestIndexValues extends LuceneTestCase {
if (ss != null) {
assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
.utf8ToString());
- Reader.SortedSource.LookupResult result = ss.getByValue(new BytesRef(
- values[idx]));
+ DocValues.SortedSource.LookupResult result = ss
+ .getByValue(new BytesRef(values[idx]));
assertTrue(result.found);
assertEquals(ss.ord(idx), result.ord);
}
@@ -217,7 +245,7 @@ public class TestIndexValues extends LuceneTestCase {
final int additionalDocs = 1 + random.nextInt(9);
w.finish(NUM_VALUES + additionalDocs);
- Reader r = Ints.getReader(dir, "test", useFixedArrays);
+ DocValues r = Ints.getValues(dir, "test", useFixedArrays);
for (int iter = 0; iter < 2; iter++) {
Source s = r.load();
for (int i = 0; i < NUM_VALUES; i++) {
@@ -254,7 +282,7 @@ public class TestIndexValues extends LuceneTestCase {
assertEquals(i, iEnum.advance(i));
assertEquals("" + i, 0, ints.get());
}
-
+
iEnum.close();
}
r.close();
@@ -267,22 +295,21 @@ public class TestIndexValues extends LuceneTestCase {
runTestFloats(4, 0.00001);
}
- private void runTestFloats(int precision, double delta)
- throws IOException {
+ private void runTestFloats(int precision, double delta) throws IOException {
Directory dir = newDirectory();
Writer w = Floats.getWriter(dir, "test", precision);
final int NUM_VALUES = 1000;
final double[] values = new double[NUM_VALUES];
for (int i = 0; i < NUM_VALUES; i++) {
- final double v = precision == 4 ? random.nextFloat() : random.nextDouble();
+ final double v = precision == 4 ? random.nextFloat() : random
+ .nextDouble();
values[i] = v;
w.add(i, v);
}
final int additionalValues = 1 + random.nextInt(10);
w.finish(NUM_VALUES + additionalValues);
- Reader r = Floats.getReader(dir, "test", NUM_VALUES
- + additionalValues);
+ DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues);
for (int iter = 0; iter < 2; iter++) {
Source s = r.load();
for (int i = 0; i < NUM_VALUES; i++) {
@@ -298,7 +325,7 @@ public class TestIndexValues extends LuceneTestCase {
assertEquals(i, fEnum.nextDoc());
assertEquals(values[i], floats.get(), delta);
}
- for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
assertEquals(i, fEnum.nextDoc());
assertEquals(0.0, floats.get(), delta);
}
@@ -312,7 +339,7 @@ public class TestIndexValues extends LuceneTestCase {
assertEquals(i, fEnum.advance(i));
assertEquals(values[i], floats.get(), delta);
}
- for(int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
assertEquals(i, fEnum.advance(i));
assertEquals(0.0, floats.get(), delta);
}
@@ -335,7 +362,7 @@ public class TestIndexValues extends LuceneTestCase {
// without deletions
IndexWriterConfig cfg = writerConfig(true);
// primitives - no deletes
- runTestNumerics(cfg,false);
+ runTestNumerics(cfg, false);
cfg = writerConfig(true);
// bytes - no deletes
@@ -377,12 +404,12 @@ public class TestIndexValues extends LuceneTestCase {
}
private IndexWriterConfig writerConfig(boolean useCompoundFile) {
- final IndexWriterConfig cfg = newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer());
+ final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer());
MergePolicy mergePolicy = cfg.getMergePolicy();
- if(mergePolicy instanceof LogMergePolicy) {
- ((LogMergePolicy)mergePolicy).setUseCompoundFile(useCompoundFile);
- } else if(useCompoundFile) {
+ if (mergePolicy instanceof LogMergePolicy) {
+ ((LogMergePolicy) mergePolicy).setUseCompoundFile(useCompoundFile);
+ } else if (useCompoundFile) {
LogMergePolicy policy = new LogDocMergePolicy();
policy.setUseCompoundFile(useCompoundFile);
cfg.setMergePolicy(policy);
@@ -390,8 +417,8 @@ public class TestIndexValues extends LuceneTestCase {
return cfg;
}
- public void runTestNumerics(IndexWriterConfig cfg,
- boolean withDeletions) throws IOException {
+ public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions)
+ throws IOException {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final int numValues = 350;
@@ -409,14 +436,15 @@ public class TestIndexValues extends LuceneTestCase {
switch (val) {
case PACKED_INTS:
case PACKED_INTS_FIXED: {
- Reader intsReader = r.getIndexValues(val.name());
+ DocValues intsReader = getDocValues(r, val.name());
Source ints = intsReader.load();
ValuesEnum intsEnum = intsReader.getEnum();
assertNotNull(intsEnum);
LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
for (int i = 0; i < base; i++) {
assertEquals(0, ints.ints(i));
- assertEquals(val.name() + " base: " + base + " index: " + i, i, random.nextBoolean()?intsEnum.advance(i): intsEnum.nextDoc());
+ assertEquals(val.name() + " base: " + base + " index: " + i, i,
+ random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc());
assertEquals(0, enumRef.get());
}
int expected = 0;
@@ -424,7 +452,8 @@ public class TestIndexValues extends LuceneTestCase {
while (deleted.get(expected)) {
expected++;
}
- assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs", i, intsEnum.advance(i));
+ assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ + " docs", i, intsEnum.advance(i));
assertEquals(expected, ints.ints(i));
assertEquals(expected, enumRef.get());
@@ -433,24 +462,27 @@ public class TestIndexValues extends LuceneTestCase {
break;
case SIMPLE_FLOAT_4BYTE:
case SIMPLE_FLOAT_8BYTE: {
- Reader floatReader = r.getIndexValues(val.name());
+ DocValues floatReader = getDocValues(r, val.name());
Source floats = floatReader.load();
ValuesEnum floatEnum = floatReader.getEnum();
assertNotNull(floatEnum);
- FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class).floats();
+ FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class)
+ .floats();
for (int i = 0; i < base; i++) {
assertEquals(0.0d, floats.floats(i), 0.0d);
- assertEquals(i, random.nextBoolean()?floatEnum.advance(i): floatEnum.nextDoc());
- assertEquals("index " + i, 0.0 ,enumRef.get(), 0.0);
+ assertEquals(i, random.nextBoolean() ? floatEnum.advance(i)
+ : floatEnum.nextDoc());
+ assertEquals("index " + i, 0.0, enumRef.get(), 0.0);
}
int expected = 0;
for (int i = base; i < r.numDocs(); i++, expected++) {
while (deleted.get(expected)) {
expected++;
}
- assertEquals("advance failed at index: " + i + " of " + r.numDocs() + " docs base:" + base, i, floatEnum.advance(i));
- assertEquals("index " + i, 2.0 * expected ,enumRef.get() , 0.00001);
+ assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ + " docs base:" + base, i, floatEnum.advance(i));
+ assertEquals("index " + i, 2.0 * expected, enumRef.get(), 0.00001);
assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001);
}
}
@@ -468,30 +500,30 @@ public class TestIndexValues extends LuceneTestCase {
d.close();
}
- private static EnumSet BYTES = EnumSet.of(
- Values.BYTES_FIXED_DEREF,
- Values.BYTES_FIXED_SORTED,
- Values.BYTES_FIXED_STRAIGHT,
- Values.BYTES_VAR_DEREF ,
- Values.BYTES_VAR_SORTED,
- Values.BYTES_VAR_STRAIGHT
- );
-
- private static EnumSet STRAIGHT_BYTES = EnumSet.of(
- Values.BYTES_FIXED_STRAIGHT,
- Values.BYTES_VAR_STRAIGHT
- );
+ private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF,
+ Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT,
+ Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED,
+ Values.BYTES_VAR_STRAIGHT);
- private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS, Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE);
-
- private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS};
- private OpenBitSet indexValues(IndexWriter w, int numValues,
- Values value, List valueVarList, boolean withDeletions,
- int multOfSeven) throws CorruptIndexException, IOException {
+ private static EnumSet STRAIGHT_BYTES = EnumSet.of(
+ Values.BYTES_FIXED_STRAIGHT, Values.BYTES_VAR_STRAIGHT);
+
+ private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS,
+ Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE,
+ Values.SIMPLE_FLOAT_8BYTE);
+
+ private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
+ Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS };
+
+ private OpenBitSet indexValues(IndexWriter w, int numValues, Values value,
+ List valueVarList, boolean withDeletions, int multOfSeven)
+ throws CorruptIndexException, IOException {
final boolean isNumeric = NUMERICS.contains(value);
OpenBitSet deleted = new OpenBitSet(numValues);
Document doc = new Document();
- Fieldable field = random.nextBoolean()? new ValuesField(value.name()):newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]);
+ Fieldable field = random.nextBoolean() ? new ValuesField(value.name())
+ : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random,
+ 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]);
doc.add(field);
ValuesAttribute valuesAttribute = ValuesField.values(field);
@@ -549,16 +581,15 @@ public class TestIndexValues extends LuceneTestCase {
}
}
w.commit();
-
+
// nocommit test unoptimized with deletions
- if(withDeletions || random.nextBoolean())
+ if (true || withDeletions || random.nextBoolean())
w.optimize();
return deleted;
}
- public void runTestIndexBytes(IndexWriterConfig cfg,
- boolean withDeletions) throws CorruptIndexException,
- LockObtainFailedException, IOException {
+ public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final List byteVariantList = new ArrayList(BYTES);
@@ -577,14 +608,14 @@ public class TestIndexValues extends LuceneTestCase {
final int numRemainingValues = (int) (numValues - deleted.cardinality());
final int base = r.numDocs() - numRemainingValues;
- Reader bytesReader = r.getIndexValues(byteIndexValue.name());
-// closeables.add(bytesReader);
+ DocValues bytesReader = getDocValues(r, byteIndexValue.name());
assertNotNull("field " + byteIndexValue.name()
+ " returned null reader - maybe merged failed", bytesReader);
Source bytes = bytesReader.load();
ValuesEnum bytesEnum = bytesReader.getEnum();
assertNotNull(bytesEnum);
- final ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
+ final ValuesAttribute attr = bytesEnum
+ .addAttribute(ValuesAttribute.class);
byte upto = 0;
// test the filled up slots for correctness
for (int i = 0; i < base; i++) {
@@ -598,7 +629,7 @@ public class TestIndexValues extends LuceneTestCase {
// fixed straight returns bytesref with zero bytes all of fixed
// length
assertNotNull("expected none null - " + msg, br);
- if(br.length != 0) {
+ if (br.length != 0) {
assertEquals("expected zero bytes of length " + bytesSize + " - "
+ msg, bytesSize, br.length);
for (int j = 0; j < br.length; j++) {
@@ -613,35 +644,38 @@ public class TestIndexValues extends LuceneTestCase {
case BYTES_FIXED_DEREF:
default:
assertNotNull("expected none null - " + msg, br);
- if(br.length != 0){
+ if (br.length != 0) {
bytes.bytes(i);
}
- assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0, br.length);
+ assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0,
+ br.length);
}
}
final BytesRef enumRef = attr.bytes();
-
// test the actual doc values added in this iteration
assertEquals(base + numRemainingValues, r.numDocs());
int v = 0;
for (int i = base; i < r.numDocs(); i++) {
-
+
String msg = " field: " + byteIndexValue.name() + " at index: " + i
- + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize;
+ + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: "
+ + bytesSize;
while (withDeletions && deleted.get(v++)) {
upto += bytesSize;
}
-
+
BytesRef br = bytes.bytes(i);
- if(bytesEnum.docID() != i)
- assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum.advance(i));
+ if (bytesEnum.docID() != i)
+ assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
+ .advance(i));
for (int j = 0; j < br.length; j++, upto++) {
- assertEquals("EnumRef Byte at index " + j + " doesn't match - " + msg,
- upto, enumRef.bytes[enumRef.offset + j]);
- assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg,
- upto, br.bytes[br.offset + j]);
- }
+ assertEquals(
+ "EnumRef Byte at index " + j + " doesn't match - " + msg, upto,
+ enumRef.bytes[enumRef.offset + j]);
+ assertEquals("SourceRef Byte at index " + j + " doesn't match - "
+ + msg, upto, br.bytes[br.offset + j]);
+ }
}
// clean up
@@ -650,9 +684,32 @@ public class TestIndexValues extends LuceneTestCase {
toClose.close();
}
}
-
+
w.close();
d.close();
}
-
+
+ private DocValues getDocValues(IndexReader reader, String field)
+ throws IOException {
+ boolean optimized = reader.isOptimized();
+ Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() : MultiFields
+ .getFields(reader);
+// return fields.docValues(field);
+ switch (random.nextInt(optimized ? 3 : 2)) {
+ case 0:
+ return fields.docValues(field);
+ case 1:
+ FieldsEnum iterator = fields.iterator();
+ String name;
+ while ((name = iterator.next()) != null) {
+ if (name.equals(field))
+ return iterator.docValues();
+ }
+ throw new RuntimeException("no such field " + field);
+ case 2:
+ return reader.getSequentialSubReaders()[0].docValues(field);
+ }
+throw new RuntimeException();
+}
+
}
From 211ab616b04954659abb37a1c1027114ad948bc8 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 26 Oct 2010 09:28:52 +0000
Subject: [PATCH 007/116] LUCENE-2700: added missing files
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1027415 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/index/DocFieldProcessor.java | 19 +-
.../index/DocFieldProcessorPerThread.java | 4 +-
.../lucene/index/codecs/FieldsConsumer.java | 3 +-
.../index/codecs/PerFieldCodecWrapper.java | 3 +-
.../codecs/docvalues/DocValuesCodec.java | 298 +++++++++++++++++
.../codecs/docvalues/DocValuesConsumer.java | 67 ++++
.../docvalues/DocValuesProducerBase.java | 99 ++++++
.../lucene/index/values/MultiDocValues.java | 300 ++++++++++++++++++
.../src/java/org/apache/lucene/util/Pair.java | 36 +++
9 files changed, 809 insertions(+), 20 deletions(-)
create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
create mode 100644 lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
create mode 100644 lucene/src/java/org/apache/lucene/util/Pair.java
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index 4db1363e722..030979cb5e7 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -17,24 +17,15 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.index.codecs.FieldsConsumer;
-import org.apache.lucene.index.values.Ints;
-import org.apache.lucene.index.values.Floats;
-import org.apache.lucene.index.values.Bytes;
-import org.apache.lucene.index.values.ValuesAttribute;
-import org.apache.lucene.index.values.Writer;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FloatsRef;
-import org.apache.lucene.util.LongsRef;
-
import java.io.IOException;
import java.util.Collection;
-import java.util.Comparator;
-import java.util.Map;
import java.util.HashMap;
+import java.util.Map;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
+import org.apache.lucene.index.values.ValuesAttribute;
+import org.apache.lucene.store.Directory;
/**
* This is a DocConsumer that gathers all fields under the
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
index 0f2fed91c6d..5bd7321fce1 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
@@ -24,8 +24,8 @@ import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.index.values.ValuesAttribute;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.RamUsageEstimator;
@@ -407,4 +407,4 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread {
}
}
}
-}
\ No newline at end of file
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
index 5bc0b48b6ef..0f90deeeff1 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
@@ -22,10 +22,9 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.Writer;
-import org.apache.lucene.index.values.Values;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
import java.io.IOException;
import java.io.Closeable;
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java
index cf21d6c3620..b00d4dbc774 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/PerFieldCodecWrapper.java
@@ -23,7 +23,6 @@ import java.util.Set;
import java.util.HashSet;
import java.util.Iterator;
import java.util.IdentityHashMap;
-import java.util.TreeMap;
import java.util.TreeSet;
import java.io.IOException;
@@ -35,8 +34,8 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.index.values.DocValues;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
import org.apache.lucene.store.Directory;
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
new file mode 100644
index 00000000000..821f766bd83
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
@@ -0,0 +1,298 @@
+package org.apache.lucene.index.codecs.docvalues;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.Writer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * A codec that adds DocValues support to a given codec transparently.
+ */
+public class DocValuesCodec extends Codec {
+ private final Map consumers = new HashMap();
+ private final Codec other;
+
+ public DocValuesCodec(Codec other) {
+ this.name = "docvalues_" + other.name;
+ this.other = other;
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state)
+ throws IOException {
+ WrappingFieldsConsumer consumer;
+ if ((consumer = consumers.get(state.segmentName)) == null) {
+ consumer = new WrappingFieldsConsumer(other);
+ }
+ consumer.state = state; // nocommit this is a hack and only necessary since
+ // we want to initialized the wrapped
+ // fieldsConsumer lazily with a SegmentWriteState created after the docvalue
+ // ones is. We should fix this in DocumentWriter I guess. See
+ // DocFieldProcessor too!
+ return consumer;
+ }
+
+ private static class WrappingFieldsConsumer extends FieldsConsumer {
+ SegmentWriteState state;
+ private final List docValuesConsumers = new ArrayList();
+ private FieldsConsumer wrappedConsumer;
+ private final Codec other;
+
+ public WrappingFieldsConsumer(Codec other) {
+ this.other = other;
+ }
+
+ @Override
+ public void close() throws IOException {
+ synchronized (this) {
+ if (wrappedConsumer != null)
+ wrappedConsumer.close();
+ }
+ }
+
+ @Override
+ public synchronized DocValuesConsumer addValuesField(FieldInfo field)
+ throws IOException {
+ DocValuesConsumer consumer = DocValuesConsumer.create(state.segmentName,
+ state.directory, field, null); // TODO: set comparator here
+ docValuesConsumers.add(consumer);
+ return consumer;
+ }
+
+ @Override
+ public TermsConsumer addField(FieldInfo field) throws IOException {
+ synchronized (this) {
+ if (wrappedConsumer == null)
+ wrappedConsumer = other.fieldsConsumer(state);
+ }
+ return wrappedConsumer.addField(field);
+ }
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state)
+ throws IOException {
+ Directory dir = state.dir;
+ Set files = new HashSet();
+
+ other.files(dir, state.segmentInfo, files);
+ for (String string : files) {
+ if (dir.fileExists(string))
+ return new WrappingFielsdProducer(state.segmentInfo, state.dir,
+ state.fieldInfos, other.fieldsProducer(state));
+ }
+ return new WrappingFielsdProducer(state.segmentInfo, state.dir,
+ state.fieldInfos, FieldsProducer.EMPTY);
+
+ }
+
+ @Override
+ public void files(Directory dir, SegmentInfo segmentInfo, Set files)
+ throws IOException {
+ Set otherFiles = new HashSet();
+ other.files(dir, segmentInfo, otherFiles);
+ for (String string : otherFiles) {
+ if (dir.fileExists(string))
+ files.add(string);
+ }
+
+ for (String file : dir.listAll()) {
+ if (file.startsWith(segmentInfo.name)
+ && (file.endsWith(Writer.DATA_EXTENSION) || file
+ .endsWith(Writer.INDEX_EXTENSION))) {
+ files.add(file);
+ }
+ }
+ // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "",
+ // Writer.DATA_EXTENSION));
+ // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "",
+ // Writer.INDEX_EXTENSION));
+
+ }
+
+ @Override
+ public void getExtensions(Set extensions) {
+ other.getExtensions(extensions);
+ extensions.add(Writer.DATA_EXTENSION);
+ extensions.add(Writer.INDEX_EXTENSION);
+ }
+
+ static class WrappingFielsdProducer extends DocValuesProducerBase {
+
+ private final FieldsProducer other;
+
+ WrappingFielsdProducer(SegmentInfo si, Directory dir, FieldInfos fieldInfo,
+ FieldsProducer other) throws IOException {
+ super(si, dir, fieldInfo);
+ this.other = other;
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ other.close();
+ } finally {
+ super.close();
+ }
+ }
+
+ @Override
+ public void loadTermsIndex(int indexDivisor) throws IOException {
+ other.loadTermsIndex(indexDivisor);
+ }
+
+ @Override
+ public FieldsEnum iterator() throws IOException {
+ return new WrappingFieldsEnum(other.iterator(), docValues.entrySet()
+ .iterator());
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ return other.terms(field);
+ }
+ }
+
+ static abstract class NameValue {
+ String name;
+ V value;
+
+ NameValue> smaller(NameValue> other) throws IOException {
+ if (other.name == null) {
+ if (this.name == null) {
+ return null;
+ }
+ return this;
+ } else if (this.name == null) {
+ return other;
+ }
+ final int res = this.name.compareTo(other.name);
+ if (res < 0)
+ return this;
+ if (res == 0)
+ other.name = this.name;
+ return other;
+ }
+
+ abstract NameValue next() throws IOException;
+ }
+
+ static class FieldsEnumNameValue extends NameValue {
+ @Override
+ NameValue next() throws IOException {
+ name = value.next();
+ return this;
+ }
+
+ }
+
+ static class DocValueNameValue extends NameValue {
+ Iterator> iter;
+
+ @Override
+ NameValue next() {
+ if (iter.hasNext()) {
+ Entry next = iter.next();
+ value = next.getValue();
+ name = next.getKey();
+ } else {
+ name = null;
+ }
+ return this;
+ }
+
+ }
+
+ static class WrappingFieldsEnum extends FieldsEnum {
+ private final DocValueNameValue docValues = new DocValueNameValue();
+ private final NameValue fieldsEnum = new FieldsEnumNameValue();
+ private NameValue> coordinator;
+
+ @Override
+ public AttributeSource attributes() {
+ return fieldsEnum.value.attributes();
+ }
+
+ public WrappingFieldsEnum(FieldsEnum wrapped,
+ Iterator> docValues) {
+ this.docValues.iter = docValues;
+ this.fieldsEnum.value = wrapped;
+ coordinator = null;
+
+ }
+
+ @Override
+ public DocValues docValues() throws IOException {
+ if (docValues.name == coordinator.name)
+ return docValues.value;
+ return null;
+ }
+
+ @Override
+ public String next() throws IOException {
+ if (coordinator == null) {
+ coordinator = fieldsEnum.next().smaller(docValues.next());
+ // old = coordinator.name;
+ } else {
+ String current = coordinator.name;
+ if (current == docValues.name) {
+ docValues.next();
+ }
+ if (current == fieldsEnum.name) {
+ fieldsEnum.next();
+ }
+ coordinator = docValues.smaller(fieldsEnum);
+
+ }
+ return coordinator == null ? null : coordinator.name;
+
+ }
+
+ @Override
+ public TermsEnum terms() throws IOException {
+ if (fieldsEnum.name == coordinator.name)
+ return fieldsEnum.value.terms();
+ return null;
+ }
+
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
new file mode 100644
index 00000000000..22b04137d27
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
@@ -0,0 +1,67 @@
+package org.apache.lucene.index.codecs.docvalues;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.ValuesAttribute;
+import org.apache.lucene.index.values.Writer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+public abstract class DocValuesConsumer {
+ public abstract void add(int docID, ValuesAttribute attr) throws IOException;
+
+ public abstract void finish(int docCount) throws IOException;
+
+ public abstract void files(Collection files) throws IOException;
+
+ public void merge(List states) throws IOException {
+ for (MergeState state : states) {
+ merge(state);
+ }
+ }
+
+ protected abstract void merge(MergeState mergeState) throws IOException;
+
+
+ public static class MergeState {
+ public final DocValues reader;
+ public final int docBase;
+ public final int docCount;
+ public final Bits bits;
+
+ public MergeState(DocValues reader, int docBase, int docCount, Bits bits) {
+ assert reader != null;
+ this.reader = reader;
+ this.docBase = docBase;
+ this.docCount = docCount;
+ this.bits = bits;
+ }
+ }
+
+ public static DocValuesConsumer create(String segmentName, Directory directory,
+ FieldInfo field, Comparator comp) throws IOException {
+ final String id = segmentName + "_" + field.number;
+ return Writer.create(field.getIndexValues(), id, directory, comp);
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
new file mode 100644
index 00000000000..ce016755455
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
@@ -0,0 +1,99 @@
+package org.apache.lucene.index.codecs.docvalues;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.Collection;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.values.Bytes;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.Floats;
+import org.apache.lucene.index.values.Ints;
+import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Writer;
+import org.apache.lucene.store.Directory;
+
+public abstract class DocValuesProducerBase extends FieldsProducer{
+
+ protected final TreeMap docValues = new TreeMap();
+
+ protected DocValuesProducerBase(SegmentInfo si, Directory dir, FieldInfos fieldInfo) throws IOException {
+ load(fieldInfo, si.name, si.docCount, dir);
+ }
+
+ @Override
+ public DocValues docValues(String field) throws IOException {
+ return docValues.get(field);
+ }
+
+ // Only opens files... doesn't actually load any values
+ protected void load(FieldInfos fieldInfos, String segment, int docCount,
+ Directory dir) throws IOException {
+ final int numFields = fieldInfos.size();
+ for (int i = 0; i < numFields; i++) {
+ final FieldInfo fieldInfo = fieldInfos.fieldInfo(i);
+ final Values v = fieldInfo.getIndexValues();
+ final String field = fieldInfo.name;
+ final String id = IndexFileNames.segmentFileName(segment, Integer
+ .toString(fieldInfo.number), "");
+ if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) {
+ docValues.put(field, loadDocValues(docCount, dir, id, v));
+ }
+ }
+ }
+
+ protected DocValues loadDocValues(int docCount, Directory dir, String id,
+ Values v) throws IOException {
+ switch (v) {
+ case PACKED_INTS:
+ return Ints.getValues(dir, id, false);
+ case PACKED_INTS_FIXED:
+ return Ints.getValues(dir, id, true);
+ case SIMPLE_FLOAT_4BYTE:
+ return Floats.getValues(dir, id, docCount);
+ case SIMPLE_FLOAT_8BYTE:
+ return Floats.getValues(dir, id, docCount);
+ case BYTES_FIXED_STRAIGHT:
+ return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount);
+ case BYTES_FIXED_DEREF:
+ return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount);
+ case BYTES_FIXED_SORTED:
+ return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount);
+ case BYTES_VAR_STRAIGHT:
+ return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount);
+ case BYTES_VAR_DEREF:
+ return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount);
+ case BYTES_VAR_SORTED:
+ return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount);
+ default:
+ throw new IllegalStateException("unrecognized index values mode " + v);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ Collection values = docValues.values();
+ for (DocValues docValues : values) {
+ docValues.close();
+ }
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
new file mode 100644
index 00000000000..cd6216a2530
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
@@ -0,0 +1,300 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.ReaderUtil.Slice;
+
+public class MultiDocValues extends DocValues {
+
+ public static class DocValuesIndex {
+ public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0];
+ final int subIndex;
+ final DocValues docValues;
+
+ public DocValuesIndex(DocValues docValues, int subIndex) {
+ this.docValues = docValues;
+ this.subIndex = subIndex;
+ }
+ }
+
+ private DocValuesIndex[] docValuesIdx;
+ private Slice[] subSlices;
+
+ public MultiDocValues(Slice[] subSlices) {
+ this.subSlices = subSlices;
+ }
+
+ public MultiDocValues(DocValuesIndex[] docValuesIdx, Slice[] subSlices) {
+ this(subSlices);
+ reset(docValuesIdx);
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ return new MultiValuesEnum(subSlices, docValuesIdx, docValuesIdx[0].docValues.type());
+ }
+
+ @Override
+ public Source load() throws IOException {
+ return new MultiSource(subSlices, docValuesIdx);
+ }
+
+ public void close() throws IOException {
+ //
+ }
+
+ public DocValues reset(DocValuesIndex[] docValuesIdx) {
+ this.docValuesIdx = docValuesIdx;
+ return this;
+ }
+
+ private static class MultiValuesEnum extends ValuesEnum {
+ private int numDocs_ = 0;
+ private int pos = -1;
+ private int start = 0;
+ private ValuesEnum current;
+ private Slice[] subSlices;
+ private DocValuesIndex[] docValuesIdx;
+ private final int maxDoc;
+
+ public MultiValuesEnum(Slice[] subSlices, DocValuesIndex[] docValuesIdx, Values type) {
+ super(type);
+ this.subSlices = subSlices;
+ this.docValuesIdx = docValuesIdx;
+ Slice slice = subSlices[subSlices.length-1];
+ maxDoc = slice.start + slice.length;
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+// int n = target - start;
+// do {
+// if (target >= maxDoc)
+// return pos = NO_MORE_DOCS;
+// if (n >= numDocs_) {
+// int idx = readerIndex(target);
+// if (enumCache[idx] == null) {
+// try {
+// DocValues indexValues = subReaders[idx].docValues(id);
+// if (indexValues != null) // nocommit does that work with default
+// // values?
+// enumCache[idx] = indexValues.getEnum(this.attributes());
+// else
+// enumCache[idx] = new DummyEnum(this.attributes(),
+// subSlices[idx].length, attr.type());
+// } catch (IOException ex) {
+// // nocommit what to do here?
+// throw new RuntimeException(ex);
+// }
+// }
+// current = enumCache[idx];
+// start = subSlices[idx].start;
+// numDocs_ = subSlices[idx].length;
+// n = target - start;
+// }
+// target = start + numDocs_;
+// } while ((n = current.advance(n)) == NO_MORE_DOCS);
+ return pos = start + current.docID();
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(pos + 1);
+ }
+ }
+
+ private class MultiSource extends Source {
+ private int numDocs_ = 0;
+ private int start = 0;
+ private Source current;
+ private Slice[] subSlices;
+ private DocValuesIndex[] docVAluesIdx;
+
+ public MultiSource(Slice[] subSlices, DocValuesIndex[] docValuesIdx) {
+ this.subSlices = subSlices;
+ this.docVAluesIdx = docValuesIdx;
+ }
+
+ public long ints(int docID) {
+// int n = docID - start;
+// if (n >= numDocs_) {
+// int idx = readerIndex(docID);
+// try {
+// current = subReaders[idx].getIndexValuesCache().getInts(id);
+// if (current == null) // nocommit does that work with default values?
+// current = new DummySource();
+// } catch (IOException ex) {
+// // nocommit what to do here?
+// throw new RuntimeException(ex);
+// }
+// start = starts[idx];
+// numDocs_ = subReaders[idx].maxDoc();
+// n = docID - start;
+// }
+// return current.ints(n);
+ return 0l;
+ }
+
+ public double floats(int docID) {
+// int n = docID - start;
+// if (n >= numDocs_) {
+// int idx = readerIndex(docID);
+// try {
+// current = subReaders[idx].getIndexValuesCache().getFloats(id);
+// if (current == null) // nocommit does that work with default values?
+// current = new DummySource();
+// } catch (IOException ex) {
+// // nocommit what to do here?
+// throw new RuntimeException(ex);
+// }
+// numDocs_ = subReaders[idx].maxDoc();
+//
+// start = starts[idx];
+// n = docID - start;
+// }
+// return current.floats(n);
+ return 0d;
+ }
+
+ public BytesRef bytes(int docID) {
+// int n = docID - start;
+// if (n >= numDocs_) {
+// int idx = readerIndex(docID);
+// try {
+// current = subReaders[idx].getIndexValuesCache().getBytes(id);
+// if (current == null) // nocommit does that work with default values?
+// current = new DummySource();
+// } catch (IOException ex) {
+// // nocommit what to do here?
+// throw new RuntimeException(ex);
+// }
+// numDocs_ = subReaders[idx].maxDoc();
+// start = starts[idx];
+// n = docID - start;
+// }
+// return current.bytes(n);
+ return null;
+ }
+
+ public long ramBytesUsed() {
+ return current.ramBytesUsed();
+ }
+
+ }
+
+ private static class DummySource extends Source {
+ private final BytesRef ref = new BytesRef();
+
+ @Override
+ public BytesRef bytes(int docID) {
+ return ref;
+ }
+
+ @Override
+ public double floats(int docID) {
+ return 0.0d;
+ }
+
+ @Override
+ public long ints(int docID) {
+ return 0;
+ }
+
+ public long ramBytesUsed() {
+ return 0;
+ }
+ }
+
+ private static class DummyEnum extends ValuesEnum {
+ private int pos = -1;
+ private final int maxDoc;
+
+ public DummyEnum(AttributeSource source, int maxDoc, Values type) {
+ super(source, type);
+ this.maxDoc = maxDoc;
+ switch (type) {
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ // nocommit - this is not correct for Fixed_straight
+ BytesRef bytes = attr.bytes();
+ bytes.length = 0;
+ bytes.offset = 0;
+ break;
+ case PACKED_INTS:
+ case PACKED_INTS_FIXED:
+ LongsRef ints = attr.ints();
+ ints.set(0);
+ break;
+
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ FloatsRef floats = attr.floats();
+ floats.set(0d);
+ break;
+ default:
+ throw new IllegalArgumentException("unknown Values type: " + type);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return pos = (pos < maxDoc ? target : NO_MORE_DOCS);
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(pos + 1);
+ }
+ }
+
+ @Override
+ public Values type() {
+ return this.docValuesIdx[0].docValues.type();
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/util/Pair.java b/lucene/src/java/org/apache/lucene/util/Pair.java
new file mode 100644
index 00000000000..9459a7548db
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/Pair.java
@@ -0,0 +1,36 @@
+package org.apache.lucene.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Simple Pair
+ * @lucene.internal
+ */
+public class Pair {
+ public final Cur cur;
+ public final Cud cud;
+
+ /**
+ * Create a simple pair
+ * @param cur the first element
+ * @param cud the second element
+ */
+ public Pair(Cur cur, Cud cud) {
+ this.cur = cur;
+ this.cud = cud;
+ }
+}
\ No newline at end of file
From beb1bb74fa56760ef3f5ce0d3037f35b335165c5 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 26 Oct 2010 09:37:49 +0000
Subject: [PATCH 008/116] fixed remaining imports - sorry for the noise
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1027420 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/src/java/org/apache/lucene/index/values/Writer.java | 2 +-
lucene/src/test/org/apache/lucene/TestExternalCodecs.java | 2 +-
.../lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java | 2 +-
.../test/org/apache/lucene/index/values/TestIndexValues.java | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java
index ae081778769..b73b8ab4113 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Writer.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java
@@ -19,7 +19,7 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Comparator;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
index cf552bf058d..ef6596e43ca 100644
--- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
+++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
@@ -26,7 +26,7 @@ import org.apache.lucene.index.codecs.*;
import org.apache.lucene.index.codecs.standard.*;
import org.apache.lucene.index.codecs.pulsing.*;
import org.apache.lucene.index.values.DocValues;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.store.*;
import java.util.*;
import java.io.*;
diff --git a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
index deb23f079f9..4a196d9b7f5 100644
--- a/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
@@ -28,7 +28,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.codecs.preflex.TermInfo;
-import org.apache.lucene.index.values.codec.DocValuesConsumer;
+import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
index 5bc064965c1..3e046296f4b 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
@@ -46,7 +46,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.values.DocValues.SortedSource;
import org.apache.lucene.index.values.DocValues.Source;
-import org.apache.lucene.index.values.codec.DocValuesCodec;
+import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
From 1fced2aa40b2500ccb6cb3a02cea33b5b543d459 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Fri, 29 Oct 2010 20:33:36 +0000
Subject: [PATCH 009/116] LUCENE-2700: Added support for MultiFields,
MultiFieldsEnum and moved actual merge code to DocValuesCodec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1028899 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/index/DocFieldProcessor.java | 2 +-
.../org/apache/lucene/index/FieldInfo.java | 20 +-
.../org/apache/lucene/index/FieldInfos.java | 30 +-
.../org/apache/lucene/index/IndexReader.java | 10 +-
.../org/apache/lucene/index/MultiFields.java | 28 +-
.../apache/lucene/index/MultiFieldsEnum.java | 48 +++-
.../apache/lucene/index/SegmentMerger.java | 17 +-
.../apache/lucene/index/SegmentReader.java | 2 +-
.../lucene/index/codecs/FieldsConsumer.java | 29 +-
.../codecs/docvalues/DocValuesConsumer.java | 52 +++-
.../docvalues/DocValuesProducerBase.java | 8 +-
.../org/apache/lucene/index/values/Bytes.java | 2 +
.../org/apache/lucene/index/values/Cache.java | 116 --------
.../apache/lucene/index/values/DocValues.java | 66 +++--
.../index/values/FixedDerefBytesImpl.java | 2 +-
.../index/values/FixedStraightBytesImpl.java | 2 +-
.../apache/lucene/index/values/Floats.java | 8 +-
.../lucene/index/values/MultiDocValues.java | 264 ++++++++++--------
.../lucene/index/values/PackedIntsImpl.java | 6 +-
.../apache/lucene/index/values/Values.java | 4 +-
.../index/values/VarDerefBytesImpl.java | 4 +-
.../index/values/VarSortedBytesImpl.java | 4 +-
.../index/values/VarStraightBytesImpl.java | 7 +-
.../apache/lucene/search/FieldComparator.java | 12 +-
.../org/apache/lucene/search/SortField.java | 2 +-
.../org/apache/lucene/util/ReaderUtil.java | 22 ++
.../lucene/index/values/TestIndexValues.java | 84 +++---
27 files changed, 428 insertions(+), 423 deletions(-)
delete mode 100644 lucene/src/java/org/apache/lucene/index/values/Cache.java
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index 030979cb5e7..d1b46e1b9c9 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -49,7 +49,7 @@ final class DocFieldProcessor extends DocConsumer {
throws IOException {
DocValuesConsumer valuesConsumer;
if ((valuesConsumer = docValues.get(name)) == null) {
- fieldInfo.setIndexValues(attr.type());
+ fieldInfo.setDocValues(attr.type());
if(fieldsConsumer == null) {
/* nocommit -- this is a hack and only works since DocValuesCodec supports initializing the FieldsConsumer twice.
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
index d7529874599..f07f73764c9 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
@@ -24,7 +24,7 @@ public final class FieldInfo {
public String name;
public boolean isIndexed;
public int number;
- Values indexValues;
+ Values docValues;
// true if term vector for this field should be stored
@@ -93,17 +93,21 @@ public final class FieldInfo {
}
}
- void setIndexValues(Values v) {
- if (indexValues != null) {
- if (indexValues != v) {
- throw new IllegalArgumentException("indexValues is already set to " + indexValues + "; cannot change to " + v);
+ void setDocValues(Values v) {
+ if (docValues != null) {
+ if (docValues != v) {
+ throw new IllegalArgumentException("indexValues is already set to " + docValues + "; cannot change to " + v);
}
} else{
- indexValues = v;
+ docValues = v;
}
}
+
+ public boolean hasDocValues() {
+ return docValues != null;
+ }
- public Values getIndexValues() {
- return indexValues;
+ public Values getDocValues() {
+ return docValues;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
index 26ed7136d97..aa11aa77fbd 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
@@ -311,10 +311,10 @@ public final class FieldInfos {
final byte b;
- if (fi.indexValues == null) {
+ if (fi.docValues == null) {
b = 0;
} else {
- switch(fi.indexValues) {
+ switch(fi.docValues) {
case PACKED_INTS:
b = 1;
break;
@@ -346,7 +346,7 @@ public final class FieldInfos {
b = 10;
break;
default:
- throw new IllegalStateException("unhandled indexValues type " + fi.indexValues);
+ throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
}
}
output.writeByte(b);
@@ -377,43 +377,41 @@ public final class FieldInfos {
boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
FieldInfo fi = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
-
if (format <= FORMAT_INDEX_VALUES) {
final byte b = input.readByte();
-
switch(b) {
case 0:
- fi.indexValues = null;
+ fi.docValues = null;
break;
case 1:
- fi.indexValues = Values.PACKED_INTS;
+ fi.docValues = Values.PACKED_INTS;
break;
case 2:
- fi.indexValues = Values.SIMPLE_FLOAT_4BYTE;
+ fi.docValues = Values.SIMPLE_FLOAT_4BYTE;
break;
case 3:
- fi.indexValues = Values.SIMPLE_FLOAT_8BYTE;
+ fi.docValues = Values.SIMPLE_FLOAT_8BYTE;
break;
case 4:
- fi.indexValues = Values.BYTES_FIXED_STRAIGHT;
+ fi.docValues = Values.BYTES_FIXED_STRAIGHT;
break;
case 5:
- fi.indexValues = Values.BYTES_FIXED_DEREF;
+ fi.docValues = Values.BYTES_FIXED_DEREF;
break;
case 6:
- fi.indexValues = Values.BYTES_FIXED_SORTED;
+ fi.docValues = Values.BYTES_FIXED_SORTED;
break;
case 7:
- fi.indexValues = Values.BYTES_VAR_STRAIGHT;
+ fi.docValues = Values.BYTES_VAR_STRAIGHT;
break;
case 8:
- fi.indexValues = Values.BYTES_VAR_DEREF;
+ fi.docValues = Values.BYTES_VAR_DEREF;
break;
case 9:
- fi.indexValues = Values.BYTES_VAR_SORTED;
+ fi.docValues = Values.BYTES_VAR_SORTED;
break;
case 10:
- fi.indexValues = Values.PACKED_INTS_FIXED;
+ fi.docValues = Values.PACKED_INTS_FIXED;
break;
default:
throw new IllegalStateException("unhandled indexValues type " + b);
diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java
index 2cb8d6d9d72..c292460d2f5 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@@ -21,7 +21,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.values.Cache;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.store.*;
import org.apache.lucene.util.Bits;
@@ -1090,7 +1089,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (docs == null) return 0;
int n = 0;
int doc;
- while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) {
+ while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
deleteDocument(doc);
n++;
}
@@ -1386,13 +1385,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
return fields.docValues(field);
}
- private final Cache indexValuesCache = new Cache(this);
-
- // nocommit -- don't expose readers if we have this?
- public Cache getIndexValuesCache() {
- return indexValuesCache;
- }
-
private Fields fields;
/** @lucene.internal */
diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java
index f642383c36b..9137d6077ff 100644
--- a/lucene/src/java/org/apache/lucene/index/MultiFields.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.MultiDocValues;
+import org.apache.lucene.index.values.Values;
import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs
@@ -290,19 +291,30 @@ public final class MultiFields extends Fields {
// Lazy init: first time this field is requested, we
// create & add to docValues:
- final List subs2 = new ArrayList();
- final List slices2 = new ArrayList();
-
+ final List docValuesIndex = new ArrayList();
+ int docsUpto = 0;
+ Values type = null;
// Gather all sub-readers that share this field
for(int i=0;i enumWithSlices = new ArrayList();
// Init q
for(int i=0;i values = new ArrayList();
- for (int i = 0; i < numTop; i++) {
- final DocValues docValues = top[i].fields.docValues();
- if (docValues != null) {
- values.add(new MultiDocValues.DocValuesIndex(docValues,
- top[i].index));
+ final List docValuesIndex = new ArrayList();
+ int docsUpto = 0;
+ Values type = null;
+ final int numEnums = enumWithSlices.length;
+ for (int i = 0; i < numEnums; i++) {
+ FieldsEnumWithSlice withSlice = enumWithSlices[i];
+ Slice slice = withSlice.slice;
+ final DocValues values = withSlice.fields.docValues();
+
+ final int start = slice.start;
+ final int length = slice.length;
+ if (values != null) {
+ if (docsUpto != start) {
+ type = values.type();
+ docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+ new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+ - docsUpto));
+ }
+ docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
+ length));
+ docsUpto = start + length;
+
+
+ } else if (i+1 == numEnums && !docValuesIndex.isEmpty()) {
+ docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+ new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+ - docsUpto));
}
}
- // TODO return an empty docvalues instance if values are empty
- return docValues.reset(values.toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
+ return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex
+ .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
index 95577954852..64a7b479f41 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -31,12 +31,7 @@ import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.FieldsConsumer;
-import org.apache.lucene.index.values.Bytes;
-import org.apache.lucene.index.values.Ints;
-import org.apache.lucene.index.values.DocValues;
-import org.apache.lucene.index.values.Floats;
import org.apache.lucene.index.values.Values;
-import org.apache.lucene.index.values.Writer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -296,13 +291,13 @@ final class SegmentMerger {
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
!reader.hasNorms(fi.name), fi.storePayloads,
fi.omitTermFreqAndPositions);
- final Values fiIndexValues = fi.indexValues;
- final Values mergedIndexValues = merged.indexValues;
- if (mergedIndexValues == null) {
- merged.setIndexValues(fiIndexValues);
- } else if (mergedIndexValues != fiIndexValues) {
+ final Values fiIndexValues = fi.docValues;
+ final Values mergedDocValues = merged.docValues;
+ if (mergedDocValues == null) {
+ merged.setDocValues(fiIndexValues);
+ } else if (mergedDocValues != fiIndexValues) {
// TODO -- can we recover from this?
- throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedIndexValues + " to " + fiIndexValues);
+ throw new IllegalStateException("cannot merge field " + fi.name + " indexValues changed from " + mergedDocValues + " to " + fiIndexValues);
}
}
} else {
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
index 9c854662a24..b1b73929caf 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
@@ -968,7 +968,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
fieldSet.add(fi.name);
}
- else if (fi.indexValues != null && fieldOption == IndexReader.FieldOption.DOC_VALUES) {
+ else if (fi.docValues != null && fieldOption == IndexReader.FieldOption.DOC_VALUES) {
fieldSet.add(fi.name);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
index 0f90deeeff1..de57c651824 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
@@ -65,32 +65,13 @@ public abstract class FieldsConsumer implements Closeable {
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
termsConsumer.merge(mergeState, terms);
}
-
- DocValues docValues = fieldsEnum.docValues(); // fix this - does not work due to multi fields
- if(docValues != null) {
- // TODO we need some kind of compatibility notation for values such
- // that two slighly different segments can be merged eg. fixed vs.
- // variable byte len or float32 vs. float64
- int docBase = 0;
- final List mergeStates = new ArrayList();
- for (IndexReader reader : mergeState.readers) {
- DocValues r = reader.docValues(mergeState.fieldInfo.name);
- if (r != null) {
- mergeStates.add(new Writer.MergeState(r, docBase, reader
- .maxDoc(), reader.getDeletedDocs()));
- }
- docBase += reader.numDocs();
- }
- if (mergeStates.isEmpty()) {
- continue;
- }
+ if (mergeState.fieldInfo.hasDocValues()) {
+ final DocValues docValues = fieldsEnum.docValues();
+ assert docValues != null : "DocValues are null for " + mergeState.fieldInfo.getDocValues();
final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo);
- docValuesConsumer.merge(mergeStates);
- docValuesConsumer.finish(mergeState.mergedDocCount);
+ assert docValuesConsumer != null;
+ docValuesConsumer.merge(mergeState, docValues);
}
-
- // merge doc values
-//
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
index 22b04137d27..0ca72d38261 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
@@ -1,4 +1,5 @@
package org.apache.lucene.index.codecs.docvalues;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -18,9 +19,9 @@ package org.apache.lucene.index.codecs.docvalues;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
-import java.util.List;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.ValuesAttribute;
import org.apache.lucene.index.values.Writer;
@@ -28,22 +29,50 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+/**
+ * @lucene.experimental
+ */
+// TODO this might need to go in the codec package since is a direct relative to
+// TermsConsumer
public abstract class DocValuesConsumer {
+
public abstract void add(int docID, ValuesAttribute attr) throws IOException;
public abstract void finish(int docCount) throws IOException;
public abstract void files(Collection files) throws IOException;
-
- public void merge(List states) throws IOException {
- for (MergeState state : states) {
- merge(state);
+
+ public void merge(org.apache.lucene.index.codecs.MergeState mergeState,
+ DocValues values) throws IOException {
+ // TODO we need some kind of compatibility notation for values such
+ // that two slightly different segments can be merged eg. fixed vs.
+ // variable byte len or float32 vs. float64
+ int docBase = 0;
+ boolean merged = false;
+ /*
+ * We ignore the given DocValues here and merge from the subReaders directly
+ * to support bulk copies on the DocValues Writer level. if this gets merged
+ * with MultiDocValues the writer can not optimize for bulk-copyable data
+ */
+ for (final IndexReader reader : mergeState.readers) {
+ final DocValues r = reader.docValues(mergeState.fieldInfo.name);
+ if (r != null) {
+ merged = true;
+ merge(new Writer.MergeState(r, docBase, reader.maxDoc(), reader
+ .getDeletedDocs()));
+ }
+ docBase += reader.numDocs();
}
+ if (merged)
+ finish(mergeState.mergedDocCount);
}
-
+
protected abstract void merge(MergeState mergeState) throws IOException;
-
-
+
+ /*
+ * specialized auxiliary MergeState is necessary since we don't want to
+ * exploit internals up to the codec ones
+ */
public static class MergeState {
public final DocValues reader;
public final int docBase;
@@ -59,9 +88,10 @@ public abstract class DocValuesConsumer {
}
}
- public static DocValuesConsumer create(String segmentName, Directory directory,
- FieldInfo field, Comparator comp) throws IOException {
+ public static DocValuesConsumer create(String segmentName,
+ Directory directory, FieldInfo field, Comparator comp)
+ throws IOException {
final String id = segmentName + "_" + field.number;
- return Writer.create(field.getIndexValues(), id, directory, comp);
+ return Writer.create(field.getDocValues(), id, directory, comp);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
index ce016755455..8cdc41b5161 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
@@ -51,13 +51,13 @@ public abstract class DocValuesProducerBase extends FieldsProducer{
final int numFields = fieldInfos.size();
for (int i = 0; i < numFields; i++) {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(i);
- final Values v = fieldInfo.getIndexValues();
+ final Values v = fieldInfo.getDocValues();
final String field = fieldInfo.name;
final String id = IndexFileNames.segmentFileName(segment, Integer
- .toString(fieldInfo.number), "");
- if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) {
+ .toString(fieldInfo.number),"");
+ if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) {
docValues.put(field, loadDocValues(docCount, dir, id, v));
- }
+ }
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index bd9fd4544f4..fb1d2738d8e 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -287,7 +287,9 @@ public final class Bytes {
return idxIn == null ? null : (IndexInput) idxIn.clone();
}
+ @Override
public void close() throws IOException {
+ super.close();
if (datIn != null) {
datIn.close();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Cache.java b/lucene/src/java/org/apache/lucene/index/values/Cache.java
deleted file mode 100644
index 711e11cdb3d..00000000000
--- a/lucene/src/java/org/apache/lucene/index/values/Cache.java
+++ /dev/null
@@ -1,116 +0,0 @@
-package org.apache.lucene.index.values;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.values.DocValues.SortedSource;
-import org.apache.lucene.index.values.DocValues.Source;
-import org.apache.lucene.util.BytesRef;
-
-public class Cache {
- final IndexReader r;
- // TODO(simonw): use WeakHashMaps instead here?
- final Map ints = new HashMap();
- final Map floats = new HashMap();
- final Map bytes = new HashMap();
- final Map sortedBytes = new HashMap();
-
- public Cache(IndexReader r) {
- this.r = r;
- }
-
- synchronized public Source getInts(String id) throws IOException {
- Source s = ints.get(id);
- if (s == null) {
- final DocValues indexValues = r.docValues(id);
- if (indexValues == null) {
- return null;
- }
- s = indexValues.load();
- ints.put(id, s);
- }
-
- return s;
- }
-
- synchronized public Source getFloats(String id) throws IOException {
- Source s = floats.get(id);
- if (s == null) {
- final DocValues indexValues = r.docValues(id);
- if (indexValues == null) {
- return null;
- }
- s = indexValues.load();
- floats.put(id, s);
- }
-
- return s;
- }
-
- synchronized public SortedSource getSortedBytes(String id,
- Comparator comp) throws IOException {
- SortedSource s = sortedBytes.get(id);
- if (s == null) {
- final DocValues indexValues = r.docValues(id);
- if (indexValues == null) {
- return null;
- }
- s = indexValues.loadSorted(comp);
- sortedBytes.put(id, s);
- } else {
- // TODO(simonw): verify comp is the same!
- }
-
- return s;
- }
-
- synchronized public Source getBytes(String id) throws IOException {
- Source s = bytes.get(id);
- if (s == null) {
- final DocValues indexValues = r.docValues(id);
- if (indexValues == null) {
- return null;
- }
- s = indexValues.load();
- bytes.put(id, s);
- }
-
- return s;
- }
-
- public void purgeInts(String id) {
- ints.remove(id);
- }
-
- public void purgeFloats(String id) {
- floats.remove(id);
- }
-
- public void purgeBytes(String id) {
- bytes.remove(id);
- }
-
- public void purgeSortedBytes(String id) {
- sortedBytes.remove(id);
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
index 501a2c981fe..44a2ae05722 100644
--- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
@@ -24,24 +24,48 @@ import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
public abstract class DocValues implements Closeable {
-
-
+
+ private final Object lock = new Object();
+
+ private Source cachedReference;
+
public static final DocValues[] EMPTY_ARRAY = new DocValues[0];
- public ValuesEnum getEnum() throws IOException{
+ public ValuesEnum getEnum() throws IOException {
return getEnum(null);
}
- public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException;
+ public abstract ValuesEnum getEnum(AttributeSource attrSource)
+ throws IOException;
public abstract Source load() throws IOException;
- public SortedSource loadSorted(Comparator comparator) throws IOException {
+ public Source getCached(boolean load) throws IOException {
+ synchronized (lock) { // TODO make sorted source cachable too
+ if (load && cachedReference == null)
+ cachedReference = load();
+ return cachedReference;
+ }
+ }
+
+ public Source releaseCached() {
+ synchronized (lock) {
+ final Source retVal = cachedReference;
+ cachedReference = null;
+ return retVal;
+ }
+ }
+
+ public SortedSource loadSorted(Comparator comparator)
+ throws IOException {
throw new UnsupportedOperationException();
}
-
+
public abstract Values type();
+ public void close() throws IOException {
+ releaseCached();
+ }
/**
* Source of integer (returned as java long), per document. The underlying
@@ -50,30 +74,34 @@ public abstract class DocValues implements Closeable {
*/
public static abstract class Source {
- public long ints(int docID) {
+ public long getInt(int docID) {
throw new UnsupportedOperationException("ints are not supported");
}
- public double floats(int docID) {
+ public double getFloat(int docID) {
throw new UnsupportedOperationException("floats are not supported");
}
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
throw new UnsupportedOperationException("bytes are not supported");
}
-
- /** Returns number of unique values. Some impls may
- * throw UnsupportedOperationException. */
+
+ /**
+ * Returns number of unique values. Some impls may throw
+ * UnsupportedOperationException.
+ */
public int getValueCount() {
throw new UnsupportedOperationException();
}
-
- public ValuesEnum getEnum() throws IOException{
+
+ public ValuesEnum getEnum() throws IOException {
return getEnum(null);
}
-
- // nocommit - enable obtaining enum from source since this is already in memory
- public /*abstract*/ ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+
+ // nocommit - enable obtaining enum from source since this is already in
+ // memory
+ public/* abstract */ValuesEnum getEnum(AttributeSource attrSource)
+ throws IOException {
throw new UnsupportedOperationException();
}
@@ -83,7 +111,7 @@ public abstract class DocValues implements Closeable {
public static abstract class SortedSource extends Source {
@Override
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
return getByOrd(ord(docID));
}
@@ -109,5 +137,5 @@ public abstract class DocValues implements Closeable {
*/
public abstract LookupResult getByValue(BytesRef value);
}
-
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
index 7e30711b465..b1e2449cbfd 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
@@ -157,7 +157,7 @@ class FixedDerefBytesImpl {
}
@Override
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
final int id = (int) index.get(docID);
if (id == 0) {
return defaultValue;
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
index 3566e336764..6df5217788b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
@@ -150,7 +150,7 @@ class FixedStraightBytesImpl {
}
@Override
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
bytesRef.offset = docID * size;
return bytesRef;
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index e343565c9b5..36dc00fadd5 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -270,7 +270,7 @@ public class Floats {
}
@Override
- public double floats(int docID) {
+ public double getFloat(int docID) {
final float f = values.get(docID);
// nocommit should we return NaN as default instead of 0.0?
return Float.isNaN(f) ? 0.0f : f;
@@ -290,9 +290,9 @@ public class Floats {
}
@Override
- public double floats(int docID) {
+ public double getFloat(int docID) {
final double d = values.get(docID);
- // nocommit should we return NaN as default instead of 0.0?
+ // TODO should we return NaN as default instead of 0.0?
return Double.isNaN(d) ? 0.0d : d;
}
@@ -302,7 +302,9 @@ public class Floats {
}
}
+ @Override
public void close() throws IOException {
+ super.close();
datIn.close();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
index cd6216a2530..77a78c25121 100644
--- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
@@ -1,4 +1,5 @@
package org.apache.lucene.index.values;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,196 +17,214 @@ package org.apache.lucene.index.values;
* limitations under the License.
*/
import java.io.IOException;
-import java.util.List;
+import java.util.Arrays;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
+import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
-import org.apache.lucene.util.ReaderUtil.Slice;
+import org.apache.lucene.util.ReaderUtil;
public class MultiDocValues extends DocValues {
- public static class DocValuesIndex {
+ public static class DocValuesIndex { // nocommit is this necessary?
public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0];
- final int subIndex;
+ final int start;
+ final int length;
final DocValues docValues;
- public DocValuesIndex(DocValues docValues, int subIndex) {
+ public DocValuesIndex(DocValues docValues, int start, int length) {
this.docValues = docValues;
- this.subIndex = subIndex;
+ this.start = start;
+ this.length = length;
}
}
private DocValuesIndex[] docValuesIdx;
- private Slice[] subSlices;
+ private int[] starts;
- public MultiDocValues(Slice[] subSlices) {
- this.subSlices = subSlices;
+ public MultiDocValues() {
+ starts = new int[0];
+ docValuesIdx = new DocValuesIndex[0];
}
- public MultiDocValues(DocValuesIndex[] docValuesIdx, Slice[] subSlices) {
- this(subSlices);
+ public MultiDocValues(DocValuesIndex[] docValuesIdx) {
reset(docValuesIdx);
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
- return new MultiValuesEnum(subSlices, docValuesIdx, docValuesIdx[0].docValues.type());
+ return new MultiValuesEnum(docValuesIdx, starts);
}
@Override
public Source load() throws IOException {
- return new MultiSource(subSlices, docValuesIdx);
+ return new MultiSource(docValuesIdx, starts);
}
public void close() throws IOException {
- //
+ super.close();
}
public DocValues reset(DocValuesIndex[] docValuesIdx) {
+ int[] start = new int[docValuesIdx.length];
+ for (int i = 0; i < docValuesIdx.length; i++) {
+ start[i] = docValuesIdx[i].start;
+ }
+ this.starts = start;
this.docValuesIdx = docValuesIdx;
return this;
}
+ public static class DummyDocValues extends DocValues {
+ final int maxDoc;
+ final Values type;
+ static final Source DUMMY = new DummySource();
+
+ public DummyDocValues(int maxDoc, Values type) {
+ this.type = type;
+ this.maxDoc = maxDoc;
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ return new DummyEnum(attrSource, maxDoc, type);
+ }
+
+ @Override
+ public Source load() throws IOException {
+ return DUMMY;
+ }
+
+ @Override
+ public Source getCached(boolean load) throws IOException {
+ return DUMMY;
+ }
+
+ @Override
+ public Source releaseCached() {
+ return DUMMY;
+ }
+
+ @Override
+ public Values type() {
+ return type;
+ }
+
+ public void close() throws IOException {
+ super.close();
+ }
+
+ }
+
private static class MultiValuesEnum extends ValuesEnum {
- private int numDocs_ = 0;
- private int pos = -1;
- private int start = 0;
- private ValuesEnum current;
- private Slice[] subSlices;
private DocValuesIndex[] docValuesIdx;
private final int maxDoc;
+ private int currentStart;
+ private int currentMax;
+ private int currentDoc = -1;
+ private ValuesEnum currentEnum;
+ private final int[] starts;
- public MultiValuesEnum(Slice[] subSlices, DocValuesIndex[] docValuesIdx, Values type) {
- super(type);
- this.subSlices = subSlices;
+ public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts)
+ throws IOException {
+ super(docValuesIdx[0].docValues.type());
this.docValuesIdx = docValuesIdx;
- Slice slice = subSlices[subSlices.length-1];
- maxDoc = slice.start + slice.length;
+ final DocValuesIndex last = docValuesIdx[docValuesIdx.length - 1];
+ maxDoc = last.start + last.length;
+ final DocValuesIndex idx = docValuesIdx[0];
+ currentEnum = idx.docValues.getEnum(this.attributes());
+ currentMax = idx.length;
+ currentStart = 0;
+ this.starts = starts;
}
@Override
public void close() throws IOException {
-
+ currentEnum.close();
}
@Override
public int advance(int target) throws IOException {
-// int n = target - start;
-// do {
-// if (target >= maxDoc)
-// return pos = NO_MORE_DOCS;
-// if (n >= numDocs_) {
-// int idx = readerIndex(target);
-// if (enumCache[idx] == null) {
-// try {
-// DocValues indexValues = subReaders[idx].docValues(id);
-// if (indexValues != null) // nocommit does that work with default
-// // values?
-// enumCache[idx] = indexValues.getEnum(this.attributes());
-// else
-// enumCache[idx] = new DummyEnum(this.attributes(),
-// subSlices[idx].length, attr.type());
-// } catch (IOException ex) {
-// // nocommit what to do here?
-// throw new RuntimeException(ex);
-// }
-// }
-// current = enumCache[idx];
-// start = subSlices[idx].start;
-// numDocs_ = subSlices[idx].length;
-// n = target - start;
-// }
-// target = start + numDocs_;
-// } while ((n = current.advance(n)) == NO_MORE_DOCS);
- return pos = start + current.docID();
+ assert target > currentDoc : "target " + target
+ + " must be > than the current doc " + currentDoc;
+ int relativeDoc = target - currentStart;
+ do {
+ if (target >= maxDoc) // we are beyond max doc
+ return currentDoc = NO_MORE_DOCS;
+ if (target >= currentMax) {
+ final int idx = ReaderUtil.subIndex(target, starts);
+ currentEnum.close();
+ currentEnum = docValuesIdx[idx].docValues.getEnum(this.attributes());
+ currentStart = docValuesIdx[idx].start;
+ currentMax = currentStart + docValuesIdx[idx].length;
+ relativeDoc = target - currentStart;
+ } else {
+ return currentDoc = currentStart + currentEnum.advance(relativeDoc);
+ }
+ } while ((relativeDoc = currentEnum.advance(relativeDoc)) == NO_MORE_DOCS);
+ return currentDoc = currentStart + relativeDoc;
}
@Override
public int docID() {
- return pos;
+ return currentDoc;
}
@Override
public int nextDoc() throws IOException {
- return advance(pos + 1);
+ return advance(currentDoc + 1);
}
}
- private class MultiSource extends Source {
- private int numDocs_ = 0;
+ private static class MultiSource extends Source {
+ private int numDocs = 0;
private int start = 0;
private Source current;
- private Slice[] subSlices;
- private DocValuesIndex[] docVAluesIdx;
+ private final int[] starts;
+ private final DocValuesIndex[] docValuesIdx;
+
+ public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) {
+ this.docValuesIdx = docValuesIdx;
+ this.starts = starts;
- public MultiSource(Slice[] subSlices, DocValuesIndex[] docValuesIdx) {
- this.subSlices = subSlices;
- this.docVAluesIdx = docValuesIdx;
}
- public long ints(int docID) {
-// int n = docID - start;
-// if (n >= numDocs_) {
-// int idx = readerIndex(docID);
-// try {
-// current = subReaders[idx].getIndexValuesCache().getInts(id);
-// if (current == null) // nocommit does that work with default values?
-// current = new DummySource();
-// } catch (IOException ex) {
-// // nocommit what to do here?
-// throw new RuntimeException(ex);
-// }
-// start = starts[idx];
-// numDocs_ = subReaders[idx].maxDoc();
-// n = docID - start;
-// }
-// return current.ints(n);
- return 0l;
+ public long getInt(int docID) {
+ final int doc = ensureSource(docID);
+ return current.getInt(doc);
}
- public double floats(int docID) {
-// int n = docID - start;
-// if (n >= numDocs_) {
-// int idx = readerIndex(docID);
-// try {
-// current = subReaders[idx].getIndexValuesCache().getFloats(id);
-// if (current == null) // nocommit does that work with default values?
-// current = new DummySource();
-// } catch (IOException ex) {
-// // nocommit what to do here?
-// throw new RuntimeException(ex);
-// }
-// numDocs_ = subReaders[idx].maxDoc();
-//
-// start = starts[idx];
-// n = docID - start;
-// }
-// return current.floats(n);
- return 0d;
+ private final int ensureSource(int docID) {
+ int n = docID - start;
+ if (n >= numDocs) {
+ final int idx = ReaderUtil.subIndex(docID, starts);
+ assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx
+ + " for doc id: " + docID + " slices : " + Arrays.toString(starts);
+ assert docValuesIdx[idx] != null;
+ try {
+ current = docValuesIdx[idx].docValues.load();
+ } catch (IOException e) {
+ throw new RuntimeException("load failed", e); // TODO how should we
+ // handle this
+ }
+
+ start = docValuesIdx[idx].start;
+ numDocs = docValuesIdx[idx].length;
+ n = docID - start;
+ }
+ return n;
}
- public BytesRef bytes(int docID) {
-// int n = docID - start;
-// if (n >= numDocs_) {
-// int idx = readerIndex(docID);
-// try {
-// current = subReaders[idx].getIndexValuesCache().getBytes(id);
-// if (current == null) // nocommit does that work with default values?
-// current = new DummySource();
-// } catch (IOException ex) {
-// // nocommit what to do here?
-// throw new RuntimeException(ex);
-// }
-// numDocs_ = subReaders[idx].maxDoc();
-// start = starts[idx];
-// n = docID - start;
-// }
-// return current.bytes(n);
- return null;
+ public double getFloat(int docID) {
+ final int doc = ensureSource(docID);
+ return current.getFloat(doc);
+ }
+
+ public BytesRef getBytes(int docID) {
+ final int doc = ensureSource(docID);
+ return current.getBytes(doc);
}
public long ramBytesUsed() {
@@ -218,17 +237,17 @@ public class MultiDocValues extends DocValues {
private final BytesRef ref = new BytesRef();
@Override
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
return ref;
}
@Override
- public double floats(int docID) {
+ public double getFloat(int docID) {
return 0.0d;
}
@Override
- public long ints(int docID) {
+ public long getInt(int docID) {
return 0;
}
@@ -296,5 +315,4 @@ public class MultiDocValues extends DocValues {
public Values type() {
return this.docValuesIdx[0].docValues.type();
}
-
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
index 64735a6597c..f0c7a6cdcf5 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
@@ -179,8 +179,8 @@ class PackedIntsImpl {
}
@Override
- public long ints(int docID) {
- // nocommit -- can we somehow avoid 2X method calls
+ public long getInt(int docID) {
+ // TODO -- can we somehow avoid 2X method calls
// on each get? must push minValue down, and make
// PackedInts implement Ints.Source
final long val = values.get(docID);
@@ -195,7 +195,9 @@ class PackedIntsImpl {
}
}
+ @Override
public void close() throws IOException {
+ super.close();
datIn.close();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java
index c806b1650f7..d7d613c0510 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Values.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Values.java
@@ -34,7 +34,7 @@ public enum Values {
SIMPLE_FLOAT_4BYTE,
SIMPLE_FLOAT_8BYTE,
- // nocommit -- shouldn't lucene decide/detect straight vs
+ // TODO(simonw): -- shouldn't lucene decide/detect straight vs
// deref, as well fixed vs var?
BYTES_FIXED_STRAIGHT,
BYTES_FIXED_DEREF,
@@ -44,5 +44,5 @@ public enum Values {
BYTES_VAR_DEREF,
BYTES_VAR_SORTED
- // nocommit -- need STRING variants as well
+ // TODO(simonw): -- need STRING variants as well
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index dccbd3bba08..9ab2adc6dde 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -135,7 +135,7 @@ class VarDerefBytesImpl {
idxOut.writeInt(address-1);
// write index
- // nocommit -- allow forcing fixed array (not -1)
+ // TODO(simonw): -- allow forcing fixed array (not -1)
// TODO(simonw): check the address calculation / make it more intuitive
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1));
final int limit;
@@ -188,7 +188,7 @@ class VarDerefBytesImpl {
}
@Override
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
int address = (int) index.get(docID);
if (address == 0) {
assert defaultValue.length == 0: " default value manipulated";
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
index c8536d8dc0c..7b291520a16 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
@@ -120,7 +120,7 @@ class VarSortedBytesImpl {
idxOut.writeLong(offset);
// write index -- first doc -> 1+ord
- // nocommit -- allow not -1:
+ // TODO(simonw): allow not -1:
final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut,
docCount, PackedInts.bitsRequired(count));
final int limit = docCount > docToEntry.length ? docToEntry.length
@@ -135,7 +135,7 @@ class VarSortedBytesImpl {
indexWriter.finish();
// next ord (0-based) -> offset
- // nocommit -- allow not -1:
+ // TODO(simonw): -- allow not -1:
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
PackedInts.bitsRequired(lastOffset));
for (int i = 0; i < count; i++) {
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
index 436a9799fc4..f747bb06139 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -82,14 +82,15 @@ class VarStraightBytesImpl {
@Override
synchronized public void finish(int docCount) throws IOException {
- if (datOut == null)
+ if (datOut == null) {
return;
+ }
initIndexOut();
// write all lengths to index
// write index
fill(docCount);
idxOut.writeVInt(address);
- // nocommit -- allow not -1
+ // TODO(simonw): allow not -1
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(address));
for (int i = 0; i < docCount; i++) {
@@ -136,7 +137,7 @@ class VarStraightBytesImpl {
}
@Override
- public BytesRef bytes(int docID) {
+ public BytesRef getBytes(int docID) {
final int address = (int) addresses.get(docID);
bytesRef.offset = address;
if (docID == maxDoc - 1) {
diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
index 2b322d6d4f8..49ae53060b6 100644
--- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
@@ -336,7 +336,7 @@ public abstract class FieldComparator {
@Override
public int compareBottom(int doc) {
- final double v2 = currentReaderValues.floats(doc);
+ final double v2 = currentReaderValues.getFloat(doc);
if (bottom > v2) {
return 1;
} else if (bottom < v2) {
@@ -348,12 +348,12 @@ public abstract class FieldComparator {
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues.floats(doc);
+ values[slot] = currentReaderValues.getFloat(doc);
}
@Override
public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException {
- currentReaderValues = reader.getIndexValuesCache().getFloats(field);
+ currentReaderValues = reader.docValues(field).getCached(true);
return this;
}
@@ -538,7 +538,7 @@ public abstract class FieldComparator {
public int compareBottom(int doc) {
// TODO: there are sneaky non-branch ways to compute
// -1/+1/0 sign
- final long v2 = currentReaderValues.ints(doc);
+ final long v2 = currentReaderValues.getInt(doc);
if (bottom > v2) {
return 1;
} else if (bottom < v2) {
@@ -550,12 +550,12 @@ public abstract class FieldComparator {
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues.ints(doc);
+ values[slot] = currentReaderValues.getInt(doc);
}
@Override
public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException {
- currentReaderValues = reader.getIndexValuesCache().getInts(field);
+ currentReaderValues = reader.docValues(field).getCached(true);
return this;
}
diff --git a/lucene/src/java/org/apache/lucene/search/SortField.java b/lucene/src/java/org/apache/lucene/search/SortField.java
index 623b785489a..0e512de4a9f 100644
--- a/lucene/src/java/org/apache/lucene/search/SortField.java
+++ b/lucene/src/java/org/apache/lucene/search/SortField.java
@@ -32,7 +32,7 @@ import org.apache.lucene.search.cache.ShortValuesCreator;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.BytesRef;
-// nocommit -- for cleaner transition, maybe we should make
+// TODO(simonw) -- for cleaner transition, maybe we should make
// a new SortField that subclasses this one and always uses
// index values?
diff --git a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java
index 7d971e989b7..875e6208195 100644
--- a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java
+++ b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java
@@ -173,4 +173,26 @@ public class ReaderUtil {
}
return hi;
}
+
+ public static int subIndex(int n, Slice[] slices) {
+ // searcher/reader for doc n:
+ int size = slices.length;
+ int lo = 0; // search starts array
+ int hi = size - 1; // for first element less than n, return its index
+ while (hi >= lo) {
+ int mid = (lo + hi) >>> 1;
+ int midValue = slices[mid].start;
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else { // found a match
+ while (mid + 1 < size && slices[mid + 1].start == midValue) {
+ mid++; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
}
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
index 3e046296f4b..4677935740d 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
@@ -70,19 +70,19 @@ public class TestIndexValues extends LuceneTestCase {
public static void beforeClassLuceneTestCaseJ4() {
LuceneTestCase.beforeClassLuceneTestCaseJ4();
final CodecProvider cp = CodecProvider.getDefault();
- docValuesCodec = new DocValuesCodec(cp.lookup(CodecProvider.getDefaultCodec()));
+ docValuesCodec = new DocValuesCodec(cp.lookup(CodecProvider
+ .getDefaultCodec()));
cp.register(docValuesCodec);
CodecProvider.setDefaultCodec(docValuesCodec.name);
}
-
+
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
final CodecProvider cp = CodecProvider.getDefault();
cp.unregister(docValuesCodec);
- LuceneTestCase.afterClassLuceneTestCaseJ4();
+ LuceneTestCase.afterClassLuceneTestCaseJ4();
}
-
-
+
public void testBytesStraight() throws IOException {
runTestBytes(Bytes.Mode.STRAIGHT, true);
runTestBytes(Bytes.Mode.STRAIGHT, false);
@@ -164,14 +164,14 @@ public class TestIndexValues extends LuceneTestCase {
if (mode == Bytes.Mode.SORTED) {
s = ss = r.loadSorted(comp);
} else {
- s = r.load();
+ s = getSource(r);
ss = null;
}
for (int i = 0; i < 100; i++) {
final int idx = 2 * i;
- assertNotNull("doc " + idx + "; value=" + values[idx], s.bytes(idx));
- assertEquals("doc " + idx, values[idx], s.bytes(idx).utf8ToString());
+ assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx));
+ assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString());
if (ss != null) {
assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
.utf8ToString());
@@ -247,9 +247,9 @@ public class TestIndexValues extends LuceneTestCase {
DocValues r = Ints.getValues(dir, "test", useFixedArrays);
for (int iter = 0; iter < 2; iter++) {
- Source s = r.load();
+ Source s = getSource(r);
for (int i = 0; i < NUM_VALUES; i++) {
- final long v = s.ints(i);
+ final long v = s.getInt(i);
assertEquals("index " + i + " b: " + b, values[i], v);
}
}
@@ -311,9 +311,9 @@ public class TestIndexValues extends LuceneTestCase {
DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues);
for (int iter = 0; iter < 2; iter++) {
- Source s = r.load();
+ Source s = getSource(r);
for (int i = 0; i < NUM_VALUES; i++) {
- assertEquals(values[i], s.floats(i), 0.0f);
+ assertEquals(values[i], s.getFloat(i), 0.0f);
}
}
@@ -437,12 +437,12 @@ public class TestIndexValues extends LuceneTestCase {
case PACKED_INTS:
case PACKED_INTS_FIXED: {
DocValues intsReader = getDocValues(r, val.name());
- Source ints = intsReader.load();
+ Source ints = getSource(intsReader);
ValuesEnum intsEnum = intsReader.getEnum();
assertNotNull(intsEnum);
LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
for (int i = 0; i < base; i++) {
- assertEquals(0, ints.ints(i));
+ assertEquals(0, ints.getInt(i));
assertEquals(val.name() + " base: " + base + " index: " + i, i,
random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc());
assertEquals(0, enumRef.get());
@@ -454,8 +454,8 @@ public class TestIndexValues extends LuceneTestCase {
}
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ " docs", i, intsEnum.advance(i));
- assertEquals(expected, ints.ints(i));
assertEquals(expected, enumRef.get());
+ assertEquals(expected, ints.getInt(i));
}
}
@@ -463,14 +463,16 @@ public class TestIndexValues extends LuceneTestCase {
case SIMPLE_FLOAT_4BYTE:
case SIMPLE_FLOAT_8BYTE: {
DocValues floatReader = getDocValues(r, val.name());
- Source floats = floatReader.load();
+ assertNotNull(floatReader);
+ Source floats = getSource(floatReader);
ValuesEnum floatEnum = floatReader.getEnum();
assertNotNull(floatEnum);
FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class)
.floats();
for (int i = 0; i < base; i++) {
- assertEquals(0.0d, floats.floats(i), 0.0d);
+ assertEquals(" floats failed for doc: " + i + " base: " + base, 0.0d,
+ floats.getFloat(i), 0.0d);
assertEquals(i, random.nextBoolean() ? floatEnum.advance(i)
: floatEnum.nextDoc());
assertEquals("index " + i, 0.0, enumRef.get(), 0.0);
@@ -483,7 +485,8 @@ public class TestIndexValues extends LuceneTestCase {
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ " docs base:" + base, i, floatEnum.advance(i));
assertEquals("index " + i, 2.0 * expected, enumRef.get(), 0.00001);
- assertEquals("index " + i, 2.0 * expected, floats.floats(i), 0.00001);
+ assertEquals("index " + i, 2.0 * expected, floats.getFloat(i),
+ 0.00001);
}
}
break;
@@ -505,15 +508,13 @@ public class TestIndexValues extends LuceneTestCase {
Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED,
Values.BYTES_VAR_STRAIGHT);
- private static EnumSet STRAIGHT_BYTES = EnumSet.of(
- Values.BYTES_FIXED_STRAIGHT, Values.BYTES_VAR_STRAIGHT);
-
private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS,
Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE,
Values.SIMPLE_FLOAT_8BYTE);
private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
- Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS };
+ Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
+ Index.NO };
private OpenBitSet indexValues(IndexWriter w, int numValues, Values value,
List valueVarList, boolean withDeletions, int multOfSeven)
@@ -521,9 +522,10 @@ public class TestIndexValues extends LuceneTestCase {
final boolean isNumeric = NUMERICS.contains(value);
OpenBitSet deleted = new OpenBitSet(numValues);
Document doc = new Document();
+ Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)];
Fieldable field = random.nextBoolean() ? new ValuesField(value.name())
: newField(value.name(), _TestUtil.randomRealisticUnicodeString(random,
- 10), IDX_VALUES[random.nextInt(IDX_VALUES.length)]);
+ 10), idx == Index.NO ? Store.YES : Store.NO, idx);
doc.add(field);
ValuesAttribute valuesAttribute = ValuesField.values(field);
@@ -582,9 +584,10 @@ public class TestIndexValues extends LuceneTestCase {
}
w.commit();
- // nocommit test unoptimized with deletions
- if (true || withDeletions || random.nextBoolean())
- w.optimize();
+ // TODO test unoptimized with deletions
+ if (withDeletions || random.nextBoolean())
+ ;
+ w.optimize();
return deleted;
}
@@ -593,10 +596,9 @@ public class TestIndexValues extends LuceneTestCase {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final List byteVariantList = new ArrayList(BYTES);
-
// run in random order to test if fill works correctly during merges
Collections.shuffle(byteVariantList, random);
- final int numValues = 350;
+ final int numValues = 333 + random.nextInt(150);
for (Values byteIndexValue : byteVariantList) {
List closeables = new ArrayList();
@@ -607,11 +609,10 @@ public class TestIndexValues extends LuceneTestCase {
assertEquals(0, r.numDeletedDocs());
final int numRemainingValues = (int) (numValues - deleted.cardinality());
final int base = r.numDocs() - numRemainingValues;
-
DocValues bytesReader = getDocValues(r, byteIndexValue.name());
assertNotNull("field " + byteIndexValue.name()
+ " returned null reader - maybe merged failed", bytesReader);
- Source bytes = bytesReader.load();
+ Source bytes = getSource(bytesReader);
ValuesEnum bytesEnum = bytesReader.getEnum();
assertNotNull(bytesEnum);
final ValuesAttribute attr = bytesEnum
@@ -619,7 +620,7 @@ public class TestIndexValues extends LuceneTestCase {
byte upto = 0;
// test the filled up slots for correctness
for (int i = 0; i < base; i++) {
- final BytesRef br = bytes.bytes(i);
+ final BytesRef br = bytes.getBytes(i);
String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ " base: " + base + " numDocs:" + r.numDocs();
switch (byteIndexValue) {
@@ -645,7 +646,7 @@ public class TestIndexValues extends LuceneTestCase {
default:
assertNotNull("expected none null - " + msg, br);
if (br.length != 0) {
- bytes.bytes(i);
+ bytes.getBytes(i);
}
assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0,
br.length);
@@ -665,7 +666,7 @@ public class TestIndexValues extends LuceneTestCase {
upto += bytesSize;
}
- BytesRef br = bytes.bytes(i);
+ BytesRef br = bytes.getBytes(i);
if (bytesEnum.docID() != i)
assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
.advance(i));
@@ -692,10 +693,9 @@ public class TestIndexValues extends LuceneTestCase {
private DocValues getDocValues(IndexReader reader, String field)
throws IOException {
boolean optimized = reader.isOptimized();
- Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields() : MultiFields
- .getFields(reader);
-// return fields.docValues(field);
- switch (random.nextInt(optimized ? 3 : 2)) {
+ Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields()
+ : MultiFields.getFields(reader);
+ switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized
case 0:
return fields.docValues(field);
case 1:
@@ -706,10 +706,14 @@ public class TestIndexValues extends LuceneTestCase {
return iterator.docValues();
}
throw new RuntimeException("no such field " + field);
- case 2:
+ case 2:// this only works if we are on an optimized index!
return reader.getSequentialSubReaders()[0].docValues(field);
}
-throw new RuntimeException();
-}
+ throw new RuntimeException();
+ }
+
+ private Source getSource(DocValues values) throws IOException {
+ return random.nextBoolean() ? values.load() : values.getCached(true);
+ }
}
From 977bb062846ba65fe3c837efa8d8e601e60f0617 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Sat, 30 Oct 2010 06:58:15 +0000
Subject: [PATCH 010/116] removed unused function
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1029001 13f79535-47bb-0310-9956-ffa450edef68
---
.../org/apache/lucene/util/ReaderUtil.java | 22 -------------------
1 file changed, 22 deletions(-)
diff --git a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java
index 875e6208195..7d971e989b7 100644
--- a/lucene/src/java/org/apache/lucene/util/ReaderUtil.java
+++ b/lucene/src/java/org/apache/lucene/util/ReaderUtil.java
@@ -173,26 +173,4 @@ public class ReaderUtil {
}
return hi;
}
-
- public static int subIndex(int n, Slice[] slices) {
- // searcher/reader for doc n:
- int size = slices.length;
- int lo = 0; // search starts array
- int hi = size - 1; // for first element less than n, return its index
- while (hi >= lo) {
- int mid = (lo + hi) >>> 1;
- int midValue = slices[mid].start;
- if (n < midValue)
- hi = mid - 1;
- else if (n > midValue)
- lo = mid + 1;
- else { // found a match
- while (mid + 1 < size && slices[mid + 1].start == midValue) {
- mid++; // scan to last match
- }
- return mid;
- }
- }
- return hi;
- }
}
From aff234e85e438fdae512079565ffab82ae173ef6 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Sat, 30 Oct 2010 10:56:01 +0000
Subject: [PATCH 011/116] LUCENE-2700: Enabled MultiField tests & introduced
SourceCache
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1029017 13f79535-47bb-0310-9956-ffa450edef68
---
.../apache/lucene/index/MultiFieldsEnum.java | 6 +-
.../lucene/index/codecs/FieldsConsumer.java | 6 +-
.../apache/lucene/index/values/DocValues.java | 34 +++++-----
.../lucene/index/values/MultiDocValues.java | 15 +----
.../lucene/index/values/SourceCache.java | 65 +++++++++++++++++++
.../index/values/ValuesAttributeImpl.java | 12 ++--
.../apache/lucene/search/FieldComparator.java | 4 +-
.../lucene/index/values/TestIndexValues.java | 24 ++++---
8 files changed, 110 insertions(+), 56 deletions(-)
create mode 100644 lucene/src/java/org/apache/lucene/index/values/SourceCache.java
diff --git a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java
index 57505592a1b..f59587107e1 100644
--- a/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java
@@ -163,10 +163,9 @@ public final class MultiFieldsEnum extends FieldsEnum {
FieldsEnumWithSlice withSlice = enumWithSlices[i];
Slice slice = withSlice.slice;
final DocValues values = withSlice.fields.docValues();
-
final int start = slice.start;
final int length = slice.length;
- if (values != null) {
+ if (values != null && currentField.equals(withSlice.current)) {
if (docsUpto != start) {
type = values.type();
docValuesIndex.add(new MultiDocValues.DocValuesIndex(
@@ -176,9 +175,8 @@ public final class MultiFieldsEnum extends FieldsEnum {
docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
length));
docsUpto = start + length;
-
- } else if (i+1 == numEnums && !docValuesIndex.isEmpty()) {
+ } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) {
docValuesIndex.add(new MultiDocValues.DocValuesIndex(
new MultiDocValues.DummyDocValues(start, type), docsUpto, start
- docsUpto));
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
index de57c651824..e4e29b79bd1 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
@@ -67,7 +67,11 @@ public abstract class FieldsConsumer implements Closeable {
}
if (mergeState.fieldInfo.hasDocValues()) {
final DocValues docValues = fieldsEnum.docValues();
- assert docValues != null : "DocValues are null for " + mergeState.fieldInfo.getDocValues();
+ // TODO: is this assert values and if so when?
+// assert docValues != null : "DocValues are null for " + mergeState.fieldInfo.getDocValues();
+ if(docValues == null) { // for now just continue
+ continue;
+ }
final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo);
assert docValuesConsumer != null;
docValuesConsumer.merge(mergeState, docValues);
diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
index 44a2ae05722..2ed2192f831 100644
--- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
@@ -25,11 +25,8 @@ import org.apache.lucene.util.BytesRef;
public abstract class DocValues implements Closeable {
- private final Object lock = new Object();
-
- private Source cachedReference;
-
public static final DocValues[] EMPTY_ARRAY = new DocValues[0];
+ private SourceCache cache = new SourceCache.DirectSourceCache();
public ValuesEnum getEnum() throws IOException {
return getEnum(null);
@@ -40,20 +37,12 @@ public abstract class DocValues implements Closeable {
public abstract Source load() throws IOException;
- public Source getCached(boolean load) throws IOException {
- synchronized (lock) { // TODO make sorted source cachable too
- if (load && cachedReference == null)
- cachedReference = load();
- return cachedReference;
- }
+ public Source getSource() throws IOException {
+ return cache.load(this);
}
-
- public Source releaseCached() {
- synchronized (lock) {
- final Source retVal = cachedReference;
- cachedReference = null;
- return retVal;
- }
+
+ public SortedSource getSortedSorted(Comparator comparator) throws IOException {
+ return cache.laodSorted(this, comparator);
}
public SortedSource loadSorted(Comparator comparator)
@@ -64,7 +53,14 @@ public abstract class DocValues implements Closeable {
public abstract Values type();
public void close() throws IOException {
- releaseCached();
+ this.cache.close(this);
+ }
+
+ public void setCache(SourceCache cache) {
+ synchronized (this.cache) {
+ this.cache.close(this);
+ this.cache = cache;
+ }
}
/**
@@ -137,5 +133,5 @@ public abstract class DocValues implements Closeable {
*/
public abstract LookupResult getByValue(BytesRef value);
}
-
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
index 77a78c25121..0c458cf205e 100644
--- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
@@ -19,7 +19,6 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FloatsRef;
@@ -28,7 +27,7 @@ import org.apache.lucene.util.ReaderUtil;
public class MultiDocValues extends DocValues {
- public static class DocValuesIndex { // nocommit is this necessary?
+ public static class DocValuesIndex {
public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0];
final int start;
final int length;
@@ -96,17 +95,7 @@ public class MultiDocValues extends DocValues {
public Source load() throws IOException {
return DUMMY;
}
-
- @Override
- public Source getCached(boolean load) throws IOException {
- return DUMMY;
- }
-
- @Override
- public Source releaseCached() {
- return DUMMY;
- }
-
+
@Override
public Values type() {
return type;
diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java
new file mode 100644
index 00000000000..69037b28a68
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java
@@ -0,0 +1,65 @@
+package org.apache.lucene.index.values;
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.values.DocValues.SortedSource;
+import org.apache.lucene.index.values.DocValues.Source;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Per {@link DocValues} {@link Source} cache.
+ */
+public abstract class SourceCache {
+ public abstract Source load(DocValues values) throws IOException;
+
+ public abstract SortedSource laodSorted(DocValues values,
+ Comparator comp) throws IOException;
+
+ public abstract void invalidate(DocValues values);
+
+ public synchronized void close(DocValues values) {
+ invalidate(values);
+ }
+
+ public static final class DirectSourceCache extends SourceCache {
+ private Source ref;
+ private SortedSource sortedRef;
+
+ public synchronized Source load(DocValues values) throws IOException {
+ if (ref == null)
+ ref = values.load();
+ return ref;
+ }
+
+ public synchronized SortedSource laodSorted(DocValues values,
+ Comparator comp) throws IOException {
+ if (sortedRef == null)
+ sortedRef = values.loadSorted(comp);
+ return sortedRef;
+ }
+
+ public synchronized void invalidate(DocValues values) {
+ ref = null;
+ sortedRef = null;
+ }
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
index fe504514ba7..6cd1e0294c7 100644
--- a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
@@ -62,7 +62,11 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut
@Override
public void clear() {
- // TODO
+ bytes = null;
+ ints = null;
+ floats = null;
+ type = null;
+ bytesComp = null;
}
@Override
@@ -91,9 +95,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut
}
}
- /* (non-Javadoc)
- * @see java.lang.Object#hashCode()
- */
@Override
public int hashCode() {
final int prime = 31;
@@ -105,9 +106,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut
return result;
}
- /* (non-Javadoc)
- * @see java.lang.Object#equals(java.lang.Object)
- */
@Override
public boolean equals(Object obj) {
if (this == obj)
diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
index 49ae53060b6..d5f34bb4e72 100644
--- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java
@@ -353,7 +353,7 @@ public abstract class FieldComparator {
@Override
public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException {
- currentReaderValues = reader.docValues(field).getCached(true);
+ currentReaderValues = reader.docValues(field).getSource();
return this;
}
@@ -555,7 +555,7 @@ public abstract class FieldComparator {
@Override
public FieldComparator setNextReader(IndexReader reader, int docBase) throws IOException {
- currentReaderValues = reader.docValues(field).getCached(true);
+ currentReaderValues = reader.docValues(field).getSource();
return this;
}
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
index 4677935740d..0912e0d1204 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
@@ -44,9 +44,9 @@ import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
import org.apache.lucene.index.values.DocValues.SortedSource;
import org.apache.lucene.index.values.DocValues.Source;
-import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
@@ -56,9 +56,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
-import org.junit.After;
import org.junit.AfterClass;
-import org.junit.Before;
import org.junit.BeforeClass;
public class TestIndexValues extends LuceneTestCase {
@@ -162,7 +160,7 @@ public class TestIndexValues extends LuceneTestCase {
Source s;
DocValues.SortedSource ss;
if (mode == Bytes.Mode.SORTED) {
- s = ss = r.loadSorted(comp);
+ s = ss = getSortedSource(r, comp);
} else {
s = getSource(r);
ss = null;
@@ -436,13 +434,16 @@ public class TestIndexValues extends LuceneTestCase {
switch (val) {
case PACKED_INTS:
case PACKED_INTS_FIXED: {
+ if(val == Values.PACKED_INTS_FIXED)
+ getDocValues(r, val.name());
DocValues intsReader = getDocValues(r, val.name());
Source ints = getSource(intsReader);
+
ValuesEnum intsEnum = intsReader.getEnum();
assertNotNull(intsEnum);
LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
for (int i = 0; i < base; i++) {
- assertEquals(0, ints.getInt(i));
+ assertEquals("index " + i, 0, ints.getInt(i));
assertEquals(val.name() + " base: " + base + " index: " + i, i,
random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc());
assertEquals(0, enumRef.get());
@@ -586,8 +587,7 @@ public class TestIndexValues extends LuceneTestCase {
// TODO test unoptimized with deletions
if (withDeletions || random.nextBoolean())
- ;
- w.optimize();
+ w.optimize();
return deleted;
}
@@ -598,7 +598,7 @@ public class TestIndexValues extends LuceneTestCase {
final List byteVariantList = new ArrayList(BYTES);
// run in random order to test if fill works correctly during merges
Collections.shuffle(byteVariantList, random);
- final int numValues = 333 + random.nextInt(150);
+ final int numValues = 179 + random.nextInt(151);
for (Values byteIndexValue : byteVariantList) {
List closeables = new ArrayList();
@@ -713,7 +713,11 @@ public class TestIndexValues extends LuceneTestCase {
}
private Source getSource(DocValues values) throws IOException {
- return random.nextBoolean() ? values.load() : values.getCached(true);
+ // getSource uses cache internally
+ return random.nextBoolean() ? values.load() : values.getSource();
+ }
+ private SortedSource getSortedSource(DocValues values, Comparator comparator) throws IOException {
+ // getSortedSource uses cache internally
+ return random.nextBoolean() ? values.loadSorted(comparator) : values.getSortedSorted(comparator);
}
-
}
From ee763f345b597660ada713c523b261dd21dbe9f4 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Fri, 12 Nov 2010 17:07:39 +0000
Subject: [PATCH 012/116] splitted DocValues TestCase into two and converted
some nocommits to TODOs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1034471 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/index/DocFieldProcessor.java | 2 +-
.../codecs/docvalues/DocValuesCodec.java | 7 +-
.../org/apache/lucene/index/values/Bytes.java | 12 +-
.../apache/lucene/index/values/Floats.java | 3 +-
.../lucene/index/values/PackedIntsImpl.java | 2 +-
.../lucene/index/values/TestDocValues.java | 318 ++++++++++++++++++
...Values.java => TestDocValuesIndexing.java} | 297 +---------------
7 files changed, 342 insertions(+), 299 deletions(-)
create mode 100644 lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
rename lucene/src/test/org/apache/lucene/index/values/{TestIndexValues.java => TestDocValuesIndexing.java} (61%)
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index 097113a1532..dee260a976d 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -41,7 +41,7 @@ final class DocFieldProcessor extends DocConsumer {
final FieldInfos fieldInfos;
final DocFieldConsumer consumer;
final StoredFieldsWriter fieldsWriter;
- final private Map docValues = new HashMap();
+ final private Map docValues = new HashMap();
private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter
synchronized DocValuesConsumer docValuesConsumer(Directory dir,
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
index 821f766bd83..dec6204f30b 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
@@ -129,7 +129,8 @@ public class DocValuesCodec extends Codec {
throws IOException {
Set otherFiles = new HashSet();
other.files(dir, segmentInfo, otherFiles);
- for (String string : otherFiles) {
+ for (String string : otherFiles) { // under some circumstances we only write DocValues
+ // so other files will be added even if they don't exist
if (dir.fileExists(string))
files.add(string);
}
@@ -141,10 +142,6 @@ public class DocValuesCodec extends Codec {
files.add(file);
}
}
- // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "",
- // Writer.DATA_EXTENSION));
- // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "",
- // Writer.INDEX_EXTENSION));
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index fb1d2738d8e..3080cbecdb0 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -21,7 +21,6 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
-import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
@@ -59,7 +58,7 @@ public final class Bytes {
};
- // nocommit -- i shouldn't have to specify fixed? can
+ // TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
public static Writer getWriter(Directory dir, String id, Mode mode,
Comparator comp, boolean fixedSize) throws IOException {
@@ -240,11 +239,12 @@ public final class Bytes {
@Override
public void files(Collection files) throws IOException {
+ assert datOut != null;
files.add(IndexFileNames.segmentFileName(id, "",
IndexFileNames.CSF_DATA_EXTENSION));
- final String idxFile = IndexFileNames.segmentFileName(id, "",
+ if(idxOut != null) { // called after flush - so this must be initialized if needed or present
+ final String idxFile = IndexFileNames.segmentFileName(id, "",
IndexFileNames.CSF_INDEX_EXTENSION);
- if (dir.fileExists(idxFile)) { // TODO is this correct? could be initialized lazy
files.add(idxFile);
}
}
@@ -279,11 +279,11 @@ public final class Bytes {
}
protected final IndexInput cloneData() {
- // is never NULL
+ assert datIn != null;
return (IndexInput) datIn.clone();
}
- protected final IndexInput cloneIndex() {
+ protected final IndexInput cloneIndex() { // TODO assert here for null rather than return null
return idxIn == null ? null : (IndexInput) idxIn.clone();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index 36dc00fadd5..f844bba40e4 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -19,8 +19,7 @@ import org.apache.lucene.util.RamUsageEstimator;
* Exposes writer/reader for floating point values. You can specify 4 (java
* float) or 8 (java double) byte precision.
*/
-//nocommit - add mmap version
-//nocommti - add bulk copy where possible
+//TODO - add bulk copy where possible
public class Floats {
private static final String CODEC_NAME = "SimpleFloats";
static final int VERSION_START = 0;
diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
index f0c7a6cdcf5..f37f7bbbdff 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
@@ -41,7 +41,7 @@ class PackedIntsImpl {
static class IntsWriter extends Writer {
- // nocommit - can we bulkcopy this on a merge?
+ // TODO: can we bulkcopy this on a merge?
private LongsRef intsRef;
private long[] docToValue;
private long minValue;
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
new file mode 100644
index 00000000000..8086871678c
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
@@ -0,0 +1,318 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.index.values.DocValues.SortedSource;
+import org.apache.lucene.index.values.DocValues.Source;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util._TestUtil;
+
+public class TestDocValues extends LuceneTestCase {
+
+ // TODO -- for sorted test, do our own Sort of the
+ // values and verify it's identical
+
+ public void testBytesStraight() throws IOException {
+ runTestBytes(Bytes.Mode.STRAIGHT, true);
+ runTestBytes(Bytes.Mode.STRAIGHT, false);
+ }
+
+ public void testBytesDeref() throws IOException {
+ runTestBytes(Bytes.Mode.DEREF, true);
+ runTestBytes(Bytes.Mode.DEREF, false);
+ }
+
+ public void testBytesSorted() throws IOException {
+ runTestBytes(Bytes.Mode.SORTED, true);
+ runTestBytes(Bytes.Mode.SORTED, false);
+ }
+
+ public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize)
+ throws IOException {
+
+ final BytesRef bytesRef = new BytesRef();
+
+ final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef
+ .getUTF8SortedAsUnicodeComparator() : null;
+
+ Directory dir = newDirectory();
+ Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize);
+ int maxDoc = 220;
+ final String[] values = new String[maxDoc];
+ final int lenMin, lenMax;
+ if (fixedSize) {
+ lenMin = lenMax = 3 + random.nextInt(7);
+ } else {
+ lenMin = 1;
+ lenMax = 15 + random.nextInt(6);
+ }
+ for (int i = 0; i < 100; i++) {
+ final String s;
+ if (i > 0 && random.nextInt(5) <= 2) {
+ // use prior value
+ s = values[2 * random.nextInt(i)];
+ } else {
+ s = _TestUtil.randomUnicodeString(random, lenMin, lenMax);
+ }
+ values[2 * i] = s;
+
+ UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef);
+ w.add(2 * i, bytesRef);
+ }
+ w.finish(maxDoc);
+
+ DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc);
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum bytesEnum = r.getEnum();
+ assertNotNull("enum is null", bytesEnum);
+ ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
+ assertNotNull("attribute is null", attr);
+ BytesRef ref = attr.bytes();
+ assertNotNull("BytesRef is null - enum not initialized to use bytes",
+ attr);
+
+ for (int i = 0; i < 2; i++) {
+ final int idx = 2 * i;
+ assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
+ String utf8String = ref.utf8ToString();
+ assertEquals("doc: " + idx + " lenLeft: " + values[idx].length()
+ + " lenRight: " + utf8String.length(), values[idx], utf8String);
+ }
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
+ assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
+
+ bytesEnum.close();
+ }
+
+ // Verify we can load source twice:
+ for (int iter = 0; iter < 2; iter++) {
+ Source s;
+ DocValues.SortedSource ss;
+ if (mode == Bytes.Mode.SORTED) {
+ s = ss = getSortedSource(r, comp);
+ } else {
+ s = getSource(r);
+ ss = null;
+ }
+
+ for (int i = 0; i < 100; i++) {
+ final int idx = 2 * i;
+ assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx));
+ assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString());
+ if (ss != null) {
+ assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
+ .utf8ToString());
+ DocValues.SortedSource.LookupResult result = ss
+ .getByValue(new BytesRef(values[idx]));
+ assertTrue(result.found);
+ assertEquals(ss.ord(idx), result.ord);
+ }
+ }
+
+ // Lookup random strings:
+ if (mode == Bytes.Mode.SORTED) {
+ final int numValues = ss.getValueCount();
+ for (int i = 0; i < 1000; i++) {
+ BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString(
+ random, lenMin, lenMax));
+ SortedSource.LookupResult result = ss.getByValue(bytesValue);
+ if (result.found) {
+ assert result.ord > 0;
+ assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord)));
+ int count = 0;
+ for (int k = 0; k < 100; k++) {
+ if (bytesValue.utf8ToString().equals(values[2 * k])) {
+ assertEquals(ss.ord(2 * k), result.ord);
+ count++;
+ }
+ }
+ assertTrue(count > 0);
+ } else {
+ assert result.ord >= 0;
+ if (result.ord == 0) {
+ final BytesRef firstRef = ss.getByOrd(1);
+ // random string was before our first
+ assertTrue(firstRef.compareTo(bytesValue) > 0);
+ } else if (result.ord == numValues) {
+ final BytesRef lastRef = ss.getByOrd(numValues);
+ // random string was after our last
+ assertTrue(lastRef.compareTo(bytesValue) < 0);
+ } else {
+ // random string fell between two of our values
+ final BytesRef before = (BytesRef) ss.getByOrd(result.ord)
+ .clone();
+ final BytesRef after = ss.getByOrd(result.ord + 1);
+ assertTrue(before.compareTo(bytesValue) < 0);
+ assertTrue(bytesValue.compareTo(after) < 0);
+
+ }
+ }
+ }
+ }
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ public void testInts() throws IOException {
+ long maxV = 1;
+ final int NUM_VALUES = 1000;
+ final long[] values = new long[NUM_VALUES];
+ for (int rx = 1; rx < 63; rx++, maxV *= 2) {
+ for (int b = 0; b < 2; b++) {
+ Directory dir = newDirectory();
+ boolean useFixedArrays = b == 0;
+ Writer w = Ints.getWriter(dir, "test", useFixedArrays);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = random.nextLong() % (1 + maxV);
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalDocs = 1 + random.nextInt(9);
+ w.finish(NUM_VALUES + additionalDocs);
+
+ DocValues r = Ints.getValues(dir, "test", useFixedArrays);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = getSource(r);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = s.getInt(i);
+ assertEquals("index " + i + " b: " + b, values[i], v);
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ LongsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals(values[i], ints.get());
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals("" + i, 0, ints.get());
+ }
+
+ iEnum.close();
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ LongsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals(values[i], ints.get());
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals("" + i, 0, ints.get());
+ }
+
+ iEnum.close();
+ }
+ r.close();
+ dir.close();
+ }
+ }
+ }
+
+ public void testFloats4() throws IOException {
+ runTestFloats(4, 0.00001);
+ }
+
+ private void runTestFloats(int precision, double delta) throws IOException {
+ Directory dir = newDirectory();
+ Writer w = Floats.getWriter(dir, "test", precision);
+ final int NUM_VALUES = 1000;
+ final double[] values = new double[NUM_VALUES];
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final double v = precision == 4 ? random.nextFloat() : random
+ .nextDouble();
+ values[i] = v;
+ w.add(i, v);
+ }
+ final int additionalValues = 1 + random.nextInt(10);
+ w.finish(NUM_VALUES + additionalValues);
+
+ DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = getSource(r);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(values[i], s.getFloat(i), 0.0f);
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum fEnum = r.getEnum();
+ ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
+ FloatsRef floats = attr.floats();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, fEnum.nextDoc());
+ assertEquals(values[i], floats.get(), delta);
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ assertEquals(i, fEnum.nextDoc());
+ assertEquals(0.0, floats.get(), delta);
+ }
+ fEnum.close();
+ }
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum fEnum = r.getEnum();
+ ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
+ FloatsRef floats = attr.floats();
+ for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
+ assertEquals(i, fEnum.advance(i));
+ assertEquals(values[i], floats.get(), delta);
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
+ assertEquals(i, fEnum.advance(i));
+ assertEquals(0.0, floats.get(), delta);
+ }
+ fEnum.close();
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ public void testFloats8() throws IOException {
+ runTestFloats(8, 0.0);
+ }
+
+ private Source getSource(DocValues values) throws IOException {
+ // getSource uses cache internally
+ return random.nextBoolean() ? values.load() : values.getSource();
+ }
+
+ private SortedSource getSortedSource(DocValues values,
+ Comparator comparator) throws IOException {
+ // getSortedSource uses cache internally
+ return random.nextBoolean() ? values.loadSorted(comparator) : values
+ .getSortedSorted(comparator);
+ }
+}
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
similarity index 61%
rename from lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
rename to lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
index 5728c396600..e2f3a206256 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
@@ -16,12 +16,10 @@ package org.apache.lucene.index.values;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
import java.util.EnumSet;
import java.util.List;
@@ -45,7 +43,6 @@ import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
-import org.apache.lucene.index.values.DocValues.SortedSource;
import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
@@ -54,22 +51,28 @@ import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-public class TestIndexValues extends LuceneTestCase {
+/**
+ *
+ * Tests DocValues integration into IndexWriter & Codecs
+ *
+ */
+public class TestDocValuesIndexing extends LuceneTestCase {
+ // TODO Add a test for addIndexes
+ // TODO add test for unoptimized case with deletes
- // TODO test addIndexes
private static DocValuesCodec docValuesCodec;
private static CodecProvider provider;
+
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
LuceneTestCase.beforeClassLuceneTestCaseJ4();
provider = new CodecProvider();
- docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(CodecProvider
- .getDefaultCodec()));
+ docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(
+ CodecProvider.getDefaultCodec()));
provider.register(docValuesCodec);
provider.setDefaultFieldCodec(docValuesCodec.name);
}
@@ -79,277 +82,6 @@ public class TestIndexValues extends LuceneTestCase {
LuceneTestCase.afterClassLuceneTestCaseJ4();
}
- public void testBytesStraight() throws IOException {
- runTestBytes(Bytes.Mode.STRAIGHT, true);
- runTestBytes(Bytes.Mode.STRAIGHT, false);
- }
-
- public void testBytesDeref() throws IOException {
- runTestBytes(Bytes.Mode.DEREF, true);
- runTestBytes(Bytes.Mode.DEREF, false);
- }
-
- public void testBytesSorted() throws IOException {
- runTestBytes(Bytes.Mode.SORTED, true);
- runTestBytes(Bytes.Mode.SORTED, false);
- }
-
- // nocommit -- for sorted test, do our own Sort of the
- // values and verify it's identical
- public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize)
- throws IOException {
-
- final BytesRef bytesRef = new BytesRef();
-
- final Comparator comp = mode == Bytes.Mode.SORTED ? BytesRef
- .getUTF8SortedAsUnicodeComparator() : null;
-
- Directory dir = newDirectory();
- Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize);
- int maxDoc = 220;
- final String[] values = new String[maxDoc];
- final int lenMin, lenMax;
- if (fixedSize) {
- lenMin = lenMax = 3 + random.nextInt(7);
- } else {
- lenMin = 1;
- lenMax = 15 + random.nextInt(6);
- }
- for (int i = 0; i < 100; i++) {
- final String s;
- if (i > 0 && random.nextInt(5) <= 2) {
- // use prior value
- s = values[2 * random.nextInt(i)];
- } else {
- s = _TestUtil.randomUnicodeString(random, lenMin, lenMax);
- }
- values[2 * i] = s;
-
- UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytesRef);
- w.add(2 * i, bytesRef);
- }
- w.finish(maxDoc);
-
- DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc);
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum bytesEnum = r.getEnum();
- assertNotNull("enum is null", bytesEnum);
- ValuesAttribute attr = bytesEnum.addAttribute(ValuesAttribute.class);
- assertNotNull("attribute is null", attr);
- BytesRef ref = attr.bytes();
- assertNotNull("BytesRef is null - enum not initialized to use bytes",
- attr);
-
- for (int i = 0; i < 2; i++) {
- final int idx = 2 * i;
- assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
- String utf8String = ref.utf8ToString();
- assertEquals("doc: " + idx + " lenLeft: " + values[idx].length()
- + " lenRight: " + utf8String.length(), values[idx], utf8String);
- }
- assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
- assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
-
- bytesEnum.close();
- }
-
- // Verify we can load source twice:
- for (int iter = 0; iter < 2; iter++) {
- Source s;
- DocValues.SortedSource ss;
- if (mode == Bytes.Mode.SORTED) {
- s = ss = getSortedSource(r, comp);
- } else {
- s = getSource(r);
- ss = null;
- }
-
- for (int i = 0; i < 100; i++) {
- final int idx = 2 * i;
- assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx));
- assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString());
- if (ss != null) {
- assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
- .utf8ToString());
- DocValues.SortedSource.LookupResult result = ss
- .getByValue(new BytesRef(values[idx]));
- assertTrue(result.found);
- assertEquals(ss.ord(idx), result.ord);
- }
- }
-
- // Lookup random strings:
- if (mode == Bytes.Mode.SORTED) {
- final int numValues = ss.getValueCount();
- for (int i = 0; i < 1000; i++) {
- BytesRef bytesValue = new BytesRef(_TestUtil.randomUnicodeString(
- random, lenMin, lenMax));
- SortedSource.LookupResult result = ss.getByValue(bytesValue);
- if (result.found) {
- assert result.ord > 0;
- assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord)));
- int count = 0;
- for (int k = 0; k < 100; k++) {
- if (bytesValue.utf8ToString().equals(values[2 * k])) {
- assertEquals(ss.ord(2 * k), result.ord);
- count++;
- }
- }
- assertTrue(count > 0);
- } else {
- assert result.ord >= 0;
- if (result.ord == 0) {
- final BytesRef firstRef = ss.getByOrd(1);
- // random string was before our first
- assertTrue(firstRef.compareTo(bytesValue) > 0);
- } else if (result.ord == numValues) {
- final BytesRef lastRef = ss.getByOrd(numValues);
- // random string was after our last
- assertTrue(lastRef.compareTo(bytesValue) < 0);
- } else {
- // random string fell between two of our values
- final BytesRef before = (BytesRef) ss.getByOrd(result.ord)
- .clone();
- final BytesRef after = ss.getByOrd(result.ord + 1);
- assertTrue(before.compareTo(bytesValue) < 0);
- assertTrue(bytesValue.compareTo(after) < 0);
-
- }
- }
- }
- }
- }
-
- r.close();
- dir.close();
- }
-
- public void testInts() throws IOException {
- long maxV = 1;
- final int NUM_VALUES = 1000;
- final long[] values = new long[NUM_VALUES];
- for (int rx = 1; rx < 63; rx++, maxV *= 2) {
- for (int b = 0; b < 2; b++) {
- Directory dir = newDirectory();
- boolean useFixedArrays = b == 0;
- Writer w = Ints.getWriter(dir, "test", useFixedArrays);
- for (int i = 0; i < NUM_VALUES; i++) {
- final long v = random.nextLong() % (1 + maxV);
- values[i] = v;
- w.add(i, v);
- }
- final int additionalDocs = 1 + random.nextInt(9);
- w.finish(NUM_VALUES + additionalDocs);
-
- DocValues r = Ints.getValues(dir, "test", useFixedArrays);
- for (int iter = 0; iter < 2; iter++) {
- Source s = getSource(r);
- for (int i = 0; i < NUM_VALUES; i++) {
- final long v = s.getInt(i);
- assertEquals("index " + i + " b: " + b, values[i], v);
- }
- }
-
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = r.getEnum();
- ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
- LongsRef ints = attr.ints();
- for (int i = 0; i < NUM_VALUES; i++) {
- assertEquals(i, iEnum.nextDoc());
- assertEquals(values[i], ints.get());
- }
- for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(i, iEnum.nextDoc());
- assertEquals("" + i, 0, ints.get());
- }
-
- iEnum.close();
- }
-
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = r.getEnum();
- ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
- LongsRef ints = attr.ints();
- for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
- assertEquals(i, iEnum.advance(i));
- assertEquals(values[i], ints.get());
- }
- for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(i, iEnum.advance(i));
- assertEquals("" + i, 0, ints.get());
- }
-
- iEnum.close();
- }
- r.close();
- dir.close();
- }
- }
- }
-
- public void testFloats4() throws IOException {
- runTestFloats(4, 0.00001);
- }
-
- private void runTestFloats(int precision, double delta) throws IOException {
- Directory dir = newDirectory();
- Writer w = Floats.getWriter(dir, "test", precision);
- final int NUM_VALUES = 1000;
- final double[] values = new double[NUM_VALUES];
- for (int i = 0; i < NUM_VALUES; i++) {
- final double v = precision == 4 ? random.nextFloat() : random
- .nextDouble();
- values[i] = v;
- w.add(i, v);
- }
- final int additionalValues = 1 + random.nextInt(10);
- w.finish(NUM_VALUES + additionalValues);
-
- DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues);
- for (int iter = 0; iter < 2; iter++) {
- Source s = getSource(r);
- for (int i = 0; i < NUM_VALUES; i++) {
- assertEquals(values[i], s.getFloat(i), 0.0f);
- }
- }
-
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum fEnum = r.getEnum();
- ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
- FloatsRef floats = attr.floats();
- for (int i = 0; i < NUM_VALUES; i++) {
- assertEquals(i, fEnum.nextDoc());
- assertEquals(values[i], floats.get(), delta);
- }
- for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
- assertEquals(i, fEnum.nextDoc());
- assertEquals(0.0, floats.get(), delta);
- }
- fEnum.close();
- }
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum fEnum = r.getEnum();
- ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
- FloatsRef floats = attr.floats();
- for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
- assertEquals(i, fEnum.advance(i));
- assertEquals(values[i], floats.get(), delta);
- }
- for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
- assertEquals(i, fEnum.advance(i));
- assertEquals(0.0, floats.get(), delta);
- }
- fEnum.close();
- }
-
- r.close();
- dir.close();
- }
-
- public void testFloats8() throws IOException {
- runTestFloats(8, 0.0);
- }
-
/**
* Tests complete indexing of {@link Values} including deletions, merging and
* sparse value fields on Compound-File
@@ -437,7 +169,7 @@ public class TestIndexValues extends LuceneTestCase {
assertNotNull(intsReader);
Source ints = getSource(intsReader);
-
+
ValuesEnum intsEnum = intsReader.getEnum();
assertNotNull(intsEnum);
LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
@@ -715,8 +447,5 @@ public class TestIndexValues extends LuceneTestCase {
// getSource uses cache internally
return random.nextBoolean() ? values.load() : values.getSource();
}
- private SortedSource getSortedSource(DocValues values, Comparator comparator) throws IOException {
- // getSortedSource uses cache internally
- return random.nextBoolean() ? values.loadSorted(comparator) : values.getSortedSorted(comparator);
- }
+
}
From eabf2867ebbfdce70494056807048a25f8a4ecdb Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 16 Nov 2010 18:34:16 +0000
Subject: [PATCH 013/116] Use pagedBytes in DocValues Byte variants instead of
direct byte arrays
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1035731 13f79535-47bb-0310-9956-ffa450edef68
---
.../apache/lucene/index/DocumentsWriter.java | 2 +-
.../org/apache/lucene/index/values/Bytes.java | 98 +++++++++---
.../index/values/FixedDerefBytesImpl.java | 33 ++---
.../index/values/FixedSortedBytesImpl.java | 139 +++++++-----------
.../index/values/FixedStraightBytesImpl.java | 23 +--
.../index/values/VarDerefBytesImpl.java | 40 ++---
.../index/values/VarSortedBytesImpl.java | 57 ++-----
.../index/values/VarStraightBytesImpl.java | 49 +++---
8 files changed, 194 insertions(+), 247 deletions(-)
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
index ea971b93370..175ab09bc68 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -601,7 +601,7 @@ final class DocumentsWriter {
}
synchronized private void initFlushState(boolean onlyDocStore) {
- initSegmentName(onlyDocStore); // nocommit - this does not work for docValues
+ initSegmentName(onlyDocStore);
final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs);
flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info);
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index 3080cbecdb0..691abe588bf 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -32,6 +32,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.PagedBytes;
/**
* Provides concrete Writer/Reader impls for byte[] value per document. There
@@ -46,7 +47,7 @@ import org.apache.lucene.util.CodecUtil;
* NOTE: Each byte[] must be <= 32768 bytes in length
*
*/
-//TODO - add bulk copy where possible
+// TODO - add bulk copy where possible
public final class Bytes {
// don't instantiate!
@@ -57,7 +58,6 @@ public final class Bytes {
STRAIGHT, DEREF, SORTED
};
-
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
public static Writer getWriter(Directory dir, String id, Mode mode,
@@ -124,29 +124,65 @@ public final class Bytes {
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final BytesRef defaultValue = new BytesRef();
+ protected final static int PAGED_BYTES_BITS = 15;
+ private final PagedBytes pagedBytes;
+ protected final PagedBytes.Reader data;
+ protected final long totalLengthInBytes;
- protected BytesBaseSource(IndexInput datIn, IndexInput idxIn) {
+ protected BytesBaseSource(IndexInput datIn, IndexInput idxIn, PagedBytes pagedBytes, long bytesToRead)
+ throws IOException {
+ assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
+ this.totalLengthInBytes = bytesToRead;
+ this.pagedBytes = pagedBytes;
+ this.pagedBytes.copy(datIn, bytesToRead);
+ data = pagedBytes.freeze(true);
this.idxIn = idxIn;
}
public void close() throws IOException {
- if (datIn != null)
- datIn.close();
- if (idxIn != null) // if straight
- idxIn.close();
-
+ data.close();
+ try {
+ if (datIn != null)
+ datIn.close();
+ } finally {
+ if (idxIn != null) // if straight
+ idxIn.close();
+ }
}
+ public long ramBytesUsed() {
+ return 0; //TOODO
+ }
+
}
static abstract class BytesBaseSortedSource extends SortedSource {
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final BytesRef defaultValue = new BytesRef();
+ protected final static int PAGED_BYTES_BITS = 15;
+ private final PagedBytes pagedBytes;
+ protected final PagedBytes.Reader data;
+ protected final BytesRef bytesRef = new BytesRef();
+ protected final LookupResult lookupResult = new LookupResult();
+ private final Comparator comp;
- protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn) {
+
+ protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, Comparator comp, PagedBytes pagedBytes, long bytesToRead) throws IOException {
+ assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
+ this.pagedBytes = pagedBytes;
+ this.pagedBytes.copy(datIn, bytesToRead);
+ data = pagedBytes.freeze(true);
this.idxIn = idxIn;
+ this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
+ : comp;
+
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord) {
+ return ord == 0 ? defaultValue : deref(--ord);
}
public void close() throws IOException {
@@ -154,13 +190,35 @@ public final class Bytes {
datIn.close();
if (idxIn != null) // if straight
idxIn.close();
+ }
+
+ protected abstract BytesRef deref(int ord);
+
+ protected LookupResult binarySearch(BytesRef b, int low, int high) {
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ deref(mid);
+ final int cmp = comp.compare(bytesRef, b);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ lookupResult.ord = mid + 1;
+ lookupResult.found = true;
+ return lookupResult;
+ }
+ }
+ assert comp.compare(bytesRef, b) != 0;
+ lookupResult.ord = low;
+ lookupResult.found = false;
+ return lookupResult;
}
}
static abstract class BytesWriterBase extends Writer {
-
private final Directory dir;
private final String id;
protected IndexOutput idxOut;
@@ -172,7 +230,8 @@ public final class Bytes {
protected final AtomicLong bytesUsed;
protected BytesWriterBase(Directory dir, String id, String codecName,
- int version, boolean initIndex, boolean initData, ByteBlockPool pool, AtomicLong bytesUsed) throws IOException {
+ int version, boolean initIndex, boolean initData, ByteBlockPool pool,
+ AtomicLong bytesUsed) throws IOException {
this.dir = dir;
this.id = id;
this.codecName = codecName;
@@ -214,7 +273,7 @@ public final class Bytes {
datOut.close();
if (idxOut != null)
idxOut.close();
- if(pool != null)
+ if (pool != null)
pool.reset();
}
@@ -228,11 +287,11 @@ public final class Bytes {
bytesRef = attr.bytes();
assert bytesRef != null;
}
-
+
@Override
public void add(int docID, ValuesAttribute attr) throws IOException {
final BytesRef ref;
- if((ref = attr.bytes()) != null) {
+ if ((ref = attr.bytes()) != null) {
add(docID, ref);
}
}
@@ -242,9 +301,10 @@ public final class Bytes {
assert datOut != null;
files.add(IndexFileNames.segmentFileName(id, "",
IndexFileNames.CSF_DATA_EXTENSION));
- if(idxOut != null) { // called after flush - so this must be initialized if needed or present
+ if (idxOut != null) { // called after flush - so this must be initialized
+ // if needed or present
final String idxFile = IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_INDEX_EXTENSION);
+ IndexFileNames.CSF_INDEX_EXTENSION);
files.add(idxFile);
}
}
@@ -254,7 +314,7 @@ public final class Bytes {
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
- static abstract class BytesReaderBase extends DocValues {
+ static abstract class BytesReaderBase extends DocValues {
protected final IndexInput idxIn;
protected final IndexInput datIn;
protected final int version;
@@ -276,6 +336,7 @@ public final class Bytes {
} else {
idxIn = null;
}
+
}
protected final IndexInput cloneData() {
@@ -283,7 +344,8 @@ public final class Bytes {
return (IndexInput) datIn.clone();
}
- protected final IndexInput cloneIndex() { // TODO assert here for null rather than return null
+ protected final IndexInput cloneIndex() { // TODO assert here for null
+ // rather than return null
return idxIn == null ? null : (IndexInput) idxIn.clone();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
index b1e2449cbfd..3be9918d55d 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
@@ -30,6 +30,7 @@ import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -133,27 +134,22 @@ class FixedDerefBytesImpl {
@Override
public Source load() throws IOException {
- return new Source(cloneData(), cloneIndex(), size);
+ final IndexInput index = cloneIndex();
+ return new Source(cloneData(), index , size, index.readInt());
}
private static class Source extends BytesBaseSource {
- // TODO: paged data or mmap?
- private final byte[] data;
private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader index;
- private final int numValue;
private final int size;
+ private final int numValues;
- protected Source(IndexInput datIn, IndexInput idxIn, int size)
+ protected Source(IndexInput datIn, IndexInput idxIn, int size, int numValues)
throws IOException {
- super(datIn, idxIn);
+ super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues);
this.size = size;
- numValue = idxIn.readInt();
- data = new byte[size * numValue];
- datIn.readBytes(data, 0, size * numValue);
+ this.numValues = numValues;
index = PackedInts.getReader(idxIn);
- bytesRef.bytes = data;
- bytesRef.length = size;
}
@Override
@@ -162,22 +158,13 @@ class FixedDerefBytesImpl {
if (id == 0) {
return defaultValue;
}
- bytesRef.offset = ((id - 1) * size);
- return bytesRef;
- }
-
- public long ramBytesUsed() {
- // TODO(simonw): move ram calculation to PackedInts?!
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
- + data.length
- + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index
- .getBitsPerValue()
- * index.size());
+ return data.fill(bytesRef, ((id - 1) * size), size);
}
+
@Override
public int getValueCount() {
- return numValue;
+ return numValues;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
index 810c6a0a82c..e445cfb29bd 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
@@ -33,6 +33,7 @@ import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -52,88 +53,93 @@ class FixedSortedBytesImpl {
private int[] docToEntry;
private final Comparator comp;
-
private final BytesRefHash hash = new BytesRefHash(pool);
-
- public Writer(Directory dir, String id, Comparator comp) throws IOException {
+
+ public Writer(Directory dir, String id, Comparator comp)
+ throws IOException {
this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
new AtomicLong());
}
- public Writer(Directory dir, String id, Comparator comp, Allocator allocator, AtomicLong bytesUsed) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed);
+ public Writer(Directory dir, String id, Comparator comp,
+ Allocator allocator, AtomicLong bytesUsed) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false,
+ new ByteBlockPool(allocator), bytesUsed);
docToEntry = new int[1];
-// docToEntry[0] = -1;
+ // docToEntry[0] = -1;
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
this.comp = comp;
}
@Override
synchronized public void add(int docID, BytesRef bytes) throws IOException {
- if(bytes.length == 0)
+ if (bytes.length == 0)
return; // default - skip it
if (size == -1) {
size = bytes.length;
initDataOut();
datOut.writeInt(size);
} else if (bytes.length != size) {
- throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+ throw new IllegalArgumentException("expected bytes size=" + size
+ + " but got " + bytes.length);
}
if (docID >= docToEntry.length) {
int[] newArray = new int[ArrayUtil.oversize(1 + docID,
RamUsageEstimator.NUM_BYTES_INT)];
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
-// Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
+ // Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
- bytesUsed.addAndGet((newArray.length - docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT);
+ bytesUsed.addAndGet((newArray.length - docToEntry.length)
+ * RamUsageEstimator.NUM_BYTES_INT);
docToEntry = newArray;
}
int e = hash.add(bytes);
- docToEntry[docID] = 1+(e < 0? (-e)-1: e);
+ docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
}
-
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
synchronized public void finish(int docCount) throws IOException {
- if(datOut == null)// no data added
+ if (datOut == null)// no data added
return;
initIndexOut();
final int[] sortedEntries = hash.sort(comp);
final int count = hash.size();
- int[] address= new int[count];
+ int[] address = new int[count];
// first dump bytes data, recording address as we go
- for(int i=0;i docToEntry.length) {
limit = docToEntry.length;
} else {
limit = docCount;
}
- for(int i=0;i 0 && e <= count: "index must 0 > && <= " + count + " was: " + e;
- w.add(address[e-1]);
+ assert e > 0 && e <= count : "index must 0 > && <= " + count
+ + " was: " + e;
+ w.add(address[e - 1]);
}
}
- for(int i=limit;i comp) throws IOException {
- return new Source(cloneData(), cloneIndex(), size, comp);
+ public SortedSource loadSorted(Comparator comp)
+ throws IOException {
+ final IndexInput idxInput = cloneIndex();
+ final IndexInput datInput = cloneData();
+ datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
+ idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
+ return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
}
private static class Source extends BytesBaseSortedSource {
- // TODO: paged data
- private final byte[] data;
- private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader index;
- private final LookupResult lookupResult = new LookupResult();
private final int numValue;
- private final Comparator comp;
private final int size;
- public Source(IndexInput datIn, IndexInput idxIn, int size, Comparator comp) throws IOException {
- super(datIn, idxIn);
+ public Source(IndexInput datIn, IndexInput idxIn, int size, int numValues,
+ Comparator comp) throws IOException {
+ super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size*numValues );
this.size = size;
- datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
- idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
-
- numValue = idxIn.readInt();
- data = new byte[size*numValue];
- datIn.readBytes(data, 0, size*numValue);
- datIn.close();
-
+ this.numValue = numValues;
index = PackedInts.getReader(idxIn);
- idxIn.close(); // do we need to close that here?
- bytesRef.bytes = data;
bytesRef.length = size;
- // default byte sort order
- this.comp = comp==null?BytesRef.getUTF8SortedAsUnicodeComparator():comp;
}
@Override
@@ -201,60 +196,38 @@ class FixedSortedBytesImpl {
return (int) index.get(docID);
}
- @Override
- public BytesRef getByOrd(int ord) {
- if (ord == 0) {
- return defaultValue;
- } else {
- bytesRef.offset = ((ord-1) * size);
- return bytesRef;
- }
- }
-
@Override
public LookupResult getByValue(BytesRef bytes) {
- return binarySearch(bytes, 0, numValue-1);
+ return binarySearch(bytes, 0, numValue - 1);
}
public long ramBytesUsed() {
// TODO(simonw): move ram calcultation to PackedInts?
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length +
- (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+ return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ + size
+ * numValue
+ + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index
+ .getBitsPerValue()
+ * index.size());
}
@Override
public int getValueCount() {
return numValue;
}
-
- private LookupResult binarySearch(BytesRef b, int low, int high) {
-
- while (low <= high) {
- int mid = (low + high) >>> 1;
- bytesRef.offset = mid * size;
- int cmp = comp.compare(bytesRef, b);
- if (cmp < 0) {
- low = mid + 1;
- } else if (cmp > 0) {
- high = mid - 1;
- } else {
- lookupResult.ord = mid+1;
- lookupResult.found = true;
- return lookupResult;
- }
- }
- lookupResult.ord = low;
- lookupResult.found = false;
- return lookupResult;
+ @Override
+ protected BytesRef deref(int ord) {
+ return data.fill(bytesRef, (ord* size), size);
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
- // do unsorted
- return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size);
+ // do unsorted
+ return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME,
+ size);
}
-
+
@Override
public Values type() {
return Values.BYTES_FIXED_SORTED;
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
index 6df5217788b..202947c5eef 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
@@ -26,7 +26,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.PagedBytes;
// Simplest storage: stores fixed length byte[] per
// document, with no dedup and no sorting.
@@ -133,30 +133,17 @@ class FixedStraightBytesImpl {
}
private static class Source extends BytesBaseSource {
- // TODO: paged data
- private final byte[] data;
private final BytesRef bytesRef = new BytesRef();
private final int size;
public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException {
- super(datIn, idxIn);
+ super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc);
this.size = size;
- final int sizeInBytes = size*maxDoc;
- data = new byte[sizeInBytes];
- assert data.length <= datIn.length() : " file size is less than the expected size diff: " + (data.length - datIn.length()) + " size: " + size + " maxDoc " + maxDoc + " pos: " + datIn.getFilePointer();
- datIn.readBytes(data, 0, sizeInBytes);
- bytesRef.bytes = data;
- bytesRef.length = size;
}
-
+
@Override
- public BytesRef getBytes(int docID) {
- bytesRef.offset = docID * size;
- return bytesRef;
- }
-
- public long ramBytesUsed() {
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length;
+ public BytesRef getBytes(int docID) {
+ return data.fill(bytesRef, docID * size, size);
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index 9ab2adc6dde..e07104f5d0e 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -33,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -165,44 +166,31 @@ class VarDerefBytesImpl {
@Override
public Source load() throws IOException {
- return new Source(cloneData(), cloneIndex());
+ final IndexInput data = cloneData();
+ final IndexInput index = cloneIndex();
+ data.seek(CodecUtil.headerLength(CODEC_NAME));
+ index.seek(CodecUtil.headerLength(CODEC_NAME));
+ final long totalBytes = index.readInt(); // should be long
+ return new Source(data,index, totalBytes);
}
private static class Source extends BytesBaseSource {
- // TODO: paged data
- private final byte[] data;
private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader index;
- public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
- super(datIn, idxIn);
- datIn.seek(CodecUtil.headerLength(CODEC_NAME));
- idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
-
- final int totBytes = idxIn.readInt();
- data = new byte[totBytes];
- datIn.readBytes(data, 0, totBytes);
-
+ public Source(IndexInput datIn, IndexInput idxIn, long totalBytes) throws IOException {
+ super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes);
index = PackedInts.getReader(idxIn);
- bytesRef.bytes = data;
}
@Override
public BytesRef getBytes(int docID) {
- int address = (int) index.get(docID);
+ long address = index.get(docID);
if (address == 0) {
assert defaultValue.length == 0: " default value manipulated";
return defaultValue;
} else {
- address--;
- if ((data[address] & 0x80) == 0) {
- // length is 1 byte
- bytesRef.length = data[address];
- bytesRef.offset = address+1;
- } else {
- bytesRef.length = (data[address]&0x7f) + ((data[address+1]&0xff)<<7);
- bytesRef.offset = address+2;
- }
+ data.fillUsingLengthPrefix2(bytesRef, --address);
return bytesRef;
}
}
@@ -211,12 +199,6 @@ class VarDerefBytesImpl {
public int getValueCount() {
return index.size();
}
-
- public long ramBytesUsed() {
- // TODO(simonw): move address ram usage to PackedInts?
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length +
- (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
- }
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
index 7b291520a16..a6eb7d0deb0 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
@@ -32,6 +32,7 @@ import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -165,41 +166,28 @@ class VarSortedBytesImpl {
@Override
public SortedSource loadSorted(Comparator comp)
throws IOException {
- return new Source(cloneData(), cloneIndex(), comp);
+ IndexInput indexIn = cloneIndex();
+ return new Source(cloneData(), indexIn , comp, indexIn.readLong());
}
private static class Source extends BytesBaseSortedSource {
// TODO: paged data
- private final byte[] data;
- private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader docToOrdIndex;
private final PackedInts.Reader ordToOffsetIndex; // 0-based
private final long totBytes;
private final int valueCount;
- private final LookupResult lookupResult = new LookupResult();
- private final Comparator comp;
public Source(IndexInput datIn, IndexInput idxIn,
- Comparator comp) throws IOException {
- super(datIn, idxIn);
- totBytes = idxIn.readLong();
- data = new byte[(int) totBytes];
- datIn.readBytes(data, 0, (int) totBytes);
+ Comparator comp, long dataLength) throws IOException {
+ super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), dataLength);
+ totBytes = dataLength;
docToOrdIndex = PackedInts.getReader(idxIn);
ordToOffsetIndex = PackedInts.getReader(idxIn);
valueCount = ordToOffsetIndex.size();
- bytesRef.bytes = data;
// default byte sort order
- this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
- : comp;
}
- @Override
- public BytesRef getByOrd(int ord) {
- return ord == 0 ? defaultValue : deref(--ord);
- }
-
@Override
public int ord(int docID) {
return (int) docToOrdIndex.get(docID);
@@ -213,7 +201,7 @@ class VarSortedBytesImpl {
public long ramBytesUsed() {
// TODO(simonw): move ram usage to PackedInts?
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
- + data.length
+ + totBytes
+ (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex
.getBitsPerValue()
* docToOrdIndex.getBitsPerValue())
@@ -228,40 +216,21 @@ class VarSortedBytesImpl {
}
// ord is 0-based
- private BytesRef deref(int ord) {
- bytesRef.offset = (int) ordToOffsetIndex.get(ord);
+ @Override
+ protected BytesRef deref(int ord) {
+
final long nextOffset;
if (ord == valueCount - 1) {
nextOffset = totBytes;
} else {
nextOffset = ordToOffsetIndex.get(1 + ord);
}
- bytesRef.length = (int) (nextOffset - bytesRef.offset);
+ final long offset = ordToOffsetIndex.get(ord);
+ data.fill(bytesRef, offset , (int)(nextOffset - offset));
return bytesRef;
}
- // TODO: share w/ FixedSortedBytesValues?
- private LookupResult binarySearch(BytesRef b, int low, int high) {
-
- while (low <= high) {
- int mid = (low + high) >>> 1;
- deref(mid);
- final int cmp = comp.compare(bytesRef, b);
- if (cmp < 0) {
- low = mid + 1;
- } else if (cmp > 0) {
- high = mid - 1;
- } else {
- lookupResult.ord = mid + 1;
- lookupResult.found = true;
- return lookupResult;
- }
- }
- assert comp.compare(bytesRef, b) != 0;
- lookupResult.ord = low;
- lookupResult.found = false;
- return lookupResult;
- }
+
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
index f747bb06139..04fd5939d34 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -28,6 +28,7 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
@@ -44,8 +45,9 @@ class VarStraightBytesImpl {
// start at -1 if the first added value is > 0
private int lastDocID = -1;
private int[] docToAddress;
-
- public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
+
+ public Writer(Directory dir, String id, AtomicLong bytesUsed)
+ throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed);
docToAddress = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
@@ -60,7 +62,8 @@ class VarStraightBytesImpl {
if (docID >= docToAddress.length) {
int oldSize = docToAddress.length;
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
- bytesUsed.addAndGet(-(docToAddress.length-oldSize)*RamUsageEstimator.NUM_BYTES_INT);
+ bytesUsed.addAndGet(-(docToAddress.length - oldSize)
+ * RamUsageEstimator.NUM_BYTES_INT);
}
for (int i = lastDocID + 1; i < docID; i++) {
docToAddress[i] = address;
@@ -70,7 +73,7 @@ class VarStraightBytesImpl {
@Override
synchronized public void add(int docID, BytesRef bytes) throws IOException {
- if(bytes.length == 0)
+ if (bytes.length == 0)
return; // default
if (datOut == null)
initDataOut();
@@ -97,7 +100,8 @@ class VarStraightBytesImpl {
w.add(docToAddress[i]);
}
w.finish();
- bytesUsed.addAndGet(-(docToAddress.length)*RamUsageEstimator.NUM_BYTES_INT);
+ bytesUsed.addAndGet(-(docToAddress.length)
+ * RamUsageEstimator.NUM_BYTES_INT);
docToAddress = null;
super.finish(docCount);
}
@@ -121,46 +125,29 @@ class VarStraightBytesImpl {
}
private class Source extends BytesBaseSource {
- private final int totBytes;
- // TODO: paged data
- private final byte[] data;
private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader addresses;
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
- super(datIn, idxIn);
- totBytes = idxIn.readVInt();
- data = new byte[totBytes];
- datIn.readBytes(data, 0, totBytes);
+ super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVInt()); // TODO
+ // should
+ // be
+ // long
addresses = PackedInts.getReader(idxIn);
- bytesRef.bytes = data;
}
@Override
public BytesRef getBytes(int docID) {
final int address = (int) addresses.get(docID);
- bytesRef.offset = address;
- if (docID == maxDoc - 1) {
- bytesRef.length = totBytes - bytesRef.offset;
- } else {
- bytesRef.length = (int) addresses.get(1 + docID) - bytesRef.offset;
- }
- return bytesRef;
+ final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address)
+ : (int) (addresses.get(1 + docID) - address);
+ return data.fill(bytesRef, address, length);
}
@Override
public int getValueCount() {
throw new UnsupportedOperationException();
}
-
- public long ramBytesUsed() {
- // TODO(simonw): move address ram usage to PackedInts?
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
- + data.length
- + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses
- .getBitsPerValue()
- * addresses.size());
- }
}
@Override
@@ -226,10 +213,10 @@ class VarStraightBytesImpl {
@Override
public int nextDoc() throws IOException {
- return advance(pos+1);
+ return advance(pos + 1);
}
}
-
+
@Override
public Values type() {
return Values.BYTES_VAR_STRAIGHT;
From 0aa416f0cb4cae123bd32d89ffe0d6bdcf1d2278 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Wed, 17 Nov 2010 14:29:59 +0000
Subject: [PATCH 014/116] fixed length prefix for use with PagedBytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1036060 13f79535-47bb-0310-9956-ffa450edef68
---
.../org/apache/lucene/index/values/Bytes.java | 2 +-
.../index/values/VarDerefBytesImpl.java | 22 +++++++++----
.../java/org/apache/lucene/util/IOUtils.java | 33 ++-----------------
.../index/values/TestDocValuesIndexing.java | 4 +--
4 files changed, 20 insertions(+), 41 deletions(-)
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index 691abe588bf..70343ca1eca 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -243,7 +243,7 @@ public final class Bytes {
if (initIndex)
initIndexOut();
}
-
+
protected void initDataOut() throws IOException {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
IndexFileNames.CSF_DATA_EXTENSION));
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index e07104f5d0e..beb0c14704c 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -24,6 +24,7 @@ import org.apache.lucene.index.values.Bytes.BytesBaseSource;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
+import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@@ -32,7 +33,6 @@ import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CodecUtil;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
@@ -113,7 +113,7 @@ class VarDerefBytesImpl {
final int docAddress;
if (e >= 0) {
docAddress = array.array.address[e] = address;
- address += IOUtils.writeLength(datOut, bytes);
+ address += writePrefixLength(datOut, bytes);
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address += bytes.length;
} else {
@@ -121,7 +121,18 @@ class VarDerefBytesImpl {
}
docToAddress[docID] = docAddress;
}
-
+
+ private static int writePrefixLength(DataOutput datOut, BytesRef bytes) throws IOException{
+ if (bytes.length < 128) {
+ datOut.writeByte((byte) bytes.length);
+ return 1;
+ } else {
+ datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
+ datOut.writeByte((byte) (bytes.length & 0xff));
+ return 2;
+ }
+ }
+
public long ramBytesUsed() {
return bytesUsed.get();
}
@@ -216,15 +227,14 @@ class VarDerefBytesImpl {
@Override
protected void fill(long address, BytesRef ref) throws IOException {
- // TODO(simonw): use pages here
datIn.seek(fp + --address);
final byte sizeByte = datIn.readByte();
final int size;
- if ((sizeByte & 0x80) == 0) {
+ if ((sizeByte & 128) == 0) {
// length is 1 byte
size = sizeByte;
} else {
- size = (sizeByte & 0x7f) + ((datIn.readByte() & 0xff) << 7);
+ size = ((sizeByte & 0x7f)<<8) | ((datIn.readByte() & 0xff));
}
if(ref.bytes.length < size)
ref.grow(size);
diff --git a/lucene/src/java/org/apache/lucene/util/IOUtils.java b/lucene/src/java/org/apache/lucene/util/IOUtils.java
index 43120dde1bd..40cde1b9095 100644
--- a/lucene/src/java/org/apache/lucene/util/IOUtils.java
+++ b/lucene/src/java/org/apache/lucene/util/IOUtils.java
@@ -20,8 +20,6 @@ package org.apache.lucene.util;
import java.io.Closeable;
import java.io.IOException;
-import org.apache.lucene.store.DataOutput;
-
/** @lucene.internal */
public final class IOUtils {
@@ -61,37 +59,10 @@ public final class IOUtils {
}
}
- if (priorException != null)
+ if (priorException != null) {
throw priorException;
- else if (firstIOE != null)
+ } else if (firstIOE != null) {
throw firstIOE;
- }
-
- /**
- * Writes the length of the {@link BytesRef} as either a one or two bytes to
- * the {@link DataOutput} and returns the number of bytes used.
- *
- * @param datOut
- * the output to write to
- * @param bytes
- * the length to write
- * @return the length of the {@link BytesRef} as either a one or two bytes to
- * the {@link DataOutput} and returns the number of bytes used.
- * @throws IOException
- * if datOut throws an {@link IOException}
- */
- public static int writeLength(DataOutput datOut, BytesRef bytes)
- throws IOException {
- final int length = bytes.length;
- if (length < 128) {
- // 1 byte to store length
- datOut.writeByte((byte) length);
- return 1;
- } else {
- // 2 byte to store length
- datOut.writeByte((byte) (0x80 | (length & 0x7f)));
- datOut.writeByte((byte) ((length >> 7) & 0xff));
- return 2;
}
}
}
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
index e2f3a206256..fcac9640ef7 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
@@ -273,7 +273,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
bytesRef.length = b.length;
bytesRef.offset = 0;
}
- //
byte upto = 0;
for (int i = 0; i < numValues; i++) {
if (isNumeric) {
@@ -324,7 +323,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
throws CorruptIndexException, LockObtainFailedException, IOException {
- Directory d = newDirectory();
+ final Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final List byteVariantList = new ArrayList(BYTES);
// run in random order to test if fill works correctly during merges
@@ -389,7 +388,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
assertEquals(base + numRemainingValues, r.numDocs());
int v = 0;
for (int i = base; i < r.numDocs(); i++) {
-
String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: "
+ bytesSize;
From 8dbe034b5928d41136fa04879157fb841da143ba Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 23 Nov 2010 22:33:50 +0000
Subject: [PATCH 015/116] - Fixed IndexOutOfBoundException due to PagedBytes
Boundary - Added Source based ValuesEnums for all types - Cut over to
PagedBytes - Share binary search impl with all sorted implementations -
DocValuesCodec is now directly compute the file names belonging to it due to
per codec metadata - splitted up testcases in more and smaller tests -
removed several nocommit
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1038364 13f79535-47bb-0310-9956-ffa450edef68
---
.../apache/lucene/document/ValuesField.java | 2 -
.../lucene/index/DocFieldProcessor.java | 10 +-
.../org/apache/lucene/index/FieldInfos.java | 6 -
.../apache/lucene/index/IndexFileNames.java | 8 -
.../apache/lucene/index/SegmentCodecs.java | 1 -
.../codecs/docvalues/DocValuesCodec.java | 94 +++--
.../codecs/docvalues/DocValuesCodecInfo.java | 119 ++++++
.../codecs/docvalues/DocValuesConsumer.java | 5 +-
.../docvalues/DocValuesProducerBase.java | 20 +-
.../org/apache/lucene/index/values/Bytes.java | 110 ++++--
.../apache/lucene/index/values/DocValues.java | 79 +++-
.../index/values/FixedDerefBytesImpl.java | 60 +--
.../index/values/FixedSortedBytesImpl.java | 20 +-
.../index/values/FixedStraightBytesImpl.java | 23 +-
.../apache/lucene/index/values/Floats.java | 123 +++++--
.../lucene/index/values/MultiDocValues.java | 109 ++----
.../lucene/index/values/PackedIntsImpl.java | 112 ++++--
.../apache/lucene/index/values/Values.java | 1 -
.../index/values/ValuesAttributeImpl.java | 3 -
.../lucene/index/values/ValuesEnum.java | 48 ++-
.../index/values/VarDerefBytesImpl.java | 111 +++---
.../index/values/VarSortedBytesImpl.java | 38 +-
.../index/values/VarStraightBytesImpl.java | 43 +--
.../apache/lucene/index/values/Writer.java | 28 +-
.../org/apache/lucene/util/PagedBytes.java | 45 ++-
.../lucene/index/values/TestDocValues.java | 149 ++++----
.../index/values/TestDocValuesIndexing.java | 343 ++++++++++--------
27 files changed, 1072 insertions(+), 638 deletions(-)
create mode 100644 lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java
diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java
index 43fc6bd6623..d71a273a3d9 100644
--- a/lucene/src/java/org/apache/lucene/document/ValuesField.java
+++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java
@@ -16,7 +16,6 @@ package org.apache.lucene.document;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import java.util.Comparator;
@@ -118,7 +117,6 @@ public class ValuesField extends AbstractField {
valField.setBytes(ref, type);
break;
case PACKED_INTS:
- case PACKED_INTS_FIXED:
valField.setInt(Long.parseLong(field.stringValue()));
break;
case SIMPLE_FLOAT_4BYTE:
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index dee260a976d..1eeeacb1176 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -43,6 +43,8 @@ final class DocFieldProcessor extends DocConsumer {
final StoredFieldsWriter fieldsWriter;
final private Map docValues = new HashMap();
private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter
+ private SegmentWriteState docValuesConsumerState; // TODO this should be encapsulated in DocumentsWriter
+
synchronized DocValuesConsumer docValuesConsumer(Directory dir,
String segment, String name, ValuesAttribute attr, FieldInfo fieldInfo)
@@ -57,8 +59,8 @@ final class DocFieldProcessor extends DocConsumer {
* the SegmentsWriteState passed in right at the moment when the segment is flushed (doccount etc) but we need the consumer earlier
* to support docvalues and later on stored fields too.
*/
- SegmentWriteState state = docWriter.segWriteState();
- fieldsConsumer = state.segmentCodecs.codec().fieldsConsumer(state);
+ docValuesConsumerState = docWriter.segWriteState();
+ fieldsConsumer = docValuesConsumerState.segmentCodecs.codec().fieldsConsumer(docValuesConsumerState);
}
valuesConsumer = fieldsConsumer.addValuesField(fieldInfo);
docValues.put(name, valuesConsumer);
@@ -102,7 +104,9 @@ final class DocFieldProcessor extends DocConsumer {
}
docValues.clear();
if(fieldsConsumer != null) {
- fieldsConsumer.close(); // nocommit this should go away
+ fieldsConsumer.close(); // TODO remove this once docvalues are fully supported by codecs
+ state.flushedFiles.addAll(docValuesConsumerState.flushedFiles);
+ docValuesConsumerState = null;
fieldsConsumer = null;
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
index 0c8aef3b11e..a6baae51324 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
@@ -344,9 +344,6 @@ public final class FieldInfos {
case BYTES_VAR_SORTED:
b = 9;
break;
- case PACKED_INTS_FIXED:
- b = 10;
- break;
default:
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
}
@@ -413,9 +410,6 @@ public final class FieldInfos {
case 9:
fi.docValues = Values.BYTES_VAR_SORTED;
break;
- case 10:
- fi.docValues = Values.PACKED_INTS_FIXED;
- break;
default:
throw new IllegalStateException("unhandled indexValues type " + b);
}
diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
index 1917b1e7dd8..ef9c4b419c6 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
@@ -79,12 +79,6 @@ public final class IndexFileNames {
/** Extension of separate norms */
public static final String SEPARATE_NORMS_EXTENSION = "s";
- /** Extension of Column-Stride Filed data files */
- public static final String CSF_DATA_EXTENSION = "dat";
-
- /** Extension of Column-Stride Filed index files */
- public static final String CSF_INDEX_EXTENSION = "idx";
-
/**
* This array contains all filename extensions used by
* Lucene's index files, with one exception, namely the
@@ -104,8 +98,6 @@ public final class IndexFileNames {
GEN_EXTENSION,
NORMS_EXTENSION,
COMPOUND_FILE_STORE_EXTENSION,
- CSF_DATA_EXTENSION,
- CSF_INDEX_EXTENSION
};
public static final String[] STORE_INDEX_EXTENSIONS = new String[] {
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java b/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
index 3c707e0d281..0f31e7854e2 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
@@ -92,7 +92,6 @@ final class SegmentCodecs implements Cloneable {
}
}
return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
-
}
Codec codec() {
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
index 75b330daa65..2a4a880b790 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
@@ -17,12 +17,9 @@ package org.apache.lucene.index.codecs.docvalues;
* limitations under the License.
*/
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
@@ -41,28 +38,32 @@ import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.Writer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
/**
* A codec that adds DocValues support to a given codec transparently.
*/
public class DocValuesCodec extends Codec {
- private final Map consumers = new HashMap();
private final Codec other;
+ private final Comparator comparator;
- public DocValuesCodec(Codec other) {
+ public DocValuesCodec(Codec other, Comparator comparator) {
this.name = "docvalues_" + other.name;
this.other = other;
+ this.comparator = comparator;
+ }
+
+ public DocValuesCodec(Codec other) {
+ this(other, null);
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
- WrappingFieldsConsumer consumer;
- if ((consumer = consumers.get(state.segmentName)) == null) {
- consumer = new WrappingFieldsConsumer(other);
- }
- consumer.state = state; // nocommit this is a hack and only necessary since
- // we want to initialized the wrapped
+ final WrappingFieldsConsumer consumer;
+ consumer = new WrappingFieldsConsumer(other, comparator, state);
+ // nocommit this is a hack and only necessary since
+ // we want to initialized the wrapped
// fieldsConsumer lazily with a SegmentWriteState created after the docvalue
// ones is. We should fix this in DocumentWriter I guess. See
// DocFieldProcessor too!
@@ -70,31 +71,44 @@ public class DocValuesCodec extends Codec {
}
private static class WrappingFieldsConsumer extends FieldsConsumer {
- SegmentWriteState state;
- private final List docValuesConsumers = new ArrayList();
+ private final SegmentWriteState state;
private FieldsConsumer wrappedConsumer;
private final Codec other;
+ private final Comparator comparator;
+ private DocValuesCodecInfo info;
- public WrappingFieldsConsumer(Codec other) {
+ public WrappingFieldsConsumer(Codec other, Comparator comparator, SegmentWriteState state) {
this.other = other;
+ this.comparator = comparator;
+ this.state = state;
}
@Override
public void close() throws IOException {
synchronized (this) {
- if (wrappedConsumer != null)
+ if (info != null) {
+ info.write(state);
+ info = null;
+ }
+ if (wrappedConsumer != null) {
wrappedConsumer.close();
+ }
}
+
}
@Override
public synchronized DocValuesConsumer addValuesField(FieldInfo field)
throws IOException {
- DocValuesConsumer consumer = DocValuesConsumer.create(state.segmentName,
- // TODO: set comparator here
- //TODO can we have a compound file per segment and codec for docvalues?
- state.directory, field, state.codecId +"-"+ field.number, null);
- docValuesConsumers.add(consumer);
+ if(info == null) {
+ info = new DocValuesCodecInfo();
+ }
+ final DocValuesConsumer consumer = DocValuesConsumer.create(info.docValuesId(state.segmentName, state.codecId, ""
+ + field.number),
+ // TODO can we have a compound file per segment and codec for
+ // docvalues?
+ state.directory, field, comparator);
+ info.add(field.number);
return consumer;
}
@@ -115,35 +129,23 @@ public class DocValuesCodec extends Codec {
Set files = new HashSet();
other.files(dir, state.segmentInfo, state.codecId, files);
- for (String string : files) {
+ for (String string : files) { // for now we just check if one of the files
+ // exists and open the producer
if (dir.fileExists(string))
return new WrappingFielsdProducer(state, other.fieldsProducer(state));
}
return new WrappingFielsdProducer(state, FieldsProducer.EMPTY);
-
}
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId,
Set files) throws IOException {
- Set otherFiles = new HashSet();
- other.files(dir, segmentInfo, codecId, otherFiles);
- for (String string : otherFiles) { // under some circumstances we only write
- // DocValues
- // so other files will be added even if
- // they don't exist
- if (dir.fileExists(string))
- files.add(string);
- }
- //TODO can we have a compound file per segment and codec for docvalues?
- for (String file : dir.listAll()) {
- if (file.startsWith(segmentInfo.name+"_" + codecId)
- && (file.endsWith(Writer.DATA_EXTENSION) || file
- .endsWith(Writer.INDEX_EXTENSION))) {
- files.add(file);
- }
- }
-
+ other.files(dir, segmentInfo, codecId, files);
+ // TODO can we have a compound file per segment and codec for docvalues?
+ DocValuesCodecInfo info = new DocValuesCodecInfo(); // TODO can we do that
+ // only once?
+ info.read(dir, segmentInfo, codecId);
+ info.files(dir, segmentInfo, codecId, files);
}
@Override
@@ -151,6 +153,7 @@ public class DocValuesCodec extends Codec {
other.getExtensions(extensions);
extensions.add(Writer.DATA_EXTENSION);
extensions.add(Writer.INDEX_EXTENSION);
+ extensions.add(DocValuesCodecInfo.INFO_FILE_EXT);
}
static class WrappingFielsdProducer extends DocValuesProducerBase {
@@ -219,7 +222,6 @@ public class DocValuesCodec extends Codec {
name = value.next();
return this;
}
-
}
static class DocValueNameValue extends NameValue {
@@ -236,7 +238,6 @@ public class DocValuesCodec extends Codec {
}
return this;
}
-
}
static class WrappingFieldsEnum extends FieldsEnum {
@@ -254,7 +255,6 @@ public class DocValuesCodec extends Codec {
this.docValues.iter = docValues;
this.fieldsEnum.value = wrapped;
coordinator = null;
-
}
@Override
@@ -268,7 +268,6 @@ public class DocValuesCodec extends Codec {
public String next() throws IOException {
if (coordinator == null) {
coordinator = fieldsEnum.next().smaller(docValues.next());
- // old = coordinator.name;
} else {
String current = coordinator.name;
if (current == docValues.name) {
@@ -281,16 +280,15 @@ public class DocValuesCodec extends Codec {
}
return coordinator == null ? null : coordinator.name;
-
}
@Override
public TermsEnum terms() throws IOException {
- if (fieldsEnum.name == coordinator.name)
+ if (fieldsEnum.name == coordinator.name) {
return fieldsEnum.value.terms();
+ }
return null;
}
-
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java
new file mode 100644
index 00000000000..cbe9ca6c0fd
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodecInfo.java
@@ -0,0 +1,119 @@
+package org.apache.lucene.index.codecs.docvalues;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedInts.Reader;
+import org.apache.lucene.util.packed.PackedInts.Writer;
+
+/**
+ * @lucene.internal
+ */
+class DocValuesCodecInfo {
+ public static final int FORMAT_CURRENT = 0;
+ static final String INFO_FILE_EXT = "inf";
+ private int[] docValuesFields = new int[1];
+ private int max;
+ private int pos;
+
+ public DocValuesCodecInfo() {
+ }
+
+ void add(int fieldId) {
+ if (pos >= docValuesFields.length) {
+ docValuesFields = ArrayUtil.grow(docValuesFields, pos + 1);
+ }
+ docValuesFields[pos++] = fieldId;
+ if (fieldId > max) {
+ max = fieldId;
+ }
+ }
+
+ String docValuesId(String segmentsName, String codecID, String fieldId) {
+ return segmentsName + "_" + codecID + "-" + fieldId;
+ }
+
+ void files(Directory dir, SegmentInfo segmentInfo, String codecId,
+ Set files) throws IOException {
+ final String file = IndexFileNames.segmentFileName(segmentInfo.name, codecId,
+ INFO_FILE_EXT);
+ files.add(file);
+ for (int i = 0; i < pos; i++) {
+ int field = docValuesFields[i];
+ String docValuesID = docValuesId(segmentInfo.name, codecId, "" + field);
+ files.add(IndexFileNames.segmentFileName(docValuesID, "",
+ org.apache.lucene.index.values.Writer.DATA_EXTENSION));
+ String idxFile = IndexFileNames.segmentFileName(docValuesID, "",
+ org.apache.lucene.index.values.Writer.INDEX_EXTENSION);
+ if (dir.fileExists(idxFile)) {
+ files.add(idxFile);
+ }
+ }
+ }
+
+ void write(SegmentWriteState state) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(state.segmentName,
+ state.codecId, INFO_FILE_EXT);
+ final IndexOutput out = state.directory.createOutput(fileName);
+ state.flushedFiles.add(fileName);
+ try {
+ out.writeInt(FORMAT_CURRENT);
+ Writer writer = PackedInts.getWriter(out, pos, PackedInts
+ .bitsRequired(max));
+ for (int i = 0; i < pos; i++) {
+ writer.add(docValuesFields[i]);
+ }
+ writer.finish();
+ } finally {
+ out.close();
+ }
+
+ }
+
+ void read(Directory directory, SegmentInfo info, String codecId)
+ throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(info.name, codecId,
+ INFO_FILE_EXT);
+ final IndexInput in = directory.openInput(fileName);
+ try {
+ in.readInt();
+ final Reader reader = PackedInts.getReader(in);
+ docValuesFields = new int[reader.size()];
+ for (int i = 0; i < docValuesFields.length; i++) {
+ docValuesFields[i] = (int) reader.get(i);
+ }
+ pos = docValuesFields.length;
+ } finally {
+ in.close();
+ }
+ }
+
+ IntsRef fieldIDs() {
+ return new IntsRef(docValuesFields, 0, pos);
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
index 3d39701779d..5f9cd9702b9 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
@@ -88,10 +88,9 @@ public abstract class DocValuesConsumer {
}
}
- public static DocValuesConsumer create(String segmentName,
- Directory directory, FieldInfo field, String codecId, Comparator comp)
+ public static DocValuesConsumer create(String id,
+ Directory directory, FieldInfo field, Comparator comp)
throws IOException {
- final String id = segmentName + "_" + codecId;
return Writer.create(field.getDocValues(), id, directory, comp);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
index 426f9277894..adf9349e766 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
@@ -31,12 +31,15 @@ import org.apache.lucene.index.values.Ints;
import org.apache.lucene.index.values.Values;
import org.apache.lucene.index.values.Writer;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IntsRef;
public abstract class DocValuesProducerBase extends FieldsProducer{
protected final TreeMap docValues = new TreeMap();
+ private final DocValuesCodecInfo info = new DocValuesCodecInfo();
protected DocValuesProducerBase(SegmentInfo si, Directory dir, FieldInfos fieldInfo, String codecId) throws IOException {
+ info.read(dir, si, codecId);
load(fieldInfo, si.name, si.docCount, dir, codecId);
}
@@ -48,16 +51,15 @@ public abstract class DocValuesProducerBase extends FieldsProducer{
// Only opens files... doesn't actually load any values
protected void load(FieldInfos fieldInfos, String segment, int docCount,
Directory dir, String codecId) throws IOException {
- final int numFields = fieldInfos.size();
- for (int i = 0; i < numFields; i++) {
- final FieldInfo fieldInfo = fieldInfos.fieldInfo(i);
- final Values v = fieldInfo.getDocValues();
+ final IntsRef valueFields = info.fieldIDs();
+ for (int i = valueFields.offset; i < valueFields.length; i++) {
+ final int fieldNumber = valueFields.ints[i];
+ final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+ assert fieldInfo.hasDocValues();
final String field = fieldInfo.name;
//TODO can we have a compound file per segment and codec for docvalues?
- final String id = IndexFileNames.segmentFileName(segment, codecId+"-"+fieldInfo.number, "");
- if (v != null && dir.fileExists(id + "." + Writer.DATA_EXTENSION)) {
- docValues.put(field, loadDocValues(docCount, dir, id, v));
- }
+ final String id = info.docValuesId( segment, codecId, fieldNumber+"");
+ docValues.put(field, loadDocValues(docCount, dir, id, fieldInfo.getDocValues()));
}
}
@@ -66,8 +68,6 @@ public abstract class DocValuesProducerBase extends FieldsProducer{
switch (v) {
case PACKED_INTS:
return Ints.getValues(dir, id, false);
- case PACKED_INTS_FIXED:
- return Ints.getValues(dir, id, true);
case SIMPLE_FLOAT_4BYTE:
return Floats.getValues(dir, id, docCount);
case SIMPLE_FLOAT_8BYTE:
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index 70343ca1eca..f9eeff57093 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -24,11 +24,14 @@ import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.values.DocValues.MissingValues;
import org.apache.lucene.index.values.DocValues.SortedSource;
import org.apache.lucene.index.values.DocValues.Source;
+import org.apache.lucene.index.values.DocValues.SourceEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
@@ -88,7 +91,7 @@ public final class Bytes {
throw new IllegalArgumentException("");
}
- // nocommit -- I can peek @ header to determing fixed/mode?
+ // TODO -- I can peek @ header to determing fixed/mode?
public static DocValues getValues(Directory dir, String id, Mode mode,
boolean fixedSize, int maxDoc) throws IOException {
if (fixedSize) {
@@ -123,15 +126,15 @@ public final class Bytes {
static abstract class BytesBaseSource extends Source {
protected final IndexInput datIn;
protected final IndexInput idxIn;
- protected final BytesRef defaultValue = new BytesRef();
protected final static int PAGED_BYTES_BITS = 15;
private final PagedBytes pagedBytes;
protected final PagedBytes.Reader data;
protected final long totalLengthInBytes;
- protected BytesBaseSource(IndexInput datIn, IndexInput idxIn, PagedBytes pagedBytes, long bytesToRead)
- throws IOException {
- assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
+ protected BytesBaseSource(IndexInput datIn, IndexInput idxIn,
+ PagedBytes pagedBytes, long bytesToRead) throws IOException {
+ assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
this.totalLengthInBytes = bytesToRead;
this.pagedBytes = pagedBytes;
@@ -146,12 +149,36 @@ public final class Bytes {
if (datIn != null)
datIn.close();
} finally {
- if (idxIn != null) // if straight
+ if (idxIn != null) // if straight - no index needed
idxIn.close();
}
}
+
+ protected abstract int maxDoc();
+
public long ramBytesUsed() {
- return 0; //TOODO
+ return 0; // TODO
+ }
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ final MissingValues missing = getMissing();
+ return new SourceEnum(attrSource, type(), this, maxDoc()) {
+ final BytesRef bytesRef = attr.bytes();
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ while (source.getBytes(target, bytesRef) == missing.bytesValue) {
+ if (++target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ }
+ return pos = target;
+ }
+ };
}
}
@@ -163,13 +190,14 @@ public final class Bytes {
protected final static int PAGED_BYTES_BITS = 15;
private final PagedBytes pagedBytes;
protected final PagedBytes.Reader data;
- protected final BytesRef bytesRef = new BytesRef();
protected final LookupResult lookupResult = new LookupResult();
private final Comparator comp;
-
- protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, Comparator comp, PagedBytes pagedBytes, long bytesToRead) throws IOException {
- assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
+ protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn,
+ Comparator comp, PagedBytes pagedBytes, long bytesToRead)
+ throws IOException {
+ assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
this.pagedBytes = pagedBytes;
this.pagedBytes.copy(datIn, bytesToRead);
@@ -177,12 +205,12 @@ public final class Bytes {
this.idxIn = idxIn;
this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
: comp;
-
+
}
-
+
@Override
- public BytesRef getByOrd(int ord) {
- return ord == 0 ? defaultValue : deref(--ord);
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ return ord == 0 ? null : deref(--ord, bytesRef);
}
public void close() throws IOException {
@@ -191,14 +219,16 @@ public final class Bytes {
if (idxIn != null) // if straight
idxIn.close();
}
-
- protected abstract BytesRef deref(int ord);
-
- protected LookupResult binarySearch(BytesRef b, int low, int high) {
+ protected abstract int maxDoc();
+
+ protected abstract BytesRef deref(int ord, BytesRef bytesRef);
+
+ protected LookupResult binarySearch(BytesRef b, BytesRef bytesRef, int low,
+ int high) {
while (low <= high) {
int mid = (low + high) >>> 1;
- deref(mid);
+ deref(mid, bytesRef);
final int cmp = comp.compare(bytesRef, b);
if (cmp < 0) {
low = mid + 1;
@@ -215,6 +245,27 @@ public final class Bytes {
lookupResult.found = false;
return lookupResult;
}
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ final MissingValues missing = getMissing();
+ return new SourceEnum(attrSource, type(), this, maxDoc()) {
+ final BytesRef bytesRef = attr.bytes();
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ while (source.getBytes(target, bytesRef) == missing.bytesValue) {
+ if (++target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ }
+ return pos = target;
+ }
+ };
+ }
}
static abstract class BytesWriterBase extends Writer {
@@ -243,16 +294,16 @@ public final class Bytes {
if (initIndex)
initIndexOut();
}
-
+
protected void initDataOut() throws IOException {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ DATA_EXTENSION));
CodecUtil.writeHeader(datOut, codecName, version);
}
protected void initIndexOut() throws IOException {
idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_INDEX_EXTENSION));
+ INDEX_EXTENSION));
CodecUtil.writeHeader(idxOut, codecName, version);
}
@@ -299,12 +350,11 @@ public final class Bytes {
@Override
public void files(Collection files) throws IOException {
assert datOut != null;
- files.add(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
if (idxOut != null) { // called after flush - so this must be initialized
- // if needed or present
+ // if needed or present
final String idxFile = IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_INDEX_EXTENSION);
+ INDEX_EXTENSION);
files.add(idxFile);
}
}
@@ -324,12 +374,12 @@ public final class Bytes {
int maxVersion, boolean doIndex) throws IOException {
this.id = id;
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ Writer.DATA_EXTENSION));
version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
if (doIndex) {
idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_INDEX_EXTENSION));
+ Writer.INDEX_EXTENSION));
final int version2 = CodecUtil.checkHeader(idxIn, codecName,
maxVersion, maxVersion);
assert version == version2;
@@ -345,7 +395,7 @@ public final class Bytes {
}
protected final IndexInput cloneIndex() { // TODO assert here for null
- // rather than return null
+ // rather than return null
return idxIn == null ? null : (IndexInput) idxIn.clone();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
index 2ed2192f831..a0d84ff1d79 100644
--- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
@@ -40,8 +40,9 @@ public abstract class DocValues implements Closeable {
public Source getSource() throws IOException {
return cache.load(this);
}
-
- public SortedSource getSortedSorted(Comparator comparator) throws IOException {
+
+ public SortedSource getSortedSorted(Comparator comparator)
+ throws IOException {
return cache.laodSorted(this, comparator);
}
@@ -51,7 +52,7 @@ public abstract class DocValues implements Closeable {
}
public abstract Values type();
-
+
public void close() throws IOException {
this.cache.close(this);
}
@@ -69,6 +70,7 @@ public abstract class DocValues implements Closeable {
* used since it can handle all precisions.
*/
public static abstract class Source {
+ protected final MissingValues missingValues = new MissingValues();
public long getInt(int docID) {
throw new UnsupportedOperationException("ints are not supported");
@@ -78,7 +80,7 @@ public abstract class DocValues implements Closeable {
throw new UnsupportedOperationException("floats are not supported");
}
- public BytesRef getBytes(int docID) {
+ public BytesRef getBytes(int docID, BytesRef ref) {
throw new UnsupportedOperationException("bytes are not supported");
}
@@ -91,24 +93,56 @@ public abstract class DocValues implements Closeable {
}
public ValuesEnum getEnum() throws IOException {
- return getEnum(null);
+ return getEnum(new AttributeSource());
}
+
+ public MissingValues getMissing() {
+ return missingValues;
+ }
+
+ public abstract Values type();
- // nocommit - enable obtaining enum from source since this is already in
- // memory
- public/* abstract */ValuesEnum getEnum(AttributeSource attrSource)
- throws IOException {
- throw new UnsupportedOperationException();
- }
+ public abstract ValuesEnum getEnum(AttributeSource attrSource)
+ throws IOException;
public abstract long ramBytesUsed();
+
+ }
+
+ abstract static class SourceEnum extends ValuesEnum {
+ protected final Source source;
+ protected final int numDocs;
+ protected int pos = -1;
+
+ SourceEnum(AttributeSource attrs, Values type, Source source, int numDocs) {
+ super(attrs, type);
+
+ this.source = source;
+ this.numDocs = numDocs;
+ }
+
+ @Override
+ public void close() throws IOException {
+ }
+
+ @Override
+ public int docID() {
+ return pos;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if(pos == NO_MORE_DOCS)
+ return NO_MORE_DOCS;
+ return advance(pos + 1);
+ }
}
public static abstract class SortedSource extends Source {
@Override
- public BytesRef getBytes(int docID) {
- return getByOrd(ord(docID));
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
+ return getByOrd(ord(docID), bytesRef);
}
/**
@@ -119,7 +153,7 @@ public abstract class DocValues implements Closeable {
public abstract int ord(int docID);
/** Returns value for specified ord. */
- public abstract BytesRef getByOrd(int ord);
+ public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
public static class LookupResult {
public boolean found;
@@ -131,7 +165,22 @@ public abstract class DocValues implements Closeable {
* {@link LookupResult#found} is true, then ord is an exact match. The
* returned {@link LookupResult} may be reused across calls.
*/
- public abstract LookupResult getByValue(BytesRef value);
+ public final LookupResult getByValue(BytesRef value) {
+ return getByValue(value, new BytesRef());
+ }
+ public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef);
}
+ public final static class MissingValues {
+ public long longValue;
+ public double doubleValue;
+ public BytesRef bytesValue;
+
+ public final void copy(MissingValues values) {
+ longValue = values.longValue;
+ doubleValue = values.doubleValue;
+ bytesValue = values.bytesValue;
+ }
+ }
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
index 3be9918d55d..06a322b9972 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
@@ -49,7 +49,7 @@ class FixedDerefBytesImpl {
private int size = -1;
private int[] docToID;
private final BytesRefHash hash = new BytesRefHash(pool);
-
+
public Writer(Directory dir, String id) throws IOException {
this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
new AtomicLong());
@@ -65,7 +65,7 @@ class FixedDerefBytesImpl {
@Override
synchronized public void add(int docID, BytesRef bytes) throws IOException {
- if(bytes.length == 0) // default value - skip it
+ if (bytes.length == 0) // default value - skip it
return;
if (size == -1) {
size = bytes.length;
@@ -81,18 +81,18 @@ class FixedDerefBytesImpl {
// new added entry
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
} else {
- ord = (-ord)-1;
+ ord = (-ord) - 1;
}
if (docID >= docToID.length) {
int size = docToID.length;
docToID = ArrayUtil.grow(docToID, 1 + docID);
- bytesUsed.addAndGet((docToID.length - size) * RamUsageEstimator.NUM_BYTES_INT);
+ bytesUsed.addAndGet((docToID.length - size)
+ * RamUsageEstimator.NUM_BYTES_INT);
}
- docToID[docID] = 1+ord;
+ docToID[docID] = 1 + ord;
}
-
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
@@ -100,7 +100,7 @@ class FixedDerefBytesImpl {
if (datOut == null) // no added data
return;
initIndexOut();
- final int count = 1+hash.size();
+ final int count = 1 + hash.size();
idxOut.writeInt(count - 1);
// write index
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
@@ -135,17 +135,16 @@ class FixedDerefBytesImpl {
@Override
public Source load() throws IOException {
final IndexInput index = cloneIndex();
- return new Source(cloneData(), index , size, index.readInt());
+ return new Source(cloneData(), index, size, index.readInt());
}
private static class Source extends BytesBaseSource {
- private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader index;
private final int size;
private final int numValues;
- protected Source(IndexInput datIn, IndexInput idxIn, int size, int numValues)
- throws IOException {
+ protected Source(IndexInput datIn, IndexInput idxIn, int size,
+ int numValues) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues);
this.size = size;
this.numValues = numValues;
@@ -153,24 +152,33 @@ class FixedDerefBytesImpl {
}
@Override
- public BytesRef getBytes(int docID) {
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int id = (int) index.get(docID);
if (id == 0) {
- return defaultValue;
+ return null;
}
return data.fill(bytesRef, ((id - 1) * size), size);
}
-
@Override
public int getValueCount() {
return numValues;
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_FIXED_DEREF;
+ }
+
+ @Override
+ protected int maxDoc() {
+ return index.size();
+ }
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
- return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME,
+ return new DerefBytesEnum(source, cloneData(), cloneIndex(),
size);
}
@@ -184,12 +192,12 @@ class FixedDerefBytesImpl {
private int pos = -1;
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
- IndexInput idxIn, String codecName, int size) throws IOException {
- this(source, datIn, idxIn, codecName, size, Values.BYTES_FIXED_DEREF);
+ IndexInput idxIn, int size) throws IOException {
+ this(source, datIn, idxIn, size, Values.BYTES_FIXED_DEREF);
}
protected DerefBytesEnum(AttributeSource source, IndexInput datIn,
- IndexInput idxIn, String codecName, int size, Values enumType)
+ IndexInput idxIn, int size, Values enumType)
throws IOException {
super(source, enumType);
ref = attr.bytes();
@@ -207,14 +215,13 @@ class FixedDerefBytesImpl {
@Override
public int advance(int target) throws IOException {
if (target < valueCount) {
- final long address = idx.advance(target);
- pos = idx.ord();
- if(address == 0) {
- // default is empty
- ref.length = 0;
- ref.offset = 0;
- return pos;
+ long address;
+ while ((address = idx.advance(target)) == 0) {
+ if (++target >= valueCount) {
+ return pos = NO_MORE_DOCS;
+ }
}
+ pos = idx.ord();
fill(address, ref);
return pos;
}
@@ -223,6 +230,9 @@ class FixedDerefBytesImpl {
@Override
public int nextDoc() throws IOException {
+ if (pos < valueCount) {
+ return pos = NO_MORE_DOCS;
+ }
return advance(pos + 1);
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
index e445cfb29bd..e826a709ee9 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
@@ -187,8 +187,6 @@ class FixedSortedBytesImpl {
this.size = size;
this.numValue = numValues;
index = PackedInts.getReader(idxIn);
-
- bytesRef.length = size;
}
@Override
@@ -197,8 +195,8 @@ class FixedSortedBytesImpl {
}
@Override
- public LookupResult getByValue(BytesRef bytes) {
- return binarySearch(bytes, 0, numValue - 1);
+ public LookupResult getByValue(BytesRef bytes, BytesRef tmpRef) {
+ return binarySearch(bytes, tmpRef, 0, numValue - 1);
}
public long ramBytesUsed() {
@@ -216,15 +214,25 @@ class FixedSortedBytesImpl {
return numValue;
}
@Override
- protected BytesRef deref(int ord) {
+ protected BytesRef deref(int ord, BytesRef bytesRef) {
return data.fill(bytesRef, (ord* size), size);
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_FIXED_SORTED;
+ }
+
+ @Override
+ protected int maxDoc() {
+ return index.size();
+ }
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
// do unsorted
- return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME,
+ return new DerefBytesEnum(source, cloneData(), cloneIndex(),
size);
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
index 202947c5eef..1ee7b6e996a 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
@@ -47,7 +47,7 @@ class FixedStraightBytesImpl {
super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, null);
}
- // nocommit - impl bulk copy here!
+ // TODO - impl bulk copy here!
@Override
synchronized public void add(int docID, BytesRef bytes) throws IOException {
@@ -133,16 +133,18 @@ class FixedStraightBytesImpl {
}
private static class Source extends BytesBaseSource {
- private final BytesRef bytesRef = new BytesRef();
private final int size;
+ private final int maxDoc;
public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc);
this.size = size;
+ this.missingValues.bytesValue = new BytesRef(size);
+ this.maxDoc = maxDoc;
}
@Override
- public BytesRef getBytes(int docID) {
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
return data.fill(bytesRef, docID * size, size);
}
@@ -150,6 +152,16 @@ class FixedStraightBytesImpl {
public int getValueCount() {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_FIXED_STRAIGHT;
+ }
+
+ @Override
+ protected int maxDoc() {
+ return maxDoc;
+ }
}
@Override
@@ -184,8 +196,6 @@ class FixedStraightBytesImpl {
@Override
public int advance(int target) throws IOException {
if(target >= maxDoc){
- ref.length = 0;
- ref.offset = 0;
return pos = NO_MORE_DOCS;
}
if((target-1) != pos) // pos inc == 1
@@ -201,6 +211,9 @@ class FixedStraightBytesImpl {
@Override
public int nextDoc() throws IOException {
+ if(pos >= maxDoc){
+ return pos = NO_MORE_DOCS;
+ }
return advance(pos+1);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index f844bba40e4..dcf984b0774 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -19,13 +19,15 @@ import org.apache.lucene.util.RamUsageEstimator;
* Exposes writer/reader for floating point values. You can specify 4 (java
* float) or 8 (java double) byte precision.
*/
-//TODO - add bulk copy where possible
+// TODO - add bulk copy where possible
public class Floats {
private static final String CODEC_NAME = "SimpleFloats";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
- private static final int INT_ZERO = Float.floatToRawIntBits(0.0f);
- private static final long LONG_ZERO = Double.doubleToRawLongBits(0.0);
+ private static final int INT_DEFAULT = Float
+ .floatToRawIntBits(Float.NEGATIVE_INFINITY);
+ private static final long LONG_DEFAULT = Double
+ .doubleToRawLongBits(Double.NEGATIVE_INFINITY);
public static Writer getWriter(Directory dir, String id, int precisionBytes)
throws IOException {
@@ -47,7 +49,6 @@ public class Floats {
abstract static class FloatsWriter extends Writer {
-
private final Directory dir;
private final String id;
private FloatsRef floatsRef;
@@ -64,7 +65,7 @@ public class Floats {
protected void initDatOut() throws IOException {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ Writer.DATA_EXTENSION));
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
datOut.writeByte(precision);
@@ -78,12 +79,12 @@ public class Floats {
protected void add(int docID) throws IOException {
add(docID, floatsRef.get());
}
-
+
@Override
public void add(int docID, ValuesAttribute attr) throws IOException {
final FloatsRef ref;
- if((ref = attr.floats()) != null)
- add(docID, ref.get());
+ if ((ref = attr.floats()) != null)
+ add(docID, ref.get());
}
@Override
@@ -113,14 +114,12 @@ public class Floats {
} else
super.merge(state);
}
-
+
@Override
public void files(Collection files) throws IOException {
- files.add(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION));
}
-
}
// Writes 4 bytes (float) per value
@@ -153,7 +152,7 @@ public class Floats {
return; // no data added - don't create file!
if (docCount > lastDocId + 1)
for (int i = lastDocId; i < docCount; i++) {
- datOut.writeInt(INT_ZERO); // default value
+ datOut.writeInt(INT_DEFAULT); // default value
}
datOut.close();
}
@@ -161,7 +160,7 @@ public class Floats {
@Override
protected int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
- datOut.writeInt(INT_ZERO);
+ datOut.writeInt(INT_DEFAULT);
}
return numValues;
}
@@ -196,7 +195,7 @@ public class Floats {
return; // no data added - don't create file!
if (docCount > lastDocId + 1)
for (int i = lastDocId; i < docCount; i++) {
- datOut.writeLong(LONG_ZERO); // default value
+ datOut.writeLong(LONG_DEFAULT); // default value
}
datOut.close();
}
@@ -204,7 +203,7 @@ public class Floats {
@Override
protected int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
- datOut.writeLong(LONG_ZERO);
+ datOut.writeLong(LONG_DEFAULT);
}
return numValues;
}
@@ -224,7 +223,7 @@ public class Floats {
protected FloatsReader(Directory dir, String id, int maxDoc)
throws IOException {
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ Writer.DATA_EXTENSION));
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
precisionBytes = datIn.readByte();
assert precisionBytes == 4 || precisionBytes == 8;
@@ -266,19 +265,43 @@ public class Floats {
Source4(ByteBuffer buffer) {
values = buffer.asFloatBuffer();
+ missingValues.doubleValue = Float.NEGATIVE_INFINITY;
}
@Override
public double getFloat(int docID) {
- final float f = values.get(docID);
- // nocommit should we return NaN as default instead of 0.0?
- return Float.isNaN(f) ? 0.0f : f;
+ return values.get(docID);
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit()
* RamUsageEstimator.NUM_BYTES_FLOAT;
}
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ final MissingValues missing = getMissing();
+ return new SourceEnum(attrSource, Values.SIMPLE_FLOAT_4BYTE, this, maxDoc) {
+ private final FloatsRef ref = attr.floats();
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs)
+ return pos = NO_MORE_DOCS;
+ while (missing.doubleValue == source.getFloat(target)) {
+ if (++target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ }
+ ref.floats[ref.offset] = source.getFloat(target);
+ return pos = target;
+ }
+ };
+ }
+
+ @Override
+ public Values type() {
+ return Values.SIMPLE_FLOAT_4BYTE;
+ }
}
private class Source8 extends Source {
@@ -286,19 +309,44 @@ public class Floats {
Source8(ByteBuffer buffer) {
values = buffer.asDoubleBuffer();
+ missingValues.doubleValue = Double.NEGATIVE_INFINITY;
+
}
@Override
public double getFloat(int docID) {
- final double d = values.get(docID);
- // TODO should we return NaN as default instead of 0.0?
- return Double.isNaN(d) ? 0.0d : d;
+ return values.get(docID);
}
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit()
* RamUsageEstimator.NUM_BYTES_DOUBLE;
}
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ final MissingValues missing = getMissing();
+ return new SourceEnum(attrSource, type(), this, maxDoc) {
+ private final FloatsRef ref = attr.floats();
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs)
+ return pos = NO_MORE_DOCS;
+ while (missing.doubleValue == source.getFloat(target)) {
+ if (++target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ }
+ ref.floats[ref.offset] = source.getFloat(target);
+ return pos = target;
+ }
+ };
+ }
+
+ @Override
+ public Values type() {
+ return Values.SIMPLE_FLOAT_8BYTE;
+ }
}
@Override
@@ -316,7 +364,7 @@ public class Floats {
return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc)
: new Floats8EnumImpl(source, indexInput, maxDoc);
}
-
+
@Override
public Values type() {
return precisionBytes == 4 ? Values.SIMPLE_FLOAT_4BYTE
@@ -336,8 +384,13 @@ public class Floats {
if (target >= maxDoc)
return pos = NO_MORE_DOCS;
dataIn.seek(fp + (target * precision));
- ref.floats[0] = Float.intBitsToFloat(dataIn.readInt());
- ref.offset = 0; // nocommit -- can we igore this?
+ int intBits;
+ while ((intBits = dataIn.readInt()) == INT_DEFAULT) {
+ if (++target >= maxDoc)
+ return pos = NO_MORE_DOCS;
+ }
+ ref.floats[0] = Float.intBitsToFloat(intBits);
+ ref.offset = 0;
return pos = target;
}
@@ -348,6 +401,9 @@ public class Floats {
@Override
public int nextDoc() throws IOException {
+ if (pos >= maxDoc) {
+ return pos = NO_MORE_DOCS;
+ }
return advance(pos + 1);
}
}
@@ -361,11 +417,17 @@ public class Floats {
@Override
public int advance(int target) throws IOException {
- if (target >= maxDoc)
+ if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
+ }
dataIn.seek(fp + (target * precision));
- ref.floats[0] = Double.longBitsToDouble(dataIn.readLong());
- ref.offset = 0; // nocommit -- can we igore this?
+ long value;
+ while ((value = dataIn.readLong()) == LONG_DEFAULT) {
+ if (++target >= maxDoc)
+ return pos = NO_MORE_DOCS;
+ }
+ ref.floats[0] = Double.longBitsToDouble(value);
+ ref.offset = 0;
return pos = target;
}
@@ -376,6 +438,9 @@ public class Floats {
@Override
public int nextDoc() throws IOException {
+ if (pos >= maxDoc) {
+ return pos = NO_MORE_DOCS;
+ }
return advance(pos + 1);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
index 0c458cf205e..d1780936f5c 100644
--- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
@@ -21,8 +21,6 @@ import java.util.Arrays;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FloatsRef;
-import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.ReaderUtil;
public class MultiDocValues extends DocValues {
@@ -78,27 +76,26 @@ public class MultiDocValues extends DocValues {
public static class DummyDocValues extends DocValues {
final int maxDoc;
- final Values type;
- static final Source DUMMY = new DummySource();
+ final Source emptySoruce;
public DummyDocValues(int maxDoc, Values type) {
- this.type = type;
this.maxDoc = maxDoc;
+ this.emptySoruce = new EmptySource(type);
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- return new DummyEnum(attrSource, maxDoc, type);
+ return emptySoruce.getEnum(attrSource);
}
@Override
public Source load() throws IOException {
- return DUMMY;
+ return emptySoruce;
}
-
+
@Override
public Values type() {
- return type;
+ return emptySoruce.type();
}
public void close() throws IOException {
@@ -177,6 +174,7 @@ public class MultiDocValues extends DocValues {
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) {
this.docValuesIdx = docValuesIdx;
this.starts = starts;
+ assert docValuesIdx.length != 0;
}
@@ -193,7 +191,8 @@ public class MultiDocValues extends DocValues {
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts);
assert docValuesIdx[idx] != null;
try {
- current = docValuesIdx[idx].docValues.load();
+ current = docValuesIdx[idx].docValues.getSource();
+ missingValues.copy(current.getMissing());
} catch (IOException e) {
throw new RuntimeException("load failed", e); // TODO how should we
// handle this
@@ -211,92 +210,62 @@ public class MultiDocValues extends DocValues {
return current.getFloat(doc);
}
- public BytesRef getBytes(int docID) {
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int doc = ensureSource(docID);
- return current.getBytes(doc);
+ return current.getBytes(doc, bytesRef);
}
public long ramBytesUsed() {
return current.ramBytesUsed();
}
- }
-
- private static class DummySource extends Source {
- private final BytesRef ref = new BytesRef();
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ throw new UnsupportedOperationException(); // TODO
+ }
@Override
- public BytesRef getBytes(int docID) {
- return ref;
+ public Values type() {
+ return docValuesIdx[0].docValues.type();
+ }
+
+ }
+
+ private static class EmptySource extends Source {
+ private final Values type;
+
+ public EmptySource(Values type) {
+ this.type = type;
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ return this.missingValues.bytesValue;
+
}
@Override
public double getFloat(int docID) {
- return 0.0d;
+ return missingValues.doubleValue;
}
@Override
public long getInt(int docID) {
- return 0;
+ return missingValues.longValue;
}
public long ramBytesUsed() {
return 0;
}
- }
- private static class DummyEnum extends ValuesEnum {
- private int pos = -1;
- private final int maxDoc;
-
- public DummyEnum(AttributeSource source, int maxDoc, Values type) {
- super(source, type);
- this.maxDoc = maxDoc;
- switch (type) {
- case BYTES_VAR_STRAIGHT:
- case BYTES_FIXED_STRAIGHT:
- case BYTES_FIXED_DEREF:
- case BYTES_FIXED_SORTED:
- case BYTES_VAR_DEREF:
- case BYTES_VAR_SORTED:
- // nocommit - this is not correct for Fixed_straight
- BytesRef bytes = attr.bytes();
- bytes.length = 0;
- bytes.offset = 0;
- break;
- case PACKED_INTS:
- case PACKED_INTS_FIXED:
- LongsRef ints = attr.ints();
- ints.set(0);
- break;
-
- case SIMPLE_FLOAT_4BYTE:
- case SIMPLE_FLOAT_8BYTE:
- FloatsRef floats = attr.floats();
- floats.set(0d);
- break;
- default:
- throw new IllegalArgumentException("unknown Values type: " + type);
- }
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ return ValuesEnum.emptyEnum(type);
}
@Override
- public void close() throws IOException {
- }
-
- @Override
- public int advance(int target) throws IOException {
- return pos = (pos < maxDoc ? target : NO_MORE_DOCS);
- }
-
- @Override
- public int docID() {
- return pos;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(pos + 1);
+ public Values type() {
+ return type;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
index f37f7bbbdff..91f56bdd660 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.values.DocValues.MissingValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -27,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
@@ -39,7 +41,6 @@ class PackedIntsImpl {
static final int VERSION_CURRENT = VERSION_START;
static class IntsWriter extends Writer {
-
// TODO: can we bulkcopy this on a merge?
private LongsRef intsRef;
@@ -49,8 +50,8 @@ class PackedIntsImpl {
private boolean started;
private final Directory dir;
private final String id;
- private int maxDocID;
- private int minDocID;
+ private OpenBitSet defaultValues = new OpenBitSet(1);
+ private int lastDocId = -1;
protected IntsWriter(Directory dir, String id) throws IOException {
this.dir = dir;
@@ -59,54 +60,58 @@ class PackedIntsImpl {
}
@Override
- synchronized public void add(int docID, long v) throws IOException {
-
+ public synchronized void add(int docID, long v) throws IOException {
+ assert lastDocId < docID;
if (!started) {
- minValue = maxValue = v;
- minDocID = maxDocID = docID;
started = true;
-
+ minValue = maxValue = v;
} else {
if (v < minValue) {
minValue = v;
} else if (v > maxValue) {
maxValue = v;
}
- if (docID < minDocID) {
- minDocID = docID;
- } else if (docID > maxDocID) {
- maxDocID = docID;
- }
}
+ defaultValues.set(docID);
+ lastDocId = docID;
+
if (docID >= docToValue.length) {
docToValue = ArrayUtil.grow(docToValue, 1 + docID);
+ defaultValues.ensureCapacity(docToValue.length);
+
}
docToValue[docID] = v;
}
@Override
- synchronized public void finish(int docCount) throws IOException {
- if(!started)
+ public synchronized void finish(int docCount) throws IOException {
+ if (!started)
return;
final IndexOutput datOut = dir.createOutput(IndexFileNames
- .segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+ .segmentFileName(id, "", DATA_EXTENSION));
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
- // nocommit -- long can't work right since it's signed
+ // TODO -- long can't work right since it's signed
datOut.writeLong(minValue);
// write a default value to recognize docs without a value for that field
final long defaultValue = ++maxValue - minValue;
datOut.writeLong(defaultValue);
- PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts.bitsRequired(maxValue-minValue));
-
- final int limit = maxDocID + 1;
- for (int i = 0; i < minDocID; i++) {
- w.add(defaultValue);
+ PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts
+ .bitsRequired(maxValue - minValue));
+ final int firstDoc = defaultValues.nextSetBit(0);
+ assert firstDoc >= 0; // we have at lest one value!
+ for (int i = 0; i < firstDoc; i++) {
+ w.add(defaultValue); // fill with defaults until first bit set
}
- for (int i = minDocID; i < limit; i++) {
+ lastDocId++;
+ for (int i = firstDoc; i < lastDocId;) {
w.add(docToValue[i] - minValue);
+ final int nextValue = defaultValues.nextSetBit(i);
+ for (i++; i < nextValue; i++) {
+ w.add(defaultValue); // fill all gaps
+ }
}
- for (int i = limit; i < docCount; i++) {
+ for (int i = lastDocId; i < docCount; i++) {
w.add(defaultValue);
}
w.finish();
@@ -128,19 +133,18 @@ class PackedIntsImpl {
protected void setNextAttribute(ValuesAttribute attr) {
intsRef = attr.ints();
}
-
+
@Override
public void add(int docID, ValuesAttribute attr) throws IOException {
final LongsRef ref;
- if((ref = attr.ints()) != null) {
+ if ((ref = attr.ints()) != null) {
add(docID, ref.get());
}
}
@Override
public void files(Collection files) throws IOException {
- files.add(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
}
}
@@ -153,7 +157,7 @@ class PackedIntsImpl {
protected IntsReader(Directory dir, String id) throws IOException {
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
- IndexFileNames.CSF_DATA_EXTENSION));
+ Writer.DATA_EXTENSION));
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
}
@@ -176,6 +180,7 @@ class PackedIntsImpl {
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
+ missingValues.longValue = minValue + defaultValue;
}
@Override
@@ -183,9 +188,7 @@ class PackedIntsImpl {
// TODO -- can we somehow avoid 2X method calls
// on each get? must push minValue down, and make
// PackedInts implement Ints.Source
- final long val = values.get(docID);
- // docs not having a value for that field must return a default value
- return val == defaultValue ? 0 : minValue + val;
+ return minValue + values.get(docID);
}
public long ramBytesUsed() {
@@ -193,6 +196,31 @@ class PackedIntsImpl {
return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+ values.getBitsPerValue() * values.size();
}
+
+ @Override
+ public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ final MissingValues missing = getMissing();
+ return new SourceEnum(attrSource, type(), this, values.size()) {
+ private final LongsRef ref = attr.ints();
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= numDocs)
+ return pos = NO_MORE_DOCS;
+ while (source.getInt(target) == missing.longValue) {
+ if (++target >= numDocs) {
+ return pos = NO_MORE_DOCS;
+ }
+ }
+ ref.ints[ref.offset] = source.getInt(target);
+ return pos = target;
+ }
+ };
+ }
+
+ @Override
+ public Values type() {
+ return Values.PACKED_INTS;
+ }
}
@Override
@@ -205,7 +233,7 @@ class PackedIntsImpl {
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new IntsEnumImpl(source, (IndexInput) datIn.clone());
}
-
+
@Override
public Values type() {
return Values.PACKED_INTS;
@@ -243,10 +271,17 @@ class PackedIntsImpl {
@Override
public int advance(int target) throws IOException {
- if (target >= maxDoc)
+ if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
- final long val = ints.advance(target);
- ref.ints[0] = val == defaultValue? 0:minValue + val;
+ }
+ long val = ints.advance(target);
+ while (val == defaultValue) {
+ if (++target >= maxDoc) {
+ return pos = NO_MORE_DOCS;
+ }
+ val = ints.advance(target);
+ }
+ ref.ints[0] = minValue + val;
ref.offset = 0; // can we skip this?
return pos = target;
}
@@ -258,7 +293,10 @@ class PackedIntsImpl {
@Override
public int nextDoc() throws IOException {
- return advance(pos+1);
+ if (pos >= maxDoc) {
+ return pos = NO_MORE_DOCS;
+ }
+ return advance(pos + 1);
}
}
}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java
index d7d613c0510..e33c0cb9b1b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Values.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Values.java
@@ -30,7 +30,6 @@ public enum Values {
* precision is fixed across the segment, and
* determined by the min/max values in the field. */
PACKED_INTS,
- PACKED_INTS_FIXED,
SIMPLE_FLOAT_4BYTE,
SIMPLE_FLOAT_8BYTE,
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
index 6cd1e0294c7..b69217bcbbe 100644
--- a/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
@@ -6,7 +6,6 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
-import org.apache.lucene.util.SetOnce;
public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribute {
private Values type;
@@ -45,7 +44,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut
floats = null;
break;
case PACKED_INTS:
- case PACKED_INTS_FIXED:
ints = new LongsRef(new long[1], 0, 1);
bytes = null;
floats = null;
@@ -84,7 +82,6 @@ public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribut
other.bytes.copy(bytes);
break;
case PACKED_INTS:
- case PACKED_INTS_FIXED:
other.ints.copy(ints);
break;
case SIMPLE_FLOAT_4BYTE:
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
index eed33457380..54bc8bf2c7e 100644
--- a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
@@ -21,14 +21,16 @@ import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
-public abstract class ValuesEnum extends DocIdSetIterator{
+public abstract class ValuesEnum extends DocIdSetIterator {
private AttributeSource source;
protected final ValuesAttribute attr;
-
protected ValuesEnum(Values enumType) {
- this(null, enumType);
+ this(null, enumType);
}
protected ValuesEnum(AttributeSource source, Values enumType) {
@@ -39,6 +41,22 @@ public abstract class ValuesEnum extends DocIdSetIterator{
attr.setType(enumType);
}
+ public Values type() {
+ return attr.type();
+ }
+
+ public BytesRef bytes() {
+ return attr.bytes();
+ }
+
+ public FloatsRef getFloat() {
+ return attr.floats();
+ }
+
+ public LongsRef getInt() {
+ return attr.ints();
+ }
+
public AttributeSource attributes() {
if (source == null)
source = new AttributeSource();
@@ -59,4 +77,28 @@ public abstract class ValuesEnum extends DocIdSetIterator{
public abstract void close() throws IOException;
+ public static ValuesEnum emptyEnum(Values type) {
+ return new ValuesEnum(type) {
+ @Override
+ public int nextDoc() throws IOException {
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public int docID() {
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return NO_MORE_DOCS;
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+ };
+ }
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index beb0c14704c..2dfa5bdc8a1 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -50,16 +50,16 @@ class VarDerefBytesImpl {
static final String CODEC_NAME = "VarDerefBytes";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
-
-
- private static class AddressParallelArray extends ParallelArrayBase {
+ private static class AddressParallelArray extends
+ ParallelArrayBase {
final int[] address;
-
+
AddressParallelArray(int size, AtomicLong bytesUsed) {
super(size, bytesUsed);
- address = new int[size];
+ address = new int[size];
}
+
@Override
protected int bytesPerEntry() {
return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry();
@@ -69,46 +69,50 @@ class VarDerefBytesImpl {
protected void copyTo(AddressParallelArray toArray, int numToCopy) {
super.copyTo(toArray, numToCopy);
System.arraycopy(address, 0, toArray.address, 0, size);
-
+
}
@Override
public AddressParallelArray newInstance(int size) {
return new AddressParallelArray(size, bytesUsed);
}
-
- }
+ }
static class Writer extends BytesWriterBase {
private int[] docToAddress;
private int address = 1;
-
- private final ParallelBytesStartArray array = new ParallelBytesStartArray(new AddressParallelArray(0, bytesUsed));
- private final BytesRefHash hash = new BytesRefHash(pool, 16, array) ;
- public Writer(Directory dir, String id) throws IOException {
+ private final ParallelBytesStartArray array = new ParallelBytesStartArray(
+ new AddressParallelArray(0, bytesUsed));
+ private final BytesRefHash hash = new BytesRefHash(pool, 16, array);
+
+ public Writer(Directory dir, String id) throws IOException {
this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
new AtomicLong());
}
- public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed);
+
+ public Writer(Directory dir, String id, Allocator allocator,
+ AtomicLong bytesUsed) throws IOException {
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false,
+ new ByteBlockPool(allocator), bytesUsed);
docToAddress = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@Override
synchronized public void add(int docID, BytesRef bytes) throws IOException {
- if(bytes.length == 0)
+ if (bytes.length == 0)
return; // default
- if(datOut == null)
+ if (datOut == null)
initDataOut();
final int e = hash.add(bytes);
if (docID >= docToAddress.length) {
final int oldSize = docToAddress.length;
- docToAddress = ArrayUtil.grow(docToAddress, 1+docID);
- bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (docToAddress.length - oldSize));
+ docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
+ * (docToAddress.length - oldSize));
}
final int docAddress;
if (e >= 0) {
@@ -117,12 +121,13 @@ class VarDerefBytesImpl {
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address += bytes.length;
} else {
- docAddress = array.array.address[(-e)-1];
+ docAddress = array.array.address[(-e) - 1];
}
docToAddress[docID] = docAddress;
}
-
- private static int writePrefixLength(DataOutput datOut, BytesRef bytes) throws IOException{
+
+ private static int writePrefixLength(DataOutput datOut, BytesRef bytes)
+ throws IOException {
if (bytes.length < 128) {
datOut.writeByte((byte) bytes.length);
return 1;
@@ -132,7 +137,7 @@ class VarDerefBytesImpl {
return 2;
}
}
-
+
public long ramBytesUsed() {
return bytesUsed.get();
}
@@ -141,25 +146,26 @@ class VarDerefBytesImpl {
// some last docs that we didn't see
@Override
synchronized public void finish(int docCount) throws IOException {
- if(datOut == null)
+ if (datOut == null)
return;
initIndexOut();
- idxOut.writeInt(address-1);
+ idxOut.writeInt(address - 1);
// write index
// TODO(simonw): -- allow forcing fixed array (not -1)
// TODO(simonw): check the address calculation / make it more intuitive
- final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1));
+ final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
+ PackedInts.bitsRequired(address - 1));
final int limit;
if (docCount > docToAddress.length) {
limit = docToAddress.length;
} else {
limit = docCount;
}
- for(int i=0;i= docCount)
+ if (target >= docCount) {
return pos = NO_MORE_DOCS;
- final int ord = (int) docToOrdIndex.get(target) - 1;
- if (ord == -1) {
- bytesRef.length = 0;
- bytesRef.offset = 0;
- return pos = target;
}
- final long offset = ordToOffsetIndex.get(ord);
+ int ord;
+ while((ord =(int) docToOrdIndex.get(target)) == 0) {
+ if(++target >= docCount) {
+ return pos = NO_MORE_DOCS;
+ }
+ }
+ final long offset = ordToOffsetIndex.get(--ord);
final long nextOffset;
if (ord == valueCount - 1) {
nextOffset = totBytes;
@@ -306,6 +311,9 @@ class VarSortedBytesImpl {
@Override
public int nextDoc() throws IOException {
+ if (pos >= docCount) {
+ return pos = NO_MORE_DOCS;
+ }
return advance(pos + 1);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
index 04fd5939d34..0f3f6dff28f 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -41,15 +41,15 @@ class VarStraightBytesImpl {
static final int VERSION_CURRENT = VERSION_START;
static class Writer extends BytesWriterBase {
- private int address;
+ private long address;
// start at -1 if the first added value is > 0
private int lastDocID = -1;
- private int[] docToAddress;
+ private long[] docToAddress;
public Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed);
- docToAddress = new int[1];
+ docToAddress = new long[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@@ -89,11 +89,8 @@ class VarStraightBytesImpl {
return;
}
initIndexOut();
- // write all lengths to index
- // write index
fill(docCount);
- idxOut.writeVInt(address);
- // TODO(simonw): allow not -1
+ idxOut.writeVLong(address);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(address));
for (int i = 0; i < docCount; i++) {
@@ -125,20 +122,17 @@ class VarStraightBytesImpl {
}
private class Source extends BytesBaseSource {
- private final BytesRef bytesRef = new BytesRef();
private final PackedInts.Reader addresses;
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
- super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVInt()); // TODO
- // should
- // be
- // long
+ super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong());
addresses = PackedInts.getReader(idxIn);
+ missingValues.bytesValue = new BytesRef(0); // empty
}
@Override
- public BytesRef getBytes(int docID) {
- final int address = (int) addresses.get(docID);
+ public BytesRef getBytes(int docID, BytesRef bytesRef) {
+ final long address = addresses.get(docID);
final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address)
: (int) (addresses.get(1 + docID) - address);
return data.fill(bytesRef, address, length);
@@ -148,14 +142,24 @@ class VarStraightBytesImpl {
public int getValueCount() {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public Values type() {
+ return Values.BYTES_VAR_STRAIGHT;
+ }
+
+ @Override
+ protected int maxDoc() {
+ return addresses.size();
+ }
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
- return new VarStrainghtBytesEnum(source, cloneData(), cloneIndex());
+ return new VarStraightBytesEnum(source, cloneData(), cloneIndex());
}
- private class VarStrainghtBytesEnum extends ValuesEnum {
+ private class VarStraightBytesEnum extends ValuesEnum {
private final PackedInts.Reader addresses;
private final IndexInput datIn;
private final IndexInput idxIn;
@@ -164,7 +168,7 @@ class VarStraightBytesImpl {
private final BytesRef ref;
private int pos = -1;
- protected VarStrainghtBytesEnum(AttributeSource source, IndexInput datIn,
+ protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
super(source, Values.BYTES_VAR_STRAIGHT);
totBytes = idxIn.readVInt();
@@ -185,13 +189,10 @@ class VarStraightBytesImpl {
@Override
public int advance(final int target) throws IOException {
if (target >= maxDoc) {
- ref.length = 0;
- ref.offset = 0;
return pos = NO_MORE_DOCS;
}
final long addr = addresses.get(target);
- if (addr == totBytes) {
- // nocommit is that a valid default value
+ if (addr == totBytes) { // empty values at the end
ref.length = 0;
ref.offset = 0;
return pos = target;
diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java
index b73b8ab4113..04471b25427 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Writer.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java
@@ -25,7 +25,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
public abstract class Writer extends DocValuesConsumer {
-
+
public static final String INDEX_EXTENSION = "idx";
public static final String DATA_EXTENSION = "dat";
@@ -63,23 +63,31 @@ public abstract class Writer extends DocValuesConsumer {
int docID = state.docBase;
final Bits bits = state.bits;
final int docCount = state.docCount;
- for (int i = 0; i < docCount; i++) {
- if (bits == null || !bits.get(i)) {
- if (valEnum.advance(i) == ValuesEnum.NO_MORE_DOCS)
- break;
- add(docID++);
+ int currentDocId;
+ if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) {
+ for (int i = 0; i < docCount; i++) {
+ if (bits == null || !bits.get(i)) {
+ if (currentDocId < i) {
+ if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) {
+ break; // advance can jump over default values
+ }
+ }
+ if (currentDocId == i) { // we are on the doc to merge
+ add(docID);
+ }
+ ++docID;
+ }
}
}
} finally {
valEnum.close();
}
}
-
- public static Writer create(Values v, String id,
- Directory directory, Comparator comp) throws IOException {
+
+ public static Writer create(Values v, String id, Directory directory,
+ Comparator comp) throws IOException {
switch (v) {
case PACKED_INTS:
- case PACKED_INTS_FIXED:
return Ints.getWriter(directory, id, true);
case SIMPLE_FLOAT_4BYTE:
return Floats.getWriter(directory, id, 4);
diff --git a/lucene/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/src/java/org/apache/lucene/util/PagedBytes.java
index 9d42cdd94f7..d09ef809ab7 100644
--- a/lucene/src/java/org/apache/lucene/util/PagedBytes.java
+++ b/lucene/src/java/org/apache/lucene/util/PagedBytes.java
@@ -90,7 +90,7 @@ public final class PagedBytes {
}
return b;
}
-
+
/** Reads length as 1 or 2 byte vInt prefix, starting @ start */
public BytesRef fillUsingLengthPrefix(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
@@ -145,6 +145,49 @@ public final class PagedBytes {
}
return start;
}
+
+ /**
+ * Reads length as 1 or 2 byte vInt prefix, starting @ start and fill the
+ * given {@link BytesRef} with the byte slice starting after the length
+ * prefix.
+ * @lucene.internal
+ **/
+ public BytesRef fillUsingLengthPrefix4(BytesRef b, long start) {
+ final int index = (int) (start >> blockBits);
+ int offset = (int) (start & blockMask);
+ final byte[] block = blocks[index];
+ final int length;
+ if ((block[offset] & 128) == 0) {
+ length = block[offset];
+ offset = offset+1;
+ } else {
+ length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
+ offset = offset+2;
+ assert length > 0;
+ }
+ assert length >= 0: "length=" + length;
+ b.length = length;
+ if (blockSize - offset >= length) {
+ // Within block
+ b.offset = offset;
+ b.bytes = blocks[index];
+ } else {
+ // Split
+ byte[] buffer = threadBuffers.get();
+ if (buffer == null) {
+ buffer = new byte[length];
+ threadBuffers.set(buffer);
+ } else if (buffer.length < length) {
+ buffer = ArrayUtil.grow(buffer, length);
+ threadBuffers.set(buffer);
+ }
+ b.bytes = buffer;
+ b.offset = 0;
+ System.arraycopy(blocks[index], offset, buffer, 0, blockSize-offset);
+ System.arraycopy(blocks[1+index], 0, buffer, blockSize-offset, length-(blockSize-offset));
+ }
+ return b;
+ }
/** @lucene.internal */
public byte[][] getBlocks() {
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
index 8086871678c..2b2015cf5f1 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
@@ -117,14 +117,15 @@ public class TestDocValues extends LuceneTestCase {
s = getSource(r);
ss = null;
}
-
for (int i = 0; i < 100; i++) {
final int idx = 2 * i;
- assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx));
- assertEquals("doc " + idx, values[idx], s.getBytes(idx).utf8ToString());
+ assertNotNull("doc " + idx + "; value=" + values[idx], s.getBytes(idx,
+ bytesRef));
+ assertEquals("doc " + idx, values[idx], s.getBytes(idx, bytesRef)
+ .utf8ToString());
if (ss != null) {
- assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx))
- .utf8ToString());
+ assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx),
+ bytesRef).utf8ToString());
DocValues.SortedSource.LookupResult result = ss
.getByValue(new BytesRef(values[idx]));
assertTrue(result.found);
@@ -141,7 +142,8 @@ public class TestDocValues extends LuceneTestCase {
SortedSource.LookupResult result = ss.getByValue(bytesValue);
if (result.found) {
assert result.ord > 0;
- assertTrue(bytesValue.bytesEquals(ss.getByOrd(result.ord)));
+ assertTrue(bytesValue
+ .bytesEquals(ss.getByOrd(result.ord, bytesRef)));
int count = 0;
for (int k = 0; k < 100; k++) {
if (bytesValue.utf8ToString().equals(values[2 * k])) {
@@ -153,18 +155,18 @@ public class TestDocValues extends LuceneTestCase {
} else {
assert result.ord >= 0;
if (result.ord == 0) {
- final BytesRef firstRef = ss.getByOrd(1);
+ final BytesRef firstRef = ss.getByOrd(1, bytesRef);
// random string was before our first
assertTrue(firstRef.compareTo(bytesValue) > 0);
} else if (result.ord == numValues) {
- final BytesRef lastRef = ss.getByOrd(numValues);
+ final BytesRef lastRef = ss.getByOrd(numValues, bytesRef);
// random string was after our last
assertTrue(lastRef.compareTo(bytesValue) < 0);
} else {
// random string fell between two of our values
- final BytesRef before = (BytesRef) ss.getByOrd(result.ord)
- .clone();
- final BytesRef after = ss.getByOrd(result.ord + 1);
+ final BytesRef before = (BytesRef) ss.getByOrd(result.ord,
+ bytesRef).clone();
+ final BytesRef after = ss.getByOrd(result.ord + 1, bytesRef);
assertTrue(before.compareTo(bytesValue) < 0);
assertTrue(bytesValue.compareTo(after) < 0);
@@ -180,64 +182,65 @@ public class TestDocValues extends LuceneTestCase {
public void testInts() throws IOException {
long maxV = 1;
- final int NUM_VALUES = 1000;
+ final int NUM_VALUES = 777 + random.nextInt(777);
final long[] values = new long[NUM_VALUES];
for (int rx = 1; rx < 63; rx++, maxV *= 2) {
- for (int b = 0; b < 2; b++) {
- Directory dir = newDirectory();
- boolean useFixedArrays = b == 0;
- Writer w = Ints.getWriter(dir, "test", useFixedArrays);
- for (int i = 0; i < NUM_VALUES; i++) {
- final long v = random.nextLong() % (1 + maxV);
- values[i] = v;
- w.add(i, v);
- }
- final int additionalDocs = 1 + random.nextInt(9);
- w.finish(NUM_VALUES + additionalDocs);
-
- DocValues r = Ints.getValues(dir, "test", useFixedArrays);
- for (int iter = 0; iter < 2; iter++) {
- Source s = getSource(r);
- for (int i = 0; i < NUM_VALUES; i++) {
- final long v = s.getInt(i);
- assertEquals("index " + i + " b: " + b, values[i], v);
- }
- }
-
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = r.getEnum();
- ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
- LongsRef ints = attr.ints();
- for (int i = 0; i < NUM_VALUES; i++) {
- assertEquals(i, iEnum.nextDoc());
- assertEquals(values[i], ints.get());
- }
- for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(i, iEnum.nextDoc());
- assertEquals("" + i, 0, ints.get());
- }
-
- iEnum.close();
- }
-
- for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = r.getEnum();
- ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
- LongsRef ints = attr.ints();
- for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
- assertEquals(i, iEnum.advance(i));
- assertEquals(values[i], ints.get());
- }
- for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(i, iEnum.advance(i));
- assertEquals("" + i, 0, ints.get());
- }
-
- iEnum.close();
- }
- r.close();
- dir.close();
+ Directory dir = newDirectory();
+ Writer w = Ints.getWriter(dir, "test", false);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = random.nextLong() % (1 + maxV);
+ values[i] = v;
+ w.add(i, v);
}
+ final int additionalDocs = 1 + random.nextInt(9);
+ w.finish(NUM_VALUES + additionalDocs);
+
+ DocValues r = Ints.getValues(dir, "test", false);
+ for (int iter = 0; iter < 2; iter++) {
+ Source s = getSource(r);
+ for (int i = 0; i < NUM_VALUES; i++) {
+ final long v = s.getInt(i);
+ assertEquals("index " + i, values[i], v);
+ }
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ LongsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i++) {
+ assertEquals(i, iEnum.nextDoc());
+ assertEquals(values[i], ints.get());
+ }
+ if (iEnum.docID() < NUM_VALUES - 1) {
+ assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1));
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
+ }
+
+ iEnum.close();
+ }
+
+ for (int iter = 0; iter < 2; iter++) {
+ ValuesEnum iEnum = r.getEnum();
+ ValuesAttribute attr = iEnum.addAttribute(ValuesAttribute.class);
+ LongsRef ints = attr.ints();
+ for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
+ assertEquals(i, iEnum.advance(i));
+ assertEquals(values[i], ints.get());
+ }
+ if (iEnum.docID() < NUM_VALUES - 1) {
+ assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1));
+ }
+ for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
+ assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
+ }
+
+ iEnum.close();
+ }
+ r.close();
+ dir.close();
}
}
@@ -248,7 +251,7 @@ public class TestDocValues extends LuceneTestCase {
private void runTestFloats(int precision, double delta) throws IOException {
Directory dir = newDirectory();
Writer w = Floats.getWriter(dir, "test", precision);
- final int NUM_VALUES = 1000;
+ final int NUM_VALUES = 777 + random.nextInt(777);;
final double[] values = new double[NUM_VALUES];
for (int i = 0; i < NUM_VALUES; i++) {
final double v = precision == 4 ? random.nextFloat() : random
@@ -269,29 +272,25 @@ public class TestDocValues extends LuceneTestCase {
for (int iter = 0; iter < 2; iter++) {
ValuesEnum fEnum = r.getEnum();
- ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
- FloatsRef floats = attr.floats();
+ FloatsRef floats = fEnum.getFloat();
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(i, fEnum.nextDoc());
assertEquals(values[i], floats.get(), delta);
}
for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
- assertEquals(i, fEnum.nextDoc());
- assertEquals(0.0, floats.get(), delta);
+ assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc());
}
fEnum.close();
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum fEnum = r.getEnum();
- ValuesAttribute attr = fEnum.addAttribute(ValuesAttribute.class);
- FloatsRef floats = attr.floats();
+ FloatsRef floats = fEnum.getFloat();
for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
assertEquals(i, fEnum.advance(i));
assertEquals(values[i], floats.get(), delta);
}
for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
- assertEquals(i, fEnum.advance(i));
- assertEquals(0.0, floats.get(), delta);
+ assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(i));
}
fEnum.close();
}
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
index fcac9640ef7..5ab0c649adf 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
@@ -43,6 +43,7 @@ import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
+import org.apache.lucene.index.values.DocValues.MissingValues;
import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
@@ -86,49 +87,40 @@ public class TestDocValuesIndexing extends LuceneTestCase {
* Tests complete indexing of {@link Values} including deletions, merging and
* sparse value fields on Compound-File
*/
- public void testCFSIndex() throws IOException {
- // without deletions
- IndexWriterConfig cfg = writerConfig(true);
- // primitives - no deletes
- runTestNumerics(cfg, false);
+ public void testIndexBytesNoDeletesCFS() throws IOException {
+ runTestIndexBytes(writerConfig(true), false);
+ }
- cfg = writerConfig(true);
- // bytes - no deletes
- runTestIndexBytes(cfg, false);
+ public void testIndexBytesDeletesCFS() throws IOException {
+ runTestIndexBytes(writerConfig(true), true);
+ }
- // with deletions
- cfg = writerConfig(true);
- // primitives
- runTestNumerics(cfg, true);
+ public void testIndexNumericsNoDeletesCFS() throws IOException {
+ runTestNumerics(writerConfig(true), false);
+ }
- cfg = writerConfig(true);
- // bytes
- runTestIndexBytes(cfg, true);
+ public void testIndexNumericsDeletesCFS() throws IOException {
+ runTestNumerics(writerConfig(true), true);
}
/**
* Tests complete indexing of {@link Values} including deletions, merging and
* sparse value fields on None-Compound-File
*/
- public void testIndex() throws IOException {
- //
- // without deletions
- IndexWriterConfig cfg = writerConfig(false);
- // primitives - no deletes
- runTestNumerics(cfg, false);
+ public void testIndexBytesNoDeletes() throws IOException {
+ runTestIndexBytes(writerConfig(false), false);
+ }
- cfg = writerConfig(false);
- // bytes - no deletes
- runTestIndexBytes(cfg, false);
+ public void testIndexBytesDeletes() throws IOException {
+ runTestIndexBytes(writerConfig(false), true);
+ }
- // with deletions
- cfg = writerConfig(false);
- // primitives
- runTestNumerics(cfg, true);
+ public void testIndexNumericsNoDeletes() throws IOException {
+ runTestNumerics(writerConfig(false), false);
+ }
- cfg = writerConfig(false);
- // bytes
- runTestIndexBytes(cfg, true);
+ public void testIndexNumericsDeletes() throws IOException {
+ runTestNumerics(writerConfig(false), true);
}
private IndexWriterConfig writerConfig(boolean useCompoundFile) {
@@ -150,7 +142,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
throws IOException {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
- final int numValues = 350;
+ final int numValues = 179 + random.nextInt(151);
final List numVariantList = new ArrayList(NUMERICS);
// run in random order to test if fill works correctly during merges
@@ -163,22 +155,24 @@ public class TestDocValuesIndexing extends LuceneTestCase {
final int numRemainingValues = (int) (numValues - deleted.cardinality());
final int base = r.numDocs() - numRemainingValues;
switch (val) {
- case PACKED_INTS:
- case PACKED_INTS_FIXED: {
+ case PACKED_INTS: {
DocValues intsReader = getDocValues(r, val.name());
assertNotNull(intsReader);
Source ints = getSource(intsReader);
+ MissingValues missing = ints.getMissing();
- ValuesEnum intsEnum = intsReader.getEnum();
- assertNotNull(intsEnum);
- LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
for (int i = 0; i < base; i++) {
- assertEquals("index " + i, 0, ints.getInt(i));
- assertEquals(val.name() + " base: " + base + " index: " + i, i,
- random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc());
- assertEquals(0, enumRef.get());
+ long value = ints.getInt(i);
+ assertEquals("index " + i, missing.longValue, value);
}
+
+ ValuesEnum intsEnum = getValuesEnum(intsReader);
+ assertTrue(intsEnum.advance(0) >= base);
+
+ intsEnum = getValuesEnum(intsReader);
+ LongsRef enumRef = intsEnum.getInt();
+
int expected = 0;
for (int i = base; i < r.numDocs(); i++, expected++) {
while (deleted.get(expected)) {
@@ -197,18 +191,18 @@ public class TestDocValuesIndexing extends LuceneTestCase {
DocValues floatReader = getDocValues(r, val.name());
assertNotNull(floatReader);
Source floats = getSource(floatReader);
- ValuesEnum floatEnum = floatReader.getEnum();
- assertNotNull(floatEnum);
- FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class)
- .floats();
+ MissingValues missing = floats.getMissing();
for (int i = 0; i < base; i++) {
- assertEquals(" floats failed for doc: " + i + " base: " + base, 0.0d,
- floats.getFloat(i), 0.0d);
- assertEquals(i, random.nextBoolean() ? floatEnum.advance(i)
- : floatEnum.nextDoc());
- assertEquals("index " + i, 0.0, enumRef.get(), 0.0);
+ double value = floats.getFloat(i);
+ assertEquals(" floats failed for doc: " + i + " base: " + base,
+ missing.doubleValue, value, 0.0d);
}
+ ValuesEnum floatEnum = getValuesEnum(floatReader);
+ assertTrue(floatEnum.advance(0) >= base);
+
+ floatEnum = getValuesEnum(floatReader);
+ FloatsRef enumRef = floatEnum.getFloat();
int expected = 0;
for (int i = base; i < r.numDocs(); i++, expected++) {
while (deleted.get(expected)) {
@@ -235,92 +229,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
d.close();
}
- private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF,
- Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT,
- Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED,
- Values.BYTES_VAR_STRAIGHT);
-
- private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS,
- Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE,
- Values.SIMPLE_FLOAT_8BYTE);
-
- private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
- Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
- Index.NO };
-
- private OpenBitSet indexValues(IndexWriter w, int numValues, Values value,
- List valueVarList, boolean withDeletions, int multOfSeven)
- throws CorruptIndexException, IOException {
- final boolean isNumeric = NUMERICS.contains(value);
- OpenBitSet deleted = new OpenBitSet(numValues);
- Document doc = new Document();
- Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)];
- Fieldable field = random.nextBoolean() ? new ValuesField(value.name())
- : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random,
- 10), idx == Index.NO ? Store.YES : Store.NO, idx);
- doc.add(field);
-
- ValuesAttribute valuesAttribute = ValuesField.values(field);
- valuesAttribute.setType(value);
- final LongsRef intsRef = valuesAttribute.ints();
- final FloatsRef floatsRef = valuesAttribute.floats();
- final BytesRef bytesRef = valuesAttribute.bytes();
-
- final String idBase = value.name() + "_";
- final byte[] b = new byte[multOfSeven];
- if (bytesRef != null) {
- bytesRef.bytes = b;
- bytesRef.length = b.length;
- bytesRef.offset = 0;
- }
- byte upto = 0;
- for (int i = 0; i < numValues; i++) {
- if (isNumeric) {
- switch (value) {
- case PACKED_INTS:
- case PACKED_INTS_FIXED:
- intsRef.set(i);
- break;
- case SIMPLE_FLOAT_4BYTE:
- case SIMPLE_FLOAT_8BYTE:
- floatsRef.set(2.0f * i);
- break;
- default:
- fail("unexpected value " + value);
- }
- } else {
- for (int j = 0; j < b.length; j++) {
- b[j] = upto++;
- }
- }
- doc.removeFields("id");
- doc.add(new Field("id", idBase + i, Store.YES,
- Index.NOT_ANALYZED_NO_NORMS));
- w.addDocument(doc);
-
- if (i % 7 == 0) {
- if (withDeletions && random.nextBoolean()) {
- Values val = valueVarList.get(random.nextInt(1 + valueVarList
- .indexOf(value)));
- final int randInt = val == value ? random.nextInt(1 + i) : random
- .nextInt(numValues);
- w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
- if (val == value) {
- deleted.set(randInt);
- }
- }
- w.commit();
-
- }
- }
- w.commit();
-
- // TODO test unoptimized with deletions
- if (withDeletions || random.nextBoolean())
- w.optimize();
- return deleted;
- }
-
public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
throws CorruptIndexException, LockObtainFailedException, IOException {
final Directory d = newDirectory();
@@ -343,30 +251,32 @@ public class TestDocValuesIndexing extends LuceneTestCase {
assertNotNull("field " + byteIndexValue.name()
+ " returned null reader - maybe merged failed", bytesReader);
Source bytes = getSource(bytesReader);
- ValuesEnum bytesEnum = bytesReader.getEnum();
- assertNotNull(bytesEnum);
- final ValuesAttribute attr = bytesEnum
- .addAttribute(ValuesAttribute.class);
byte upto = 0;
+
// test the filled up slots for correctness
+ MissingValues missing = bytes.getMissing();
for (int i = 0; i < base; i++) {
- final BytesRef br = bytes.getBytes(i);
+
+ BytesRef br = bytes.getBytes(i, new BytesRef());
String msg = " field: " + byteIndexValue.name() + " at index: " + i
+ " base: " + base + " numDocs:" + r.numDocs();
switch (byteIndexValue) {
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_STRAIGHT:
- assertEquals(i, bytesEnum.advance(i));
// fixed straight returns bytesref with zero bytes all of fixed
// length
- assertNotNull("expected none null - " + msg, br);
- if (br.length != 0) {
- assertEquals("expected zero bytes of length " + bytesSize + " - "
- + msg, bytesSize, br.length);
- for (int j = 0; j < br.length; j++) {
- assertEquals("Byte at index " + j + " doesn't match - " + msg, 0,
- br.bytes[br.offset + j]);
+ if (missing.bytesValue != null) {
+ assertNotNull("expected none null - " + msg, br);
+ if (br.length != 0) {
+ assertEquals("expected zero bytes of length " + bytesSize + " - "
+ + msg, bytesSize, br.length);
+ for (int j = 0; j < br.length; j++) {
+ assertEquals("Byte at index " + j + " doesn't match - " + msg,
+ 0, br.bytes[br.offset + j]);
+ }
}
+ } else {
+ assertNull("expected null - " + msg + " " + br, br);
}
break;
case BYTES_VAR_SORTED:
@@ -374,16 +284,18 @@ public class TestDocValuesIndexing extends LuceneTestCase {
case BYTES_VAR_DEREF:
case BYTES_FIXED_DEREF:
default:
- assertNotNull("expected none null - " + msg, br);
- if (br.length != 0) {
- bytes.getBytes(i);
- }
- assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0,
- br.length);
+ assertNull("expected null - " + msg + " " + br, br);
+ // make sure we advance at least until base
+ ValuesEnum bytesEnum = getValuesEnum(bytesReader);
+ final int advancedTo = bytesEnum.advance(0);
+ assertTrue(byteIndexValue.name() + " advanced failed base:" + base
+ + " advancedTo: " + advancedTo, base <= advancedTo);
+
}
}
- final BytesRef enumRef = attr.bytes();
+ ValuesEnum bytesEnum = getValuesEnum(bytesReader);
+ final BytesRef enumRef = bytesEnum.bytes();
// test the actual doc values added in this iteration
assertEquals(base + numRemainingValues, r.numDocs());
int v = 0;
@@ -395,14 +307,20 @@ public class TestDocValuesIndexing extends LuceneTestCase {
upto += bytesSize;
}
- BytesRef br = bytes.getBytes(i);
- if (bytesEnum.docID() != i)
+ BytesRef br = bytes.getBytes(i, new BytesRef());
+ if (bytesEnum.docID() != i) {
assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
.advance(i));
+ }
for (int j = 0; j < br.length; j++, upto++) {
assertEquals(
"EnumRef Byte at index " + j + " doesn't match - " + msg, upto,
enumRef.bytes[enumRef.offset + j]);
+ if (!(br.bytes.length > br.offset + j))
+ br = bytes.getBytes(i, new BytesRef());
+ assertTrue("BytesRef index exceeded [" + msg + "] offset: "
+ + br.offset + " length: " + br.length + " index: "
+ + (br.offset + j), br.bytes.length > br.offset + j);
assertEquals("SourceRef Byte at index " + j + " doesn't match - "
+ msg, upto, br.bytes[br.offset + j]);
}
@@ -442,8 +360,113 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
private Source getSource(DocValues values) throws IOException {
- // getSource uses cache internally
- return random.nextBoolean() ? values.load() : values.getSource();
+ Source source;
+ if (random.nextInt(10) == 0) {
+ source = values.load();
+ } else {
+ // getSource uses cache internally
+ source = values.getSource();
+ }
+ assertNotNull(source);
+ return source;
+ }
+
+ private ValuesEnum getValuesEnum(DocValues values) throws IOException {
+ ValuesEnum valuesEnum;
+ if (!(values instanceof MultiDocValues) && random.nextInt(10) == 0) {
+ // TODO not supported by MultiDocValues yet!
+ valuesEnum = getSource(values).getEnum();
+ } else {
+ valuesEnum = values.getEnum();
+
+ }
+ assertNotNull(valuesEnum);
+ return valuesEnum;
+ }
+
+ private static EnumSet BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF,
+ Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT,
+ Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED,
+ Values.BYTES_VAR_STRAIGHT);
+
+ private static EnumSet NUMERICS = EnumSet.of(Values.PACKED_INTS,
+ Values.SIMPLE_FLOAT_4BYTE, Values.SIMPLE_FLOAT_8BYTE);
+
+ private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
+ Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
+ Index.NO };
+
+ private OpenBitSet indexValues(IndexWriter w, int numValues, Values value,
+ List valueVarList, boolean withDeletions, int multOfSeven)
+ throws CorruptIndexException, IOException {
+ final boolean isNumeric = NUMERICS.contains(value);
+ OpenBitSet deleted = new OpenBitSet(numValues);
+ Document doc = new Document();
+ Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)];
+ Fieldable field = random.nextBoolean() ? new ValuesField(value.name())
+ : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random,
+ 10), idx == Index.NO ? Store.YES : Store.NO, idx);
+ doc.add(field);
+
+ ValuesAttribute valuesAttribute = ValuesField.values(field);
+ valuesAttribute.setType(value);
+ final LongsRef intsRef = valuesAttribute.ints();
+ final FloatsRef floatsRef = valuesAttribute.floats();
+ final BytesRef bytesRef = valuesAttribute.bytes();
+
+ final String idBase = value.name() + "_";
+ final byte[] b = new byte[multOfSeven];
+ if (bytesRef != null) {
+ bytesRef.bytes = b;
+ bytesRef.length = b.length;
+ bytesRef.offset = 0;
+ }
+ byte upto = 0;
+ for (int i = 0; i < numValues; i++) {
+ if (isNumeric) {
+ switch (value) {
+ case PACKED_INTS:
+ intsRef.set(i);
+ break;
+ case SIMPLE_FLOAT_4BYTE:
+ case SIMPLE_FLOAT_8BYTE:
+ floatsRef.set(2.0f * i);
+ break;
+ default:
+ fail("unexpected value " + value);
+ }
+ } else {
+ for (int j = 0; j < b.length; j++) {
+ b[j] = upto++;
+ }
+ }
+ doc.removeFields("id");
+ doc.add(new Field("id", idBase + i, Store.YES,
+ Index.NOT_ANALYZED_NO_NORMS));
+ w.addDocument(doc);
+
+ if (i % 7 == 0) {
+ if (withDeletions && random.nextBoolean()) {
+ Values val = valueVarList.get(random.nextInt(1 + valueVarList
+ .indexOf(value)));
+ final int randInt = val == value ? random.nextInt(1 + i) : random
+ .nextInt(numValues);
+ w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
+ if (val == value) {
+ deleted.set(randInt);
+ }
+ }
+ if (random.nextInt(10) == 0) {
+ w.commit();
+ }
+ }
+ }
+ w.commit();
+
+ // TODO test unoptimized with deletions
+ if (withDeletions || random.nextBoolean())
+ w.optimize();
+ return deleted;
}
}
From d936615b0afc5f6c1a9ac2a8d60f97a08bc117d9 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 23 Nov 2010 22:45:54 +0000
Subject: [PATCH 016/116] added missing ASL headers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1038368 13f79535-47bb-0310-9956-ffa450edef68
---
.../apache/lucene/index/values/Floats.java | 17 +++++++++++++-
.../org/apache/lucene/index/values/Ints.java | 23 ++++++++++++++++---
2 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index dcf984b0774..38afe7da467 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -1,5 +1,20 @@
package org.apache.lucene.index.values;
-
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java
index 9b4e585c64d..d3f0e691c1c 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Ints.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java
@@ -1,24 +1,41 @@
package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
import java.io.IOException;
import org.apache.lucene.index.values.PackedIntsImpl.IntsReader;
import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter;
import org.apache.lucene.store.Directory;
+
//TODO - add bulk copy where possible
public class Ints {
private Ints() {
}
-
public static Writer getWriter(Directory dir, String id, boolean useFixedArray)
throws IOException {
- //TODO - implement fixed?!
+ // TODO - implement fixed?!
return new IntsWriter(dir, id);
}
- public static DocValues getValues(Directory dir, String id, boolean useFixedArray) throws IOException {
+ public static DocValues getValues(Directory dir, String id,
+ boolean useFixedArray) throws IOException {
return new IntsReader(dir, id);
}
}
From 399c93850c5a80cf2f6ddd2491bc9656daa8119a Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Tue, 30 Nov 2010 14:45:45 +0000
Subject: [PATCH 017/116] LUCENE-2186: added tracking of used bytes during
indexing
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1040544 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/index/DocFieldProcessor.java | 1 +
.../apache/lucene/index/DocumentsWriter.java | 4 +-
.../apache/lucene/index/SegmentMerger.java | 3 +-
.../lucene/index/SegmentWriteState.java | 6 +-
.../lucene/index/codecs/FieldsConsumer.java | 2 +-
.../codecs/docvalues/DocValuesCodec.java | 3 +-
.../codecs/docvalues/DocValuesConsumer.java | 15 +-
.../org/apache/lucene/index/values/Bytes.java | 25 ++--
.../apache/lucene/index/values/DocValues.java | 12 +-
.../index/values/FixedDerefBytesImpl.java | 18 ++-
.../index/values/FixedSortedBytesImpl.java | 19 +--
.../index/values/FixedStraightBytesImpl.java | 4 +-
.../apache/lucene/index/values/Floats.java | 29 ++--
.../org/apache/lucene/index/values/Ints.java | 5 +-
.../lucene/index/values/MultiDocValues.java | 12 +-
.../lucene/index/values/PackedIntsImpl.java | 24 ++--
.../apache/lucene/index/values/Values.java | 4 +-
.../index/values/VarDerefBytesImpl.java | 70 +++++----
.../index/values/VarSortedBytesImpl.java | 22 +--
.../index/values/VarStraightBytesImpl.java | 4 +-
.../apache/lucene/index/values/Writer.java | 25 ++--
.../org/apache/lucene/util/BytesRefHash.java | 135 +++++++++---------
.../org/apache/lucene/util/FloatsRef.java | 24 +++-
.../java/org/apache/lucene/util/LongsRef.java | 34 +++--
.../org/apache/lucene/util/PagedBytes.java | 14 +-
.../org/apache/lucene/index/TestCodecs.java | 3 +-
.../lucene/index/values/TestDocValues.java | 14 +-
.../index/values/TestDocValuesIndexing.java | 73 +++++++++-
28 files changed, 369 insertions(+), 235 deletions(-)
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index 1eeeacb1176..4b37b0481c0 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -64,6 +64,7 @@ final class DocFieldProcessor extends DocConsumer {
}
valuesConsumer = fieldsConsumer.addValuesField(fieldInfo);
docValues.put(name, valuesConsumer);
+
}
return valuesConsumer;
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
index 175ab09bc68..296c57af867 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -604,14 +604,14 @@ final class DocumentsWriter {
initSegmentName(onlyDocStore);
final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs);
flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
- docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info);
+ docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info, bytesUsed);
}
SegmentWriteState segWriteState() {
final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs);
return new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(),
- info);
+ info, bytesUsed);
}
/** Returns the SegmentCodecs used to flush the last segment */
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
index 05ded0aa5ba..d66de3d41b1 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption;
@@ -366,7 +367,7 @@ final class SegmentMerger {
}
}
- segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, null, docCount, 0, termIndexInterval, codecInfo);
+ segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, null, docCount, 0, termIndexInterval, codecInfo, new AtomicLong(0));
return docCount;
}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
index 427e6ba70b5..30d8db64c03 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.PrintStream;
import java.util.Collection;
import java.util.HashSet;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.store.Directory;
@@ -35,6 +36,7 @@ public class SegmentWriteState {
public final int numDocs;
public int numDocsInStore;
public final Collection flushedFiles;
+ public final AtomicLong bytesUsed;
final SegmentCodecs segmentCodecs;
public final String codecId;
@@ -62,7 +64,7 @@ public class SegmentWriteState {
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
String docStoreSegmentName, int numDocs,
- int numDocsInStore, int termIndexInterval, SegmentCodecs segmentCodecs) {
+ int numDocsInStore, int termIndexInterval, SegmentCodecs segmentCodecs, AtomicLong bytesUsed) {
this.infoStream = infoStream;
this.directory = directory;
this.segmentName = segmentName;
@@ -74,6 +76,7 @@ public class SegmentWriteState {
this.segmentCodecs = segmentCodecs;
flushedFiles = new HashSet();
codecId = "";
+ this.bytesUsed = bytesUsed;
}
/**
@@ -91,5 +94,6 @@ public class SegmentWriteState {
segmentCodecs = state.segmentCodecs;
flushedFiles = state.flushedFiles;
this.codecId = codecId;
+ bytesUsed = state.bytesUsed;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
index e4e29b79bd1..27d1a87d0c1 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
@@ -44,7 +44,7 @@ public abstract class FieldsConsumer implements Closeable {
public abstract TermsConsumer addField(FieldInfo field) throws IOException;
/** Adds a new DocValuesField */
- public /*abstract*/ DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
+ public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
throw new UnsupportedOperationException("docvalues are not supported");
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
index 2a4a880b790..43264f67b84 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
@@ -22,6 +22,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldsEnum;
@@ -107,7 +108,7 @@ public class DocValuesCodec extends Codec {
+ field.number),
// TODO can we have a compound file per segment and codec for
// docvalues?
- state.directory, field, comparator);
+ state.directory, field, comparator, state.bytesUsed);
info.add(field.number);
return consumer;
}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
index 5f9cd9702b9..2a6a7c7a88f 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesConsumer.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index.codecs.docvalues;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
@@ -35,6 +36,16 @@ import org.apache.lucene.util.BytesRef;
// TODO this might need to go in the codec package since is a direct relative to
// TermsConsumer
public abstract class DocValuesConsumer {
+
+ protected AtomicLong bytesUsed = new AtomicLong(0);
+
+ protected DocValuesConsumer(AtomicLong bytesUsed) {
+ this.bytesUsed = bytesUsed;
+ }
+
+ public final long bytesUsed() {
+ return this.bytesUsed.get();
+ }
public abstract void add(int docID, ValuesAttribute attr) throws IOException;
@@ -89,8 +100,8 @@ public abstract class DocValuesConsumer {
}
public static DocValuesConsumer create(String id,
- Directory directory, FieldInfo field, Comparator comp)
+ Directory directory, FieldInfo field, Comparator comp, AtomicLong bytesUsed)
throws IOException {
- return Writer.create(field.getDocValues(), id, directory, comp);
+ return Writer.create(field.getDocValues(), id, directory, comp, bytesUsed);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index f9eeff57093..89cd3441c51 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -24,7 +24,7 @@ import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.values.DocValues.MissingValues;
+import org.apache.lucene.index.values.DocValues.MissingValue;
import org.apache.lucene.index.values.DocValues.SortedSource;
import org.apache.lucene.index.values.DocValues.Source;
import org.apache.lucene.index.values.DocValues.SourceEnum;
@@ -64,7 +64,7 @@ public final class Bytes {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
public static Writer getWriter(Directory dir, String id, Mode mode,
- Comparator comp, boolean fixedSize) throws IOException {
+ Comparator comp, boolean fixedSize, AtomicLong bytesUsed) throws IOException {
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
@@ -74,17 +74,17 @@ public final class Bytes {
if (mode == Mode.STRAIGHT) {
return new FixedStraightBytesImpl.Writer(dir, id);
} else if (mode == Mode.DEREF) {
- return new FixedDerefBytesImpl.Writer(dir, id);
+ return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed);
} else if (mode == Mode.SORTED) {
- return new FixedSortedBytesImpl.Writer(dir, id, comp);
+ return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
}
} else {
if (mode == Mode.STRAIGHT) {
- return new VarStraightBytesImpl.Writer(dir, id);
+ return new VarStraightBytesImpl.Writer(dir, id, bytesUsed);
} else if (mode == Mode.DEREF) {
- return new VarDerefBytesImpl.Writer(dir, id);
+ return new VarDerefBytesImpl.Writer(dir, id, bytesUsed);
} else if (mode == Mode.SORTED) {
- return new VarSortedBytesImpl.Writer(dir, id, comp);
+ return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
}
}
@@ -162,7 +162,7 @@ public final class Bytes {
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- final MissingValues missing = getMissing();
+ final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, maxDoc()) {
final BytesRef bytesRef = attr.bytes();
@@ -248,7 +248,7 @@ public final class Bytes {
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- final MissingValues missing = getMissing();
+ final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, maxDoc()) {
final BytesRef bytesRef = attr.bytes();
@@ -275,20 +275,19 @@ public final class Bytes {
protected IndexOutput idxOut;
protected IndexOutput datOut;
protected BytesRef bytesRef;
- private String codecName;
- private int version;
+ private final String codecName;
+ private final int version;
protected final ByteBlockPool pool;
- protected final AtomicLong bytesUsed;
protected BytesWriterBase(Directory dir, String id, String codecName,
int version, boolean initIndex, boolean initData, ByteBlockPool pool,
AtomicLong bytesUsed) throws IOException {
+ super(bytesUsed);
this.dir = dir;
this.id = id;
this.codecName = codecName;
this.version = version;
this.pool = pool;
- this.bytesUsed = bytesUsed;
if (initData)
initDataOut();
if (initIndex)
diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
index a0d84ff1d79..d734d5e7b0b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
@@ -70,7 +70,7 @@ public abstract class DocValues implements Closeable {
* used since it can handle all precisions.
*/
public static abstract class Source {
- protected final MissingValues missingValues = new MissingValues();
+ protected final MissingValue missingValue = new MissingValue();
public long getInt(int docID) {
throw new UnsupportedOperationException("ints are not supported");
@@ -96,8 +96,8 @@ public abstract class DocValues implements Closeable {
return getEnum(new AttributeSource());
}
- public MissingValues getMissing() {
- return missingValues;
+ public MissingValue getMissing() {
+ return missingValue;
}
public abstract Values type();
@@ -105,8 +105,6 @@ public abstract class DocValues implements Closeable {
public abstract ValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
- public abstract long ramBytesUsed();
-
}
abstract static class SourceEnum extends ValuesEnum {
@@ -171,12 +169,12 @@ public abstract class DocValues implements Closeable {
public abstract LookupResult getByValue(BytesRef value, BytesRef tmpRef);
}
- public final static class MissingValues {
+ public final static class MissingValue {
public long longValue;
public double doubleValue;
public BytesRef bytesValue;
- public final void copy(MissingValues values) {
+ public final void copy(MissingValue values) {
longValue = values.longValue;
doubleValue = values.doubleValue;
bytesValue = values.bytesValue;
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
index 06a322b9972..f5df15dd7d6 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
@@ -34,6 +34,7 @@ import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
@@ -48,11 +49,12 @@ class FixedDerefBytesImpl {
static class Writer extends BytesWriterBase {
private int size = -1;
private int[] docToID;
- private final BytesRefHash hash = new BytesRefHash(pool);
+ private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY,
+ new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
- public Writer(Directory dir, String id) throws IOException {
+ public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
- new AtomicLong());
+ bytesUsed);
}
public Writer(Directory dir, String id, Allocator allocator,
@@ -60,7 +62,7 @@ class FixedDerefBytesImpl {
super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false,
new ByteBlockPool(allocator), bytesUsed);
docToID = new int[1];
- bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); // TODO BytesRefHash uses bytes too!
}
@Override
@@ -85,7 +87,7 @@ class FixedDerefBytesImpl {
}
if (docID >= docToID.length) {
- int size = docToID.length;
+ final int size = docToID.length;
docToID = ArrayUtil.grow(docToID, 1 + docID);
bytesUsed.addAndGet((docToID.length - size)
* RamUsageEstimator.NUM_BYTES_INT);
@@ -114,9 +116,11 @@ class FixedDerefBytesImpl {
w.add(0);
}
w.finish();
- hash.clear();
-
+ hash.close();
super.finish(docCount);
+ bytesUsed.addAndGet((-docToID.length)
+ * RamUsageEstimator.NUM_BYTES_INT);
+ docToID = null;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
index e826a709ee9..f19ac893f49 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
@@ -37,6 +37,7 @@ import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
@@ -53,12 +54,13 @@ class FixedSortedBytesImpl {
private int[] docToEntry;
private final Comparator comp;
- private final BytesRefHash hash = new BytesRefHash(pool);
+ private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY,
+ new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
- public Writer(Directory dir, String id, Comparator comp)
+ public Writer(Directory dir, String id, Comparator comp, AtomicLong bytesUsed)
throws IOException {
this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
- new AtomicLong());
+ bytesUsed);
}
public Writer(Directory dir, String id, Comparator comp,
@@ -148,6 +150,7 @@ class FixedSortedBytesImpl {
bytesUsed.addAndGet((-docToEntry.length)
* RamUsageEstimator.NUM_BYTES_INT);
docToEntry = null;
+ hash.close();
}
}
@@ -199,16 +202,6 @@ class FixedSortedBytesImpl {
return binarySearch(bytes, tmpRef, 0, numValue - 1);
}
- public long ramBytesUsed() {
- // TODO(simonw): move ram calcultation to PackedInts?
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
- + size
- * numValue
- + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index
- .getBitsPerValue()
- * index.size());
- }
-
@Override
public int getValueCount() {
return numValue;
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
index 1ee7b6e996a..ef760835b64 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
@@ -81,7 +81,7 @@ class FixedStraightBytesImpl {
oneRecord = new byte[size];
}
fill(state.docBase);
- // nocommit should we add a transfer to API to each reader?
+ // TODO should we add a transfer to API to each reader?
datOut.copyBytes(reader.cloneData(), size * maxDocs);
lastDocID += maxDocs-1;
} else
@@ -139,7 +139,7 @@ class FixedStraightBytesImpl {
public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc);
this.size = size;
- this.missingValues.bytesValue = new BytesRef(size);
+ this.missingValue.bytesValue = new BytesRef(size);
this.maxDoc = maxDoc;
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index 38afe7da467..f53345c9c3d 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -20,6 +20,7 @@ import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.nio.FloatBuffer;
import java.util.Collection;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.Directory;
@@ -44,16 +45,16 @@ public class Floats {
private static final long LONG_DEFAULT = Double
.doubleToRawLongBits(Double.NEGATIVE_INFINITY);
- public static Writer getWriter(Directory dir, String id, int precisionBytes)
+ public static Writer getWriter(Directory dir, String id, int precisionBytes, AtomicLong bytesUsed)
throws IOException {
if (precisionBytes != 4 && precisionBytes != 8) {
throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
+ precisionBytes);
}
if (precisionBytes == 4) {
- return new Float4Writer(dir, id);
+ return new Float4Writer(dir, id, bytesUsed);
} else {
- return new Float8Writer(dir, id);
+ return new Float8Writer(dir, id, bytesUsed);
}
}
@@ -63,7 +64,6 @@ public class Floats {
}
abstract static class FloatsWriter extends Writer {
-
private final Directory dir;
private final String id;
private FloatsRef floatsRef;
@@ -71,8 +71,9 @@ public class Floats {
protected IndexOutput datOut;
private final byte precision;
- protected FloatsWriter(Directory dir, String id, int precision)
+ protected FloatsWriter(Directory dir, String id, int precision, AtomicLong bytesUsed)
throws IOException {
+ super(bytesUsed);
this.dir = dir;
this.id = id;
this.precision = (byte) precision;
@@ -113,7 +114,7 @@ public class Floats {
protected void merge(MergeState state) throws IOException {
if (state.bits == null && state.reader instanceof FloatsReader) {
// no deletes - bulk copy
- // nocommit - should be do bulks with deletes too?
+ // TODO: should be do bulks with deletes too?
final FloatsReader reader = (FloatsReader) state.reader;
assert reader.precisionBytes == (int) precision;
if (reader.maxDoc == 0)
@@ -140,8 +141,8 @@ public class Floats {
// Writes 4 bytes (float) per value
static class Float4Writer extends FloatsWriter {
- protected Float4Writer(Directory dir, String id) throws IOException {
- super(dir, id, 4);
+ protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
+ super(dir, id, 4, bytesUsed);
}
@Override
@@ -184,8 +185,8 @@ public class Floats {
// Writes 8 bytes (double) per value
static class Float8Writer extends FloatsWriter {
- protected Float8Writer(Directory dir, String id) throws IOException {
- super(dir, id, 8);
+ protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
+ super(dir, id, 8, bytesUsed);
}
@Override
@@ -280,7 +281,7 @@ public class Floats {
Source4(ByteBuffer buffer) {
values = buffer.asFloatBuffer();
- missingValues.doubleValue = Float.NEGATIVE_INFINITY;
+ missingValue.doubleValue = Float.NEGATIVE_INFINITY;
}
@Override
@@ -295,7 +296,7 @@ public class Floats {
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- final MissingValues missing = getMissing();
+ final MissingValue missing = getMissing();
return new SourceEnum(attrSource, Values.SIMPLE_FLOAT_4BYTE, this, maxDoc) {
private final FloatsRef ref = attr.floats();
@Override
@@ -324,7 +325,7 @@ public class Floats {
Source8(ByteBuffer buffer) {
values = buffer.asDoubleBuffer();
- missingValues.doubleValue = Double.NEGATIVE_INFINITY;
+ missingValue.doubleValue = Double.NEGATIVE_INFINITY;
}
@@ -340,7 +341,7 @@ public class Floats {
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- final MissingValues missing = getMissing();
+ final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, maxDoc) {
private final FloatsRef ref = attr.floats();
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/values/Ints.java b/lucene/src/java/org/apache/lucene/index/values/Ints.java
index d3f0e691c1c..7955d7c7753 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Ints.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Ints.java
@@ -17,6 +17,7 @@ package org.apache.lucene.index.values;
*/
import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.values.PackedIntsImpl.IntsReader;
import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter;
@@ -28,10 +29,10 @@ public class Ints {
private Ints() {
}
- public static Writer getWriter(Directory dir, String id, boolean useFixedArray)
+ public static Writer getWriter(Directory dir, String id, boolean useFixedArray, AtomicLong bytesUsed)
throws IOException {
// TODO - implement fixed?!
- return new IntsWriter(dir, id);
+ return new IntsWriter(dir, id, bytesUsed);
}
public static DocValues getValues(Directory dir, String id,
diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
index d1780936f5c..0e81c25d725 100644
--- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
@@ -192,7 +192,7 @@ public class MultiDocValues extends DocValues {
assert docValuesIdx[idx] != null;
try {
current = docValuesIdx[idx].docValues.getSource();
- missingValues.copy(current.getMissing());
+ missingValue.copy(current.getMissing());
} catch (IOException e) {
throw new RuntimeException("load failed", e); // TODO how should we
// handle this
@@ -215,10 +215,6 @@ public class MultiDocValues extends DocValues {
return current.getBytes(doc, bytesRef);
}
- public long ramBytesUsed() {
- return current.ramBytesUsed();
- }
-
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
throw new UnsupportedOperationException(); // TODO
@@ -240,18 +236,18 @@ public class MultiDocValues extends DocValues {
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
- return this.missingValues.bytesValue;
+ return this.missingValue.bytesValue;
}
@Override
public double getFloat(int docID) {
- return missingValues.doubleValue;
+ return missingValue.doubleValue;
}
@Override
public long getInt(int docID) {
- return missingValues.longValue;
+ return missingValue.longValue;
}
public long ramBytesUsed() {
diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
index 91f56bdd660..61f19e7cb5a 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
@@ -18,9 +18,9 @@ package org.apache.lucene.index.values;
*/
import java.io.IOException;
import java.util.Collection;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.values.DocValues.MissingValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -50,13 +50,15 @@ class PackedIntsImpl {
private boolean started;
private final Directory dir;
private final String id;
- private OpenBitSet defaultValues = new OpenBitSet(1);
+ private final OpenBitSet defaultValues = new OpenBitSet(1);
private int lastDocId = -1;
- protected IntsWriter(Directory dir, String id) throws IOException {
+ protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
+ super(bytesUsed);
this.dir = dir;
this.id = id;
docToValue = new long[1];
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the bitset needs memory too
}
@Override
@@ -76,9 +78,10 @@ class PackedIntsImpl {
lastDocId = docID;
if (docID >= docToValue.length) {
+ final long len = docToValue.length ;
docToValue = ArrayUtil.grow(docToValue, 1 + docID);
defaultValues.ensureCapacity(docToValue.length);
-
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG * ((docToValue.length) - len));
}
docToValue[docID] = v;
}
@@ -115,13 +118,10 @@ class PackedIntsImpl {
w.add(defaultValue);
}
w.finish();
-
datOut.close();
- }
-
- public long ramBytesUsed() {
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length
- * RamUsageEstimator.NUM_BYTES_LONG;
+ bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG * docToValue.length ));
+ docToValue = null;
+
}
@Override
@@ -180,7 +180,7 @@ class PackedIntsImpl {
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
- missingValues.longValue = minValue + defaultValue;
+ missingValue.longValue = minValue + defaultValue;
}
@Override
@@ -199,7 +199,7 @@ class PackedIntsImpl {
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- final MissingValues missing = getMissing();
+ final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, values.size()) {
private final LongsRef ref = attr.ints();
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Values.java
index e33c0cb9b1b..56921dd36c2 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Values.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Values.java
@@ -22,8 +22,8 @@ package org.apache.lucene.index.values;
* values into RAM, exposing a random access API, when
* loaded.
*
- * NOTE: This feature is experimental and the
- * API is free to change in non-backwards-compatible ways. */
+ * @lucene.experimenta
+ */
public enum Values {
/** Integral value is stored as packed ints. The bit
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index 2dfa5bdc8a1..b0e89ce92cd 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -37,8 +37,7 @@ import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
-import org.apache.lucene.util.BytesRefHash.ParallelArrayBase;
-import org.apache.lucene.util.BytesRefHash.ParallelBytesStartArray;
+import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores variable-length byte[] by deref, ie when two docs
@@ -51,30 +50,47 @@ class VarDerefBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
- private static class AddressParallelArray extends
- ParallelArrayBase {
- final int[] address;
+ private static final class AddressByteStartArray extends
+ TrackingDirectBytesStartArray {
+ int[] address;
- AddressParallelArray(int size, AtomicLong bytesUsed) {
+ AddressByteStartArray(int size, AtomicLong bytesUsed) {
super(size, bytesUsed);
- address = new int[size];
}
@Override
- protected int bytesPerEntry() {
- return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry();
+ public AtomicLong bytesUsed() {
+ return bytesUsed;
}
@Override
- protected void copyTo(AddressParallelArray toArray, int numToCopy) {
- super.copyTo(toArray, numToCopy);
- System.arraycopy(address, 0, toArray.address, 0, size);
-
+ public int[] clear() {
+ if (address != null) {
+ bytesUsed.addAndGet(-address.length * RamUsageEstimator.NUM_BYTES_INT);
+ address = null;
+ }
+ return super.clear();
}
@Override
- public AddressParallelArray newInstance(int size) {
- return new AddressParallelArray(size, bytesUsed);
+ public int[] grow() {
+ assert address != null;
+ final int oldSize = address.length;
+ final int[] retVal = super.grow();
+ address = ArrayUtil.grow(address, retVal.length);
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
+ * (address.length - oldSize));
+ return retVal;
+ }
+
+ @Override
+ public int[] init() {
+ if (address == null) {
+ address = new int[ArrayUtil.oversize(initSize,
+ RamUsageEstimator.NUM_BYTES_INT)];
+ bytesUsed.addAndGet((address.length) * RamUsageEstimator.NUM_BYTES_INT);
+ }
+ return super.init();
}
}
@@ -83,13 +99,14 @@ class VarDerefBytesImpl {
private int[] docToAddress;
private int address = 1;
- private final ParallelBytesStartArray array = new ParallelBytesStartArray(
- new AddressParallelArray(0, bytesUsed));
+ private final AddressByteStartArray array = new AddressByteStartArray(1,
+ bytesUsed);
private final BytesRefHash hash = new BytesRefHash(pool, 16, array);
- public Writer(Directory dir, String id) throws IOException {
+ public Writer(Directory dir, String id, AtomicLong bytesUsed)
+ throws IOException {
this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
- new AtomicLong());
+ bytesUsed);
}
public Writer(Directory dir, String id, Allocator allocator,
@@ -116,12 +133,12 @@ class VarDerefBytesImpl {
}
final int docAddress;
if (e >= 0) {
- docAddress = array.array.address[e] = address;
+ docAddress = array.address[e] = address;
address += writePrefixLength(datOut, bytes);
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address += bytes.length;
} else {
- docAddress = array.array.address[(-e) - 1];
+ docAddress = array.address[(-e) - 1];
}
docToAddress[docID] = docAddress;
}
@@ -138,10 +155,6 @@ class VarDerefBytesImpl {
}
}
- public long ramBytesUsed() {
- return bytesUsed.get();
- }
-
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
@@ -169,8 +182,11 @@ class VarDerefBytesImpl {
w.add(0);
}
w.finish();
- hash.clear(true);
+ hash.close();
super.finish(docCount);
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
+ * (-docToAddress.length));
+ docToAddress = null;
}
}
@@ -202,7 +218,7 @@ class VarDerefBytesImpl {
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
long address = index.get(docID);
- return address == 0 ? null : data.fillUsingLengthPrefix4(bytesRef,
+ return address == 0 ? null : data.fillSliceWithPrefix(bytesRef,
--address);
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
index 4504ee4f431..0c22fd8ae54 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
@@ -36,6 +36,7 @@ import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores variable-length byte[] by deref, ie when two docs
@@ -52,12 +53,12 @@ class VarSortedBytesImpl {
private int[] docToEntry;
private final Comparator comp;
- private final BytesRefHash hash = new BytesRefHash(pool);
+ private final BytesRefHash hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY,
+ new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
- public Writer(Directory dir, String id, Comparator comp)
+ public Writer(Directory dir, String id, Comparator comp, AtomicLong bytesUsed)
throws IOException {
- this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
- new AtomicLong());
+ this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE), bytesUsed);
}
public Writer(Directory dir, String id, Comparator comp,
@@ -147,6 +148,7 @@ class VarSortedBytesImpl {
super.finish(docCount);
bytesUsed.addAndGet((-docToEntry.length)
* RamUsageEstimator.NUM_BYTES_INT);
+ hash.close();
}
}
@@ -195,18 +197,6 @@ class VarSortedBytesImpl {
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
}
- public long ramBytesUsed() {
- // TODO(simonw): move ram usage to PackedInts?
- return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
- + totBytes
- + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex
- .getBitsPerValue()
- * docToOrdIndex.getBitsPerValue())
- + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ordToOffsetIndex
- .getBitsPerValue()
- * ordToOffsetIndex.getBitsPerValue());
- }
-
@Override
public int getValueCount() {
return valueCount;
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
index 0f3f6dff28f..ccff45e1c5b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -62,7 +62,7 @@ class VarStraightBytesImpl {
if (docID >= docToAddress.length) {
int oldSize = docToAddress.length;
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
- bytesUsed.addAndGet(-(docToAddress.length - oldSize)
+ bytesUsed.addAndGet((docToAddress.length - oldSize)
* RamUsageEstimator.NUM_BYTES_INT);
}
for (int i = lastDocID + 1; i < docID; i++) {
@@ -127,7 +127,7 @@ class VarStraightBytesImpl {
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong());
addresses = PackedInts.getReader(idxIn);
- missingValues.bytesValue = new BytesRef(0); // empty
+ missingValue.bytesValue = new BytesRef(0); // empty
}
@Override
diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java
index 04471b25427..82dc03b6f2b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Writer.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java
@@ -18,6 +18,7 @@ package org.apache.lucene.index.values;
*/
import java.io.IOException;
import java.util.Comparator;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
import org.apache.lucene.store.Directory;
@@ -26,6 +27,10 @@ import org.apache.lucene.util.BytesRef;
public abstract class Writer extends DocValuesConsumer {
+ protected Writer(AtomicLong bytesUsed) {
+ super(bytesUsed);
+ }
+
public static final String INDEX_EXTENSION = "idx";
public static final String DATA_EXTENSION = "dat";
@@ -85,26 +90,26 @@ public abstract class Writer extends DocValuesConsumer {
}
public static Writer create(Values v, String id, Directory directory,
- Comparator comp) throws IOException {
+ Comparator comp, AtomicLong bytesUsed) throws IOException {
switch (v) {
case PACKED_INTS:
- return Ints.getWriter(directory, id, true);
+ return Ints.getWriter(directory, id, true, bytesUsed);
case SIMPLE_FLOAT_4BYTE:
- return Floats.getWriter(directory, id, 4);
+ return Floats.getWriter(directory, id, 4, bytesUsed);
case SIMPLE_FLOAT_8BYTE:
- return Floats.getWriter(directory, id, 8);
+ return Floats.getWriter(directory, id, 8, bytesUsed);
case BYTES_FIXED_STRAIGHT:
- return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true);
+ return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true, bytesUsed);
case BYTES_FIXED_DEREF:
- return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true);
+ return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true, bytesUsed);
case BYTES_FIXED_SORTED:
- return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true);
+ return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true, bytesUsed);
case BYTES_VAR_STRAIGHT:
- return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false);
+ return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false, bytesUsed);
case BYTES_VAR_DEREF:
- return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false);
+ return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false, bytesUsed);
case BYTES_VAR_SORTED:
- return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false);
+ return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false, bytesUsed);
default:
throw new IllegalArgumentException("Unknown Values: " + v);
}
diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
index 5a4336f2d32..c5b180cfa44 100644
--- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -227,8 +227,9 @@ public final class BytesRefHash {
public void clear(boolean resetPool) {
lastCount = count;
count = 0;
- if (resetPool)
+ if (resetPool) {
pool.reset();
+ }
bytesStart = bytesStartArray.clear();
if (lastCount != -1 && shrink(lastCount)) {
// shrink clears the hash entries
@@ -240,6 +241,16 @@ public final class BytesRefHash {
public void clear() {
clear(true);
}
+
+ /**
+ * Closes the BytesRefHash and releases all internally used memory
+ */
+ public void close() {
+ clear(true);
+ ords = null;
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
+ * -hashSize);
+ }
/**
* Adds a new {@link BytesRef}
@@ -453,8 +464,14 @@ public final class BytesRefHash {
* effect.
*/
public void reinit() {
- if (bytesStart == null)
+ if (bytesStart == null) {
bytesStart = bytesStartArray.init();
+ }
+
+ if (ords == null) {
+ ords = new int[hashSize];
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * hashSize);
+ }
}
/**
@@ -515,17 +532,62 @@ public final class BytesRefHash {
*/
public abstract AtomicLong bytesUsed();
}
-
- public static class DirectBytesStartArray extends BytesStartArray {
-
+
+ /**
+ * A direct {@link BytesStartArray} that tracks all memory allocation using an {@link AtomicLong} instance.
+ */
+ public static class TrackingDirectBytesStartArray extends BytesStartArray {
protected final int initSize;
private int[] bytesStart;
- private final AtomicLong bytesUsed = new AtomicLong(0);
+ protected final AtomicLong bytesUsed;
+
+ public TrackingDirectBytesStartArray(int initSize, AtomicLong bytesUsed) {
+ this.initSize = initSize;
+ this.bytesUsed = bytesUsed;
+ }
+ @Override
+ public int[] clear() {
+ if (bytesStart != null) {
+ bytesUsed.addAndGet(-bytesStart.length * RamUsageEstimator.NUM_BYTES_INT);
+ }
+ return bytesStart = null;
+ }
+
+ @Override
+ public int[] grow() {
+ assert bytesStart != null;
+ final int oldSize = bytesStart.length;
+ bytesStart = ArrayUtil.grow(bytesStart, bytesStart.length + 1);
+ bytesUsed.addAndGet((bytesStart.length - oldSize) * RamUsageEstimator.NUM_BYTES_INT);
+ return bytesStart;
+ }
+
+ @Override
+ public int[] init() {
+ bytesStart = new int[ArrayUtil.oversize(initSize,
+ RamUsageEstimator.NUM_BYTES_INT)];
+ bytesUsed.addAndGet((bytesStart.length) * RamUsageEstimator.NUM_BYTES_INT);
+ return bytesStart;
+ }
+
+ @Override
+ public AtomicLong bytesUsed() {
+ return bytesUsed;
+ }
+ }
+
+ public static class DirectBytesStartArray extends BytesStartArray {
+ protected final int initSize;
+ private int[] bytesStart;
+ private final AtomicLong bytesUsed;
+
public DirectBytesStartArray(int initSize) {
+ this.bytesUsed = new AtomicLong(0);
this.initSize = initSize;
}
+
@Override
public int[] clear() {
return bytesStart = null;
@@ -547,66 +609,5 @@ public final class BytesRefHash {
public AtomicLong bytesUsed() {
return bytesUsed;
}
-
- }
-
- public static class ParallelBytesStartArray> extends BytesStartArray {
- private final T prototype;
- public T array;
-
- public ParallelBytesStartArray(T template) {
- this.prototype = template;
- }
- @Override
- public int[] init() {
- if(array == null) {
- array = prototype.newInstance(2);
- }
- return array.textStart;
- }
-
- @Override
- public int[] grow() {
- array = array.grow();
- return array.textStart;
- }
-
- @Override
- public int[] clear() {
- if(array != null) {
- array.deref();
- array = null;
- }
- return null;
- }
-
- @Override
- public AtomicLong bytesUsed() {
- return array.bytesUsed();
- }
-
- }
-
- public abstract static class ParallelArrayBase> extends ParallelArray {
- final int[] textStart;
-
- protected ParallelArrayBase(int size, AtomicLong bytesUsed) {
- super(size, bytesUsed);
- textStart = new int[size];
- }
-
- @Override
- protected int bytesPerEntry() {
- return RamUsageEstimator.NUM_BYTES_INT;
- }
-
- @Override
- protected void copyTo(T toArray, int numToCopy) {
- System.arraycopy(textStart, 0, toArray.textStart, 0, size);
- }
-
- @Override
- public abstract T newInstance(int size);
-
}
}
diff --git a/lucene/src/java/org/apache/lucene/util/FloatsRef.java b/lucene/src/java/org/apache/lucene/util/FloatsRef.java
index 9dd107e323d..67066748395 100644
--- a/lucene/src/java/org/apache/lucene/util/FloatsRef.java
+++ b/lucene/src/java/org/apache/lucene/util/FloatsRef.java
@@ -1,9 +1,27 @@
-/**
- *
- */
package org.apache.lucene.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Represents float[], as a slice (offset + length) into an existing float[].
+ *
+ * @lucene.internal
+ */
public final class FloatsRef implements Cloneable{
public double[] floats;
public int offset;
diff --git a/lucene/src/java/org/apache/lucene/util/LongsRef.java b/lucene/src/java/org/apache/lucene/util/LongsRef.java
index c5dee1a15b1..2a9bb2e385a 100644
--- a/lucene/src/java/org/apache/lucene/util/LongsRef.java
+++ b/lucene/src/java/org/apache/lucene/util/LongsRef.java
@@ -1,9 +1,27 @@
-/**
- *
- */
package org.apache.lucene.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Represents long[], as a slice (offset + length) into an existing long[].
+ *
+ * @lucene.internal
+ */
public final class LongsRef implements Cloneable {
public long[] ints;
public int offset;
@@ -30,11 +48,11 @@ public final class LongsRef implements Cloneable {
public Object clone() {
return new LongsRef(this);
}
-
+
public void set(long value) {
ints[offset] = value;
}
-
+
public long get() {
return ints[offset];
}
@@ -44,13 +62,13 @@ public final class LongsRef implements Cloneable {
final int prime = 31;
int result = 0;
final int end = offset + length;
- for(int i = offset; i < end; i++) {
+ for (int i = offset; i < end; i++) {
long value = ints[i];
result = prime * result + (int) (value ^ (value >>> 32));
}
return result;
}
-
+
@Override
public boolean equals(Object other) {
return this.intsEquals((LongsRef) other);
@@ -61,7 +79,7 @@ public final class LongsRef implements Cloneable {
int otherUpto = other.offset;
final long[] otherInts = other.ints;
final int end = offset + length;
- for(int upto=offset;uptostart, the
+ * length is read as 1 or 2 byte vInt prefix. Iff the slice spans across a
+ * block border this method will allocate sufficient resources and copy the
+ * paged data.
+ *
+ * Slices spanning more than one block are not supported.
+ *
+ *
* @lucene.internal
**/
- public BytesRef fillUsingLengthPrefix4(BytesRef b, long start) {
+ public BytesRef fillSliceWithPrefix(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
int offset = (int) (start & blockMask);
final byte[] block = blocks[index];
diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
index 769ea0e9bee..38c80c30bec 100644
--- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@@ -607,7 +608,7 @@ public class TestCodecs extends LuceneTestCase {
final int termIndexInterval = this.nextInt(13, 27);
final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, CodecProvider.getDefault());
- final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, null, 10000, 10000, termIndexInterval, codecInfo);
+ final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, null, 10000, 10000, termIndexInterval, codecInfo, new AtomicLong());
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
Arrays.sort(fields);
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
index 2b2015cf5f1..02a49ccd847 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Comparator;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.values.DocValues.SortedSource;
import org.apache.lucene.index.values.DocValues.Source;
@@ -59,7 +60,8 @@ public class TestDocValues extends LuceneTestCase {
.getUTF8SortedAsUnicodeComparator() : null;
Directory dir = newDirectory();
- Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize);
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize, trackBytes);
int maxDoc = 220;
final String[] values = new String[maxDoc];
final int lenMin, lenMax;
@@ -83,6 +85,7 @@ public class TestDocValues extends LuceneTestCase {
w.add(2 * i, bytesRef);
}
w.finish(maxDoc);
+ assertEquals(0, trackBytes.get());
DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc);
for (int iter = 0; iter < 2; iter++) {
@@ -186,7 +189,8 @@ public class TestDocValues extends LuceneTestCase {
final long[] values = new long[NUM_VALUES];
for (int rx = 1; rx < 63; rx++, maxV *= 2) {
Directory dir = newDirectory();
- Writer w = Ints.getWriter(dir, "test", false);
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Ints.getWriter(dir, "test", false, trackBytes);
for (int i = 0; i < NUM_VALUES; i++) {
final long v = random.nextLong() % (1 + maxV);
values[i] = v;
@@ -194,6 +198,8 @@ public class TestDocValues extends LuceneTestCase {
}
final int additionalDocs = 1 + random.nextInt(9);
w.finish(NUM_VALUES + additionalDocs);
+ assertEquals(0, trackBytes.get());
+
DocValues r = Ints.getValues(dir, "test", false);
for (int iter = 0; iter < 2; iter++) {
@@ -250,7 +256,8 @@ public class TestDocValues extends LuceneTestCase {
private void runTestFloats(int precision, double delta) throws IOException {
Directory dir = newDirectory();
- Writer w = Floats.getWriter(dir, "test", precision);
+ final AtomicLong trackBytes = new AtomicLong(0);
+ Writer w = Floats.getWriter(dir, "test", precision, trackBytes);
final int NUM_VALUES = 777 + random.nextInt(777);;
final double[] values = new double[NUM_VALUES];
for (int i = 0; i < NUM_VALUES; i++) {
@@ -261,6 +268,7 @@ public class TestDocValues extends LuceneTestCase {
}
final int additionalValues = 1 + random.nextInt(10);
w.finish(NUM_VALUES + additionalValues);
+ assertEquals(0, trackBytes.get());
DocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues);
for (int iter = 0; iter < 2; iter++) {
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
index 5ab0c649adf..9cca81fbec1 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
@@ -43,8 +43,13 @@ import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
-import org.apache.lucene.index.values.DocValues.MissingValues;
+import org.apache.lucene.index.values.DocValues.MissingValue;
import org.apache.lucene.index.values.DocValues.Source;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
@@ -62,8 +67,27 @@ import org.junit.BeforeClass;
*
*/
public class TestDocValuesIndexing extends LuceneTestCase {
- // TODO Add a test for addIndexes
- // TODO add test for unoptimized case with deletes
+ /*
+ * TODO:
+ * Roadmap to land on trunk
+ * - Cut over to a direct API on ValuesEnum vs. ValuesAttribute
+ * - Add documentation for:
+ * - Source and ValuesEnum
+ * - DocValues
+ * - ValuesField
+ * - ValuesAttribute
+ * - Values
+ * - Add @lucene.experimental to all necessary classes
+ * - Try to make ValuesField more lightweight -> AttributeSource
+ * - add test for unoptimized case with deletes
+ * - add a test for addIndexes
+ * - split up existing testcases and give them meaningfull names
+ * - use consistent naming throughout DocValues
+ * - Values -> DocValueType
+ * - PackedIntsImpl -> Ints
+ * - run RAT
+ * - add tests for FieldComparator FloatIndexValuesComparator vs. FloatValuesComparator etc.
+ */
private static DocValuesCodec docValuesCodec;
private static CodecProvider provider;
@@ -82,6 +106,43 @@ public class TestDocValuesIndexing extends LuceneTestCase {
public static void afterClassLuceneTestCaseJ4() {
LuceneTestCase.afterClassLuceneTestCaseJ4();
}
+
+ /*
+ * Simple test case to show how to use the API
+ */
+ public void testDocValuesSimple() throws CorruptIndexException, IOException, ParseException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, writerConfig(false));
+ for (int i = 0; i < 5; i++) {
+ Document doc = new Document();
+ ValuesField valuesField = new ValuesField("docId");
+ valuesField.setInt(i);
+ doc.add(valuesField);
+ doc.add(new Field("docId", "" + i, Store.NO, Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+ writer.commit();
+ writer.optimize(true);
+
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir, null, true, 1, provider);
+ assertTrue(reader.isOptimized());
+
+ IndexSearcher searcher = new IndexSearcher(reader);
+ QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "docId", new MockAnalyzer());
+ TopDocs search = searcher.search(parser.parse("0 OR 1 OR 2 OR 3 OR 4"), 10);
+ assertEquals(5, search.totalHits);
+ ScoreDoc[] scoreDocs = search.scoreDocs;
+ DocValues docValues = MultiFields.getDocValues(reader, "docId");
+ Source source = docValues.getSource();
+ for (int i = 0; i < scoreDocs.length; i++) {
+ assertEquals(i, scoreDocs[i].doc);
+ assertEquals(i, source.getInt(scoreDocs[i].doc));
+ }
+ reader.close();
+ dir.close();
+ }
/**
* Tests complete indexing of {@link Values} including deletions, merging and
@@ -160,7 +221,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
assertNotNull(intsReader);
Source ints = getSource(intsReader);
- MissingValues missing = ints.getMissing();
+ MissingValue missing = ints.getMissing();
for (int i = 0; i < base; i++) {
long value = ints.getInt(i);
@@ -191,7 +252,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
DocValues floatReader = getDocValues(r, val.name());
assertNotNull(floatReader);
Source floats = getSource(floatReader);
- MissingValues missing = floats.getMissing();
+ MissingValue missing = floats.getMissing();
for (int i = 0; i < base; i++) {
double value = floats.getFloat(i);
@@ -254,7 +315,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
byte upto = 0;
// test the filled up slots for correctness
- MissingValues missing = bytes.getMissing();
+ MissingValue missing = bytes.getMissing();
for (int i = 0; i < base; i++) {
BytesRef br = bytes.getBytes(i, new BytesRef());
From 964eccc435084f0c5a8aa6ff516bc12cf9398930 Mon Sep 17 00:00:00 2001
From: Simon Willnauer
Date: Mon, 6 Dec 2010 00:57:55 +0000
Subject: [PATCH 018/116] fixed naming inconsistency
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1042503 13f79535-47bb-0310-9956-ffa450edef68
---
.../benchmark/byTask/feeds/DocMaker.java | 14 +++---
.../apache/lucene/document/ValuesField.java | 20 ++++-----
.../org/apache/lucene/index/FieldInfo.java | 8 ++--
.../org/apache/lucene/index/FieldInfos.java | 20 ++++-----
.../org/apache/lucene/index/FieldsEnum.java | 2 +-
.../lucene/index/FilterIndexReader.java | 2 +-
.../org/apache/lucene/index/MultiFields.java | 4 +-
.../apache/lucene/index/MultiFieldsEnum.java | 4 +-
.../apache/lucene/index/SegmentMerger.java | 6 +--
.../apache/lucene/index/SegmentReader.java | 2 +-
.../docvalues/DocValuesProducerBase.java | 4 +-
.../org/apache/lucene/index/values/Bytes.java | 6 +--
.../apache/lucene/index/values/DocValues.java | 16 +++----
.../{ValuesEnum.java => DocValuesEnum.java} | 16 +++----
.../index/values/FixedDerefBytesImpl.java | 18 ++++----
.../index/values/FixedSortedBytesImpl.java | 10 ++---
.../index/values/FixedStraightBytesImpl.java | 16 +++----
.../apache/lucene/index/values/Floats.java | 36 +++++++--------
.../lucene/index/values/MultiDocValues.java | 28 ++++++------
.../lucene/index/values/PackedIntsImpl.java | 18 ++++----
.../index/values/PerDocFieldValues.java | 8 ++--
.../lucene/index/values/SourceCache.java | 1 +
.../index/values/{Values.java => Type.java} | 2 +-
.../index/values/VarDerefBytesImpl.java | 12 ++---
.../index/values/VarSortedBytesImpl.java | 14 +++---
.../index/values/VarStraightBytesImpl.java | 14 +++---
.../apache/lucene/index/values/Writer.java | 10 ++---
.../lucene/index/values/TestDocValues.java | 22 ++++-----
.../index/values/TestDocValuesIndexing.java | 45 +++++++++----------
29 files changed, 188 insertions(+), 190 deletions(-)
rename lucene/src/java/org/apache/lucene/index/values/{ValuesEnum.java => DocValuesEnum.java} (89%)
rename lucene/src/java/org/apache/lucene/index/values/{Values.java => Type.java} (98%)
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
index b28300713ed..493745b1ac4 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@@ -33,7 +33,7 @@ import org.apache.lucene.document.ValuesField;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
/**
* Creates {@link Document} objects. Uses a {@link ContentSource} to generate
@@ -160,13 +160,13 @@ public class DocMaker {
private long lastPrintedNumUniqueBytes = 0;
private int printNum = 0;
- private Map fieldVauleMap;
+ private Map fieldVauleMap;
// create a doc
// use only part of the body, modify it to keep the rest (or use all if size==0).
// reset the docdata properties so they are not added more than once.
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
- Values valueType;
+ Type valueType;
final DocState ds = getDocState();
final Document doc = reuseFields ? ds.doc : new Document();
doc.getFields().clear();
@@ -252,7 +252,7 @@ public class DocMaker {
}
private void trySetIndexValues(Field field) {
- final Values valueType;
+ final Type valueType;
if((valueType = fieldVauleMap.get(field.name())) != null)
ValuesField.set(field, valueType);
}
@@ -385,18 +385,18 @@ public class DocMaker {
resetLeftovers();
}
- private static final Map parseValueFields(String fields) {
+ private static final Map parseValueFields(String fields) {
if(fields == null)
return Collections.emptyMap();
String[] split = fields.split(";");
- Map result = new HashMap();
+ Map result = new HashMap();
for (String tuple : split) {
final String[] nameValue = tuple.split("=");
if (nameValue.length != 2) {
throw new IllegalArgumentException("illegal doc.stored.values format: "
+ fields + " expected fieldname=ValuesType;...;...;");
}
- result.put(nameValue[0].trim(), Values.valueOf(nameValue[1]));
+ result.put(nameValue[0].trim(), Type.valueOf(nameValue[1]));
}
return result;
}
diff --git a/lucene/src/java/org/apache/lucene/document/ValuesField.java b/lucene/src/java/org/apache/lucene/document/ValuesField.java
index 5acb6f272c7..105ac86a355 100644
--- a/lucene/src/java/org/apache/lucene/document/ValuesField.java
+++ b/lucene/src/java/org/apache/lucene/document/ValuesField.java
@@ -24,7 +24,7 @@ import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.values.PerDocFieldValues;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.util.BytesRef;
/**
@@ -36,7 +36,7 @@ public class ValuesField extends AbstractField implements PerDocFieldValues {
protected BytesRef bytes;
protected double doubleValue;
protected long longValue;
- protected Values type;
+ protected Type type;
protected Comparator bytesComparator;
public ValuesField(String name) {
@@ -49,26 +49,26 @@ public class ValuesField extends AbstractField implements PerDocFieldValues {
}
public void setInt(long value) {
- type = Values.PACKED_INTS;
+ type = Type.PACKED_INTS;
longValue = value;
}
public void setFloat(float value) {
- type = Values.SIMPLE_FLOAT_4BYTE;
+ type = Type.SIMPLE_FLOAT_4BYTE;
doubleValue = value;
}
public void setFloat(double value) {
- type = Values.SIMPLE_FLOAT_8BYTE;
+ type = Type.SIMPLE_FLOAT_8BYTE;
doubleValue = value;
}
- public void setBytes(BytesRef value, Values type) {
+ public void setBytes(BytesRef value, Type type) {
setBytes(value, type, null);
}
- public void setBytes(BytesRef value, Values type, Comparator comp) {
+ public void setBytes(BytesRef value, Type type, Comparator comp) {
this.type = type;
if (bytes == null) {
this.bytes = new BytesRef();
@@ -97,11 +97,11 @@ public class ValuesField extends AbstractField implements PerDocFieldValues {
this.bytesComparator = comp;
}
- public void setType(Values type) {
+ public void setType(Type type) {
this.type = type;
}
- public Values type() {
+ public Type type() {
return type;
}
@@ -122,7 +122,7 @@ public class ValuesField extends AbstractField implements PerDocFieldValues {
return field;
}
- public static T set(T field, Values type) {
+ public static T set(T field, Type type) {
if (field instanceof ValuesField)
return field;
final ValuesField valField = new ValuesField();
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
index e2fbf14f9d3..96ace5f1f1b 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
@@ -1,6 +1,6 @@
package org.apache.lucene.index;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,7 +24,7 @@ public final class FieldInfo {
public String name;
public boolean isIndexed;
public int number;
- Values docValues;
+ Type docValues;
// true if term vector for this field should be stored
@@ -94,7 +94,7 @@ public final class FieldInfo {
}
}
- void setDocValues(Values v) {
+ void setDocValues(Type v) {
if (docValues != null) {
if (docValues != v) {
throw new IllegalArgumentException("indexValues is already set to " + docValues + "; cannot change to " + v);
@@ -108,7 +108,7 @@ public final class FieldInfo {
return docValues != null;
}
- public Values getDocValues() {
+ public Type getDocValues() {
return docValues;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
index a6baae51324..ea5d528ca78 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
@@ -19,7 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -384,31 +384,31 @@ public final class FieldInfos {
fi.docValues = null;
break;
case 1:
- fi.docValues = Values.PACKED_INTS;
+ fi.docValues = Type.PACKED_INTS;
break;
case 2:
- fi.docValues = Values.SIMPLE_FLOAT_4BYTE;
+ fi.docValues = Type.SIMPLE_FLOAT_4BYTE;
break;
case 3:
- fi.docValues = Values.SIMPLE_FLOAT_8BYTE;
+ fi.docValues = Type.SIMPLE_FLOAT_8BYTE;
break;
case 4:
- fi.docValues = Values.BYTES_FIXED_STRAIGHT;
+ fi.docValues = Type.BYTES_FIXED_STRAIGHT;
break;
case 5:
- fi.docValues = Values.BYTES_FIXED_DEREF;
+ fi.docValues = Type.BYTES_FIXED_DEREF;
break;
case 6:
- fi.docValues = Values.BYTES_FIXED_SORTED;
+ fi.docValues = Type.BYTES_FIXED_SORTED;
break;
case 7:
- fi.docValues = Values.BYTES_VAR_STRAIGHT;
+ fi.docValues = Type.BYTES_VAR_STRAIGHT;
break;
case 8:
- fi.docValues = Values.BYTES_VAR_DEREF;
+ fi.docValues = Type.BYTES_VAR_DEREF;
break;
case 9:
- fi.docValues = Values.BYTES_VAR_SORTED;
+ fi.docValues = Type.BYTES_VAR_SORTED;
break;
default:
throw new IllegalStateException("unhandled indexValues type " + b);
diff --git a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java
index e3112ca8b5b..290cd107cfb 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldsEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsEnum.java
@@ -20,7 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.values.DocValues;
-import org.apache.lucene.index.values.ValuesEnum;
+import org.apache.lucene.index.values.DocValuesEnum;
import org.apache.lucene.util.AttributeSource;
/** Enumerates indexed fields. You must first call {@link
diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
index 9cde6140ae1..3393a71fdfb 100644
--- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
@@ -20,7 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.values.DocValues;
-import org.apache.lucene.index.values.ValuesEnum;
+import org.apache.lucene.index.values.DocValuesEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java
index 9137d6077ff..a871ce87f7a 100644
--- a/lucene/src/java/org/apache/lucene/index/MultiFields.java
+++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java
@@ -25,7 +25,7 @@ import java.util.ArrayList;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.MultiDocValues;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs
@@ -293,7 +293,7 @@ public final class MultiFields extends Fields {
// create & add to docValues:
final List docValuesIndex = new ArrayList();
int docsUpto = 0;
- Values type = null;
+ Type type = null;
// Gather all sub-readers that share this field
for(int i=0;i docValuesIndex = new ArrayList();
int docsUpto = 0;
- Values type = null;
+ Type type = null;
final int numEnums = enumWithSlices.length;
for (int i = 0; i < numEnums; i++) {
FieldsEnumWithSlice withSlice = enumWithSlices[i];
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
index 48f516b903c..5535bbe7725 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.FieldsConsumer;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -274,8 +274,8 @@ final class SegmentMerger {
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
!reader.hasNorms(fi.name), fi.storePayloads,
fi.omitTermFreqAndPositions);
- final Values fiIndexValues = fi.docValues;
- final Values mergedDocValues = merged.docValues;
+ final Type fiIndexValues = fi.docValues;
+ final Type mergedDocValues = merged.docValues;
if (mergedDocValues == null) {
merged.setDocValues(fiIndexValues);
} else if (mergedDocValues != fiIndexValues) {
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
index 47446854af3..10974b45fcf 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
@@ -45,7 +45,7 @@ import org.apache.lucene.index.values.Bytes;
import org.apache.lucene.index.values.Ints;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.Floats;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
index 08307d4dd97..4cf1d5a916b 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesProducerBase.java
@@ -27,7 +27,7 @@ import org.apache.lucene.index.values.Bytes;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.Floats;
import org.apache.lucene.index.values.Ints;
-import org.apache.lucene.index.values.Values;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IntsRef;
@@ -65,7 +65,7 @@ public abstract class DocValuesProducerBase extends FieldsProducer{
}
protected DocValues loadDocValues(int docCount, Directory dir, String id,
- Values v) throws IOException {
+ Type v) throws IOException {
switch (v) {
case PACKED_INTS:
return Ints.getValues(dir, id, false);
diff --git a/lucene/src/java/org/apache/lucene/index/values/Bytes.java b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
index e54e575f4c0..5dabb6cdd41 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Bytes.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Bytes.java
@@ -162,7 +162,7 @@ public final class Bytes {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, maxDoc()) {
@Override
@@ -246,7 +246,7 @@ public final class Bytes {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, maxDoc()) {
@@ -340,7 +340,7 @@ public final class Bytes {
}
@Override
- protected void setNextEnum(ValuesEnum valuesEnum) {
+ protected void setNextEnum(DocValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytes();
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/DocValues.java b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
index 39d21ac4398..4de684ce526 100644
--- a/lucene/src/java/org/apache/lucene/index/values/DocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValues.java
@@ -31,11 +31,11 @@ public abstract class DocValues implements Closeable {
public static final DocValues[] EMPTY_ARRAY = new DocValues[0];
private SourceCache cache = new SourceCache.DirectSourceCache();
- public ValuesEnum getEnum() throws IOException {
+ public DocValuesEnum getEnum() throws IOException {
return getEnum(null);
}
- public abstract ValuesEnum getEnum(AttributeSource attrSource)
+ public abstract DocValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
public abstract Source load() throws IOException;
@@ -54,7 +54,7 @@ public abstract class DocValues implements Closeable {
throw new UnsupportedOperationException();
}
- public abstract Values type();
+ public abstract Type type();
public void close() throws IOException {
this.cache.close(this);
@@ -95,7 +95,7 @@ public abstract class DocValues implements Closeable {
throw new UnsupportedOperationException();
}
- public ValuesEnum getEnum() throws IOException {
+ public DocValuesEnum getEnum() throws IOException {
return getEnum(null);
}
@@ -103,19 +103,19 @@ public abstract class DocValues implements Closeable {
return missingValue;
}
- public abstract Values type();
+ public abstract Type type();
- public abstract ValuesEnum getEnum(AttributeSource attrSource)
+ public abstract DocValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
}
- abstract static class SourceEnum extends ValuesEnum {
+ abstract static class SourceEnum extends DocValuesEnum {
protected final Source source;
protected final int numDocs;
protected int pos = -1;
- SourceEnum(AttributeSource attrs, Values type, Source source, int numDocs) {
+ SourceEnum(AttributeSource attrs, Type type, Source source, int numDocs) {
super(attrs, type);
this.source = source;
this.numDocs = numDocs;
diff --git a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java
similarity index 89%
rename from lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
rename to lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java
index 8008929dabc..ecdeec8cf42 100644
--- a/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
+++ b/lucene/src/java/org/apache/lucene/index/values/DocValuesEnum.java
@@ -28,18 +28,18 @@ import org.apache.lucene.util.LongsRef;
*
* @lucene.experimental
*/
-public abstract class ValuesEnum extends DocIdSetIterator {
+public abstract class DocValuesEnum extends DocIdSetIterator {
private AttributeSource source;
- private Values enumType;
+ private Type enumType;
protected BytesRef bytesRef;
protected FloatsRef floatsRef;
protected LongsRef intsRef;
- protected ValuesEnum(Values enumType) {
+ protected DocValuesEnum(Type enumType) {
this(null, enumType);
}
- protected ValuesEnum(AttributeSource source, Values enumType) {
+ protected DocValuesEnum(AttributeSource source, Type enumType) {
this.source = source;
this.enumType = enumType;
switch (enumType) {
@@ -61,7 +61,7 @@ public abstract class ValuesEnum extends DocIdSetIterator {
}
}
- public Values type() {
+ public Type type() {
return enumType;
}
@@ -77,7 +77,7 @@ public abstract class ValuesEnum extends DocIdSetIterator {
return intsRef;
}
- protected void copyReferences(ValuesEnum valuesEnum) {
+ protected void copyReferences(DocValuesEnum valuesEnum) {
intsRef = valuesEnum.intsRef;
floatsRef = valuesEnum.floatsRef;
bytesRef = valuesEnum.bytesRef;
@@ -103,8 +103,8 @@ public abstract class ValuesEnum extends DocIdSetIterator {
public abstract void close() throws IOException;
- public static ValuesEnum emptyEnum(Values type) {
- return new ValuesEnum(type) {
+ public static DocValuesEnum emptyEnum(Type type) {
+ return new DocValuesEnum(type) {
@Override
public int nextDoc() throws IOException {
return NO_MORE_DOCS;
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
index c987ca37816..e662de58565 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
@@ -172,8 +172,8 @@ class FixedDerefBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_FIXED_DEREF;
+ public Type type() {
+ return Type.BYTES_FIXED_DEREF;
}
@Override
@@ -183,12 +183,12 @@ class FixedDerefBytesImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new DerefBytesEnum(source, cloneData(), cloneIndex(),
size);
}
- static class DerefBytesEnum extends ValuesEnum {
+ static class DerefBytesEnum extends DocValuesEnum {
protected final IndexInput datIn;
private final PackedInts.ReaderIterator idx;
protected final long fp;
@@ -198,11 +198,11 @@ class FixedDerefBytesImpl {
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn, int size) throws IOException {
- this(source, datIn, idxIn, size, Values.BYTES_FIXED_DEREF);
+ this(source, datIn, idxIn, size, Type.BYTES_FIXED_DEREF);
}
protected DerefBytesEnum(AttributeSource source, IndexInput datIn,
- IndexInput idxIn, int size, Values enumType)
+ IndexInput idxIn, int size, Type enumType)
throws IOException {
super(source, enumType);
this.datIn = datIn;
@@ -217,7 +217,7 @@ class FixedDerefBytesImpl {
}
- protected void copyReferences(ValuesEnum valuesEnum) {
+ protected void copyReferences(DocValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytesRef;
if(bytesRef.bytes.length < size) {
bytesRef.grow(size);
@@ -270,8 +270,8 @@ class FixedDerefBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_FIXED_DEREF;
+ public Type type() {
+ return Type.BYTES_FIXED_DEREF;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
index adf1c2167c2..583d93b3d7b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
@@ -215,8 +215,8 @@ class FixedSortedBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_FIXED_SORTED;
+ public Type type() {
+ return Type.BYTES_FIXED_SORTED;
}
@Override
@@ -226,15 +226,15 @@ class FixedSortedBytesImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
// do unsorted
return new DerefBytesEnum(source, cloneData(), cloneIndex(),
size);
}
@Override
- public Values type() {
- return Values.BYTES_FIXED_SORTED;
+ public Type type() {
+ return Type.BYTES_FIXED_SORTED;
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
index 9f8047304ca..77d51a6f431 100644
--- a/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
@@ -156,8 +156,8 @@ class FixedStraightBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_FIXED_STRAIGHT;
+ public Type type() {
+ return Type.BYTES_FIXED_STRAIGHT;
}
@Override
@@ -167,11 +167,11 @@ class FixedStraightBytesImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
}
- private static final class FixedStraightBytesEnum extends ValuesEnum {
+ private static final class FixedStraightBytesEnum extends DocValuesEnum {
private final IndexInput datIn;
private final int size;
private final int maxDoc;
@@ -179,7 +179,7 @@ class FixedStraightBytesImpl {
private final long fp;
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn, int size, int maxDoc) throws IOException{
- super(source, Values.BYTES_FIXED_STRAIGHT);
+ super(source, Type.BYTES_FIXED_STRAIGHT);
this.datIn = datIn;
this.size = size;
this.maxDoc = maxDoc;
@@ -189,7 +189,7 @@ class FixedStraightBytesImpl {
fp = datIn.getFilePointer();
}
- protected void copyReferences(ValuesEnum valuesEnum) {
+ protected void copyReferences(DocValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytesRef;
if(bytesRef.bytes.length < size) {
bytesRef.grow(size);
@@ -228,8 +228,8 @@ class FixedStraightBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_FIXED_STRAIGHT;
+ public Type type() {
+ return Type.BYTES_FIXED_STRAIGHT;
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Floats.java b/lucene/src/java/org/apache/lucene/index/values/Floats.java
index d2d3fe6a500..0bf082ab98b 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Floats.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Floats.java
@@ -102,7 +102,7 @@ public class Floats {
}
@Override
- protected void setNextEnum(ValuesEnum valuesEnum) {
+ protected void setNextEnum(DocValuesEnum valuesEnum) {
floatsRef = valuesEnum.getFloat();
}
@@ -288,9 +288,9 @@ public class Floats {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
final MissingValue missing = getMissing();
- return new SourceEnum(attrSource, Values.SIMPLE_FLOAT_4BYTE, this, maxDoc) {
+ return new SourceEnum(attrSource, Type.SIMPLE_FLOAT_4BYTE, this, maxDoc) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
@@ -307,8 +307,8 @@ public class Floats {
}
@Override
- public Values type() {
- return Values.SIMPLE_FLOAT_4BYTE;
+ public Type type() {
+ return Type.SIMPLE_FLOAT_4BYTE;
}
}
@@ -327,7 +327,7 @@ public class Floats {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, maxDoc) {
@Override
@@ -346,8 +346,8 @@ public class Floats {
}
@Override
- public Values type() {
- return Values.SIMPLE_FLOAT_8BYTE;
+ public Type type() {
+ return Type.SIMPLE_FLOAT_8BYTE;
}
}
@@ -358,7 +358,7 @@ public class Floats {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
IndexInput indexInput = (IndexInput) datIn.clone();
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
// skip precision:
@@ -368,9 +368,9 @@ public class Floats {
}
@Override
- public Values type() {
- return precisionBytes == 4 ? Values.SIMPLE_FLOAT_4BYTE
- : Values.SIMPLE_FLOAT_8BYTE;
+ public Type type() {
+ return precisionBytes == 4 ? Type.SIMPLE_FLOAT_4BYTE
+ : Type.SIMPLE_FLOAT_8BYTE;
}
}
@@ -378,7 +378,7 @@ public class Floats {
Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc)
throws IOException {
- super(source, dataIn, 4, maxDoc, Values.SIMPLE_FLOAT_4BYTE);
+ super(source, dataIn, 4, maxDoc, Type.SIMPLE_FLOAT_4BYTE);
}
@Override
@@ -414,7 +414,7 @@ public class Floats {
Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc)
throws IOException {
- super(source, dataIn, 8, maxDoc, Values.SIMPLE_FLOAT_8BYTE);
+ super(source, dataIn, 8, maxDoc, Type.SIMPLE_FLOAT_8BYTE);
}
@Override
@@ -447,7 +447,7 @@ public class Floats {
}
}
- static abstract class FloatsEnumImpl extends ValuesEnum {
+ static abstract class FloatsEnumImpl extends DocValuesEnum {
protected final IndexInput dataIn;
protected int pos = -1;
protected final int precision;
@@ -455,9 +455,9 @@ public class Floats {
protected final long fp;
FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision,
- int maxDoc, Values type) throws IOException {
- super(source, precision == 4 ? Values.SIMPLE_FLOAT_4BYTE
- : Values.SIMPLE_FLOAT_8BYTE);
+ int maxDoc, Type type) throws IOException {
+ super(source, precision == 4 ? Type.SIMPLE_FLOAT_4BYTE
+ : Type.SIMPLE_FLOAT_8BYTE);
this.dataIn = dataIn;
this.precision = precision;
this.maxDoc = maxDoc;
diff --git a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
index dfd7478a106..f0b6e7dbfe3 100644
--- a/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/MultiDocValues.java
@@ -54,7 +54,7 @@ public class MultiDocValues extends DocValues {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new MultiValuesEnum(docValuesIdx, starts);
}
@@ -81,13 +81,13 @@ public class MultiDocValues extends DocValues {
final int maxDoc;
final Source emptySoruce;
- public DummyDocValues(int maxDoc, Values type) {
+ public DummyDocValues(int maxDoc, Type type) {
this.maxDoc = maxDoc;
this.emptySoruce = new EmptySource(type);
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return emptySoruce.getEnum(attrSource);
}
@@ -97,7 +97,7 @@ public class MultiDocValues extends DocValues {
}
@Override
- public Values type() {
+ public Type type() {
return emptySoruce.type();
}
@@ -107,13 +107,13 @@ public class MultiDocValues extends DocValues {
}
- private static class MultiValuesEnum extends ValuesEnum {
+ private static class MultiValuesEnum extends DocValuesEnum {
private DocValuesIndex[] docValuesIdx;
private final int maxDoc;
private int currentStart;
private int currentMax;
private int currentDoc = -1;
- private ValuesEnum currentEnum;
+ private DocValuesEnum currentEnum;
private final int[] starts;
public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts)
@@ -222,21 +222,21 @@ public class MultiDocValues extends DocValues {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
throw new UnsupportedOperationException(); // TODO
}
@Override
- public Values type() {
+ public Type type() {
return docValuesIdx[0].docValues.type();
}
}
private static class EmptySource extends Source {
- private final Values type;
+ private final Type type;
- public EmptySource(Values type) {
+ public EmptySource(Type type) {
this.type = type;
}
@@ -257,18 +257,18 @@ public class MultiDocValues extends DocValues {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
- return ValuesEnum.emptyEnum(type);
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ return DocValuesEnum.emptyEnum(type);
}
@Override
- public Values type() {
+ public Type type() {
return type;
}
}
@Override
- public Values type() {
+ public Type type() {
return this.docValuesIdx[0].docValues.type();
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
index 9842352c571..8466bb2db7f 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
@@ -133,7 +133,7 @@ class PackedIntsImpl {
}
@Override
- protected void setNextEnum(ValuesEnum valuesEnum) {
+ protected void setNextEnum(DocValuesEnum valuesEnum) {
intsRef = valuesEnum.getInt();
}
@@ -192,7 +192,7 @@ class PackedIntsImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource attrSource) throws IOException {
final MissingValue missing = getMissing();
return new SourceEnum(attrSource, type(), this, values.size()) {
@Override
@@ -211,8 +211,8 @@ class PackedIntsImpl {
}
@Override
- public Values type() {
- return Values.PACKED_INTS;
+ public Type type() {
+ return Type.PACKED_INTS;
}
}
@@ -223,18 +223,18 @@ class PackedIntsImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new IntsEnumImpl(source, (IndexInput) datIn.clone());
}
@Override
- public Values type() {
- return Values.PACKED_INTS;
+ public Type type() {
+ return Type.PACKED_INTS;
}
}
- private static final class IntsEnumImpl extends ValuesEnum {
+ private static final class IntsEnumImpl extends DocValuesEnum {
private final PackedInts.ReaderIterator ints;
private long minValue;
private final IndexInput dataIn;
@@ -244,7 +244,7 @@ class PackedIntsImpl {
private IntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
- super(source, Values.PACKED_INTS);
+ super(source, Type.PACKED_INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
dataIn.seek(CodecUtil.headerLength(CODEC_NAME));
diff --git a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java
index e94b7d5505f..f60fcd4e0e5 100644
--- a/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/PerDocFieldValues.java
@@ -32,9 +32,9 @@ public interface PerDocFieldValues {
public void setFloat(double value);
- public void setBytes(BytesRef value, Values type);
+ public void setBytes(BytesRef value, Type type);
- public void setBytes(BytesRef value, Values type, Comparator comp);
+ public void setBytes(BytesRef value, Type type, Comparator comp);
public BytesRef getBytes();
@@ -46,8 +46,8 @@ public interface PerDocFieldValues {
public void setBytesComparator(Comparator comp);
- public void setType(Values type);
+ public void setType(Type type);
- public Values type();
+ public Type type();
}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java
index e2f4c63c9aa..07e33633b44 100644
--- a/lucene/src/java/org/apache/lucene/index/values/SourceCache.java
+++ b/lucene/src/java/org/apache/lucene/index/values/SourceCache.java
@@ -26,6 +26,7 @@ import org.apache.lucene.util.BytesRef;
/**
* Per {@link DocValues} {@link Source} cache.
+ * @lucene.experimental
*/
public abstract class SourceCache {
public abstract Source load(DocValues values) throws IOException;
diff --git a/lucene/src/java/org/apache/lucene/index/values/Values.java b/lucene/src/java/org/apache/lucene/index/values/Type.java
similarity index 98%
rename from lucene/src/java/org/apache/lucene/index/values/Values.java
rename to lucene/src/java/org/apache/lucene/index/values/Type.java
index 31adfd2a51a..717741abab8 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Values.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Type.java
@@ -24,7 +24,7 @@ package org.apache.lucene.index.values;
*
* @lucene.experimental
*/
-public enum Values {
+public enum Type {
/** Integral value is stored as packed ints. The bit
* precision is fixed across the segment, and
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
index 985b960c5a2..c28e7e57759 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
@@ -231,8 +231,8 @@ class VarDerefBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_VAR_DEREF;
+ public Type type() {
+ return Type.BYTES_VAR_DEREF;
}
@Override
@@ -242,7 +242,7 @@ class VarDerefBytesImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new VarDerefBytesEnum(source, cloneData(), cloneIndex());
}
@@ -250,7 +250,7 @@ class VarDerefBytesImpl {
public VarDerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
- super(source, datIn, idxIn, -1, Values.BYTES_VAR_DEREF);
+ super(source, datIn, idxIn, -1, Type.BYTES_VAR_DEREF);
}
@Override
@@ -273,8 +273,8 @@ class VarDerefBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_VAR_DEREF;
+ public Type type() {
+ return Type.BYTES_VAR_DEREF;
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
index 53dddf31c73..ab023a1f817 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
@@ -220,8 +220,8 @@ class VarSortedBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_VAR_SORTED;
+ public Type type() {
+ return Type.BYTES_VAR_SORTED;
}
@Override
@@ -231,11 +231,11 @@ class VarSortedBytesImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new VarSortedBytesEnum(source, cloneData(), cloneIndex());
}
- private static class VarSortedBytesEnum extends ValuesEnum {
+ private static class VarSortedBytesEnum extends DocValuesEnum {
private PackedInts.Reader docToOrdIndex;
private PackedInts.Reader ordToOffsetIndex;
private IndexInput idxIn;
@@ -248,7 +248,7 @@ class VarSortedBytesImpl {
protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
- super(source, Values.BYTES_VAR_SORTED);
+ super(source, Type.BYTES_VAR_SORTED);
totBytes = idxIn.readLong();
// keep that in memory to prevent lots of disk seeks
docToOrdIndex = PackedInts.getReader(idxIn);
@@ -309,8 +309,8 @@ class VarSortedBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_VAR_SORTED;
+ public Type type() {
+ return Type.BYTES_VAR_SORTED;
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
index c259fe2c242..18f0272a268 100644
--- a/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
@@ -147,8 +147,8 @@ class VarStraightBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_VAR_STRAIGHT;
+ public Type type() {
+ return Type.BYTES_VAR_STRAIGHT;
}
@Override
@@ -158,11 +158,11 @@ class VarStraightBytesImpl {
}
@Override
- public ValuesEnum getEnum(AttributeSource source) throws IOException {
+ public DocValuesEnum getEnum(AttributeSource source) throws IOException {
return new VarStraightBytesEnum(source, cloneData(), cloneIndex());
}
- private class VarStraightBytesEnum extends ValuesEnum {
+ private class VarStraightBytesEnum extends DocValuesEnum {
private final PackedInts.Reader addresses;
private final IndexInput datIn;
private final IndexInput idxIn;
@@ -172,7 +172,7 @@ class VarStraightBytesImpl {
protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
- super(source, Values.BYTES_VAR_STRAIGHT);
+ super(source, Type.BYTES_VAR_STRAIGHT);
totBytes = idxIn.readVInt();
fp = datIn.getFilePointer();
addresses = PackedInts.getReader(idxIn);
@@ -220,8 +220,8 @@ class VarStraightBytesImpl {
}
@Override
- public Values type() {
- return Values.BYTES_VAR_STRAIGHT;
+ public Type type() {
+ return Type.BYTES_VAR_STRAIGHT;
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/values/Writer.java b/lucene/src/java/org/apache/lucene/index/values/Writer.java
index 376c09291ff..c254e0ae3d5 100644
--- a/lucene/src/java/org/apache/lucene/index/values/Writer.java
+++ b/lucene/src/java/org/apache/lucene/index/values/Writer.java
@@ -55,7 +55,7 @@ public abstract class Writer extends DocValuesConsumer {
/** Records the specfied value for the docID */
protected abstract void add(int docID) throws IOException;
- protected abstract void setNextEnum(ValuesEnum valuesEnum);
+ protected abstract void setNextEnum(DocValuesEnum valuesEnum);
/** Finish writing, close any files */
public abstract void finish(int docCount) throws IOException;
@@ -63,7 +63,7 @@ public abstract class Writer extends DocValuesConsumer {
// enables bulk copies in subclasses per MergeState
@Override
protected void merge(MergeState state) throws IOException {
- final ValuesEnum valEnum = state.reader.getEnum();
+ final DocValuesEnum valEnum = state.reader.getEnum();
assert valEnum != null;
try {
setNextEnum(valEnum);
@@ -71,11 +71,11 @@ public abstract class Writer extends DocValuesConsumer {
final Bits bits = state.bits;
final int docCount = state.docCount;
int currentDocId;
- if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) {
+ if ((currentDocId = valEnum.advance(0)) != DocValuesEnum.NO_MORE_DOCS) {
for (int i = 0; i < docCount; i++) {
if (bits == null || !bits.get(i)) {
if (currentDocId < i) {
- if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) {
+ if ((currentDocId = valEnum.advance(i)) == DocValuesEnum.NO_MORE_DOCS) {
break; // advance can jump over default values
}
}
@@ -91,7 +91,7 @@ public abstract class Writer extends DocValuesConsumer {
}
}
- public static Writer create(Values v, String id, Directory directory,
+ public static Writer create(Type v, String id, Directory directory,
Comparator comp, AtomicLong bytesUsed) throws IOException {
switch (v) {
case PACKED_INTS:
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
index 94b247e3c36..694ed68d1a4 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
@@ -89,7 +89,7 @@ public class TestDocValues extends LuceneTestCase {
DocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc);
for (int iter = 0; iter < 2; iter++) {
- ValuesEnum bytesEnum = r.getEnum();
+ DocValuesEnum bytesEnum = r.getEnum();
assertNotNull("enum is null", bytesEnum);
BytesRef ref = bytesEnum.bytes();
@@ -100,8 +100,8 @@ public class TestDocValues extends LuceneTestCase {
assertEquals("doc: " + idx + " lenLeft: " + values[idx].length()
+ " lenRight: " + utf8String.length(), values[idx], utf8String);
}
- assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
- assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
+ assertEquals(DocValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
+ assertEquals(DocValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
bytesEnum.close();
}
@@ -207,7 +207,7 @@ public class TestDocValues extends LuceneTestCase {
}
for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = r.getEnum();
+ DocValuesEnum iEnum = r.getEnum();
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(i, iEnum.nextDoc());
@@ -217,14 +217,14 @@ public class TestDocValues extends LuceneTestCase {
assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1));
}
for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
+ assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
}
iEnum.close();
}
for (int iter = 0; iter < 2; iter++) {
- ValuesEnum iEnum = r.getEnum();
+ DocValuesEnum iEnum = r.getEnum();
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
assertEquals(i, iEnum.advance(i));
@@ -234,7 +234,7 @@ public class TestDocValues extends LuceneTestCase {
assertEquals(NUM_VALUES - 1, iEnum.advance(NUM_VALUES - 1));
}
for (int i = NUM_VALUES; i < NUM_VALUES + additionalDocs; i++) {
- assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
+ assertEquals(DocValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
}
iEnum.close();
@@ -273,26 +273,26 @@ public class TestDocValues extends LuceneTestCase {
}
for (int iter = 0; iter < 2; iter++) {
- ValuesEnum fEnum = r.getEnum();
+ DocValuesEnum fEnum = r.getEnum();
FloatsRef floats = fEnum.getFloat();
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(i, fEnum.nextDoc());
assertEquals(values[i], floats.get(), delta);
}
for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
- assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc());
+ assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.nextDoc());
}
fEnum.close();
}
for (int iter = 0; iter < 2; iter++) {
- ValuesEnum fEnum = r.getEnum();
+ DocValuesEnum fEnum = r.getEnum();
FloatsRef floats = fEnum.getFloat();
for (int i = 0; i < NUM_VALUES; i += 1 + random.nextInt(25)) {
assertEquals(i, fEnum.advance(i));
assertEquals(values[i], floats.get(), delta);
}
for (int i = NUM_VALUES; i < NUM_VALUES + additionalValues; i++) {
- assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(i));
+ assertEquals(DocValuesEnum.NO_MORE_DOCS, fEnum.advance(i));
}
fEnum.close();
}
diff --git a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
index 5600b005bac..334414b0d8a 100644
--- a/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
@@ -80,9 +80,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
* - add test for unoptimized case with deletes
* - add a test for addIndexes
* - split up existing testcases and give them meaningfull names
- * - use consistent naming throughout DocValues
- * - Values -> DocValueType
- * - PackedIntsImpl -> Ints
* - run RAT
* - add tests for FieldComparator FloatIndexValuesComparator vs. FloatValuesComparator etc.
*/
@@ -139,7 +136,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
/**
- * Tests complete indexing of {@link Values} including deletions, merging and
+ * Tests complete indexing of {@link Type} including deletions, merging and
* sparse value fields on Compound-File
*/
public void testIndexBytesNoDeletesCFS() throws IOException {
@@ -159,7 +156,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
/**
- * Tests complete indexing of {@link Values} including deletions, merging and
+ * Tests complete indexing of {@link Type} including deletions, merging and
* sparse value fields on None-Compound-File
*/
public void testIndexBytesNoDeletes() throws IOException {
@@ -198,11 +195,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final int numValues = 179 + random.nextInt(151);
- final List numVariantList = new ArrayList(NUMERICS);
+ final List numVariantList = new ArrayList(NUMERICS);
// run in random order to test if fill works correctly during merges
Collections.shuffle(numVariantList, random);
- for (Values val : numVariantList) {
+ for (Type val : numVariantList) {
OpenBitSet deleted = indexValues(w, numValues, val, numVariantList,
withDeletions, 7);
List closeables = new ArrayList