diff --git a/build.xml b/build.xml
index 8d9326d962e..4b23cc49e3e 100644
--- a/build.xml
+++ b/build.xml
@@ -124,6 +124,7 @@
+
diff --git a/dev-tools/eclipse/dot.classpath b/dev-tools/eclipse/dot.classpath
index 18b83d8f37f..809ab6c5952 100644
--- a/dev-tools/eclipse/dot.classpath
+++ b/dev-tools/eclipse/dot.classpath
@@ -102,7 +102,7 @@
-
+
@@ -112,14 +112,22 @@
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dev-tools/maven/lucene/contrib/demo/pom.xml.template b/dev-tools/maven/lucene/contrib/demo/pom.xml.template
index f73258be1f7..3ef308c0f5e 100644
--- a/dev-tools/maven/lucene/contrib/demo/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/demo/pom.xml.template
@@ -70,8 +70,8 @@
${project.version}
- javax.servlet
- servlet-api
+ org.eclipse.jetty.orbit
+ javax.servletprovided
diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template
index a250a9763cd..342c8bb7765 100644
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@@ -42,8 +42,7 @@
4.0.0yyyy-MM-dd HH:mm:ss1.6
- 6.1.26
- 6.1.26-patched-JETTY-1340
+ 8.1.2.v201203081.6.11.0
@@ -296,14 +295,24 @@
2.2
- org.mortbay.jetty
- jetty
- ${patched.jetty.version}
+ org.eclipse.jetty
+ jetty-server
+ ${jetty.version}
- org.mortbay.jetty
+ org.eclipse.jetty
+ jetty-servlet
+ ${jetty.version}
+
+
+ org.eclipse.jettyjetty-util
- ${patched.jetty.version}
+ ${jetty.version}
+
+
+ org.eclipse.jetty
+ jetty-webapp
+ ${jetty.version}org.slf4j
@@ -331,9 +340,9 @@
${slf4j.version}
- javax.servlet
- servlet-api
- 2.4
+ org.eclipse.jetty.orbit
+ javax.servlet
+ 3.0.0.v201112011016com.spatial4j
@@ -495,7 +504,7 @@
org.mortbay.jetty
- maven-jetty-plugin
+ jetty-maven-plugin${jetty.version}
@@ -636,7 +645,7 @@
solr-commons-csv${project.version}jar
- solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar
+ solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar
@@ -653,34 +662,6 @@
solr/lib/apache-solr-noggit-r1211150.jar
-
- install-jetty
- install
-
- install-file
-
-
- org.mortbay.jetty
- jetty
- ${patched.jetty.version}
- jar
- solr/example/lib/jetty-${patched.jetty.version}.jar
-
-
-
- install-jetty-util
- install
-
- install-file
-
-
- org.mortbay.jetty
- jetty-util
- ${patched.jetty.version}
- jar
- solr/example/lib/jetty-util-${patched.jetty.version}.jar
-
- install-jsonicinstall
diff --git a/dev-tools/maven/solr/contrib/clustering/pom.xml.template b/dev-tools/maven/solr/contrib/clustering/pom.xml.template
index 277966ad333..bcd51e9d8b0 100644
--- a/dev-tools/maven/solr/contrib/clustering/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/clustering/pom.xml.template
@@ -94,12 +94,17 @@
- org.mortbay.jetty
- jetty
+ org.eclipse.jetty
+ jetty-servertest
- org.mortbay.jetty
+ org.eclipse.jetty
+ jetty-servlet
+ test
+
+
+ org.eclipse.jettyjetty-utiltest
diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
index e6abe764e86..f0e0369f6de 100644
--- a/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
@@ -73,12 +73,17 @@
test
- org.mortbay.jetty
- jetty
+ org.eclipse.jetty
+ jetty-servertest
- org.mortbay.jetty
+ org.eclipse.jetty
+ jetty-servlet
+ test
+
+
+ org.eclipse.jettyjetty-utiltest
diff --git a/dev-tools/maven/solr/core/pom.xml.template b/dev-tools/maven/solr/core/pom.xml.template
index 9d786e21577..97840259d61 100644
--- a/dev-tools/maven/solr/core/pom.xml.template
+++ b/dev-tools/maven/solr/core/pom.xml.template
@@ -172,15 +172,20 @@
guava
- org.mortbay.jetty
- jetty
+ org.eclipse.jetty
+ jetty-servertrue
- org.mortbay.jetty
+ org.eclipse.jettyjetty-utiltrue
+
+ org.eclipse.jetty
+ jetty-webapp
+ true
+ org.codehaus.woodstoxwstx-asl
@@ -193,8 +198,8 @@
- javax.servlet
- servlet-api
+ org.eclipse.jetty.orbit
+ javax.servlet
diff --git a/dev-tools/maven/solr/webapp/pom.xml.template b/dev-tools/maven/solr/webapp/pom.xml.template
index 105ef63ac52..f6c82f643c9 100644
--- a/dev-tools/maven/solr/webapp/pom.xml.template
+++ b/dev-tools/maven/solr/webapp/pom.xml.template
@@ -58,8 +58,8 @@
${project.version}
- javax.servlet
- servlet-api
+ org.eclipse.jetty.orbit
+ javax.servletprovided
@@ -98,9 +98,9 @@
-
+
org.mortbay.jetty
- maven-jetty-plugin
+ jetty-maven-plugin10
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 87678750af4..33dd4e7d2e8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -410,6 +410,10 @@ API Changes
method maybeReopen has been deprecated in favor of maybeRefresh().
(Shai Erera, Mike McCandless, Simon Willnauer)
+* LUCENE-3859: AtomicReader.hasNorms(field) is deprecated, instead you
+ can inspect the FieldInfo yourself to see if norms are present, which
+ also allows you to get the type. (Robert Muir)
+
New features
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
@@ -919,7 +923,13 @@ Bug fixes
from the delegate DocIdSet.iterator(), which is allowed to return
null by DocIdSet specification when no documents match.
(Shay Banon via Uwe Schindler)
-
+
+* LUCENE-3821: SloppyPhraseScorer missed documents that ExactPhraseScorer finds
+ When phrase queru had repeating terms (e.g. "yes ho yes")
+ sloppy query missed documents that exact query matched.
+ Fixed except when for repeating multiterms (e.g. "yes ho yes|no").
+ (Robert Muir, Doron Cohen)
+
Optimizations
* LUCENE-3653: Improve concurrency in VirtualMethod and AttributeSource by
@@ -932,6 +942,9 @@ Documentation
Build
+* LUCENE-3857: exceptions from other threads in beforeclass/etc do not fail
+ the test (Dawid Weiss)
+
* LUCENE-3847: LuceneTestCase will now check for modifications of System
properties before and after each test (and suite). If changes are detected,
the test will fail. A rule can be used to reset system properties to
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index ffb174afc27..51b18cfbe18 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -170,7 +170,7 @@
-
+
diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt
index a48d2fc82c2..20b8042e9cb 100644
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@@ -72,6 +72,8 @@ New Features
start/endOffset, if offsets are indexed. (Alan Woodward via Mike
McCandless)
+ * LUCENE-3802: Support for grouped faceting. (Martijn van Groningen)
+
API Changes
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
@@ -242,6 +244,10 @@ Bug Fixes
that take stopwords and stem exclusion tables also initialize
the default stem overrides (e.g. kind/kinder, fiets). (Robert Muir)
+ * LUCENE-3831: avoid NPE if the SpanQuery has a null field (eg a
+ SpanOrQuery with no clauses added). (Alan Woodward via Mike
+ McCandless).
+
Documentation
* LUCENE-3599: Javadocs for DistanceUtils.haversine() were incorrectly
diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
index d5f8222f653..36a3f9d5d8e 100644
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
@@ -43,8 +43,8 @@ class MemoryIndexNormDocValues extends DocValues {
}
@Override
- public Type type() {
- return source.type();
+ public Type getType() {
+ return source.getType();
}
@Override
diff --git a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
index d3ccd61bc6e..8ae25e0b5b4 100644
--- a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
+++ b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
@@ -21,6 +21,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
@@ -40,11 +41,16 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
@@ -225,4 +231,28 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
reader.close();
}
+
+ // LUCENE-3831
+ public void testNullPointerException() throws IOException {
+ RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
+ SpanQuery wrappedquery = new SpanMultiTermQueryWrapper(regex);
+
+ MemoryIndex mindex = new MemoryIndex();
+ mindex.addField("field", new MockAnalyzer(random).tokenStream("field", new StringReader("hello there")));
+
+ // This throws an NPE
+ assertEquals(0, mindex.search(wrappedquery), 0.00001f);
+ }
+
+ // LUCENE-3831
+ public void testPassesIfWrapped() throws IOException {
+ RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
+ SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper(regex));
+
+ MemoryIndex mindex = new MemoryIndex();
+ mindex.addField("field", new MockAnalyzer(random).tokenStream("field", new StringReader("hello there")));
+
+ // This passes though
+ assertEquals(0, mindex.search(wrappedquery), 0.00001f);
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
index d2e42c63a18..72be59cca46 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
@@ -197,6 +197,7 @@ public class BlockTermsReader extends FieldsProducer {
@Override
public Terms terms(String field) throws IOException {
+ assert field != null;
return fields.get(field);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
index f773e1e72ee..00a0c2a696a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
@@ -211,6 +211,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
@Override
public Terms terms(String field) throws IOException {
+ assert field != null;
return fields.get(field);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 8e960da2adf..98f130ae825 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.Set;
+import java.util.ServiceLoader; // javadocs
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriterConfig; // javadocs
@@ -26,7 +27,15 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.util.NamedSPILoader;
/**
- * Encodes/decodes an inverted index segment
+ * Encodes/decodes an inverted index segment.
+ *
+ * Note, when extending this class, the name ({@link #getName}) is
+ * written into the index. In order for the segment to be read, the
+ * name must resolve to your implementation via {@link #forName(String)}.
+ * This method uses Java's
+ * {@link ServiceLoader Service Provider Interface} to resolve codec names.
+ *
+ * @see ServiceLoader
*/
public abstract class Codec implements NamedSPILoader.NamedSPI {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
new file mode 100644
index 00000000000..e83ab2a48db
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
@@ -0,0 +1,513 @@
+package org.apache.lucene.codecs;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.Map;
+
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * @lucene.experimental
+ * @lucene.internal
+ */
+public abstract class DocValuesArraySource extends Source {
+
+ private static final Map TEMPLATES;
+
+ static {
+ EnumMap templates = new EnumMap(
+ Type.class);
+ templates.put(Type.FIXED_INTS_16, new ShortValues());
+ templates.put(Type.FIXED_INTS_32, new IntValues());
+ templates.put(Type.FIXED_INTS_64, new LongValues());
+ templates.put(Type.FIXED_INTS_8, new ByteValues());
+ templates.put(Type.FLOAT_32, new FloatValues());
+ templates.put(Type.FLOAT_64, new DoubleValues());
+ TEMPLATES = Collections.unmodifiableMap(templates);
+ }
+
+ public static DocValuesArraySource forType(Type type) {
+ return TEMPLATES.get(type);
+ }
+
+ protected final int bytesPerValue;
+
+ DocValuesArraySource(int bytesPerValue, Type type) {
+ super(type);
+ this.bytesPerValue = bytesPerValue;
+ }
+
+ @Override
+ public abstract BytesRef getBytes(int docID, BytesRef ref);
+
+
+ public abstract DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException;
+
+ public abstract DocValuesArraySource newFromArray(Object array);
+
+ @Override
+ public final boolean hasArray() {
+ return true;
+ }
+
+ public void toBytes(long value, BytesRef bytesRef) {
+ copyLong(bytesRef, value);
+ }
+
+ public void toBytes(double value, BytesRef bytesRef) {
+ copyLong(bytesRef, Double.doubleToRawLongBits(value));
+ }
+
+ final static class ByteValues extends DocValuesArraySource {
+ private final byte[] values;
+
+ ByteValues() {
+ super(1, Type.FIXED_INTS_8);
+ values = new byte[0];
+ }
+ private ByteValues(byte[] array) {
+ super(1, Type.FIXED_INTS_8);
+ values = array;
+ }
+
+ private ByteValues(IndexInput input, int numDocs) throws IOException {
+ super(1, Type.FIXED_INTS_8);
+ values = new byte[numDocs];
+ input.readBytes(values, 0, values.length, false);
+ }
+
+ @Override
+ public byte[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException {
+ return new ByteValues(input, numDocs);
+ }
+
+ @Override
+ public DocValuesArraySource newFromArray(Object array) {
+ assert array instanceof byte[];
+ return new ByteValues((byte[]) array);
+ }
+
+ public void toBytes(long value, BytesRef bytesRef) {
+ if (bytesRef.bytes.length == 0) {
+ bytesRef.bytes = new byte[1];
+ }
+ bytesRef.bytes[0] = (byte) (0xFFL & value);
+ bytesRef.offset = 0;
+ bytesRef.length = 1;
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ toBytes(getInt(docID), ref);
+ return ref;
+ }
+
+ };
+
+ final static class ShortValues extends DocValuesArraySource {
+ private final short[] values;
+
+ ShortValues() {
+ super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
+ values = new short[0];
+ }
+
+ private ShortValues(short[] array) {
+ super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
+ values = array;
+ }
+
+ private ShortValues(IndexInput input, int numDocs) throws IOException {
+ super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
+ values = new short[numDocs];
+ for (int i = 0; i < values.length; i++) {
+ values[i] = input.readShort();
+ }
+ }
+
+ @Override
+ public short[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException {
+ return new ShortValues(input, numDocs);
+ }
+
+ public void toBytes(long value, BytesRef bytesRef) {
+ copyShort(bytesRef, (short) (0xFFFFL & value));
+ }
+
+ @Override
+ public DocValuesArraySource newFromArray(Object array) {
+ assert array instanceof short[];
+ return new ShortValues((short[]) array);
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ toBytes(getInt(docID), ref);
+ return ref;
+ }
+
+ };
+
+ final static class IntValues extends DocValuesArraySource {
+ private final int[] values;
+
+ IntValues() {
+ super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
+ values = new int[0];
+ }
+
+ private IntValues(IndexInput input, int numDocs) throws IOException {
+ super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
+ values = new int[numDocs];
+ for (int i = 0; i < values.length; i++) {
+ values[i] = input.readInt();
+ }
+ }
+
+ private IntValues(int[] array) {
+ super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
+ values = array;
+ }
+
+ @Override
+ public int[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return 0xFFFFFFFF & values[docID];
+ }
+
+ @Override
+ public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException {
+ return new IntValues(input, numDocs);
+ }
+
+ public void toBytes(long value, BytesRef bytesRef) {
+ copyInt(bytesRef, (int) (0xFFFFFFFF & value));
+ }
+
+ @Override
+ public DocValuesArraySource newFromArray(Object array) {
+ assert array instanceof int[];
+ return new IntValues((int[]) array);
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ toBytes(getInt(docID), ref);
+ return ref;
+ }
+
+ };
+
+ final static class LongValues extends DocValuesArraySource {
+ private final long[] values;
+
+ LongValues() {
+ super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
+ values = new long[0];
+ }
+
+ private LongValues(IndexInput input, int numDocs) throws IOException {
+ super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
+ values = new long[numDocs];
+ for (int i = 0; i < values.length; i++) {
+ values[i] = input.readLong();
+ }
+ }
+
+ private LongValues(long[] array) {
+ super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
+ values = array;
+ }
+
+ @Override
+ public long[] getArray() {
+ return values;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException {
+ return new LongValues(input, numDocs);
+ }
+
+ @Override
+ public DocValuesArraySource newFromArray(Object array) {
+ assert array instanceof long[];
+ return new LongValues((long[])array);
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ toBytes(getInt(docID), ref);
+ return ref;
+ }
+
+ };
+
+ final static class FloatValues extends DocValuesArraySource {
+ private final float[] values;
+
+ FloatValues() {
+ super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
+ values = new float[0];
+ }
+
+ private FloatValues(IndexInput input, int numDocs) throws IOException {
+ super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
+ values = new float[numDocs];
+ /*
+ * we always read BIG_ENDIAN here since the writer serialized plain bytes
+ * we can simply read the ints / longs back in using readInt / readLong
+ */
+ for (int i = 0; i < values.length; i++) {
+ values[i] = Float.intBitsToFloat(input.readInt());
+ }
+ }
+
+ private FloatValues(float[] array) {
+ super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
+ values = array;
+ }
+
+ @Override
+ public float[] getArray() {
+ return values;
+ }
+
+ @Override
+ public double getFloat(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ public void toBytes(double value, BytesRef bytesRef) {
+ copyInt(bytesRef, Float.floatToRawIntBits((float)value));
+
+ }
+
+ @Override
+ public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException {
+ return new FloatValues(input, numDocs);
+ }
+
+ @Override
+ public DocValuesArraySource newFromArray(Object array) {
+ assert array instanceof float[];
+ return new FloatValues((float[]) array);
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ toBytes(getFloat(docID), ref);
+ return ref;
+ }
+ };
+
+ final static class DoubleValues extends DocValuesArraySource {
+ private final double[] values;
+
+ DoubleValues() {
+ super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
+ values = new double[0];
+ }
+
+ private DoubleValues(IndexInput input, int numDocs) throws IOException {
+ super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
+ values = new double[numDocs];
+ /*
+ * we always read BIG_ENDIAN here since the writer serialized plain bytes
+ * we can simply read the ints / longs back in using readInt / readLong
+ */
+ for (int i = 0; i < values.length; i++) {
+ values[i] = Double.longBitsToDouble(input.readLong());
+ }
+ }
+
+ private DoubleValues(double[] array) {
+ super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
+ values = array;
+ }
+
+ @Override
+ public double[] getArray() {
+ return values;
+ }
+
+ @Override
+ public double getFloat(int docID) {
+ assert docID >= 0 && docID < values.length;
+ return values[docID];
+ }
+
+ @Override
+ public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
+ throws IOException {
+ return new DoubleValues(input, numDocs);
+ }
+
+ @Override
+ public DocValuesArraySource newFromArray(Object array) {
+ assert array instanceof double[];
+ return new DoubleValues((double[]) array);
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ toBytes(getFloat(docID), ref);
+ return ref;
+ }
+
+ };
+
+ /**
+ * Copies the given long value and encodes it as 8 byte Big-Endian.
+ *
+ * NOTE: this method resets the offset to 0, length to 8 and resizes the
+ * reference array if needed.
+ */
+ public static void copyLong(BytesRef ref, long value) {
+ if (ref.bytes.length < 8) {
+ ref.bytes = new byte[8];
+ }
+ copyInternal(ref, (int) (value >> 32), ref.offset = 0);
+ copyInternal(ref, (int) value, 4);
+ ref.length = 8;
+ }
+
+ /**
+ * Copies the given int value and encodes it as 4 byte Big-Endian.
+ *
+ * NOTE: this method resets the offset to 0, length to 4 and resizes the
+ * reference array if needed.
+ */
+ public static void copyInt(BytesRef ref, int value) {
+ if (ref.bytes.length < 4) {
+ ref.bytes = new byte[4];
+ }
+ copyInternal(ref, value, ref.offset = 0);
+ ref.length = 4;
+
+ }
+
+ /**
+ * Copies the given short value and encodes it as a 2 byte Big-Endian.
+ *
+ * NOTE: this method resets the offset to 0, length to 2 and resizes the
+ * reference array if needed.
+ */
+ public static void copyShort(BytesRef ref, short value) {
+ if (ref.bytes.length < 2) {
+ ref.bytes = new byte[2];
+ }
+ ref.offset = 0;
+ ref.bytes[ref.offset] = (byte) (value >> 8);
+ ref.bytes[ref.offset + 1] = (byte) (value);
+ ref.length = 2;
+ }
+
+ private static void copyInternal(BytesRef ref, int value, int startOffset) {
+ ref.bytes[startOffset] = (byte) (value >> 24);
+ ref.bytes[startOffset + 1] = (byte) (value >> 16);
+ ref.bytes[startOffset + 2] = (byte) (value >> 8);
+ ref.bytes[startOffset + 3] = (byte) (value);
+ }
+
+ /**
+ * Converts 2 consecutive bytes from the current offset to a short. Bytes are
+ * interpreted as Big-Endian (most significant bit first)
+ *
+ * NOTE: this method does NOT check the bounds of the referenced array.
+ */
+ public static short asShort(BytesRef b) {
+ return (short) (0xFFFF & ((b.bytes[b.offset] & 0xFF) << 8) | (b.bytes[b.offset + 1] & 0xFF));
+ }
+
+ /**
+ * Converts 4 consecutive bytes from the current offset to an int. Bytes are
+ * interpreted as Big-Endian (most significant bit first)
+ *
+ * NOTE: this method does NOT check the bounds of the referenced array.
+ */
+ public static int asInt(BytesRef b) {
+ return asIntInternal(b, b.offset);
+ }
+
+ /**
+ * Converts 8 consecutive bytes from the current offset to a long. Bytes are
+ * interpreted as Big-Endian (most significant bit first)
+ *
+ * NOTE: this method does NOT check the bounds of the referenced array.
+ */
+ public static long asLong(BytesRef b) {
+ return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
+ b.offset + 4) & 0xFFFFFFFFL);
+ }
+
+ private static int asIntInternal(BytesRef b, int pos) {
+ return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
+ | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
+ }
+
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index e59ffbb481a..5b621395512 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -22,6 +22,7 @@ import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.Bits;
@@ -40,6 +41,7 @@ public abstract class DocValuesConsumer {
protected final BytesRef spare = new BytesRef();
+ protected abstract Type getType();
/**
* Adds the given {@link IndexableField} instance to this
* {@link DocValuesConsumer}
@@ -110,7 +112,7 @@ public abstract class DocValuesConsumer {
final Source source = reader.getDirectSource();
assert source != null;
int docID = docBase;
- final DocValues.Type type = reader.type();
+ final Type type = getType();
final Field scratchField;
switch(type) {
case VAR_INTS:
@@ -160,7 +162,7 @@ public abstract class DocValuesConsumer {
*/
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc)
throws IOException {
- switch(source.type()) {
+ switch(getType()) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
similarity index 67%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesReaderBase.java
rename to lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
index 71fb02f0387..d2bfb46b9f6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesReaderBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.lucene40.values;
+package org.apache.lucene.codecs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,10 +24,6 @@ import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
-import org.apache.lucene.codecs.PerDocProducer;
-import org.apache.lucene.codecs.lucene40.values.Bytes;
-import org.apache.lucene.codecs.lucene40.values.Floats;
-import org.apache.lucene.codecs.lucene40.values.Ints;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.DocValues;
@@ -40,7 +36,7 @@ import org.apache.lucene.util.BytesRef;
* Abstract base class for PerDocProducer implementations
* @lucene.experimental
*/
-public abstract class DocValuesReaderBase extends PerDocProducer {
+public abstract class PerDocProducerBase extends PerDocProducer {
protected abstract void closeInternal(Collection extends Closeable> closeables) throws IOException;
protected abstract Map docValues();
@@ -70,9 +66,7 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
for (FieldInfo fieldInfo : fieldInfos) {
if (canLoad(fieldInfo)) {
final String field = fieldInfo.name;
- // TODO can we have a compound file per segment and codec for
- // docvalues?
- final String id = DocValuesWriterBase.docValuesId(segment,
+ final String id = docValuesId(segment,
fieldInfo.number);
values.put(field,
loadDocValues(docCount, dir, id, getDocValuesType(fieldInfo), context));
@@ -97,7 +91,11 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
}
protected boolean anyDocValuesFields(FieldInfos infos) {
- return infos.anyDocValuesFields();
+ return infos.hasDocValues();
+ }
+
+ public static String docValuesId(String segmentsName, int fieldId) {
+ return segmentsName + "_" + fieldId;
}
/**
@@ -119,33 +117,6 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
* @throws IllegalArgumentException
* if the given {@link Type} is not supported
*/
- protected DocValues loadDocValues(int docCount, Directory dir, String id,
- DocValues.Type type, IOContext context) throws IOException {
- switch (type) {
- case FIXED_INTS_16:
- case FIXED_INTS_32:
- case FIXED_INTS_64:
- case FIXED_INTS_8:
- case VAR_INTS:
- return Ints.getValues(dir, id, docCount, type, context);
- case FLOAT_32:
- return Floats.getValues(dir, id, docCount, context, type);
- case FLOAT_64:
- return Floats.getValues(dir, id, docCount, context, type);
- case BYTES_FIXED_STRAIGHT:
- return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
- case BYTES_FIXED_DEREF:
- return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
- case BYTES_FIXED_SORTED:
- return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
- case BYTES_VAR_STRAIGHT:
- return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
- case BYTES_VAR_DEREF:
- return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
- case BYTES_VAR_SORTED:
- return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
- default:
- throw new IllegalStateException("unrecognized index values mode " + type);
- }
- }
+ protected abstract DocValues loadDocValues(int docCount, Directory dir, String id,
+ DocValues.Type type, IOContext context) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java
index 8cacae53379..fcad39cf482 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java
@@ -107,7 +107,7 @@ class Lucene3xFieldInfosReader extends FieldInfosReader {
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
- omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.BYTES_VAR_STRAIGHT : null);
+ omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.FIXED_INTS_8 : null);
}
if (input.getFilePointer() != input.length()) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java
index 8d0ec06125f..045173d5e00 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java
@@ -76,7 +76,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
try {
long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now)
for (FieldInfo fi : fields) {
- if (fi.normsPresent()) {
+ if (fi.hasNorms()) {
String fileName = getNormFilename(segmentName, normGen, fi.number);
Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir;
@@ -235,7 +235,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
}
@Override
- public Type type() {
+ public Type getType() {
return Type.FIXED_INTS_8;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
index 67521f64e62..aea22072bb7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
@@ -24,19 +24,24 @@ import java.util.Collection;
import java.util.Map;
import java.util.TreeMap;
-import org.apache.lucene.codecs.lucene40.values.DocValuesReaderBase;
+import org.apache.lucene.codecs.PerDocProducerBase;
+import org.apache.lucene.codecs.lucene40.values.Bytes;
+import org.apache.lucene.codecs.lucene40.values.Floats;
+import org.apache.lucene.codecs.lucene40.values.Ints;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.IOUtils;
/**
* Default PerDocProducer implementation that uses compound file.
* @lucene.experimental
*/
-public class Lucene40DocValuesProducer extends DocValuesReaderBase {
+public class Lucene40DocValuesProducer extends PerDocProducerBase {
protected final TreeMap docValues;
private final Directory cfs;
/**
@@ -71,4 +76,35 @@ public class Lucene40DocValuesProducer extends DocValuesReaderBase {
IOUtils.close(closeables);
}
}
+
+ @Override
+ protected DocValues loadDocValues(int docCount, Directory dir, String id,
+ Type type, IOContext context) throws IOException {
+ switch (type) {
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ return Ints.getValues(dir, id, docCount, type, context);
+ case FLOAT_32:
+ return Floats.getValues(dir, id, docCount, context, type);
+ case FLOAT_64:
+ return Floats.getValues(dir, id, docCount, context, type);
+ case BYTES_FIXED_STRAIGHT:
+ return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
+ case BYTES_FIXED_DEREF:
+ return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
+ case BYTES_FIXED_SORTED:
+ return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
+ case BYTES_VAR_STRAIGHT:
+ return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
+ case BYTES_VAR_DEREF:
+ return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
+ case BYTES_VAR_SORTED:
+ return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
+ default:
+ throw new IllegalStateException("unrecognized index values mode " + type);
+ }
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
index 8bf729eb41c..a085a4840d0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
@@ -62,7 +62,7 @@ public class Lucene40NormsFormat extends NormsFormat {
@Override
protected boolean canLoad(FieldInfo info) {
- return info.normsPresent();
+ return info.hasNorms();
}
@Override
@@ -92,7 +92,7 @@ public class Lucene40NormsFormat extends NormsFormat {
@Override
protected boolean canMerge(FieldInfo info) {
- return info.normsPresent();
+ return info.hasNorms();
}
@Override
@@ -104,7 +104,7 @@ public class Lucene40NormsFormat extends NormsFormat {
final String normsFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION);
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
for (FieldInfo fieldInfo : fieldInfos) {
- if (fieldInfo.normsPresent()) {
+ if (fieldInfo.hasNorms()) {
final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
files.add(normsFileName);
files.add(normsEntriesFileName);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
index 06bf58e5e40..4ab1bd9e4b9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
@@ -23,7 +23,6 @@ import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@@ -64,7 +63,7 @@ import org.apache.lucene.util.packed.PackedInts;
*
* @lucene.experimental
*/
-final class Bytes {
+public final class Bytes {
static final String DV_SEGMENT_SUFFIX = "dv";
@@ -242,8 +241,8 @@ final class Bytes {
private final IOContext context;
protected BytesWriterBase(Directory dir, String id, String codecName,
- int version, Counter bytesUsed, IOContext context) throws IOException {
- super(bytesUsed);
+ int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
+ super(bytesUsed, type);
this.id = id;
this.dir = dir;
this.codecName = codecName;
@@ -292,25 +291,11 @@ final class Bytes {
}
return idxOut;
}
- /**
- * Must be called only with increasing docIDs. It's OK for some docIDs to be
- * skipped; they will be filled with 0 bytes.
- */
- protected
- abstract void add(int docID, BytesRef bytes) throws IOException;
+
@Override
public abstract void finish(int docCount) throws IOException;
- @Override
- protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
- add(docID, source.getBytes(sourceDoc, bytesRef));
- }
-
- @Override
- public void add(int docID, IndexableField docValue) throws IOException {
- add(docID, docValue.binaryValue());
- }
}
/**
@@ -378,7 +363,7 @@ final class Bytes {
}
@Override
- public Type type() {
+ public Type getType() {
return type;
}
@@ -393,22 +378,22 @@ final class Bytes {
protected long maxBytes = 0;
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
- int codecVersion, Counter bytesUsed, IOContext context)
+ int codecVersion, Counter bytesUsed, IOContext context, Type type)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
- ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false);
+ ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
- int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
+ int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
- ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam);
+ ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
- Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
- super(dir, id, codecName, codecVersion, bytesUsed, context);
+ Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
+ super(dir, id, codecName, codecVersion, bytesUsed, context, type);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
@@ -430,7 +415,9 @@ final class Bytes {
}
@Override
- protected void add(int docID, BytesRef bytes) throws IOException {
+ public void add(int docID, IndexableField value) throws IOException {
+ BytesRef bytes = value.binaryValue();
+ assert bytes != null;
if (bytes.length == 0) { // default value - skip it
return;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/BytesRefUtils.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/BytesRefUtils.java
deleted file mode 100644
index 4d4e7eae957..00000000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/BytesRefUtils.java
+++ /dev/null
@@ -1,120 +0,0 @@
-package org.apache.lucene.codecs.lucene40.values;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-import org.apache.lucene.util.BytesRef;
-
-/**
- * Package private BytesRefUtils - can move this into the o.a.l.utils package if
- * needed.
- *
- * @lucene.internal
- */
-final class BytesRefUtils {
-
- private BytesRefUtils() {
- }
-
- /**
- * Copies the given long value and encodes it as 8 byte Big-Endian.
- *
- * NOTE: this method resets the offset to 0, length to 8 and resizes the
- * reference array if needed.
- */
- public static void copyLong(BytesRef ref, long value) {
- if (ref.bytes.length < 8) {
- ref.bytes = new byte[8];
- }
- copyInternal(ref, (int) (value >> 32), ref.offset = 0);
- copyInternal(ref, (int) value, 4);
- ref.length = 8;
- }
-
- /**
- * Copies the given int value and encodes it as 4 byte Big-Endian.
- *
- * NOTE: this method resets the offset to 0, length to 4 and resizes the
- * reference array if needed.
- */
- public static void copyInt(BytesRef ref, int value) {
- if (ref.bytes.length < 4) {
- ref.bytes = new byte[4];
- }
- copyInternal(ref, value, ref.offset = 0);
- ref.length = 4;
- }
-
- /**
- * Copies the given short value and encodes it as a 2 byte Big-Endian.
- *
- * NOTE: this method resets the offset to 0, length to 2 and resizes the
- * reference array if needed.
- */
- public static void copyShort(BytesRef ref, short value) {
- if (ref.bytes.length < 2) {
- ref.bytes = new byte[2];
- }
- ref.bytes[ref.offset] = (byte) (value >> 8);
- ref.bytes[ref.offset + 1] = (byte) (value);
- ref.length = 2;
- }
-
- private static void copyInternal(BytesRef ref, int value, int startOffset) {
- ref.bytes[startOffset] = (byte) (value >> 24);
- ref.bytes[startOffset + 1] = (byte) (value >> 16);
- ref.bytes[startOffset + 2] = (byte) (value >> 8);
- ref.bytes[startOffset + 3] = (byte) (value);
- }
-
- /**
- * Converts 2 consecutive bytes from the current offset to a short. Bytes are
- * interpreted as Big-Endian (most significant bit first)
- *
- * NOTE: this method does NOT check the bounds of the referenced array.
- */
- public static short asShort(BytesRef b) {
- return (short) (0xFFFF & ((b.bytes[b.offset] & 0xFF) << 8) | (b.bytes[b.offset + 1] & 0xFF));
- }
-
- /**
- * Converts 4 consecutive bytes from the current offset to an int. Bytes are
- * interpreted as Big-Endian (most significant bit first)
- *
- * NOTE: this method does NOT check the bounds of the referenced array.
- */
- public static int asInt(BytesRef b) {
- return asIntInternal(b, b.offset);
- }
-
- /**
- * Converts 8 consecutive bytes from the current offset to a long. Bytes are
- * interpreted as Big-Endian (most significant bit first)
- *
- * NOTE: this method does NOT check the bounds of the referenced array.
- */
- public static long asLong(BytesRef b) {
- return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
- b.offset + 4) & 0xFFFFFFFFL);
- }
-
- private static int asIntInternal(BytesRef b, int pos) {
- return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
- | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
- }
-
-}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesArray.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesArray.java
deleted file mode 100644
index 668f094f2cb..00000000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesArray.java
+++ /dev/null
@@ -1,306 +0,0 @@
-package org.apache.lucene.codecs.lucene40.values;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.EnumMap;
-import java.util.Map;
-
-import org.apache.lucene.index.DocValues.Source;
-import org.apache.lucene.index.DocValues.Type;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.RamUsageEstimator;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * @lucene.experimental
- */
-abstract class DocValuesArray extends Source {
-
- static final Map TEMPLATES;
-
- static {
- EnumMap templates = new EnumMap(
- Type.class);
- templates.put(Type.FIXED_INTS_16, new ShortValues());
- templates.put(Type.FIXED_INTS_32, new IntValues());
- templates.put(Type.FIXED_INTS_64, new LongValues());
- templates.put(Type.FIXED_INTS_8, new ByteValues());
- templates.put(Type.FLOAT_32, new FloatValues());
- templates.put(Type.FLOAT_64, new DoubleValues());
- TEMPLATES = Collections.unmodifiableMap(templates);
- }
-
- protected final int bytesPerValue;
-
- DocValuesArray(int bytesPerValue, Type type) {
- super(type);
- this.bytesPerValue = bytesPerValue;
- }
-
- public abstract DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException;
-
- @Override
- public final boolean hasArray() {
- return true;
- }
-
- void toBytes(long value, BytesRef bytesRef) {
- BytesRefUtils.copyLong(bytesRef, value);
- }
-
- void toBytes(double value, BytesRef bytesRef) {
- BytesRefUtils.copyLong(bytesRef, Double.doubleToRawLongBits(value));
- }
-
- final static class ByteValues extends DocValuesArray {
- private final byte[] values;
-
- ByteValues() {
- super(1, Type.FIXED_INTS_8);
- values = new byte[0];
- }
-
- private ByteValues(IndexInput input, int numDocs) throws IOException {
- super(1, Type.FIXED_INTS_8);
- values = new byte[numDocs];
- input.readBytes(values, 0, values.length, false);
- }
-
- @Override
- public byte[] getArray() {
- return values;
- }
-
- @Override
- public long getInt(int docID) {
- assert docID >= 0 && docID < values.length;
- return values[docID];
- }
-
- @Override
- public DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException {
- return new ByteValues(input, numDocs);
- }
-
- void toBytes(long value, BytesRef bytesRef) {
- bytesRef.bytes[0] = (byte) (0xFFL & value);
- }
-
- };
-
- final static class ShortValues extends DocValuesArray {
- private final short[] values;
-
- ShortValues() {
- super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
- values = new short[0];
- }
-
- private ShortValues(IndexInput input, int numDocs) throws IOException {
- super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
- values = new short[numDocs];
- for (int i = 0; i < values.length; i++) {
- values[i] = input.readShort();
- }
- }
-
- @Override
- public short[] getArray() {
- return values;
- }
-
- @Override
- public long getInt(int docID) {
- assert docID >= 0 && docID < values.length;
- return values[docID];
- }
-
- @Override
- public DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException {
- return new ShortValues(input, numDocs);
- }
-
- void toBytes(long value, BytesRef bytesRef) {
- BytesRefUtils.copyShort(bytesRef, (short) (0xFFFFL & value));
- }
-
- };
-
- final static class IntValues extends DocValuesArray {
- private final int[] values;
-
- IntValues() {
- super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
- values = new int[0];
- }
-
- private IntValues(IndexInput input, int numDocs) throws IOException {
- super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
- values = new int[numDocs];
- for (int i = 0; i < values.length; i++) {
- values[i] = input.readInt();
- }
- }
-
- @Override
- public int[] getArray() {
- return values;
- }
-
- @Override
- public long getInt(int docID) {
- assert docID >= 0 && docID < values.length;
- return 0xFFFFFFFF & values[docID];
- }
-
- @Override
- public DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException {
- return new IntValues(input, numDocs);
- }
-
- void toBytes(long value, BytesRef bytesRef) {
- BytesRefUtils.copyInt(bytesRef, (int) (0xFFFFFFFF & value));
- }
-
- };
-
- final static class LongValues extends DocValuesArray {
- private final long[] values;
-
- LongValues() {
- super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
- values = new long[0];
- }
-
- private LongValues(IndexInput input, int numDocs) throws IOException {
- super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
- values = new long[numDocs];
- for (int i = 0; i < values.length; i++) {
- values[i] = input.readLong();
- }
- }
-
- @Override
- public long[] getArray() {
- return values;
- }
-
- @Override
- public long getInt(int docID) {
- assert docID >= 0 && docID < values.length;
- return values[docID];
- }
-
- @Override
- public DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException {
- return new LongValues(input, numDocs);
- }
-
- };
-
- final static class FloatValues extends DocValuesArray {
- private final float[] values;
-
- FloatValues() {
- super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
- values = new float[0];
- }
-
- private FloatValues(IndexInput input, int numDocs) throws IOException {
- super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
- values = new float[numDocs];
- /*
- * we always read BIG_ENDIAN here since the writer serialized plain bytes
- * we can simply read the ints / longs back in using readInt / readLong
- */
- for (int i = 0; i < values.length; i++) {
- values[i] = Float.intBitsToFloat(input.readInt());
- }
- }
-
- @Override
- public float[] getArray() {
- return values;
- }
-
- @Override
- public double getFloat(int docID) {
- assert docID >= 0 && docID < values.length;
- return values[docID];
- }
-
- @Override
- void toBytes(double value, BytesRef bytesRef) {
- BytesRefUtils.copyInt(bytesRef, Float.floatToRawIntBits((float)value));
-
- }
-
- @Override
- public DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException {
- return new FloatValues(input, numDocs);
- }
- };
-
- final static class DoubleValues extends DocValuesArray {
- private final double[] values;
-
- DoubleValues() {
- super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
- values = new double[0];
- }
-
- private DoubleValues(IndexInput input, int numDocs) throws IOException {
- super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
- values = new double[numDocs];
- /*
- * we always read BIG_ENDIAN here since the writer serialized plain bytes
- * we can simply read the ints / longs back in using readInt / readLong
- */
- for (int i = 0; i < values.length; i++) {
- values[i] = Double.longBitsToDouble(input.readLong());
- }
- }
-
- @Override
- public double[] getArray() {
- return values;
- }
-
- @Override
- public double getFloat(int docID) {
- assert docID >= 0 && docID < values.length;
- return values[docID];
- }
-
- @Override
- public DocValuesArray newFromInput(IndexInput input, int numDocs)
- throws IOException {
- return new DoubleValues(input, numDocs);
- }
-
- };
-
-}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
index 7b8483f2c37..aeeb23083e5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.codecs.lucene40.values.Writer;
import org.apache.lucene.index.FieldInfo;
@@ -81,14 +82,10 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
@Override
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
return Writer.create(valueType,
- docValuesId(segmentName, field.number),
+ PerDocProducerBase.docValuesId(segmentName, field.number),
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
}
- public static String docValuesId(String segmentsName, int fieldId) {
- return segmentsName + "_" + fieldId;
- }
-
public Comparator getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
index 72efc15a084..7c745b93b0c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
@@ -46,7 +46,7 @@ class FixedDerefBytesImpl {
public static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
}
@Override
@@ -84,7 +84,7 @@ class FixedDerefBytesImpl {
@Override
public Source getDirectSource()
throws IOException {
- return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
+ return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType());
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
index 7e12c9c97cf..2ab1700421b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
@@ -58,7 +58,7 @@ class FixedSortedBytesImpl {
public Writer(Directory dir, String id, Comparator comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
this.comp = comp;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
index 1b0c2e7a896..4ea4b46e257 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
@@ -22,10 +22,12 @@ import java.io.IOException;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesSourceBase;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesWriterBase;
+import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -52,6 +54,7 @@ class FixedStraightBytesImpl {
static final int VERSION_CURRENT = VERSION_START;
static abstract class FixedBytesWriterBase extends BytesWriterBase {
+ protected final DocValuesField bytesSpareField = new DocValuesField("", new BytesRef(), Type.BYTES_FIXED_STRAIGHT);
protected int lastDocID = -1;
// start at -1 if the first added value is > 0
protected int size = -1;
@@ -60,13 +63,20 @@ class FixedStraightBytesImpl {
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context) throws IOException {
- super(dir, id, codecName, version, bytesUsed, context);
+ this(dir, id, codecName, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
+ }
+
+ protected FixedBytesWriterBase(Directory dir, String id, String codecName,
+ int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
+ super(dir, id, codecName, version, bytesUsed, context, type);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
pool.nextBuffer();
}
@Override
- protected void add(int docID, BytesRef bytes) throws IOException {
+ public void add(int docID, IndexableField value) throws IOException {
+ final BytesRef bytes = value.binaryValue();
+ assert bytes != null;
assert lastDocID < docID;
if (size == -1) {
@@ -277,7 +287,7 @@ class FixedStraightBytesImpl {
@Override
public Source getDirectSource() throws IOException {
- return new DirectFixedStraightSource(cloneData(), size, type());
+ return new DirectFixedStraightSource(cloneData(), size, getType());
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
index 8a2d073c9c8..4d3e0548e7d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40.values;
*/
import java.io.IOException;
+import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@@ -39,7 +40,7 @@ import org.apache.lucene.util.IOUtils;
*
* @lucene.experimental
*/
-class Floats {
+public class Floats {
protected static final String CODEC_NAME = "Floats";
protected static final int VERSION_START = 0;
@@ -69,31 +70,28 @@ class Floats {
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
private final int size;
- private final DocValuesArray template;
+ private final DocValuesArraySource template;
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, Type type) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
size = typeToSize(type);
this.bytesRef = new BytesRef(size);
bytesRef.length = size;
- template = DocValuesArray.TEMPLATES.get(type);
+ template = DocValuesArraySource.forType(type);
assert template != null;
}
- protected void add(int docID, double v) throws IOException {
- template.toBytes(v, bytesRef);
- add(docID, bytesRef);
- }
-
- @Override
- public void add(int docID, IndexableField docValue) throws IOException {
- add(docID, docValue.numericValue().doubleValue());
- }
-
@Override
protected boolean tryBulkMerge(DocValues docValues) {
// only bulk merge if value type is the same otherwise size differs
- return super.tryBulkMerge(docValues) && docValues.type() == template.type();
+ return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
+ }
+
+ @Override
+ public void add(int docID, IndexableField value) throws IOException {
+ template.toBytes(value.numericValue().doubleValue(), bytesRef);
+ bytesSpareField.setBytesValue(bytesRef);
+ super.add(docID, bytesSpareField);
}
@Override
@@ -104,11 +102,11 @@ class Floats {
}
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
- final DocValuesArray arrayTemplate;
+ final DocValuesArraySource arrayTemplate;
FloatsReader(Directory dir, String id, int maxDoc, IOContext context, Type type)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
- arrayTemplate = DocValuesArray.TEMPLATES.get(type);
+ arrayTemplate = DocValuesArraySource.forType(type);
assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
index a9ba6c3e069..22875ad5a73 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40.values;
import java.io.IOException;
+import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@@ -36,7 +37,7 @@ import org.apache.lucene.util.IOUtils;
*
* @lucene.experimental
*/
-final class Ints {
+public final class Ints {
protected static final String CODEC_NAME = "Ints";
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
@@ -88,7 +89,7 @@ final class Ints {
static class IntsWriter extends FixedStraightBytesImpl.Writer {
- private final DocValuesArray template;
+ private final DocValuesArraySource template;
public IntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, Type valueType) throws IOException {
@@ -101,17 +102,7 @@ final class Ints {
size = typeToSize(valueType);
this.bytesRef = new BytesRef(size);
bytesRef.length = size;
- template = DocValuesArray.TEMPLATES.get(valueType);
- }
-
- protected void add(int docID, long v) throws IOException {
- template.toBytes(v, bytesRef);
- add(docID, bytesRef);
- }
-
- @Override
- public void add(int docID, IndexableField docValue) throws IOException {
- add(docID, docValue.numericValue().longValue());
+ template = DocValuesArraySource.forType(valueType);
}
@Override
@@ -120,21 +111,28 @@ final class Ints {
template.toBytes(value, bytesRef);
}
+ @Override
+ public void add(int docID, IndexableField value) throws IOException {
+ template.toBytes(value.numericValue().longValue(), bytesRef);
+ bytesSpareField.setBytesValue(bytesRef);
+ super.add(docID, bytesSpareField);
+ }
+
@Override
protected boolean tryBulkMerge(DocValues docValues) {
// only bulk merge if value type is the same otherwise size differs
- return super.tryBulkMerge(docValues) && docValues.type() == template.type();
+ return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
}
}
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
- private final DocValuesArray arrayTemplate;
+ private final DocValuesArraySource arrayTemplate;
IntsReader(Directory dir, String id, int maxDoc, IOContext context, Type type)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc,
context, type);
- arrayTemplate = DocValuesArray.TEMPLATES.get(type);
+ arrayTemplate = DocValuesArraySource.forType(type);
assert arrayTemplate != null;
assert type == sizeToType(size);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
index eb2acf70b05..ffa46d799bf 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
@@ -18,9 +18,8 @@ package org.apache.lucene.codecs.lucene40.values;
*/
import java.io.IOException;
-import org.apache.lucene.codecs.lucene40.values.DocValuesArray.LongValues;
+import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.lucene40.values.FixedStraightBytesImpl.FixedBytesWriterBase;
-import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
@@ -59,27 +58,10 @@ class PackedIntValues {
protected PackedIntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.VAR_INTS);
bytesRef = new BytesRef(8);
}
-
- protected void add(int docID, long v) throws IOException {
- assert lastDocId < docID;
- if (!started) {
- started = true;
- minValue = maxValue = v;
- } else {
- if (v < minValue) {
- minValue = v;
- } else if (v > maxValue) {
- maxValue = v;
- }
- }
- lastDocId = docID;
- BytesRefUtils.copyLong(bytesRef, v);
- add(docID, bytesRef);
- }
-
+
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
@@ -112,13 +94,6 @@ class PackedIntValues {
}
}
- @Override
- protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
- assert docID > lastDocId : "docID: " + docID
- + " must be greater than the last added doc id: " + lastDocId;
- add(docID, source.getInt(sourceDoc));
- }
-
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
datOut.writeLong(minValue);
@@ -149,10 +124,25 @@ class PackedIntValues {
}
w.finish();
}
-
+
@Override
public void add(int docID, IndexableField docValue) throws IOException {
- add(docID, docValue.numericValue().longValue());
+ final long v = docValue.numericValue().longValue();
+ assert lastDocId < docID;
+ if (!started) {
+ started = true;
+ minValue = maxValue = v;
+ } else {
+ if (v < minValue) {
+ minValue = v;
+ } else if (v > maxValue) {
+ maxValue = v;
+ }
+ }
+ lastDocId = docID;
+ DocValuesArraySource.copyLong(bytesRef, v);
+ bytesSpareField.setBytesValue(bytesRef);
+ super.add(docID, bytesSpareField);
}
}
@@ -164,7 +154,7 @@ class PackedIntValues {
private final IndexInput datIn;
private final byte type;
private final int numDocs;
- private final LongValues values;
+ private final DocValuesArraySource values;
protected PackedIntsReader(Directory dir, String id, int numDocs,
IOContext context) throws IOException {
@@ -176,7 +166,7 @@ class PackedIntValues {
try {
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
type = datIn.readByte();
- values = type == FIXED_64 ? new LongValues() : null;
+ values = type == FIXED_64 ? DocValuesArraySource.forType(Type.FIXED_INTS_64) : null;
success = true;
} finally {
if (!success) {
@@ -220,7 +210,7 @@ class PackedIntValues {
@Override
- public Type type() {
+ public Type getType() {
return Type.VAR_INTS;
}
@@ -247,7 +237,7 @@ class PackedIntValues {
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
ref.grow(8);
- BytesRefUtils.copyLong(ref, getInt(docID));
+ DocValuesArraySource.copyLong(ref, getInt(docID));
return ref;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
index 19a7bd71bb6..43bff795af4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
@@ -57,7 +57,7 @@ class VarDerefBytesImpl {
static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
size = 0;
}
@@ -105,7 +105,7 @@ class VarDerefBytesImpl {
@Override
public Source getDirectSource()
throws IOException {
- return new DirectVarDerefSource(cloneData(), cloneIndex(), type());
+ return new DirectVarDerefSource(cloneData(), cloneIndex(), getType());
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
index 0229199f0c8..9a8e87dcaf6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
@@ -59,7 +59,7 @@ final class VarSortedBytesImpl {
public Writer(Directory dir, String id, Comparator comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
this.comp = comp;
size = 0;
}
@@ -166,7 +166,7 @@ final class VarSortedBytesImpl {
@Override
public Source getDirectSource() throws IOException {
- return new DirectSortedSource(cloneData(), cloneIndex(), comparator, type());
+ return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
index 2902801c259..cfb9d78cfb6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
@@ -26,6 +26,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -63,7 +64,7 @@ class VarStraightBytesImpl {
private boolean merge = false;
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
- super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
+ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
docToAddress = new long[1];
pool.nextBuffer(); // init
@@ -84,7 +85,9 @@ class VarStraightBytesImpl {
}
@Override
- protected void add(int docID, BytesRef bytes) throws IOException {
+ public void add(int docID, IndexableField value) throws IOException {
+ final BytesRef bytes = value.binaryValue();
+ assert bytes != null;
assert !merge;
if (bytes.length == 0) {
return; // default
@@ -245,7 +248,7 @@ class VarStraightBytesImpl {
@Override
public Source getDirectSource()
throws IOException {
- return new DirectVarStraightSource(cloneData(), cloneIndex(), type());
+ return new DirectVarStraightSource(cloneData(), cloneIndex(), getType());
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
index 13529d3d487..77c317ca816 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
@@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
*/
abstract class Writer extends DocValuesConsumer {
protected final Counter bytesUsed;
+ protected Type type;
/**
* Creates a new {@link Writer}.
@@ -49,9 +50,19 @@ abstract class Writer extends DocValuesConsumer {
* internally allocated memory. All tracked bytes must be released
* once {@link #finish(int)} has been called.
*/
- protected Writer(Counter bytesUsed) {
+ protected Writer(Counter bytesUsed, Type type) {
this.bytesUsed = bytesUsed;
+ this.type = type;
}
+
+
+
+ @Override
+ protected Type getType() {
+ return type;
+ }
+
+
/**
* Factory method to create a {@link Writer} instance for a given type. This
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
index 7794e6dfc3c..89d8bb359f1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@@ -98,8 +98,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")";
}
- private static final boolean VERBOSE = false;
-
private final static class TermsWriter extends TermsConsumer {
private final IndexOutput out;
private final FieldInfo field;
@@ -123,10 +121,13 @@ public class MemoryPostingsFormat extends PostingsFormat {
// NOTE: not private so we don't pay access check at runtime:
int docCount;
RAMOutputStream buffer = new RAMOutputStream();
+
+ int lastOffsetLength;
+ int lastOffset;
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
- if (VERBOSE) System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
+ //System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
final int delta = docID - lastDocID;
assert docID == 0 || delta > 0;
lastDocID = docID;
@@ -143,20 +144,23 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
lastPos = 0;
+ lastOffset = 0;
}
@Override
public void addPosition(int pos, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert payload == null || field.storePayloads;
- if (VERBOSE) System.out.println(" addPos pos=" + pos + " payload=" + payload);
+ //System.out.println(" addPos pos=" + pos + " payload=" + payload);
final int delta = pos - lastPos;
assert delta >= 0;
lastPos = pos;
+ int payloadLen = 0;
+
if (field.storePayloads) {
- final int payloadLen = payload == null ? 0 : payload.length;
+ payloadLen = payload == null ? 0 : payload.length;
if (payloadLen != lastPayloadLen) {
lastPayloadLen = payloadLen;
buffer.writeVInt((delta<<1)|1);
@@ -164,13 +168,28 @@ public class MemoryPostingsFormat extends PostingsFormat {
} else {
buffer.writeVInt(delta<<1);
}
-
- if (payloadLen > 0) {
- buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
- }
} else {
buffer.writeVInt(delta);
}
+
+ if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+ // don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
+ // and the numbers aren't that much smaller anyways.
+ int offsetDelta = startOffset - lastOffset;
+ int offsetLength = endOffset - startOffset;
+ if (offsetLength != lastOffsetLength) {
+ buffer.writeVInt(offsetDelta << 1 | 1);
+ buffer.writeVInt(offsetLength);
+ } else {
+ buffer.writeVInt(offsetDelta << 1);
+ }
+ lastOffset = startOffset;
+ lastOffsetLength = offsetLength;
+ }
+
+ if (payloadLen > 0) {
+ buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
+ }
}
@Override
@@ -182,6 +201,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
lastDocID = 0;
docCount = 0;
lastPayloadLen = 0;
+ // force first offset to write its length
+ lastOffsetLength = -1;
return this;
}
}
@@ -190,7 +211,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public PostingsConsumer startTerm(BytesRef text) {
- if (VERBOSE) System.out.println(" startTerm term=" + text.utf8ToString());
+ //System.out.println(" startTerm term=" + text.utf8ToString());
return postingsWriter.reset();
}
@@ -224,12 +245,12 @@ public class MemoryPostingsFormat extends PostingsFormat {
spare.bytes = finalBuffer;
spare.length = totalBytes;
- if (VERBOSE) {
- System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
- for(int i=0;i>> 1;
- if (VERBOSE) System.out.println(" docID=" + accum + " code=" + code);
+ //System.out.println(" docID=" + accum + " code=" + code);
if ((code & 1) != 0) {
freq = 1;
} else {
@@ -352,8 +368,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
assert freq > 0;
}
- if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
- // Skip positions
+ if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ // Skip positions/payloads
for(int posUpto=0;posUpto= 0;
+ if (needsOffsets && !hasOffsets) {
+ return null; // not available
}
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
@@ -665,14 +724,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
decodeMetaData();
FSTDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
- docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads);
+ docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads, hasOffsets);
} else {
docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
- if (!docsAndPositionsEnum.canReuse(field.storePayloads)) {
- docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads);
+ if (!docsAndPositionsEnum.canReuse(field.storePayloads, hasOffsets)) {
+ docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads, hasOffsets);
}
}
- if (VERBOSE) System.out.println("D&P reset this=" + this);
+ //System.out.println("D&P reset this=" + this);
return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
}
@@ -683,14 +742,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public BytesRef next() throws IOException {
- if (VERBOSE) System.out.println("te.next");
+ //System.out.println("te.next");
current = fstEnum.next();
if (current == null) {
- if (VERBOSE) System.out.println(" END");
+ //System.out.println(" END");
return null;
}
didDecode = false;
- if (VERBOSE) System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
+ //System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
return current.input;
}
@@ -794,9 +853,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
break;
}
final TermsReader termsReader = new TermsReader(state.fieldInfos, in, termCount);
- if (VERBOSE) {
- System.out.println("load field=" + termsReader.field.name);
- }
+ // System.out.println("load field=" + termsReader.field.name);
fields.put(termsReader.field.name, termsReader);
}
} finally {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
index f8e37ef9852..e4436835af2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.ServiceLoader; // javadocs
import java.util.Set;
import java.util.TreeMap;
@@ -47,7 +48,14 @@ import org.apache.lucene.util.IOUtils;
/**
* Enables per field format support.
- *
+ *
+ * Note, when extending this class, the name ({@link #getName}) is
+ * written into the index. In order for the field to be read, the
+ * name must resolve to your implementation via {@link #forName(String)}.
+ * This method uses Java's
+ * {@link ServiceLoader Service Provider Interface} to resolve format names.
+ *
+ * @see ServiceLoader
* @lucene.experimental
*/
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java
index 5a8472b9e48..b28b40e4581 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
+import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.codecs.lucene40.values.DocValuesWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@@ -58,7 +59,7 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
private static void files(Directory dir,FieldInfos fieldInfos, String segmentName, Set files) {
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
- String filename = docValuesId(segmentName, fieldInfo.number);
+ String filename = PerDocProducerBase.docValuesId(segmentName, fieldInfo.number);
switch (fieldInfo.getDocValuesType()) {
case BYTES_FIXED_DEREF:
case BYTES_VAR_DEREF:
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesProducer.java
index 6cfb2f6b620..0406c26aa0f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesProducer.java
@@ -22,16 +22,22 @@ import java.util.Collection;
import java.util.Map;
import java.util.TreeMap;
-import org.apache.lucene.codecs.lucene40.values.DocValuesReaderBase;
+import org.apache.lucene.codecs.PerDocProducerBase;
+import org.apache.lucene.codecs.lucene40.values.Bytes;
+import org.apache.lucene.codecs.lucene40.values.Floats;
+import org.apache.lucene.codecs.lucene40.values.Ints;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.IOUtils;
/**
* Implementation of PerDocProducer that uses separate files.
* @lucene.experimental
*/
-public class SepDocValuesProducer extends DocValuesReaderBase {
+public class SepDocValuesProducer extends PerDocProducerBase {
private final TreeMap docValues;
/**
@@ -51,4 +57,35 @@ public class SepDocValuesProducer extends DocValuesReaderBase {
protected void closeInternal(Collection extends Closeable> closeables) throws IOException {
IOUtils.close(closeables);
}
+
+ @Override
+ protected DocValues loadDocValues(int docCount, Directory dir, String id,
+ Type type, IOContext context) throws IOException {
+ switch (type) {
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ return Ints.getValues(dir, id, docCount, type, context);
+ case FLOAT_32:
+ return Floats.getValues(dir, id, docCount, context, type);
+ case FLOAT_64:
+ return Floats.getValues(dir, id, docCount, context, type);
+ case BYTES_FIXED_STRAIGHT:
+ return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
+ case BYTES_FIXED_DEREF:
+ return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
+ case BYTES_FIXED_SORTED:
+ return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
+ case BYTES_VAR_STRAIGHT:
+ return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
+ case BYTES_VAR_DEREF:
+ return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
+ case BYTES_VAR_SORTED:
+ return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
+ default:
+ throw new IllegalStateException("unrecognized index values mode " + type);
+ }
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java
index 39b53e4f2ef..23defd7b9bc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java
@@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfosFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
/**
* plain text index format.
@@ -41,7 +40,7 @@ public final class SimpleTextCodec extends Codec {
private final FieldInfosFormat fieldInfosFormat = new SimpleTextFieldInfosFormat();
private final TermVectorsFormat vectorsFormat = new SimpleTextTermVectorsFormat();
// TODO: need a plain-text impl
- private final DocValuesFormat docValues = new Lucene40DocValuesFormat();
+ private final DocValuesFormat docValues = new SimpleTextDocValuesFormat();
// TODO: need a plain-text impl (using the above)
private final NormsFormat normsFormat = new SimpleTextNormsFormat();
private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java
new file mode 100644
index 00000000000..477e87b7142
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java
@@ -0,0 +1,288 @@
+package org.apache.lucene.codecs.simpletext;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesArraySource;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * @lucene.experimental
+ */
+public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
+
+ static final BytesRef ZERO_DOUBLE = new BytesRef(Double.toString(0d));
+ static final BytesRef ZERO_INT = new BytesRef(Integer.toString(0));
+ static final BytesRef HEADER = new BytesRef("SimpleTextDocValues");
+
+ static final BytesRef END = new BytesRef("END");
+ static final BytesRef VALUE_SIZE = new BytesRef("valuesize ");
+ static final BytesRef DOC = new BytesRef(" doc ");
+ static final BytesRef VALUE = new BytesRef(" value ");
+ protected BytesRef scratch = new BytesRef();
+ protected int maxDocId = -1;
+ protected final String segment;
+ protected final Directory dir;
+ protected final IOContext ctx;
+ protected final Type type;
+ protected final BytesRefHash hash;
+ private int[] ords;
+ private int fixedSize = Integer.MIN_VALUE;
+ private BytesRef zeroBytes;
+ private final String segmentSuffix;
+
+
+ public SimpleTextDocValuesConsumer(String segment, Directory dir,
+ IOContext ctx, Type type, String segmentSuffix) {
+ this.ctx = ctx;
+ this.dir = dir;
+ this.segment = segment;
+ this.type = type;
+ hash = new BytesRefHash();
+ ords = new int[0];
+ this.segmentSuffix = segmentSuffix;
+
+ }
+
+ @Override
+ public void add(int docID, IndexableField value) throws IOException {
+ assert docID >= 0;
+ int ord = -1;
+ int vSize = -1;
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ vSize = value.binaryValue().length;
+ ord = hash.add(value.binaryValue());
+ break;
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ vSize = -1;
+ try {
+ ord = hash.add(value.binaryValue());
+ } catch (NullPointerException e) {
+ System.err.println();
+ }
+ break;
+ case FIXED_INTS_16:
+ vSize = 2;
+ scratch.grow(2);
+ DocValuesArraySource.copyShort(scratch, value.numericValue().shortValue());
+ ord = hash.add(scratch);
+ break;
+ case FIXED_INTS_32:
+ vSize = 4;
+ scratch.grow(4);
+ DocValuesArraySource.copyInt(scratch, value.numericValue().intValue());
+ ord = hash.add(scratch);
+ break;
+ case FIXED_INTS_8:
+ vSize = 1;
+ scratch.grow(1);
+ scratch.bytes[scratch.offset] = value.numericValue().byteValue();
+ scratch.length = 1;
+ ord = hash.add(scratch);
+ break;
+ case FIXED_INTS_64:
+ vSize = 8;
+ case VAR_INTS:
+ scratch.grow(8);
+ DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
+ ord = hash.add(scratch);
+ break;
+ case FLOAT_32:
+ vSize = 4;
+ scratch.grow(4);
+ DocValuesArraySource.copyInt(scratch,
+ Float.floatToRawIntBits(value.numericValue().floatValue()));
+ ord = hash.add(scratch);
+ break;
+ case FLOAT_64:
+ vSize = 8;
+ scratch.grow(8);
+ DocValuesArraySource.copyLong(scratch,
+ Double.doubleToRawLongBits(value.numericValue().doubleValue()));
+ ord = hash.add(scratch);
+ break;
+
+ }
+
+ if (fixedSize == Integer.MIN_VALUE) {
+ assert maxDocId == -1;
+ fixedSize = vSize;
+ } else {
+ if (fixedSize != vSize) {
+ throw new IllegalArgumentException("value size must be " + fixedSize + " but was: " + vSize);
+ }
+ }
+ maxDocId = Math.max(docID, maxDocId);
+ ords = grow(ords, docID);
+
+ ords[docID] = (ord < 0 ? (-ord)-1 : ord) + 1;
+ }
+
+ protected BytesRef getHeader() {
+ return HEADER;
+ }
+
+ private int[] grow(int[] array, int upto) {
+ if (array.length <= upto) {
+ return ArrayUtil.grow(array, 1 + upto);
+ }
+ return array;
+ }
+
+ private void prepareFlush(int docCount) {
+ assert ords != null;
+ ords = grow(ords, docCount);
+ }
+
+ @Override
+ public void finish(int docCount) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "",
+ segmentSuffix);
+ IndexOutput output = dir.createOutput(fileName, ctx);
+ boolean success = false;
+ BytesRef spare = new BytesRef();
+ try {
+ SimpleTextUtil.write(output, getHeader());
+ SimpleTextUtil.writeNewline(output);
+ SimpleTextUtil.write(output, VALUE_SIZE);
+ SimpleTextUtil.write(output, Integer.toString(this.fixedSize), scratch);
+ SimpleTextUtil.writeNewline(output);
+ prepareFlush(docCount);
+ for (int i = 0; i < docCount; i++) {
+ SimpleTextUtil.write(output, DOC);
+ SimpleTextUtil.write(output, Integer.toString(i), scratch);
+ SimpleTextUtil.writeNewline(output);
+ SimpleTextUtil.write(output, VALUE);
+ writeDoc(output, i, spare);
+ SimpleTextUtil.writeNewline(output);
+ }
+ SimpleTextUtil.write(output, END);
+ SimpleTextUtil.writeNewline(output);
+ success = true;
+ } finally {
+ hash.close();
+ if (success) {
+ IOUtils.close(output);
+ } else {
+ IOUtils.closeWhileHandlingException(output);
+ }
+ }
+ }
+
+ protected void writeDoc(IndexOutput output, int docId, BytesRef spare) throws IOException {
+ int ord = ords[docId] - 1;
+ if (ord != -1) {
+ assert ord >= 0;
+ hash.get(ord, spare);
+
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ SimpleTextUtil.write(output, spare);
+ break;
+ case FIXED_INTS_16:
+ SimpleTextUtil.write(output,
+ Short.toString(DocValuesArraySource.asShort(spare)), scratch);
+ break;
+ case FIXED_INTS_32:
+ SimpleTextUtil.write(output,
+ Integer.toString(DocValuesArraySource.asInt(spare)), scratch);
+ break;
+ case VAR_INTS:
+ case FIXED_INTS_64:
+ SimpleTextUtil.write(output,
+ Long.toString(DocValuesArraySource.asLong(spare)), scratch);
+ break;
+ case FIXED_INTS_8:
+ assert spare.length == 1 : spare.length;
+ SimpleTextUtil.write(output,
+ Integer.toString(spare.bytes[spare.offset]), scratch);
+ break;
+ case FLOAT_32:
+ float valueFloat = Float.intBitsToFloat(DocValuesArraySource.asInt(spare));
+ SimpleTextUtil.write(output, Float.toString(valueFloat), scratch);
+ break;
+ case FLOAT_64:
+ double valueDouble = Double.longBitsToDouble(DocValuesArraySource
+ .asLong(spare));
+ SimpleTextUtil.write(output, Double.toString(valueDouble), scratch);
+ break;
+ default:
+ throw new IllegalArgumentException("unsupported type: " + type);
+ }
+ } else {
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ if(zeroBytes == null) {
+ assert fixedSize > 0;
+ zeroBytes = new BytesRef(new byte[fixedSize]);
+ }
+ SimpleTextUtil.write(output, zeroBytes);
+ break;
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ scratch.length = 0;
+ SimpleTextUtil.write(output, scratch);
+ break;
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ SimpleTextUtil.write(output, ZERO_INT);
+ break;
+ case FLOAT_32:
+ case FLOAT_64:
+ SimpleTextUtil.write(output, ZERO_DOUBLE);
+ break;
+ default:
+ throw new IllegalArgumentException("unsupported type: " + type);
+ }
+ }
+
+ }
+
+ @Override
+ protected Type getType() {
+ return type;
+ }
+
+
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
new file mode 100644
index 00000000000..b33186783a4
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
@@ -0,0 +1,53 @@
+package org.apache.lucene.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.PerDocConsumer;
+import org.apache.lucene.codecs.PerDocProducer;
+import org.apache.lucene.index.PerDocWriteState;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.util.BytesRef;
+/**
+ * @lucene.experimental
+ */
+public class SimpleTextDocValuesFormat extends DocValuesFormat {
+ private static final String DOC_VALUES_SEG_SUFFIX = "dv";
+ @Override
+ public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
+ return new SimpleTextPerDocConsumer(state, DOC_VALUES_SEG_SUFFIX);
+ }
+
+ @Override
+ public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
+ return new SimpleTextPerDocProducer(state, BytesRef.getUTF8SortedAsUnicodeComparator(), DOC_VALUES_SEG_SUFFIX);
+ }
+
+ static String docValuesId(String segmentsName, int fieldId) {
+ return segmentsName + "_" + fieldId;
+ }
+
+ @Override
+ public void files(SegmentInfo info, Set files)
+ throws IOException {
+ SimpleTextPerDocConsumer.files(info, files, DOC_VALUES_SEG_SUFFIX);
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java
deleted file mode 100644
index 086e770f6f2..00000000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java
+++ /dev/null
@@ -1,294 +0,0 @@
-package org.apache.lucene.codecs.simpletext;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.Set;
-
-import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.codecs.PerDocConsumer;
-import org.apache.lucene.index.DocValues.Type;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Writes plain-text norms
- *
* FOR RECREATIONAL USE ONLY
+ *
* @lucene.experimental
*/
public class SimpleTextNormsFormat extends NormsFormat {
+ private static final String NORMS_SEG_SUFFIX = "len";
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
- return new SimpleTextNormsConsumer(state.directory, state.segmentName, state.context);
+ return new SimpleTextNormsPerDocConsumer(state, NORMS_SEG_SUFFIX);
}
-
+
@Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
- return new SimpleTextNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context);
+ return new SimpleTextNormsPerDocProducer(state,
+ BytesRef.getUTF8SortedAsUnicodeComparator(), NORMS_SEG_SUFFIX);
}
-
+
@Override
public void files(SegmentInfo info, Set files) throws IOException {
- SimpleTextNormsConsumer.files(info, files);
- }
+ SimpleTextNormsPerDocConsumer.files(info, files);
+ }
+
+ public static class SimpleTextNormsPerDocProducer extends
+ SimpleTextPerDocProducer {
+
+ public SimpleTextNormsPerDocProducer(SegmentReadState state,
+ Comparator comp, String segmentSuffix) throws IOException {
+ super(state, comp, segmentSuffix);
+ }
+
+ @Override
+ protected boolean canLoad(FieldInfo info) {
+ return info.hasNorms();
+ }
+
+ @Override
+ protected Type getDocValuesType(FieldInfo info) {
+ return info.getNormType();
+ }
+
+ @Override
+ protected boolean anyDocValuesFields(FieldInfos infos) {
+ return infos.hasNorms();
+ }
+
+ }
+
+ public static class SimpleTextNormsPerDocConsumer extends
+ SimpleTextPerDocConsumer {
+
+ public SimpleTextNormsPerDocConsumer(PerDocWriteState state,
+ String segmentSuffix) throws IOException {
+ super(state, segmentSuffix);
+ }
+
+ @Override
+ protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
+ throws IOException {
+ return reader.normValues(info.name);
+ }
+
+ @Override
+ protected boolean canMerge(FieldInfo info) {
+ return info.hasNorms();
+ }
+
+ @Override
+ protected Type getDocValuesType(FieldInfo info) {
+ return info.getNormType();
+ }
+
+ @Override
+ public void abort() {
+ Set files = new HashSet();
+ filesInternal(state.fieldInfos, state.segmentName, files, segmentSuffix);
+ IOUtils.deleteFilesIgnoringExceptions(state.directory,
+ files.toArray(new String[0]));
+ }
+
+ public static void files(SegmentInfo segmentInfo, Set files)
+ throws IOException {
+ filesInternal(segmentInfo.getFieldInfos(), segmentInfo.name, files,
+ NORMS_SEG_SUFFIX);
+ }
+
+ public static void filesInternal(FieldInfos fieldInfos, String segmentName,
+ Set files, String segmentSuffix) {
+ for (FieldInfo fieldInfo : fieldInfos) {
+ if (fieldInfo.hasNorms()) {
+ String id = docValuesId(segmentName, fieldInfo.number);
+ files.add(IndexFileNames.segmentFileName(id, "",
+ segmentSuffix));
+ }
+ }
+ }
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java
deleted file mode 100644
index 126770b1c8b..00000000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java
+++ /dev/null
@@ -1,175 +0,0 @@
-package org.apache.lucene.codecs.simpletext;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.DOC;
-import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.END;
-import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.FIELD;
-import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORM;
-import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORMS_EXTENSION;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.codecs.PerDocProducer;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocValues.Source;
-import org.apache.lucene.index.DocValues.Type;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.StringHelper;
-
-/**
- * Reads plain-text norms
- *
- * FOR RECREATIONAL USE ONLY
- * @lucene.experimental
- */
-public class SimpleTextNormsProducer extends PerDocProducer {
-
- Map norms = new HashMap();
-
- public SimpleTextNormsProducer(Directory directory, SegmentInfo si, FieldInfos fields, IOContext context) throws IOException {
- if (fields.hasNorms()) {
- readNorms(directory.openInput(IndexFileNames.segmentFileName(si.name, "", NORMS_EXTENSION), context), si.docCount);
- }
- }
-
- // we read in all the norms up front into a hashmap
- private void readNorms(IndexInput in, int maxDoc) throws IOException {
- BytesRef scratch = new BytesRef();
- boolean success = false;
- try {
- SimpleTextUtil.readLine(in, scratch);
- while (!scratch.equals(END)) {
- assert StringHelper.startsWith(scratch, FIELD);
- final String fieldName = readString(FIELD.length, scratch);
- byte bytes[] = new byte[maxDoc];
- for (int i = 0; i < bytes.length; i++) {
- SimpleTextUtil.readLine(in, scratch);
- assert StringHelper.startsWith(scratch, DOC);
- SimpleTextUtil.readLine(in, scratch);
- assert StringHelper.startsWith(scratch, NORM);
- bytes[i] = scratch.bytes[scratch.offset + NORM.length];
- }
- norms.put(fieldName, new NormsDocValues(new Norm(bytes)));
- SimpleTextUtil.readLine(in, scratch);
- assert StringHelper.startsWith(scratch, FIELD) || scratch.equals(END);
- }
- success = true;
- } finally {
- if (success) {
- IOUtils.close(in);
- } else {
- IOUtils.closeWhileHandlingException(in);
- }
- }
- }
-
- @Override
- public void close() throws IOException {
- norms = null;
- }
-
- static void files(Directory dir, SegmentInfo info, Set files) throws IOException {
- FieldInfos fieldInfos = info.getFieldInfos();
- for (FieldInfo fieldInfo : fieldInfos) {
- if (fieldInfo.normsPresent()) {
- files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION));
- break;
- }
- }
- }
-
- private String readString(int offset, BytesRef scratch) {
- return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
- }
-
- @Override
- public DocValues docValues(String field) throws IOException {
- return norms.get(field);
- }
-
- private class NormsDocValues extends DocValues {
- private final Source source;
- public NormsDocValues(Source source) {
- this.source = source;
- }
-
- @Override
- public Source load() throws IOException {
- return source;
- }
-
- @Override
- public Source getDirectSource() throws IOException {
- return getSource();
- }
-
- @Override
- public Type type() {
- return Type.FIXED_INTS_8;
- }
-
- @Override
- public int getValueSize() {
- return 1;
- }
- }
-
- static final class Norm extends Source {
- protected Norm(byte[] bytes) {
- super(Type.FIXED_INTS_8);
- this.bytes = bytes;
- }
- final byte bytes[];
-
- @Override
- public BytesRef getBytes(int docID, BytesRef ref) {
- ref.bytes = bytes;
- ref.offset = docID;
- ref.length = 1;
- return ref;
- }
-
- @Override
- public long getInt(int docID) {
- return bytes[docID];
- }
-
- @Override
- public boolean hasArray() {
- return true;
- }
-
- @Override
- public Object getArray() {
- return bytes;
- }
-
- }
-}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocConsumer.java
new file mode 100644
index 00000000000..d3dfd4485bb
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocConsumer.java
@@ -0,0 +1,94 @@
+package org.apache.lucene.codecs.simpletext;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.PerDocConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.PerDocWriteState;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * @lucene.experimental
+ */
+class SimpleTextPerDocConsumer extends PerDocConsumer {
+
+ protected final PerDocWriteState state;
+ protected final String segmentSuffix;
+ public SimpleTextPerDocConsumer(PerDocWriteState state, String segmentSuffix)
+ throws IOException {
+ this.state = state;
+ this.segmentSuffix = segmentSuffix;
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+
+ @Override
+ public DocValuesConsumer addValuesField(Type type, FieldInfo field)
+ throws IOException {
+ return new SimpleTextDocValuesConsumer(SimpleTextDocValuesFormat.docValuesId(state.segmentName,
+ field.number), state.directory, state.context, type, segmentSuffix);
+ }
+
+ @Override
+ public void abort() {
+ Set files = new HashSet();
+ files(state.directory, state.fieldInfos, state.segmentName, files, segmentSuffix);
+ IOUtils.deleteFilesIgnoringExceptions(state.directory,
+ files.toArray(new String[0]));
+ }
+
+
+ static void files(SegmentInfo info, Set files, String segmentSuffix) throws IOException {
+ files(info.dir, info.getFieldInfos(), info.name, files, segmentSuffix);
+ }
+
+ static String docValuesId(String segmentsName, int fieldId) {
+ return segmentsName + "_" + fieldId;
+ }
+
+ @SuppressWarnings("fallthrough")
+ private static void files(Directory dir, FieldInfos fieldInfos,
+ String segmentName, Set files, String segmentSuffix) {
+ for (FieldInfo fieldInfo : fieldInfos) {
+ if (fieldInfo.hasDocValues()) {
+ String filename = docValuesId(segmentName, fieldInfo.number);
+ files.add(IndexFileNames.segmentFileName(filename, "",
+ segmentSuffix));
+ try {
+ assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
+ segmentSuffix));
+ } catch (IOException e) {
+ // don't throw checked exception - dir is only used in assert
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
new file mode 100644
index 00000000000..59e49c615a4
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
@@ -0,0 +1,431 @@
+package org.apache.lucene.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.DOC;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.HEADER;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE_SIZE;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.lucene.codecs.DocValuesArraySource;
+import org.apache.lucene.codecs.PerDocProducerBase;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.SortedSource;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.packed.PackedInts.Reader;
+
+/**
+ * @lucene.experimental
+ */
+public class SimpleTextPerDocProducer extends PerDocProducerBase {
+ protected final TreeMap docValues;
+ private Comparator comp;
+ private final String segmentSuffix;
+
+ /**
+ * Creates a new {@link SimpleTextPerDocProducer} instance and loads all
+ * {@link DocValues} instances for this segment and codec.
+ */
+ public SimpleTextPerDocProducer(SegmentReadState state,
+ Comparator comp, String segmentSuffix) throws IOException {
+ this.comp = comp;
+ this.segmentSuffix = segmentSuffix;
+ if (anyDocValuesFields(state.fieldInfos)) {
+ docValues = load(state.fieldInfos, state.segmentInfo.name,
+ state.segmentInfo.docCount, state.dir, state.context);
+ } else {
+ docValues = new TreeMap();
+ }
+ }
+
+ @Override
+ protected Map docValues() {
+ return docValues;
+ }
+
+ protected DocValues loadDocValues(int docCount, Directory dir, String id,
+ DocValues.Type type, IOContext context) throws IOException {
+ return new SimpleTextDocValues(dir, context, type, id, docCount, comp, segmentSuffix);
+ }
+
+ @Override
+ protected void closeInternal(Collection extends Closeable> closeables)
+ throws IOException {
+ IOUtils.close(closeables);
+ }
+
+ private static class SimpleTextDocValues extends DocValues {
+
+ private int docCount;
+
+ @Override
+ public void close() throws IOException {
+ try {
+ super.close();
+ } finally {
+ IOUtils.close(input);
+ }
+ }
+
+ private Type type;
+ private Comparator comp;
+ private int valueSize;
+ private final IndexInput input;
+
+ public SimpleTextDocValues(Directory dir, IOContext ctx, Type type,
+ String id, int docCount, Comparator comp, String segmentSuffix) throws IOException {
+ this.type = type;
+ this.docCount = docCount;
+ this.comp = comp;
+ final String fileName = IndexFileNames.segmentFileName(id, "", segmentSuffix);
+ boolean success = false;
+ IndexInput in = null;
+ try {
+ in = dir.openInput(fileName, ctx);
+ valueSize = readHeader(in);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+ input = in;
+
+ }
+
+ @Override
+ public Source load() throws IOException {
+ boolean success = false;
+ IndexInput in = (IndexInput) input.clone();
+ try {
+ Source source = null;
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ source = read(in, new ValueReader(type, docCount, comp));
+ break;
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case VAR_INTS:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case FLOAT_32:
+ case FLOAT_64:
+ source = read(in, new ValueReader(type, docCount, null));
+ break;
+ default:
+ throw new IllegalArgumentException("unknown type: " + type);
+ }
+ assert source != null;
+ success = true;
+ return source;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(in);
+ } else {
+ IOUtils.close(in);
+ }
+ }
+ }
+
+ private int readHeader(IndexInput in) throws IOException {
+ BytesRef scratch = new BytesRef();
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, HEADER);
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, VALUE_SIZE);
+ return Integer.parseInt(readString(scratch.offset + VALUE_SIZE.length,
+ scratch));
+ }
+
+ private Source read(IndexInput in, ValueReader reader) throws IOException {
+ BytesRef scratch = new BytesRef();
+ for (int i = 0; i < docCount; i++) {
+ SimpleTextUtil.readLine(in, scratch);
+
+ assert StringHelper.startsWith(scratch, DOC) : scratch.utf8ToString();
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, VALUE);
+ reader.fromString(i, scratch, scratch.offset + VALUE.length);
+ }
+ SimpleTextUtil.readLine(in, scratch);
+ assert scratch.equals(END);
+ return reader.getSource();
+ }
+
+ @Override
+ public Source getDirectSource() throws IOException {
+ return this.getSource();
+ }
+
+ @Override
+ public int getValueSize() {
+ return valueSize;
+ }
+
+ @Override
+ public Type getType() {
+ return type;
+ }
+
+ }
+
+ public static String readString(int offset, BytesRef scratch) {
+ return new String(scratch.bytes, scratch.offset + offset, scratch.length
+ - offset, IOUtils.CHARSET_UTF_8);
+ }
+
+ private static final class ValueReader {
+ private final Type type;
+ private byte[] bytes;
+ private short[] shorts;
+ private int[] ints;
+ private long[] longs;
+ private float[] floats;
+ private double[] doubles;
+ private Source source;
+ private BytesRefHash hash;
+ private BytesRef scratch;
+
+ public ValueReader(Type type, int maxDocs, Comparator comp) {
+ super();
+ this.type = type;
+ Source docValuesArray = null;
+ switch (type) {
+ case FIXED_INTS_16:
+ shorts = new short[maxDocs];
+ docValuesArray = DocValuesArraySource.forType(type)
+ .newFromArray(shorts);
+ break;
+ case FIXED_INTS_32:
+ ints = new int[maxDocs];
+ docValuesArray = DocValuesArraySource.forType(type).newFromArray(ints);
+ break;
+ case FIXED_INTS_64:
+ longs = new long[maxDocs];
+ docValuesArray = DocValuesArraySource.forType(type)
+ .newFromArray(longs);
+ break;
+ case VAR_INTS:
+ longs = new long[maxDocs];
+ docValuesArray = new VarIntsArraySource(type, longs);
+ break;
+ case FIXED_INTS_8:
+ bytes = new byte[maxDocs];
+ docValuesArray = DocValuesArraySource.forType(type).newFromArray(bytes);
+ break;
+ case FLOAT_32:
+ floats = new float[maxDocs];
+ docValuesArray = DocValuesArraySource.forType(type)
+ .newFromArray(floats);
+ break;
+ case FLOAT_64:
+ doubles = new double[maxDocs];
+ docValuesArray = DocValuesArraySource.forType(type).newFromArray(
+ doubles);
+ break;
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ assert comp != null;
+ hash = new BytesRefHash();
+ BytesSource bytesSource = new BytesSource(type, comp, maxDocs, hash);
+ ints = bytesSource.docIdToEntry;
+ source = bytesSource;
+ scratch = new BytesRef();
+ break;
+
+ }
+ if (docValuesArray != null) {
+ assert source == null;
+ this.source = docValuesArray;
+ }
+ }
+
+ public void fromString(int ord, BytesRef ref, int offset) {
+ switch (type) {
+ case FIXED_INTS_16:
+ assert shorts != null;
+ shorts[ord] = Short.parseShort(readString(offset, ref));
+ break;
+ case FIXED_INTS_32:
+ assert ints != null;
+ ints[ord] = Integer.parseInt(readString(offset, ref));
+ break;
+ case FIXED_INTS_64:
+ case VAR_INTS:
+ assert longs != null;
+ longs[ord] = Long.parseLong(readString(offset, ref));
+ break;
+ case FIXED_INTS_8:
+ assert bytes != null;
+ bytes[ord] = (byte) Integer.parseInt(readString(offset, ref));
+ break;
+ case FLOAT_32:
+ assert floats != null;
+ floats[ord] = Float.parseFloat(readString(offset, ref));
+ break;
+ case FLOAT_64:
+ assert doubles != null;
+ doubles[ord] = Double.parseDouble(readString(offset, ref));
+ break;
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ scratch.bytes = ref.bytes;
+ scratch.length = ref.length - offset;
+ scratch.offset = ref.offset + offset;
+ int key = hash.add(scratch);
+ ints[ord] = key < 0 ? (-key) - 1 : key;
+ break;
+ }
+ }
+
+ public Source getSource() {
+ if (source instanceof BytesSource) {
+ ((BytesSource) source).maybeSort();
+ }
+ return source;
+ }
+ }
+
+ private static final class BytesSource extends SortedSource {
+
+ private final BytesRefHash hash;
+ int[] docIdToEntry;
+ int[] sortedEntries;
+ int[] adresses;
+ private final boolean isSorted;
+
+ protected BytesSource(Type type, Comparator comp, int maxDoc,
+ BytesRefHash hash) {
+ super(type, comp);
+ docIdToEntry = new int[maxDoc];
+ this.hash = hash;
+ isSorted = type == Type.BYTES_FIXED_SORTED
+ || type == Type.BYTES_VAR_SORTED;
+ }
+
+ void maybeSort() {
+ if (isSorted) {
+ adresses = new int[hash.size()];
+ sortedEntries = hash.sort(getComparator());
+ for (int i = 0; i < adresses.length; i++) {
+ int entry = sortedEntries[i];
+ adresses[entry] = i;
+ }
+ }
+
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ if (isSorted) {
+ return hash.get(sortedEntries[ord(docID)], ref);
+ } else {
+ return hash.get(docIdToEntry[docID], ref);
+ }
+ }
+
+ @Override
+ public SortedSource asSortedSource() {
+ if (isSorted) {
+ return this;
+ }
+ return null;
+ }
+
+ @Override
+ public int ord(int docID) {
+ assert isSorted;
+ try {
+ return adresses[docIdToEntry[docID]];
+ } catch (Exception e) {
+
+ return 0;
+ }
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef bytesRef) {
+ assert isSorted;
+ return hash.get(sortedEntries[ord], bytesRef);
+ }
+
+ @Override
+ public Reader getDocToOrd() {
+ return null;
+ }
+
+ @Override
+ public int getValueCount() {
+ return hash.size();
+ }
+
+ }
+
+ private static class VarIntsArraySource extends Source {
+
+ private final long[] array;
+
+ protected VarIntsArraySource(Type type, long[] array) {
+ super(type);
+ this.array = array;
+ }
+
+ @Override
+ public long getInt(int docID) {
+ return array[docID];
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ DocValuesArraySource.copyLong(ref, getInt(docID));
+ return ref;
+ }
+
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
index a3840a13c96..fd4428c52fc 100644
--- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
@@ -60,12 +60,17 @@ public abstract class AtomicReader extends IndexReader {
return readerContext;
}
- /** Returns true if there are norms stored for this field. */
- public boolean hasNorms(String field) throws IOException {
- // backward compatible implementation.
- // SegmentReader has an efficient implementation.
+ /**
+ * Returns true if there are norms stored for this field.
+ * @deprecated (4.0) use {@link #getFieldInfos()} and check {@link FieldInfo#hasNorms()}
+ * for the field instead.
+ */
+ @Deprecated
+ public final boolean hasNorms(String field) throws IOException {
ensureOpen();
- return normValues(field) != null;
+ // note: using normValues(field) != null would potentially cause i/o
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ return fi != null && fi.hasNorms();
}
/**
diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
index c9bb2001455..d97e0b6b453 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
@@ -53,6 +53,14 @@ public abstract class BaseCompositeReader extends Composi
private final int numDocs;
private final boolean hasDeletions;
+ /**
+ * Constructs a {@code BaseCompositeReader} on the given subReaders.
+ * @param subReaders the wrapped sub-readers. This array is returned by
+ * {@link #getSequentialSubReaders} and used to resolve the correct
+ * subreader for docID-based methods. Please note: This array is not
+ * cloned and not protected for modification, the subclass is responsible
+ * to do this.
+ */
protected BaseCompositeReader(R[] subReaders) throws IOException {
this.subReaders = subReaders;
starts = new int[subReaders.length + 1]; // build starts array
diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
index ad55e08c053..78f80f11fa7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
@@ -210,7 +210,7 @@ class BufferedDeletesStream {
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
- final IndexWriter.ReadersAndLiveDocs rld = readerPool.get(info, true);
+ final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
int delCount = 0;
final boolean segAllDeletes;
@@ -224,11 +224,12 @@ class BufferedDeletesStream {
// Don't delete by Term here; DocumentsWriterPerThread
// already did that on flush:
delCount += applyQueryDeletes(packet.queriesIterable(), rld, reader);
- final int fullDelCount = rld.info.getDelCount() + rld.pendingDeleteCount;
+ final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
assert fullDelCount <= rld.info.docCount;
segAllDeletes = fullDelCount == rld.info.docCount;
} finally {
- readerPool.release(reader, false);
+ rld.release(reader);
+ readerPool.release(rld);
}
anyNewDeletes |= delCount > 0;
@@ -262,18 +263,19 @@ class BufferedDeletesStream {
if (coalescedDeletes != null) {
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
- final IndexWriter.ReadersAndLiveDocs rld = readerPool.get(info, true);
+ final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
int delCount = 0;
final boolean segAllDeletes;
try {
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader);
- final int fullDelCount = rld.info.getDelCount() + rld.pendingDeleteCount;
+ final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
assert fullDelCount <= rld.info.docCount;
segAllDeletes = fullDelCount == rld.info.docCount;
- } finally {
- readerPool.release(reader, false);
+ } finally {
+ rld.release(reader);
+ readerPool.release(rld);
}
anyNewDeletes |= delCount > 0;
@@ -353,7 +355,7 @@ class BufferedDeletesStream {
}
// Delete by Term
- private synchronized long applyTermDeletes(Iterable termsIter, IndexWriter.ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
+ private synchronized long applyTermDeletes(Iterable termsIter, ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
long delCount = 0;
Fields fields = reader.fields();
if (fields == null) {
@@ -394,7 +396,7 @@ class BufferedDeletesStream {
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes(), false)) {
- DocsEnum docsEnum = termsEnum.docs(rld.liveDocs, docs, false);
+ DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, false);
//System.out.println("BDS: got docsEnum=" + docsEnum);
if (docsEnum != null) {
@@ -434,7 +436,7 @@ class BufferedDeletesStream {
}
// Delete by query
- private static long applyQueryDeletes(Iterable queriesIter, IndexWriter.ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
+ private static long applyQueryDeletes(Iterable queriesIter, ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
long delCount = 0;
final AtomicReaderContext readerContext = reader.getTopReaderContext();
boolean any = false;
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 2c7cd03f655..0aa837fb75d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -651,28 +651,17 @@ public class CheckIndex {
if (infoStream != null) {
infoStream.print(" test: field norms.........");
}
- DocValues dv;
for (FieldInfo info : fieldInfos) {
- if (reader.hasNorms(info.name)) {
- dv = reader.normValues(info.name);
- assert dv != null;
- if (dv.getSource().hasArray()) {
- Object array = dv.getSource().getArray();
- if (Array.getLength(array) != reader.maxDoc()) {
- throw new RuntimeException("norms for field: " + info.name + " are of the wrong size");
- }
- }
- if (!info.isIndexed || info.omitNorms) {
- throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
- }
+ if (info.hasNorms()) {
+ assert reader.hasNorms(info.name); // deprecated path
+ DocValues dv = reader.normValues(info.name);
+ checkDocValues(dv, info.name, info.getNormType(), reader.maxDoc());
++status.totFields;
} else {
+ assert !reader.hasNorms(info.name); // deprecated path
if (reader.normValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
- if (info.normsPresent()) {
- throw new RuntimeException("field: " + info.name + " should have norms but omits them!");
- }
}
}
@@ -1171,6 +1160,92 @@ public class CheckIndex {
return status;
}
+ /** Helper method to verify values (either docvalues or norms), also checking
+ * type and size against fieldinfos/segmentinfo
+ */
+ private void checkDocValues(DocValues docValues, String fieldName, DocValues.Type expectedType, int expectedDocs) throws IOException {
+ if (docValues == null) {
+ throw new RuntimeException("field: " + fieldName + " omits docvalues but should have them!");
+ }
+ DocValues.Type type = docValues.getType();
+ if (type != expectedType) {
+ throw new RuntimeException("field: " + fieldName + " has type: " + type + " but fieldInfos says:" + expectedType);
+ }
+ final Source values = docValues.getDirectSource();
+ int size = docValues.getValueSize();
+ for (int i = 0; i < expectedDocs; i++) {
+ switch (type) {
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ BytesRef bytes = new BytesRef();
+ values.getBytes(i, bytes);
+ if (size != -1 && size != bytes.length) {
+ throw new RuntimeException("field: " + fieldName + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
+ }
+ break;
+ case FLOAT_32:
+ assert size == 4;
+ values.getFloat(i);
+ break;
+ case FLOAT_64:
+ assert size == 8;
+ values.getFloat(i);
+ break;
+ case VAR_INTS:
+ assert size == -1;
+ values.getInt(i);
+ break;
+ case FIXED_INTS_16:
+ assert size == 2;
+ values.getInt(i);
+ break;
+ case FIXED_INTS_32:
+ assert size == 4;
+ values.getInt(i);
+ break;
+ case FIXED_INTS_64:
+ assert size == 8;
+ values.getInt(i);
+ break;
+ case FIXED_INTS_8:
+ assert size == 1;
+ values.getInt(i);
+ break;
+ default:
+ throw new IllegalArgumentException("Field: " + fieldName
+ + " - no such DocValues type: " + type);
+ }
+ }
+ if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
+ // check sorted bytes
+ SortedSource sortedValues = values.asSortedSource();
+ Comparator comparator = sortedValues.getComparator();
+ int lastOrd = -1;
+ BytesRef lastBytes = new BytesRef();
+ for (int i = 0; i < expectedDocs; i++) {
+ int ord = sortedValues.ord(i);
+ if (ord < 0 || ord > expectedDocs) {
+ throw new RuntimeException("field: " + fieldName + " ord is out of bounds: " + ord);
+ }
+ BytesRef bytes = new BytesRef();
+ sortedValues.getByOrd(ord, bytes);
+ if (lastOrd != -1) {
+ int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
+ int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
+ if (ordComp != bytesComp) {
+ throw new RuntimeException("field: " + fieldName + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
+ }
+ }
+ lastOrd = ord;
+ lastBytes = bytes;
+ }
+ }
+ }
+
private Status.DocValuesStatus testDocValues(SegmentInfo info,
SegmentReader reader) {
final Status.DocValuesStatus status = new Status.DocValuesStatus();
@@ -1183,87 +1258,7 @@ public class CheckIndex {
if (fieldInfo.hasDocValues()) {
status.totalValueFields++;
final DocValues docValues = reader.docValues(fieldInfo.name);
- if (docValues == null) {
- throw new RuntimeException("field: " + fieldInfo.name + " omits docvalues but should have them!");
- }
- DocValues.Type type = docValues.type();
- if (type != fieldInfo.getDocValuesType()) {
- throw new RuntimeException("field: " + fieldInfo.name + " has type: " + type + " but fieldInfos says:" + fieldInfo.getDocValuesType());
- }
- final Source values = docValues.getDirectSource();
- final int maxDoc = reader.maxDoc();
- int size = docValues.getValueSize();
- for (int i = 0; i < maxDoc; i++) {
- switch (fieldInfo.getDocValuesType()) {
- case BYTES_FIXED_SORTED:
- case BYTES_VAR_SORTED:
- case BYTES_FIXED_DEREF:
- case BYTES_FIXED_STRAIGHT:
- case BYTES_VAR_DEREF:
- case BYTES_VAR_STRAIGHT:
- BytesRef bytes = new BytesRef();
- values.getBytes(i, bytes);
- if (size != -1 && size != bytes.length) {
- throw new RuntimeException("field: " + fieldInfo.name + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
- }
- break;
- case FLOAT_32:
- assert size == 4;
- values.getFloat(i);
- break;
- case FLOAT_64:
- assert size == 8;
- values.getFloat(i);
- break;
- case VAR_INTS:
- assert size == -1;
- values.getInt(i);
- break;
- case FIXED_INTS_16:
- assert size == 2;
- values.getInt(i);
- break;
- case FIXED_INTS_32:
- assert size == 4;
- values.getInt(i);
- break;
- case FIXED_INTS_64:
- assert size == 8;
- values.getInt(i);
- break;
- case FIXED_INTS_8:
- assert size == 1;
- values.getInt(i);
- break;
- default:
- throw new IllegalArgumentException("Field: " + fieldInfo.name
- + " - no such DocValues type: " + fieldInfo.getDocValuesType());
- }
- }
- if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
- // check sorted bytes
- SortedSource sortedValues = values.asSortedSource();
- Comparator comparator = sortedValues.getComparator();
- int lastOrd = -1;
- BytesRef lastBytes = new BytesRef();
- for (int i = 0; i < maxDoc; i++) {
- int ord = sortedValues.ord(i);
- if (ord < 0 || ord > maxDoc) {
- throw new RuntimeException("field: " + fieldInfo.name + " ord is out of bounds: " + ord);
- }
- BytesRef bytes = new BytesRef();
- sortedValues.getByOrd(ord, bytes);
- if (lastOrd != -1) {
- int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
- int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
- if (ordComp != bytesComp) {
- throw new RuntimeException("field: " + fieldInfo.name + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
- }
- }
- lastOrd = ord;
- lastBytes = bytes;
- }
- }
+ checkDocValues(docValues, fieldInfo.name, fieldInfo.getDocValuesType(), reader.maxDoc());
} else {
if (reader.docValues(fieldInfo.name) != null) {
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
diff --git a/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java
index 055557daa5c..47b48220882 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java
@@ -81,6 +81,9 @@ public abstract class CompositeReader extends IndexReader {
* If this method returns an empty array, that means this
* reader is a null reader (for example a MultiReader
* that has no sub readers).
+ *
Warning: Don't modify the returned array!
+ * Doing so will corrupt the internal structure of this
+ * {@code CompositeReader}.
*/
public abstract IndexReader[] getSequentialSubReaders();
diff --git a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
index 48ed5bf45b6..30785b0fcf2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
@@ -323,8 +323,17 @@ public abstract class DirectoryReader extends BaseCompositeReader
}
}
- protected DirectoryReader(Directory directory, AtomicReader[] readers) throws CorruptIndexException, IOException {
- super(readers);
+ /**
+ * Expert: Constructs a {@code DirectoryReader} on the given subReaders.
+ * @param segmentReaders the wrapped atomic index segment readers. This array is
+ * returned by {@link #getSequentialSubReaders} and used to resolve the correct
+ * subreader for docID-based methods. Please note: This array is not
+ * cloned and not protected for modification outside of this reader.
+ * Subclasses of {@code DirectoryReader} should take care to not allow
+ * modification of this internal array, e.g. {@link #doOpenIfChanged()}.
+ */
+ protected DirectoryReader(Directory directory, AtomicReader[] segmentReaders) throws CorruptIndexException, IOException {
+ super(segmentReaders);
this.directory = directory;
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
index 038c62f4de2..677facbf6e1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
@@ -216,6 +216,13 @@ public class DocTermOrds {
}
}
+ /**
+ * @return The number of terms in this field
+ */
+ public int numTerms() {
+ return numTermsInField;
+ }
+
/** Subclass can override this */
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValues.java b/lucene/core/src/java/org/apache/lucene/index/DocValues.java
index 23999f49e0c..7d77326012d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java
@@ -90,7 +90,7 @@ public abstract class DocValues implements Closeable {
/**
* Returns the {@link Type} of this {@link DocValues} instance
*/
- public abstract Type type();
+ public abstract Type getType();
/**
* Closes this {@link DocValues} instance. This method should only be called
@@ -191,7 +191,7 @@ public abstract class DocValues implements Closeable {
*
* @return the {@link Type} of this source.
*/
- public Type type() {
+ public Type getType() {
return type;
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
index 60ccf4a65f8..34f9587008d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
@@ -122,14 +122,23 @@ public final class FieldInfo {
}
}
+ /**
+ * @return true if this field has any docValues.
+ */
public boolean hasDocValues() {
return docValueType != null;
}
+ /**
+ * @return {@link DocValues.Type} of the docValues. this may be null if the field has no docvalues.
+ */
public DocValues.Type getDocValuesType() {
return docValueType;
}
+ /**
+ * @return {@link DocValues.Type} of the norm. this may be null if the field has no norms.
+ */
public DocValues.Type getNormType() {
return normType;
}
@@ -146,11 +155,17 @@ public final class FieldInfo {
}
}
+ /**
+ * @return true if norms are explicitly omitted for this field
+ */
public boolean omitNorms() {
return omitNorms;
}
- public boolean normsPresent() {
+ /**
+ * @return true if this field actually has any norms.
+ */
+ public boolean hasNorms() {
return isIndexed && !omitNorms && normType != null;
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
index 5fbc9511ae1..e2af36f4f4e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@@ -178,7 +178,7 @@ public final class FieldInfos implements Iterable {
return fis;
}
- /** Returns true if any fields do not positions */
+ /** Returns true if any fields have positions */
public boolean hasProx() {
if (isReadOnly()) {
return hasProx;
@@ -349,6 +349,12 @@ public final class FieldInfos implements Iterable {
return fi;
}
+ /**
+ * lookup the number of a field by name.
+ *
+ * @param fieldName field's name
+ * @return number of field, or -1 if it does not exist.
+ */
public int fieldNumber(String fieldName) {
FieldInfo fi = fieldInfo(fieldName);
return (fi != null) ? fi.number : -1;
@@ -384,11 +390,17 @@ public final class FieldInfos implements Iterable {
return byNumber.values().iterator();
}
+ /**
+ * @return number of fields
+ */
public int size() {
assert byNumber.size() == byName.size();
return byNumber.size();
}
+ /**
+ * @return true if at least one field has any vectors
+ */
public boolean hasVectors() {
if (isReadOnly()) {
return hasVectors;
@@ -402,9 +414,12 @@ public final class FieldInfos implements Iterable {
return false;
}
+ /**
+ * @return true if at least one field has any norms
+ */
public boolean hasNorms() {
for (FieldInfo fi : this) {
- if (fi.normsPresent()) {
+ if (fi.hasNorms()) {
return true;
}
}
@@ -441,7 +456,10 @@ public final class FieldInfos implements Iterable {
return roFis;
}
- public boolean anyDocValuesFields() {
+ /**
+ * @return true if at least one field has docValues
+ */
+ public boolean hasDocValues() {
for (FieldInfo fi : this) {
if (fi.hasDocValues()) {
return true;
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
index 7cda4d35947..e1a302f48e1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
@@ -359,12 +359,6 @@ public class FilterAtomicReader extends AtomicReader {
return in.hasDeletions();
}
- @Override
- public boolean hasNorms(String field) throws IOException {
- ensureOpen();
- return in.hasNorms(field);
- }
-
@Override
protected void doClose() throws IOException {
in.close();
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java b/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
index 4b9e3f7c0e8..2f7ec499774 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
@@ -453,7 +453,7 @@ final class IndexFileDeleter {
assert Thread.holdsLock(writer);
if (infoStream.isEnabled("IFD")) {
- infoStream.message("IFD", "now checkpoint \"" + writer.segString(segmentInfos) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
+ infoStream.message("IFD", "now checkpoint \"" + writer.segString(writer.toLiveInfos(segmentInfos)) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
}
// Try again now to delete any previously un-deletable
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java b/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
index a27557ea57c..8c33c1996fc 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
@@ -41,25 +41,27 @@ public abstract class IndexReaderContext {
this.isTopLevel = parent==null;
}
+ /** Returns the {@link IndexReader}, this context represents. */
public abstract IndexReader reader();
/**
* Returns the context's leaves if this context is a top-level context
* otherwise null. For convenience, if this is an
* {@link AtomicReaderContext} this returns itsself as the only leaf.
- *
- * Note: this is convenience method since leaves can always be obtained by
+ *
Note: this is convenience method since leaves can always be obtained by
* walking the context tree.
+ *
Warning: Don't modify the returned array!
+ * Doing so will corrupt the internal structure of this
+ * {@code IndexReaderContext}.
*/
public abstract AtomicReaderContext[] leaves();
/**
* Returns the context's children iff this context is a composite context
* otherwise null.
- *
- * Note: this method is a convenience method to prevent
- * instanceof checks and type-casts to
- * {@link CompositeReaderContext}.
+ *
Warning: Don't modify the returned array!
+ * Doing so will corrupt the internal structure of this
+ * {@code IndexReaderContext}.
*/
public abstract IndexReaderContext[] children();
}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 4a6ec709bb5..01079bd0887 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -33,7 +33,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@@ -392,260 +391,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return r;
}
- // This class inherits all sync from IW:
- class ReadersAndLiveDocs {
- // Not final because we replace (clone) when we need to
- // change it and it's been shared:
- public final SegmentInfo info;
-
- // Set once (null, and then maybe set, and never set again):
- private SegmentReader reader;
-
- // TODO: it's sometimes wasteful that we hold open two
- // separate SRs (one for merging one for
- // reading)... maybe just use a single SR? The gains of
- // not loading the terms index (for merging in the
- // non-NRT case) are far less now... and if the app has
- // any deletes it'll open real readers anyway.
-
- // Set once (null, and then maybe set, and never set again):
- private SegmentReader mergeReader;
-
- // Holds the current shared (readable and writable
- // liveDocs). This is null when there are no deleted
- // docs, and it's copy-on-write (cloned whenever we need
- // to change it but it's been shared to an external NRT
- // reader).
- public Bits liveDocs;
-
- // How many further deletions we've done against
- // liveDocs vs when we loaded it or last wrote it:
- public int pendingDeleteCount;
-
- // True if the current liveDocs is referenced by an
- // external NRT reader:
- public boolean shared;
-
- public ReadersAndLiveDocs(SegmentInfo info) {
- this.info = info;
- shared = true;
- }
-
- // Returns false if we are the only remaining refs of
- // this reader:
- public synchronized boolean anyOutsideRefs(SegmentReader sr) {
- int myRefCounts = 0;
- if (sr == reader) {
- myRefCounts++;
- }
- if (sr == mergeReader) {
- myRefCounts++;
- }
- final int rc = sr.getRefCount();
- assert rc >= myRefCounts;
- return rc > myRefCounts;
- }
-
- // Call only from assert!
- public synchronized boolean verifyDocCounts() {
- int count;
- if (liveDocs != null) {
- count = 0;
- for(int docID=0;docID> it = readerMap.entrySet().iterator();
while(it.hasNext()) {
final ReadersAndLiveDocs rld = it.next().getValue();
- //System.out.println("pool.dropAll: seg=" + rld.info);
if (doSave && rld.writeLiveDocs(directory)) {
+ // Make sure we only write del docs for a live segment:
assert infoIsLive(rld.info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
@@ -735,13 +472,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
assert readerMap.size() == 0;
}
- public synchronized void drop(SegmentInfo info) throws IOException {
- final ReadersAndLiveDocs rld = readerMap.remove(info);
- if (rld != null) {
- rld.dropReaders();
- }
- }
-
/**
* Commit live docs changes for the segment readers for
* the provided infos.
@@ -751,19 +481,23 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
public synchronized void commit(SegmentInfos infos) throws IOException {
for (SegmentInfo info : infos) {
final ReadersAndLiveDocs rld = readerMap.get(info);
- if (rld != null && rld.writeLiveDocs(directory)) {
- assert infoIsLive(info);
- // Must checkpoint w/ deleter, because we just
- // created created new _X_N.del file.
- deleter.checkpoint(segmentInfos, false);
+ if (rld != null) {
+ assert rld.info == info;
+ if (rld.writeLiveDocs(directory)) {
+ // Make sure we only write del docs for a live segment:
+ assert infoIsLive(info);
+ // Must checkpoint w/ deleter, because we just
+ // created created new _X_N.del file.
+ deleter.checkpoint(segmentInfos, false);
+ }
}
}
}
/**
* Obtain a ReadersAndLiveDocs instance from the
- * readerPool. If getReader is true, you must later call
- * {@link #release(SegmentReader)}.
+ * readerPool. If create is true, you must later call
+ * {@link #release(ReadersAndLiveDocs)}.
* @throws IOException
*/
public synchronized ReadersAndLiveDocs get(SegmentInfo info, boolean create) {
@@ -771,15 +505,22 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
assert info.dir == directory;
ReadersAndLiveDocs rld = readerMap.get(info);
- //System.out.println("rld.get seg=" + info + " poolReaders=" + poolReaders);
if (rld == null) {
- //System.out.println(" new rld");
if (!create) {
return null;
}
- rld = new ReadersAndLiveDocs(info);
+ rld = new ReadersAndLiveDocs(IndexWriter.this, info);
+ // Steal initial reference:
readerMap.put(info, rld);
+ } else {
+ assert rld.info == info: "rld.info=" + rld.info + " info=" + info + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info);
}
+
+ if (create) {
+ // Return ref to caller:
+ rld.incRef();
+ }
+
return rld;
}
}
@@ -795,7 +536,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
- delCount += rld.pendingDeleteCount;
+ delCount += rld.getPendingDeleteCount();
}
return delCount;
}
@@ -1116,7 +857,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
finishMerges(waitForMerges);
stopMerges = true;
}
-
mergeScheduler.close();
if (infoStream.isEnabled("IW")) {
@@ -1160,8 +900,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
}
-
-
/** Returns the Directory used by this index. */
public Directory getDirectory() {
@@ -2020,6 +1758,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
notifyAll();
}
+ // Don't bother saving any changes in our segmentInfos
+ readerPool.dropAll(false);
+
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
@@ -2038,9 +1779,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
-
- // Don't bother saving any changes in our segmentInfos
- readerPool.dropAll(false);
}
lastCommitChangeCount = changeCount;
@@ -3023,16 +2761,18 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final int docCount = info.docCount;
final Bits prevLiveDocs = merge.readerLiveDocs.get(i);
final Bits currentLiveDocs;
- ReadersAndLiveDocs rld = readerPool.get(info, false);
- // We enrolled in mergeInit:
- assert rld != null;
- currentLiveDocs = rld.liveDocs;
+ final ReadersAndLiveDocs rld = readerPool.get(info, false);
+ // We hold a ref so it should still be in the pool:
+ assert rld != null: "seg=" + info.name;
+ currentLiveDocs = rld.getLiveDocs();
if (prevLiveDocs != null) {
// If we had deletions on starting the merge we must
// still have deletions now:
assert currentLiveDocs != null;
+ assert prevLiveDocs.length() == docCount;
+ assert currentLiveDocs.length() == docCount;
// There were deletes on this segment when the merge
// started. The merge has collapsed away those
@@ -3066,9 +2806,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
} else {
- docUpto += info.docCount - info.getDelCount() - rld.pendingDeleteCount;
+ docUpto += info.docCount - info.getDelCount() - rld.getPendingDeleteCount();
}
} else if (currentLiveDocs != null) {
+ assert currentLiveDocs.length() == docCount;
// This segment had no deletes before but now it
// does:
for(int j=0; j infos) throws IOException {
final StringBuilder buffer = new StringBuilder();
- for(final SegmentInfo s : infos) {
+ for(final SegmentInfo info : infos) {
if (buffer.length() > 0) {
buffer.append(' ');
}
- buffer.append(segString(s));
+ buffer.append(segString(info));
}
return buffer.toString();
}
@@ -3819,6 +3574,24 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return true;
}
+ // For infoStream output
+ synchronized SegmentInfos toLiveInfos(SegmentInfos sis) {
+ final SegmentInfos newSIS = new SegmentInfos();
+ final Map liveSIS = new HashMap();
+ for(SegmentInfo info : segmentInfos) {
+ liveSIS.put(info, info);
+ }
+ for(SegmentInfo info : sis) {
+ SegmentInfo liveInfo = liveSIS.get(info);
+ if (liveInfo != null) {
+ info = liveInfo;
+ }
+ newSIS.add(info);
+ }
+
+ return newSIS;
+ }
+
/** Walk through all files referenced by the current
* segmentInfos and ask the Directory to sync each file,
* if it wasn't already. If that succeeds, then we
@@ -3853,7 +3626,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
+ infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount);
}
assert filesExist(toSync);
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexableField.java b/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
index 267de2e140d..fac46e01592 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexableField.java
@@ -54,7 +54,7 @@ public interface IndexableField {
/** Non-null if this field has a Reader value */
public Reader readerValue();
- /** Non-null if this field hasa numeric value */
+ /** Non-null if this field has a numeric value */
public Number numericValue();
/**
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index 0e41f8dceef..75a61cfa5db 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -144,7 +144,7 @@ public class MultiDocValues extends DocValues {
}
final DocValues d = puller.pull(r, field);
if (d != null) {
- TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize());
+ TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize());
promotedType[0] = promotedType[0].promote(incoming);
} else if (puller.stopLoadingOnNull(r, field)){
promotedType[0] = TypePromoter.getIdentityPromoter(); // set to identity to return null
@@ -203,8 +203,8 @@ public class MultiDocValues extends DocValues {
}
@Override
- public Type type() {
- return emptySource.type();
+ public Type getType() {
+ return emptySource.getType();
}
@Override
@@ -230,8 +230,8 @@ public class MultiDocValues extends DocValues {
}
@Override
- public Type type() {
- return emptyFixedSource.type();
+ public Type getType() {
+ return emptyFixedSource.getType();
}
@Override
@@ -519,7 +519,7 @@ public class MultiDocValues extends DocValues {
@Override
public SortedSource asSortedSource() {
- if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
+ if (getType() == Type.BYTES_FIXED_SORTED || getType() == Type.BYTES_VAR_SORTED) {
}
return super.asSortedSource();
@@ -586,7 +586,7 @@ public class MultiDocValues extends DocValues {
}
@Override
- public Type type() {
+ public Type getType() {
return type;
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiReader.java b/lucene/core/src/java/org/apache/lucene/index/MultiReader.java
index a936b807e52..39627e87c50 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiReader.java
@@ -51,7 +51,7 @@ public class MultiReader extends BaseCompositeReader {
/**
*
Construct a MultiReader aggregating the named set of (sub)readers.
- * @param subReaders set of (sub)readers
+ * @param subReaders set of (sub)readers; this array will be cloned.
* @param closeSubReaders indicates whether the subreaders should be closed
* when this MultiReader is closed
*/
diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
index 4120026d703..545eead433a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
@@ -263,13 +263,6 @@ public final class ParallelAtomicReader extends AtomicReader {
return fields;
}
- @Override
- public boolean hasNorms(String field) throws IOException {
- ensureOpen();
- AtomicReader reader = fieldToReader.get(field);
- return reader==null ? false : reader.hasNorms(field);
- }
-
@Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
new file mode 100644
index 00000000000..e0f4d2f57f4
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
@@ -0,0 +1,303 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.MutableBits;
+
+// Used by IndexWriter to hold open SegmentReaders (for
+// searching or merging), plus pending deletes,
+// for a given segment
+class ReadersAndLiveDocs {
+ // Not final because we replace (clone) when we need to
+ // change it and it's been shared:
+ public final SegmentInfo info;
+
+ // Tracks how many consumers are using this instance:
+ private final AtomicInteger refCount = new AtomicInteger(1);
+
+ private final IndexWriter writer;
+
+ // Set once (null, and then maybe set, and never set again):
+ private SegmentReader reader;
+
+ // TODO: it's sometimes wasteful that we hold open two
+ // separate SRs (one for merging one for
+ // reading)... maybe just use a single SR? The gains of
+ // not loading the terms index (for merging in the
+ // non-NRT case) are far less now... and if the app has
+ // any deletes it'll open real readers anyway.
+
+ // Set once (null, and then maybe set, and never set again):
+ private SegmentReader mergeReader;
+
+ // Holds the current shared (readable and writable
+ // liveDocs). This is null when there are no deleted
+ // docs, and it's copy-on-write (cloned whenever we need
+ // to change it but it's been shared to an external NRT
+ // reader).
+ private Bits liveDocs;
+
+ // How many further deletions we've done against
+ // liveDocs vs when we loaded it or last wrote it:
+ private int pendingDeleteCount;
+
+ // True if the current liveDocs is referenced by an
+ // external NRT reader:
+ private boolean shared;
+
+ public ReadersAndLiveDocs(IndexWriter writer, SegmentInfo info) {
+ this.info = info;
+ this.writer = writer;
+ shared = true;
+ }
+
+ public void incRef() {
+ final int rc = refCount.incrementAndGet();
+ assert rc > 1;
+ }
+
+ public void decRef() {
+ final int rc = refCount.decrementAndGet();
+ assert rc >= 0;
+ }
+
+ public int refCount() {
+ final int rc = refCount.get();
+ assert rc >= 0;
+ return rc;
+ }
+
+ public synchronized int getPendingDeleteCount() {
+ return pendingDeleteCount;
+ }
+
+ // Call only from assert!
+ public synchronized boolean verifyDocCounts() {
+ int count;
+ if (liveDocs != null) {
+ count = 0;
+ for(int docID=0;docID 0 || writer.getKeepFullyDeletedSegments()) {
- readers.add(reader);
- infosUpto++;
- } else {
- reader.close();
- segmentInfos.remove(infosUpto);
+ final ReadersAndLiveDocs rld = writer.readerPool.get(info, true);
+ try {
+ final SegmentReader reader = rld.getReadOnlyClone(IOContext.READ);
+ if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
+ // Steal the ref:
+ readers.add(reader);
+ infosUpto++;
+ } else {
+ reader.close();
+ segmentInfos.remove(infosUpto);
+ }
+ } finally {
+ writer.readerPool.release(rld);
}
success = true;
} catch(IOException ex) {
prior = ex;
} finally {
- if (!success)
+ if (!success) {
IOUtils.closeWhileHandlingException(prior, readers);
+ }
}
}
return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]),
@@ -219,12 +225,12 @@ final class StandardDirectoryReader extends DirectoryReader {
}
@Override
- protected final DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
+ protected DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
return doOpenIfChanged(null);
}
@Override
- protected final DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
+ protected DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
ensureOpen();
// If we were obtained by writer.getReader(), re-ask the
@@ -237,7 +243,7 @@ final class StandardDirectoryReader extends DirectoryReader {
}
@Override
- protected final DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
ensureOpen();
if (writer == this.writer && applyAllDeletes == this.applyAllDeletes) {
return doOpenFromWriter(null);
@@ -246,7 +252,7 @@ final class StandardDirectoryReader extends DirectoryReader {
}
}
- private final DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
+ private DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
if (commit != null) {
throw new IllegalArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
index a6860018a5b..5013f2f67d6 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@@ -1640,7 +1640,7 @@ public abstract class FieldComparator {
// This means segment has doc values, but they are
// not able to provide a sorted source; consider
// this a hard error:
- throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().type() + " reader=" + context.reader());
+ throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().getType() + " reader=" + context.reader());
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index fa19fad45ba..73d416adf8a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -584,8 +584,9 @@ public class IndexSearcher {
Weight weight = query.createWeight(this);
float v = weight.getValueForNormalization();
float norm = getSimilarity().queryNorm(v);
- if (Float.isInfinite(norm) || Float.isNaN(norm))
+ if (Float.isInfinite(norm) || Float.isNaN(norm)) {
norm = 1.0f;
+ }
weight.normalize(norm, 1.0f);
return weight;
}
@@ -812,6 +813,8 @@ public class IndexSearcher {
final int docCount;
final long sumTotalTermFreq;
final long sumDocFreq;
+
+ assert field != null;
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index d829bf32595..2dbc77e27cb 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -22,7 +22,6 @@ import java.util.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
@@ -238,7 +237,7 @@ public class MultiPhraseQuery extends Query {
docFreq = termsEnum.docFreq();
}
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
}
// sort by increasing docFreq order
@@ -314,9 +313,21 @@ public class MultiPhraseQuery extends Query {
}
buffer.append("\"");
+ int k = 0;
Iterator i = termArrays.iterator();
+ int lastPos = -1;
+ boolean first = true;
while (i.hasNext()) {
Term[] terms = i.next();
+ int position = positions.get(k);
+ if (first) {
+ first = false;
+ } else {
+ buffer.append(" ");
+ for (int j=1; j<(position-lastPos); j++) {
+ buffer.append("? ");
+ }
+ }
if (terms.length > 1) {
buffer.append("(");
for (int j = 0; j < terms.length; j++) {
@@ -328,8 +339,8 @@ public class MultiPhraseQuery extends Query {
} else {
buffer.append(terms[0].text());
}
- if (i.hasNext())
- buffer.append(" ");
+ lastPos = position;
+ ++k;
}
buffer.append("\"");
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
index 004d857c0f6..b2d4afe3814 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
@@ -31,12 +31,15 @@ final class PhrasePositions {
final int ord; // unique across all PhrasePositions instances
final DocsAndPositionsEnum postings; // stream of docs & positions
PhrasePositions next; // used to make lists
- PhrasePositions nextRepeating; // link to next repeating pp: standing for same term in different query offsets
+ int rptGroup = -1; // >=0 indicates that this is a repeating PP
+ int rptInd; // index in the rptGroup
+ final Term[] terms; // for repetitions initialization
- PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
+ PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) {
this.postings = postings;
offset = o;
this.ord = ord;
+ this.terms = terms;
}
final boolean next() throws IOException { // increments to next doc
@@ -78,8 +81,8 @@ final class PhrasePositions {
@Override
public String toString() {
String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
- if (nextRepeating!=null) {
- s += " rpt[ "+nextRepeating+" ]";
+ if (rptGroup >=0 ) {
+ s += " rpt:"+rptGroup+",i"+rptInd;
}
return s;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index 30faaba84c3..2f2a45c9635 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
@@ -137,23 +138,46 @@ public class PhraseQuery extends Query {
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
- final Term term;
+ final Term[] terms;
+ final int nTerms; // for faster comparisons
- public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
+ public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
- this.term = term;
+ nTerms = terms==null ? 0 : terms.length;
+ if (nTerms>0) {
+ if (terms.length==1) {
+ this.terms = terms;
+ } else {
+ Term[] terms2 = new Term[terms.length];
+ System.arraycopy(terms, 0, terms2, 0, terms.length);
+ Arrays.sort(terms2);
+ this.terms = terms2;
+ }
+ } else {
+ this.terms = null;
+ }
}
public int compareTo(PostingsAndFreq other) {
- if (docFreq == other.docFreq) {
- if (position == other.position) {
- return term.compareTo(other.term);
- }
+ if (docFreq != other.docFreq) {
+ return docFreq - other.docFreq;
+ }
+ if (position != other.position) {
return position - other.position;
}
- return docFreq - other.docFreq;
+ if (nTerms != other.nTerms) {
+ return nTerms - other.nTerms;
+ }
+ if (nTerms == 0) {
+ return 0;
+ }
+ for (int i=0; i 0) {
- min = new PhrasePositions(postings[0].postings, postings[0].position, 0);
+ min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
- PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
+ PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;
diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
index dbd5ca84d41..2f2f9ed59a5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -19,22 +19,38 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.OpenBitSet;
final class SloppyPhraseScorer extends PhraseScorer {
- private int slop;
- private boolean checkedRepeats; // flag to only check in first candidate doc in case there are no repeats
- private boolean hasRepeats; // flag indicating that there are repeats (already checked in first candidate doc)
- private PhraseQueue pq; // for advancing min position
- private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
+
+ private final int slop;
+ private final int numPostings;
+ private final PhraseQueue pq; // for advancing min position
+
+ private int end; // current largest phrase position
+
+ private boolean hasRpts; // flag indicating that there are repetitions (as checked in first candidate doc)
+ private boolean checkedRpts; // flag to only check for repetitions in first candidate doc
+ private boolean hasMultiTermRpts; //
+ private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
+ private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer);
this.slop = slop;
+ this.numPostings = postings==null ? 0 : postings.length;
+ pq = new PhraseQueue(postings.length);
}
-
+
/**
* Score a candidate doc for all slop-valid position-combinations (matches)
* encountered while traversing/hopping the PhrasePositions.
@@ -55,31 +71,27 @@ final class SloppyPhraseScorer extends PhraseScorer {
*/
@Override
protected float phraseFreq() throws IOException {
- int end = initPhrasePositions();
- //printPositions(System.err, "INIT DONE:");
- if (end==Integer.MIN_VALUE) {
+ if (!initPhrasePositions()) {
return 0.0f;
}
-
float freq = 0.0f;
PhrasePositions pp = pq.pop();
int matchLength = end - pp.position;
- int next = pq.size()>0 ? pq.top().position : pp.position;
- //printQueue(System.err, pp, "Bef Loop: next="+next+" mlen="+end+"-"+pp.position+"="+matchLength);
- while (pp.nextPosition() && (end=advanceRepeats(pp, end)) != Integer.MIN_VALUE) {
- if (pp.position > next) {
- //printQueue(System.err, pp, "A: >next="+next+" matchLength="+matchLength);
+ int next = pq.top().position;
+ while (advancePP(pp)) {
+ if (hasRpts && !advanceRpts(pp)) {
+ break; // pps exhausted
+ }
+ if (pp.position > next) { // done minimizing current match-length
if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match
}
pq.add(pp);
pp = pq.pop();
- next = pq.size()>0 ? pq.top().position : pp.position;
+ next = pq.top().position;
matchLength = end - pp.position;
- //printQueue(System.err, pp, "B: >next="+next+" matchLength="+matchLength);
} else {
int matchLength2 = end - pp.position;
- //printQueue(System.err, pp, "C: mlen2 repeatsEnd) {
- repeatsEnd = pp.position;
+ /** advance a PhrasePosition and update 'end', return false if exhausted */
+ private boolean advancePP(PhrasePositions pp) throws IOException {
+ if (!pp.nextPosition()) {
+ return false;
}
- if (!hasRepeats) {
- return repeatsEnd;
+ if (pp.position > end) {
+ end = pp.position;
}
+ return true;
+ }
+
+ /** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser
+ * of the two colliding pps. Note that there can only be one collision, as by the initialization
+ * there were no collisions before pp was advanced. */
+ private boolean advanceRpts(PhrasePositions pp) throws IOException {
+ if (pp.rptGroup < 0) {
+ return true; // not a repeater
+ }
+ PhrasePositions[] rg = rptGroups[pp.rptGroup];
+ OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
+ int k0 = pp.rptInd;
+ int k;
+ while((k=collide(pp)) >= 0) {
+ pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
+ if (!advancePP(pp)) {
+ return false; // exhausted
+ }
+ if (k != k0) { // careful: mark only those currently in the queue
+ bits.set(k); // mark that pp2 need to be re-queued
+ }
+ }
+ // collisions resolved, now re-queue
+ // empty (partially) the queue until seeing all pps advanced for resolving collisions
+ int n = 0;
+ while (bits.cardinality() > 0) {
+ PhrasePositions pp2 = pq.pop();
+ rptStack[n++] = pp2;
+ if (pp2.rptGroup >= 0 && bits.get(pp2.rptInd)) {
+ bits.clear(pp2.rptInd);
+ }
+ }
+ // add back to queue
+ for (int i=n-1; i>=0; i--) {
+ pq.add(rptStack[i]);
+ }
+ return true;
+ }
+
+ /** compare two pps, but only by position and offset */
+ private PhrasePositions lesser(PhrasePositions pp, PhrasePositions pp2) {
+ if (pp.position < pp2.position ||
+ (pp.position == pp2.position && pp.offset < pp2.offset)) {
+ return pp;
+ }
+ return pp2;
+ }
+
+ /** index of a pp2 colliding with pp, or -1 if none */
+ private int collide(PhrasePositions pp) {
int tpPos = tpPos(pp);
- for (PhrasePositions pp2=pp.nextRepeating; pp2!=null; pp2=pp2.nextRepeating) {
- while (tpPos(pp2) <= tpPos) {
- if (!pp2.nextPosition()) {
- return Integer.MIN_VALUE;
- }
- }
- tpPos = tpPos(pp2);
- if (pp2.position > repeatsEnd) {
- repeatsEnd = pp2.position;
- }
- // "dirty" trick: with holes, given a pp, its repeating pp2 might have smaller position.
- // so in order to have the right "start" in matchLength computation we fake pp.position.
- // this relies on pp.nextPosition() not using pp.position.
- if (pp2.position < pp.position) {
- pp.position = pp2.position;
+ PhrasePositions[] rg = rptGroups[pp.rptGroup];
+ for (int i=0; i
- *
Detect groups of repeating pps: those with same tpPos (tpPos==position in the doc) but different offsets in query.
- *
For each such group:
- *
- *
form an inner linked list of the repeating ones.
- *
propagate all group members but first so that they land on different tpPos().
- *
- *
Mark whether there are repetitions at all, so that scoring queries with no repetitions has no overhead due to this computation.
- *
Insert to pq only non repeating PPs, or PPs that are the first in a repeating group.
+ *
Check if there are repetitions
+ *
If there are, find groups of repetitions.
*
* Examples:
*
@@ -145,118 +186,305 @@ final class SloppyPhraseScorer extends PhraseScorer {
*
repetitions: "ho my my"~2
*
repetitions: "my ho my"~2
*
- * @return end (max position), or Integer.MIN_VALUE if any term ran out (i.e. done)
+ * @return false if PPs are exhausted (and so current doc will not be a match)
*/
- private int initPhrasePositions() throws IOException {
- int end = Integer.MIN_VALUE;
-
- // no repeats at all (most common case is also the simplest one)
- if (checkedRepeats && !hasRepeats) {
- // build queue from list
- pq.clear();
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
- pp.firstPosition();
- if (pp.position > end) {
- end = pp.position;
- }
- pq.add(pp); // build pq from list
- }
- return end;
+ private boolean initPhrasePositions() throws IOException {
+ end = Integer.MIN_VALUE;
+ if (!checkedRpts) {
+ return initFirstTime();
}
-
- //printPositions(System.err, "Init: 1: Bef position");
-
- // position the pp's
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
- pp.firstPosition();
+ if (!hasRpts) {
+ initSimple();
+ return true; // PPs available
}
-
- //printPositions(System.err, "Init: 2: Aft position");
-
- // one time initialization for this scorer (done only for the first candidate doc)
- if (!checkedRepeats) {
- checkedRepeats = true;
- ArrayList ppsA = new ArrayList();
- PhrasePositions dummyPP = new PhrasePositions(null, -1, -1);
- // check for repeats
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
- if (pp.nextRepeating != null) {
- continue; // a repetition of an earlier pp
- }
- ppsA.add(pp);
- int tpPos = tpPos(pp);
- for (PhrasePositions prevB=pp, pp2=pp.next; pp2!= min; pp2=pp2.next) {
- if (
- pp2.nextRepeating != null // already detected as a repetition of an earlier pp
- || pp.offset == pp2.offset // not a repetition: the two PPs are originally in same offset in the query!
- || tpPos(pp2) != tpPos) { // not a repetition
- continue;
- }
- // a repetition
- hasRepeats = true;
- prevB.nextRepeating = pp2; // add pp2 to the repeats linked list
- pp2.nextRepeating = dummyPP; // allows not to handle the last pp in a sub-list
- prevB = pp2;
- }
- }
- if (hasRepeats) {
- // clean dummy markers
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
- if (pp.nextRepeating == dummyPP) {
- pp.nextRepeating = null;
- }
- }
- }
- nrPps = ppsA.toArray(new PhrasePositions[0]);
- pq = new PhraseQueue(nrPps.length);
- }
-
- //printPositions(System.err, "Init: 3: Aft check-repeats");
-
- // with repeats must advance some repeating pp's so they all start with differing tp's
- if (hasRepeats) {
- for (PhrasePositions pp: nrPps) {
- if ((end=advanceRepeats(pp, end)) == Integer.MIN_VALUE) {
- return Integer.MIN_VALUE; // ran out of a term -- done (no valid matches in current doc)
- }
- }
- }
-
- //printPositions(System.err, "Init: 4: Aft advance-repeats");
-
- // build queue from non repeating pps
+ return initComplex();
+ }
+
+ /** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */
+ private void initSimple() throws IOException {
+ //System.err.println("initSimple: doc: "+min.doc);
pq.clear();
- for (PhrasePositions pp: nrPps) {
+ // position pps and build queue from list
+ for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ pp.firstPosition();
if (pp.position > end) {
end = pp.position;
}
pq.add(pp);
}
-
- return end;
}
+ /** with repeats: not so simple. */
+ private boolean initComplex() throws IOException {
+ //System.err.println("initComplex: doc: "+min.doc);
+ placeFirstPositions();
+ if (!advanceRepeatGroups()) {
+ return false; // PPs exhausted
+ }
+ fillQueue();
+ return true; // PPs available
+ }
+
+ /** move all PPs to their first position */
+ private void placeFirstPositions() throws IOException {
+ for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ pp.firstPosition();
+ }
+ }
+
+ /** Fill the queue (all pps are already placed */
+ private void fillQueue() {
+ pq.clear();
+ for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ if (pp.position > end) {
+ end = pp.position;
+ }
+ pq.add(pp);
+ }
+ }
+
+ /** At initialization (each doc), each repetition group is sorted by (query) offset.
+ * This provides the start condition: no collisions.
+ *
Case 1: no multi-term repeats
+ * It is sufficient to advance each pp in the group by one less than its group index.
+ * So lesser pp is not advanced, 2nd one advance once, 3rd one advanced twice, etc.
+ *
Case 2: multi-term repeats
+ *
+ * @return false if PPs are exhausted.
+ */
+ private boolean advanceRepeatGroups() throws IOException {
+ for (PhrasePositions[] rg: rptGroups) {
+ if (hasMultiTermRpts) {
+ // more involved, some may not collide
+ int incr;
+ for (int i=0; i= 0) {
+ PhrasePositions pp2 = lesser(pp, rg[k]);
+ if (!advancePP(pp2)) { // at initialization always advance pp with higher offset
+ return false; // exhausted
+ }
+ if (pp2.rptInd < i) { // should not happen?
+ incr = 0;
+ break;
+ }
+ }
+ }
+ } else {
+ // simpler, we know exactly how much to advance
+ for (int j=1; j
+ * If there are repetitions, check if multi-term postings (MTP) are involved.
+ * Without MTP, once PPs are placed in the first candidate doc, repeats (and groups) are visible.
+ * With MTP, a more complex check is needed, up-front, as there may be "hidden collisions".
+ * For example P1 has {A,B}, P1 has {B,C}, and the first doc is: "A C B". At start, P1 would point
+ * to "A", p2 to "C", and it will not be identified that P1 and P2 are repetitions of each other.
+ * The more complex initialization has two parts:
+ * (1) identification of repetition groups.
+ * (2) advancing repeat groups at the start of the doc.
+ * For (1), a possible solution is to just create a single repetition group,
+ * made of all repeating pps. But this would slow down the check for collisions,
+ * as all pps would need to be checked. Instead, we compute "connected regions"
+ * on the bipartite graph of postings and terms.
+ */
+ private boolean initFirstTime() throws IOException {
+ //System.err.println("initFirstTime: doc: "+min.doc);
+ checkedRpts = true;
+ placeFirstPositions();
+
+ LinkedHashMap rptTerms = repeatingTerms();
+ hasRpts = !rptTerms.isEmpty();
+
+ if (hasRpts) {
+ rptStack = new PhrasePositions[numPostings]; // needed with repetitions
+ ArrayList> rgs = gatherRptGroups(rptTerms);
+ sortRptGroups(rgs);
+ if (!advanceRepeatGroups()) {
+ return false; // PPs exhausted
+ }
+ }
+
+ fillQueue();
+ return true; // PPs available
+ }
+
+ /** sort each repetition group by (query) offset.
+ * Done only once (at first doc) and allows to initialize faster for each doc. */
+ private void sortRptGroups(ArrayList> rgs) {
+ rptGroups = new PhrasePositions[rgs.size()][];
+ Comparator cmprtr = new Comparator() {
+ public int compare(PhrasePositions pp1, PhrasePositions pp2) {
+ return pp1.offset - pp2.offset;
+ }
+ };
+ for (int i=0; i> gatherRptGroups(LinkedHashMap rptTerms) throws IOException {
+ PhrasePositions[] rpp = repeatingPPs(rptTerms);
+ ArrayList> res = new ArrayList>();
+ if (!hasMultiTermRpts) {
+ // simpler - no multi-terms - can base on positions in first doc
+ for (int i=0; i=0) continue; // already marked as a repetition
+ int tpPos = tpPos(pp);
+ for (int j=i+1; j=0 // already marked as a repetition
+ || pp2.offset == pp.offset // not a repetition: two PPs are originally in same offset in the query!
+ || tpPos(pp2) != tpPos) { // not a repetition
+ continue;
+ }
+ // a repetition
+ int g = pp.rptGroup;
+ if (g < 0) {
+ g = res.size();
+ pp.rptGroup = g;
+ ArrayList rl = new ArrayList(2);
+ rl.add(pp);
+ res.add(rl);
+ }
+ pp2.rptGroup = g;
+ res.get(g).add(pp2);
+ }
+ }
+ } else {
+ // more involved - has multi-terms
+ ArrayList> tmp = new ArrayList>();
+ ArrayList bb = ppTermsBitSets(rpp, rptTerms);
+ unionTermGroups(bb);
+ HashMap tg = termGroups(rptTerms, bb);
+ HashSet distinctGroupIDs = new HashSet(tg.values());
+ for (int i=0; i());
+ }
+ for (PhrasePositions pp : rpp) {
+ for (Term t: pp.terms) {
+ if (rptTerms.containsKey(t)) {
+ int g = tg.get(t);
+ tmp.get(g).add(pp);
+ assert pp.rptGroup==-1 || pp.rptGroup==g;
+ pp.rptGroup = g;
+ }
+ }
+ }
+ for (HashSet hs : tmp) {
+ res.add(new ArrayList(hs));
+ }
+ }
+ return res;
+ }
+
/** Actual position in doc of a PhrasePosition, relies on that position = tpPos - offset) */
private final int tpPos(PhrasePositions pp) {
return pp.position + pp.offset;
}
-
-// private void printPositions(PrintStream ps, String title) {
-// ps.println();
-// ps.println("---- "+title);
-// int k = 0;
-// if (nrPps!=null) {
-// for (PhrasePositions pp: nrPps) {
-// ps.println(" " + k++ + " " + pp);
-// }
-// } else {
-// for (PhrasePositions pp=min; 0==k || pp!=min; pp = pp.next) {
-// ps.println(" " + k++ + " " + pp);
-// }
-// }
-// }
+ /** find repeating terms and assign them ordinal values */
+ private LinkedHashMap repeatingTerms() {
+ LinkedHashMap tord = new LinkedHashMap();
+ HashMap tcnt = new HashMap();
+ for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (Term t : pp.terms) {
+ Integer cnt0 = tcnt.get(t);
+ Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
+ tcnt.put(t, cnt);
+ if (cnt==2) {
+ tord.put(t,tord.size());
+ }
+ }
+ }
+ return tord;
+ }
+
+ /** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
+ private PhrasePositions[] repeatingPPs(HashMap rptTerms) {
+ ArrayList rp = new ArrayList();
+ for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (Term t : pp.terms) {
+ if (rptTerms.containsKey(t)) {
+ rp.add(pp);
+ hasMultiTermRpts |= (pp.terms.length > 1);
+ break;
+ }
+ }
+ }
+ return rp.toArray(new PhrasePositions[0]);
+ }
+
+ /** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
+ private ArrayList ppTermsBitSets(PhrasePositions[] rpp, HashMap tord) {
+ ArrayList bb = new ArrayList(rpp.length);
+ for (PhrasePositions pp : rpp) {
+ OpenBitSet b = new OpenBitSet(tord.size());
+ Integer ord;
+ for (Term t: pp.terms) {
+ if ((ord=tord.get(t))!=null) {
+ b.set(ord);
+ }
+ }
+ bb.add(b);
+ }
+ return bb;
+ }
+
+ /** union (term group) bit-sets until they are disjoint (O(n^^2)), and each group have different terms */
+ private void unionTermGroups(ArrayList bb) {
+ int incr;
+ for (int i=0; i termGroups(LinkedHashMap tord, ArrayList bb) throws IOException {
+ HashMap tg = new HashMap();
+ Term[] t = tord.keySet().toArray(new Term[0]);
+ for (int i=0; idot language description
* for visualization. Example of use:
*
- *