LUCENE-3795: updating to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3795_lsp_spatial_module@1300232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2012-03-13 16:43:38 +00:00
commit d5b39f875f
491 changed files with 17359 additions and 40345 deletions

View File

@ -124,6 +124,7 @@
<fileset dir="modules" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
</subant>
<delete dir="dist" failonerror="false" />
</sequential>
</target>

View File

@ -102,7 +102,7 @@
<classpathentry kind="lib" path="modules/benchmark/lib/commons-compress-1.2.jar"/>
<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>
<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r1211150.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar"/>
<classpathentry kind="lib" path="solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-fileupload-1.2.1.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-httpclient-3.1.jar"/>
<classpathentry kind="lib" path="solr/lib/commons-io-2.1.jar"/>
@ -112,14 +112,22 @@
<classpathentry kind="lib" path="solr/lib/jcl-over-slf4j-1.6.1.jar"/>
<classpathentry kind="lib" path="solr/lib/junit-4.10.jar"/>
<classpathentry kind="lib" path="solr/lib/log4j-over-slf4j-1.6.1.jar"/>
<classpathentry kind="lib" path="solr/lib/servlet-api-2.4.jar"/>
<classpathentry kind="lib" path="solr/lib/slf4j-api-1.6.1.jar"/>
<classpathentry kind="lib" path="solr/lib/slf4j-jdk14-1.6.1.jar"/>
<classpathentry kind="lib" path="solr/lib/wstx-asl-3.2.7.jar"/>
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.4.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-6.1.26-patched-JETTY-1340.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-util-6.1.26-patched-JETTY-1340.jar"/>
<classpathentry kind="lib" path="solr/example/lib/servlet-api-2.5-20081211.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-continuation-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-deploy-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-http-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-io-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-jmx-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-security-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-server-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-servlet-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-util-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-webapp-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jetty-xml-8.1.2.v20120308.jar"/>
<classpathentry kind="lib" path="solr/example/lib/servlet-api-3.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.5.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>

View File

@ -70,8 +70,8 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<groupId>org.eclipse.jetty.orbit</groupId>
<artifactId>javax.servlet</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>

View File

@ -42,8 +42,7 @@
<base.specification.version>4.0.0</base.specification.version>
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
<java.compat.version>1.6</java.compat.version>
<jetty.version>6.1.26</jetty.version>
<patched.jetty.version>6.1.26-patched-JETTY-1340</patched.jetty.version>
<jetty.version>8.1.2.v20120308</jetty.version>
<slf4j.version>1.6.1</slf4j.version>
<tika.version>1.0</tika.version>
</properties>
@ -296,14 +295,24 @@
<version>2.2</version>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<version>${patched.jetty.version}</version>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>${patched.jetty.version}</version>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId>
<version>${jetty.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@ -331,9 +340,9 @@
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.4</version>
<groupId>org.eclipse.jetty.orbit</groupId>
<artifactId>javax.servlet</artifactId>
<version>3.0.0.v201112011016</version>
</dependency>
<dependency>
<groupId>com.spatial4j</groupId>
@ -495,7 +504,7 @@
</plugin>
<plugin>
<groupId>org.mortbay.jetty</groupId>
<artifactId>maven-jetty-plugin</artifactId>
<artifactId>jetty-maven-plugin</artifactId>
<version>${jetty.version}</version>
</plugin>
<plugin>
@ -636,7 +645,7 @@
<artifactId>solr-commons-csv</artifactId>
<version>${project.version}</version>
<packaging>jar</packaging>
<file>solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar</file>
<file>solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar</file>
</configuration>
</execution>
<execution>
@ -653,34 +662,6 @@
<file>solr/lib/apache-solr-noggit-r1211150.jar</file>
</configuration>
</execution>
<execution>
<id>install-jetty</id>
<phase>install</phase>
<goals>
<goal>install-file</goal>
</goals>
<configuration>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<version>${patched.jetty.version}</version>
<packaging>jar</packaging>
<file>solr/example/lib/jetty-${patched.jetty.version}.jar</file>
</configuration>
</execution>
<execution>
<id>install-jetty-util</id>
<phase>install</phase>
<goals>
<goal>install-file</goal>
</goals>
<configuration>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>${patched.jetty.version}</version>
<packaging>jar</packaging>
<file>solr/example/lib/jetty-util-${patched.jetty.version}.jar</file>
</configuration>
</execution>
<execution>
<id>install-jsonic</id>
<phase>install</phase>

View File

@ -94,12 +94,17 @@
</exclusions>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<scope>test</scope>
</dependency>

View File

@ -73,12 +73,17 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<scope>test</scope>
</dependency>

View File

@ -172,15 +172,20 @@
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId>
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>wstx-asl</artifactId>
@ -193,8 +198,8 @@
</exclusions>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<groupId>org.eclipse.jetty.orbit</groupId>
<artifactId>javax.servlet</artifactId>
<!-- compile scope; solr-core is a jar not a war -->
</dependency>
<dependency>

View File

@ -58,8 +58,8 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<groupId>org.eclipse.jetty.orbit</groupId>
<artifactId>javax.servlet</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
@ -98,9 +98,9 @@
</configuration>
</plugin>
<plugin>
<!-- http://docs.codehaus.org/display/JETTY/Maven+Jetty+Plugin -->
<!-- http://wiki.eclipse.org/Jetty/Feature/Jetty_Maven_Plugin -->
<groupId>org.mortbay.jetty</groupId>
<artifactId>maven-jetty-plugin</artifactId>
<artifactId>jetty-maven-plugin</artifactId>
<configuration>
<scanIntervalSeconds>10</scanIntervalSeconds>
<webAppConfig>

View File

@ -410,6 +410,10 @@ API Changes
method maybeReopen has been deprecated in favor of maybeRefresh().
(Shai Erera, Mike McCandless, Simon Willnauer)
* LUCENE-3859: AtomicReader.hasNorms(field) is deprecated, instead you
can inspect the FieldInfo yourself to see if norms are present, which
also allows you to get the type. (Robert Muir)
New features
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
@ -919,7 +923,13 @@ Bug fixes
from the delegate DocIdSet.iterator(), which is allowed to return
null by DocIdSet specification when no documents match.
(Shay Banon via Uwe Schindler)
* LUCENE-3821: SloppyPhraseScorer missed documents that ExactPhraseScorer finds
When phrase queru had repeating terms (e.g. "yes ho yes")
sloppy query missed documents that exact query matched.
Fixed except when for repeating multiterms (e.g. "yes ho yes|no").
(Robert Muir, Doron Cohen)
Optimizations
* LUCENE-3653: Improve concurrency in VirtualMethod and AttributeSource by
@ -932,6 +942,9 @@ Documentation
Build
* LUCENE-3857: exceptions from other threads in beforeclass/etc do not fail
the test (Dawid Weiss)
* LUCENE-3847: LuceneTestCase will now check for modifications of System
properties before and after each test (and suite). If changes are detected,
the test will fail. A rule can be used to reset system properties to

View File

@ -170,7 +170,7 @@
<property name="junit.output.dir.backwards" location="${build.dir.backwards}/test"/>
<property name="junit.reports" location="${build.dir}/test/reports"/>
<property name="junit.reports.backwards" location="${build.dir.backwards}/test/reports"/>
<property name="junit.excludes" value=""/>
<property name="junit.excludes" value="**/Abstract*"/>
<condition property="junit.details.formatter"
value="org.apache.tools.ant.taskdefs.optional.junit.BriefJUnitResultFormatter"
else="org.apache.lucene.util.LuceneJUnitResultFormatter">

View File

@ -72,6 +72,8 @@ New Features
start/endOffset, if offsets are indexed. (Alan Woodward via Mike
McCandless)
* LUCENE-3802: Support for grouped faceting. (Martijn van Groningen)
API Changes
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
@ -242,6 +244,10 @@ Bug Fixes
that take stopwords and stem exclusion tables also initialize
the default stem overrides (e.g. kind/kinder, fiets). (Robert Muir)
* LUCENE-3831: avoid NPE if the SpanQuery has a null field (eg a
SpanOrQuery with no clauses added). (Alan Woodward via Mike
McCandless).
Documentation
* LUCENE-3599: Javadocs for DistanceUtils.haversine() were incorrectly

View File

@ -43,8 +43,8 @@ class MemoryIndexNormDocValues extends DocValues {
}
@Override
public Type type() {
return source.type();
public Type getType() {
return source.getType();
}
@Override

View File

@ -21,6 +21,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
@ -40,11 +41,16 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
@ -225,4 +231,28 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
reader.close();
}
// LUCENE-3831
public void testNullPointerException() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<RegexpQuery>(regex);
MemoryIndex mindex = new MemoryIndex();
mindex.addField("field", new MockAnalyzer(random).tokenStream("field", new StringReader("hello there")));
// This throws an NPE
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
}
// LUCENE-3831
public void testPassesIfWrapped() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(regex));
MemoryIndex mindex = new MemoryIndex();
mindex.addField("field", new MockAnalyzer(random).tokenStream("field", new StringReader("hello there")));
// This passes though
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
}
}

View File

@ -197,6 +197,7 @@ public class BlockTermsReader extends FieldsProducer {
@Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
}

View File

@ -211,6 +211,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
@Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.Set;
import java.util.ServiceLoader; // javadocs
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriterConfig; // javadocs
@ -26,7 +27,15 @@ import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.util.NamedSPILoader;
/**
* Encodes/decodes an inverted index segment
* Encodes/decodes an inverted index segment.
* <p>
* Note, when extending this class, the name ({@link #getName}) is
* written into the index. In order for the segment to be read, the
* name must resolve to your implementation via {@link #forName(String)}.
* This method uses Java's
* {@link ServiceLoader Service Provider Interface} to resolve codec names.
* <p>
* @see ServiceLoader
*/
public abstract class Codec implements NamedSPILoader.NamedSPI {

View File

@ -0,0 +1,513 @@
package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* @lucene.experimental
* @lucene.internal
*/
public abstract class DocValuesArraySource extends Source {
private static final Map<Type, DocValuesArraySource> TEMPLATES;
static {
EnumMap<Type, DocValuesArraySource> templates = new EnumMap<Type, DocValuesArraySource>(
Type.class);
templates.put(Type.FIXED_INTS_16, new ShortValues());
templates.put(Type.FIXED_INTS_32, new IntValues());
templates.put(Type.FIXED_INTS_64, new LongValues());
templates.put(Type.FIXED_INTS_8, new ByteValues());
templates.put(Type.FLOAT_32, new FloatValues());
templates.put(Type.FLOAT_64, new DoubleValues());
TEMPLATES = Collections.unmodifiableMap(templates);
}
public static DocValuesArraySource forType(Type type) {
return TEMPLATES.get(type);
}
protected final int bytesPerValue;
DocValuesArraySource(int bytesPerValue, Type type) {
super(type);
this.bytesPerValue = bytesPerValue;
}
@Override
public abstract BytesRef getBytes(int docID, BytesRef ref);
public abstract DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException;
public abstract DocValuesArraySource newFromArray(Object array);
@Override
public final boolean hasArray() {
return true;
}
public void toBytes(long value, BytesRef bytesRef) {
copyLong(bytesRef, value);
}
public void toBytes(double value, BytesRef bytesRef) {
copyLong(bytesRef, Double.doubleToRawLongBits(value));
}
final static class ByteValues extends DocValuesArraySource {
private final byte[] values;
ByteValues() {
super(1, Type.FIXED_INTS_8);
values = new byte[0];
}
private ByteValues(byte[] array) {
super(1, Type.FIXED_INTS_8);
values = array;
}
private ByteValues(IndexInput input, int numDocs) throws IOException {
super(1, Type.FIXED_INTS_8);
values = new byte[numDocs];
input.readBytes(values, 0, values.length, false);
}
@Override
public byte[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ByteValues(input, numDocs);
}
@Override
public DocValuesArraySource newFromArray(Object array) {
assert array instanceof byte[];
return new ByteValues((byte[]) array);
}
public void toBytes(long value, BytesRef bytesRef) {
if (bytesRef.bytes.length == 0) {
bytesRef.bytes = new byte[1];
}
bytesRef.bytes[0] = (byte) (0xFFL & value);
bytesRef.offset = 0;
bytesRef.length = 1;
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
toBytes(getInt(docID), ref);
return ref;
}
};
final static class ShortValues extends DocValuesArraySource {
private final short[] values;
ShortValues() {
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
values = new short[0];
}
private ShortValues(short[] array) {
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
values = array;
}
private ShortValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
values = new short[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = input.readShort();
}
}
@Override
public short[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ShortValues(input, numDocs);
}
public void toBytes(long value, BytesRef bytesRef) {
copyShort(bytesRef, (short) (0xFFFFL & value));
}
@Override
public DocValuesArraySource newFromArray(Object array) {
assert array instanceof short[];
return new ShortValues((short[]) array);
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
toBytes(getInt(docID), ref);
return ref;
}
};
final static class IntValues extends DocValuesArraySource {
private final int[] values;
IntValues() {
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
values = new int[0];
}
private IntValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
values = new int[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = input.readInt();
}
}
private IntValues(int[] array) {
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
values = array;
}
@Override
public int[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return 0xFFFFFFFF & values[docID];
}
@Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new IntValues(input, numDocs);
}
public void toBytes(long value, BytesRef bytesRef) {
copyInt(bytesRef, (int) (0xFFFFFFFF & value));
}
@Override
public DocValuesArraySource newFromArray(Object array) {
assert array instanceof int[];
return new IntValues((int[]) array);
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
toBytes(getInt(docID), ref);
return ref;
}
};
final static class LongValues extends DocValuesArraySource {
private final long[] values;
LongValues() {
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
values = new long[0];
}
private LongValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
values = new long[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = input.readLong();
}
}
private LongValues(long[] array) {
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
values = array;
}
@Override
public long[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new LongValues(input, numDocs);
}
@Override
public DocValuesArraySource newFromArray(Object array) {
assert array instanceof long[];
return new LongValues((long[])array);
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
toBytes(getInt(docID), ref);
return ref;
}
};
final static class FloatValues extends DocValuesArraySource {
private final float[] values;
FloatValues() {
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
values = new float[0];
}
private FloatValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
values = new float[numDocs];
/*
* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs back in using readInt / readLong
*/
for (int i = 0; i < values.length; i++) {
values[i] = Float.intBitsToFloat(input.readInt());
}
}
private FloatValues(float[] array) {
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
values = array;
}
@Override
public float[] getArray() {
return values;
}
@Override
public double getFloat(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public void toBytes(double value, BytesRef bytesRef) {
copyInt(bytesRef, Float.floatToRawIntBits((float)value));
}
@Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new FloatValues(input, numDocs);
}
@Override
public DocValuesArraySource newFromArray(Object array) {
assert array instanceof float[];
return new FloatValues((float[]) array);
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
toBytes(getFloat(docID), ref);
return ref;
}
};
final static class DoubleValues extends DocValuesArraySource {
private final double[] values;
DoubleValues() {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
values = new double[0];
}
private DoubleValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
values = new double[numDocs];
/*
* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs back in using readInt / readLong
*/
for (int i = 0; i < values.length; i++) {
values[i] = Double.longBitsToDouble(input.readLong());
}
}
private DoubleValues(double[] array) {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
values = array;
}
@Override
public double[] getArray() {
return values;
}
@Override
public double getFloat(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new DoubleValues(input, numDocs);
}
@Override
public DocValuesArraySource newFromArray(Object array) {
assert array instanceof double[];
return new DoubleValues((double[]) array);
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
toBytes(getFloat(docID), ref);
return ref;
}
};
/**
* Copies the given long value and encodes it as 8 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 8 and resizes the
* reference array if needed.
*/
public static void copyLong(BytesRef ref, long value) {
if (ref.bytes.length < 8) {
ref.bytes = new byte[8];
}
copyInternal(ref, (int) (value >> 32), ref.offset = 0);
copyInternal(ref, (int) value, 4);
ref.length = 8;
}
/**
* Copies the given int value and encodes it as 4 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 4 and resizes the
* reference array if needed.
*/
public static void copyInt(BytesRef ref, int value) {
if (ref.bytes.length < 4) {
ref.bytes = new byte[4];
}
copyInternal(ref, value, ref.offset = 0);
ref.length = 4;
}
/**
* Copies the given short value and encodes it as a 2 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 2 and resizes the
* reference array if needed.
*/
public static void copyShort(BytesRef ref, short value) {
if (ref.bytes.length < 2) {
ref.bytes = new byte[2];
}
ref.offset = 0;
ref.bytes[ref.offset] = (byte) (value >> 8);
ref.bytes[ref.offset + 1] = (byte) (value);
ref.length = 2;
}
private static void copyInternal(BytesRef ref, int value, int startOffset) {
ref.bytes[startOffset] = (byte) (value >> 24);
ref.bytes[startOffset + 1] = (byte) (value >> 16);
ref.bytes[startOffset + 2] = (byte) (value >> 8);
ref.bytes[startOffset + 3] = (byte) (value);
}
/**
* Converts 2 consecutive bytes from the current offset to a short. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public static short asShort(BytesRef b) {
return (short) (0xFFFF & ((b.bytes[b.offset] & 0xFF) << 8) | (b.bytes[b.offset + 1] & 0xFF));
}
/**
* Converts 4 consecutive bytes from the current offset to an int. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public static int asInt(BytesRef b) {
return asIntInternal(b, b.offset);
}
/**
* Converts 8 consecutive bytes from the current offset to a long. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public static long asLong(BytesRef b) {
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
b.offset + 4) & 0xFFFFFFFFL);
}
private static int asIntInternal(BytesRef b, int pos) {
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
}
}

View File

@ -22,6 +22,7 @@ import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.Bits;
@ -40,6 +41,7 @@ public abstract class DocValuesConsumer {
protected final BytesRef spare = new BytesRef();
protected abstract Type getType();
/**
* Adds the given {@link IndexableField} instance to this
* {@link DocValuesConsumer}
@ -110,7 +112,7 @@ public abstract class DocValuesConsumer {
final Source source = reader.getDirectSource();
assert source != null;
int docID = docBase;
final DocValues.Type type = reader.type();
final Type type = getType();
final Field scratchField;
switch(type) {
case VAR_INTS:
@ -160,7 +162,7 @@ public abstract class DocValuesConsumer {
*/
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc)
throws IOException {
switch(source.type()) {
switch(getType()) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:

View File

@ -1,4 +1,4 @@
package org.apache.lucene.codecs.lucene40.values;
package org.apache.lucene.codecs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -24,10 +24,6 @@ import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.codecs.lucene40.values.Bytes;
import org.apache.lucene.codecs.lucene40.values.Floats;
import org.apache.lucene.codecs.lucene40.values.Ints;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.DocValues;
@ -40,7 +36,7 @@ import org.apache.lucene.util.BytesRef;
* Abstract base class for PerDocProducer implementations
* @lucene.experimental
*/
public abstract class DocValuesReaderBase extends PerDocProducer {
public abstract class PerDocProducerBase extends PerDocProducer {
protected abstract void closeInternal(Collection<? extends Closeable> closeables) throws IOException;
protected abstract Map<String, DocValues> docValues();
@ -70,9 +66,7 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
for (FieldInfo fieldInfo : fieldInfos) {
if (canLoad(fieldInfo)) {
final String field = fieldInfo.name;
// TODO can we have a compound file per segment and codec for
// docvalues?
final String id = DocValuesWriterBase.docValuesId(segment,
final String id = docValuesId(segment,
fieldInfo.number);
values.put(field,
loadDocValues(docCount, dir, id, getDocValuesType(fieldInfo), context));
@ -97,7 +91,11 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
}
protected boolean anyDocValuesFields(FieldInfos infos) {
return infos.anyDocValuesFields();
return infos.hasDocValues();
}
public static String docValuesId(String segmentsName, int fieldId) {
return segmentsName + "_" + fieldId;
}
/**
@ -119,33 +117,6 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
* @throws IllegalArgumentException
* if the given {@link Type} is not supported
*/
protected DocValues loadDocValues(int docCount, Directory dir, String id,
DocValues.Type type, IOContext context) throws IOException {
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
case BYTES_FIXED_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
case BYTES_VAR_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
case BYTES_VAR_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
case BYTES_VAR_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
default:
throw new IllegalStateException("unrecognized index values mode " + type);
}
}
protected abstract DocValues loadDocValues(int docCount, Directory dir, String id,
DocValues.Type type, IOContext context) throws IOException;
}

View File

@ -107,7 +107,7 @@ class Lucene3xFieldInfosReader extends FieldInfosReader {
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.BYTES_VAR_STRAIGHT : null);
omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.FIXED_INTS_8 : null);
}
if (input.getFilePointer() != input.length()) {

View File

@ -76,7 +76,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
try {
long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now)
for (FieldInfo fi : fields) {
if (fi.normsPresent()) {
if (fi.hasNorms()) {
String fileName = getNormFilename(segmentName, normGen, fi.number);
Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir;
@ -235,7 +235,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
}
@Override
public Type type() {
public Type getType() {
return Type.FIXED_INTS_8;
}

View File

@ -24,19 +24,24 @@ import java.util.Collection;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.lucene40.values.DocValuesReaderBase;
import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.codecs.lucene40.values.Bytes;
import org.apache.lucene.codecs.lucene40.values.Floats;
import org.apache.lucene.codecs.lucene40.values.Ints;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.IOUtils;
/**
* Default PerDocProducer implementation that uses compound file.
* @lucene.experimental
*/
public class Lucene40DocValuesProducer extends DocValuesReaderBase {
public class Lucene40DocValuesProducer extends PerDocProducerBase {
protected final TreeMap<String,DocValues> docValues;
private final Directory cfs;
/**
@ -71,4 +76,35 @@ public class Lucene40DocValuesProducer extends DocValuesReaderBase {
IOUtils.close(closeables);
}
}
@Override
protected DocValues loadDocValues(int docCount, Directory dir, String id,
Type type, IOContext context) throws IOException {
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
case BYTES_FIXED_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
case BYTES_VAR_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
case BYTES_VAR_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
case BYTES_VAR_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
default:
throw new IllegalStateException("unrecognized index values mode " + type);
}
}
}

View File

@ -62,7 +62,7 @@ public class Lucene40NormsFormat extends NormsFormat {
@Override
protected boolean canLoad(FieldInfo info) {
return info.normsPresent();
return info.hasNorms();
}
@Override
@ -92,7 +92,7 @@ public class Lucene40NormsFormat extends NormsFormat {
@Override
protected boolean canMerge(FieldInfo info) {
return info.normsPresent();
return info.hasNorms();
}
@Override
@ -104,7 +104,7 @@ public class Lucene40NormsFormat extends NormsFormat {
final String normsFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION);
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.normsPresent()) {
if (fieldInfo.hasNorms()) {
final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
files.add(normsFileName);
files.add(normsEntriesFileName);

View File

@ -23,7 +23,6 @@ import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@ -64,7 +63,7 @@ import org.apache.lucene.util.packed.PackedInts;
*
* @lucene.experimental
*/
final class Bytes {
public final class Bytes {
static final String DV_SEGMENT_SUFFIX = "dv";
@ -242,8 +241,8 @@ final class Bytes {
private final IOContext context;
protected BytesWriterBase(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context) throws IOException {
super(bytesUsed);
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
super(bytesUsed, type);
this.id = id;
this.dir = dir;
this.codecName = codecName;
@ -292,25 +291,11 @@ final class Bytes {
}
return idxOut;
}
/**
* Must be called only with increasing docIDs. It's OK for some docIDs to be
* skipped; they will be filled with 0 bytes.
*/
protected
abstract void add(int docID, BytesRef bytes) throws IOException;
@Override
public abstract void finish(int docCount) throws IOException;
@Override
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
add(docID, source.getBytes(sourceDoc, bytesRef));
}
@Override
public void add(int docID, IndexableField docValue) throws IOException {
add(docID, docValue.binaryValue());
}
}
/**
@ -378,7 +363,7 @@ final class Bytes {
}
@Override
public Type type() {
public Type getType() {
return type;
}
@ -393,22 +378,22 @@ final class Bytes {
protected long maxBytes = 0;
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
int codecVersion, Counter bytesUsed, IOContext context)
int codecVersion, Counter bytesUsed, IOContext context, Type type)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false);
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam);
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, codecName, codecVersion, bytesUsed, context);
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
super(dir, id, codecName, codecVersion, bytesUsed, context, type);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
@ -430,7 +415,9 @@ final class Bytes {
}
@Override
protected void add(int docID, BytesRef bytes) throws IOException {
public void add(int docID, IndexableField value) throws IOException {
BytesRef bytes = value.binaryValue();
assert bytes != null;
if (bytes.length == 0) { // default value - skip it
return;
}

View File

@ -1,120 +0,0 @@
package org.apache.lucene.codecs.lucene40.values;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import org.apache.lucene.util.BytesRef;
/**
* Package private BytesRefUtils - can move this into the o.a.l.utils package if
* needed.
*
* @lucene.internal
*/
final class BytesRefUtils {
private BytesRefUtils() {
}
/**
* Copies the given long value and encodes it as 8 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 8 and resizes the
* reference array if needed.
*/
public static void copyLong(BytesRef ref, long value) {
if (ref.bytes.length < 8) {
ref.bytes = new byte[8];
}
copyInternal(ref, (int) (value >> 32), ref.offset = 0);
copyInternal(ref, (int) value, 4);
ref.length = 8;
}
/**
* Copies the given int value and encodes it as 4 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 4 and resizes the
* reference array if needed.
*/
public static void copyInt(BytesRef ref, int value) {
if (ref.bytes.length < 4) {
ref.bytes = new byte[4];
}
copyInternal(ref, value, ref.offset = 0);
ref.length = 4;
}
/**
* Copies the given short value and encodes it as a 2 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 2 and resizes the
* reference array if needed.
*/
public static void copyShort(BytesRef ref, short value) {
if (ref.bytes.length < 2) {
ref.bytes = new byte[2];
}
ref.bytes[ref.offset] = (byte) (value >> 8);
ref.bytes[ref.offset + 1] = (byte) (value);
ref.length = 2;
}
private static void copyInternal(BytesRef ref, int value, int startOffset) {
ref.bytes[startOffset] = (byte) (value >> 24);
ref.bytes[startOffset + 1] = (byte) (value >> 16);
ref.bytes[startOffset + 2] = (byte) (value >> 8);
ref.bytes[startOffset + 3] = (byte) (value);
}
/**
* Converts 2 consecutive bytes from the current offset to a short. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public static short asShort(BytesRef b) {
return (short) (0xFFFF & ((b.bytes[b.offset] & 0xFF) << 8) | (b.bytes[b.offset + 1] & 0xFF));
}
/**
* Converts 4 consecutive bytes from the current offset to an int. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public static int asInt(BytesRef b) {
return asIntInternal(b, b.offset);
}
/**
* Converts 8 consecutive bytes from the current offset to a long. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public static long asLong(BytesRef b) {
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
b.offset + 4) & 0xFFFFFFFFL);
}
private static int asIntInternal(BytesRef b, int pos) {
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
}
}

View File

@ -1,306 +0,0 @@
package org.apache.lucene.codecs.lucene40.values;
import java.io.IOException;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* @lucene.experimental
*/
abstract class DocValuesArray extends Source {
static final Map<Type, DocValuesArray> TEMPLATES;
static {
EnumMap<Type, DocValuesArray> templates = new EnumMap<Type, DocValuesArray>(
Type.class);
templates.put(Type.FIXED_INTS_16, new ShortValues());
templates.put(Type.FIXED_INTS_32, new IntValues());
templates.put(Type.FIXED_INTS_64, new LongValues());
templates.put(Type.FIXED_INTS_8, new ByteValues());
templates.put(Type.FLOAT_32, new FloatValues());
templates.put(Type.FLOAT_64, new DoubleValues());
TEMPLATES = Collections.unmodifiableMap(templates);
}
protected final int bytesPerValue;
DocValuesArray(int bytesPerValue, Type type) {
super(type);
this.bytesPerValue = bytesPerValue;
}
public abstract DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException;
@Override
public final boolean hasArray() {
return true;
}
void toBytes(long value, BytesRef bytesRef) {
BytesRefUtils.copyLong(bytesRef, value);
}
void toBytes(double value, BytesRef bytesRef) {
BytesRefUtils.copyLong(bytesRef, Double.doubleToRawLongBits(value));
}
final static class ByteValues extends DocValuesArray {
private final byte[] values;
ByteValues() {
super(1, Type.FIXED_INTS_8);
values = new byte[0];
}
private ByteValues(IndexInput input, int numDocs) throws IOException {
super(1, Type.FIXED_INTS_8);
values = new byte[numDocs];
input.readBytes(values, 0, values.length, false);
}
@Override
public byte[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ByteValues(input, numDocs);
}
void toBytes(long value, BytesRef bytesRef) {
bytesRef.bytes[0] = (byte) (0xFFL & value);
}
};
final static class ShortValues extends DocValuesArray {
private final short[] values;
ShortValues() {
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
values = new short[0];
}
private ShortValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
values = new short[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = input.readShort();
}
}
@Override
public short[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ShortValues(input, numDocs);
}
void toBytes(long value, BytesRef bytesRef) {
BytesRefUtils.copyShort(bytesRef, (short) (0xFFFFL & value));
}
};
final static class IntValues extends DocValuesArray {
private final int[] values;
IntValues() {
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
values = new int[0];
}
private IntValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
values = new int[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = input.readInt();
}
}
@Override
public int[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return 0xFFFFFFFF & values[docID];
}
@Override
public DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new IntValues(input, numDocs);
}
void toBytes(long value, BytesRef bytesRef) {
BytesRefUtils.copyInt(bytesRef, (int) (0xFFFFFFFF & value));
}
};
final static class LongValues extends DocValuesArray {
private final long[] values;
LongValues() {
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
values = new long[0];
}
private LongValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
values = new long[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = input.readLong();
}
}
@Override
public long[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new LongValues(input, numDocs);
}
};
final static class FloatValues extends DocValuesArray {
private final float[] values;
FloatValues() {
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
values = new float[0];
}
private FloatValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
values = new float[numDocs];
/*
* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs back in using readInt / readLong
*/
for (int i = 0; i < values.length; i++) {
values[i] = Float.intBitsToFloat(input.readInt());
}
}
@Override
public float[] getArray() {
return values;
}
@Override
public double getFloat(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
void toBytes(double value, BytesRef bytesRef) {
BytesRefUtils.copyInt(bytesRef, Float.floatToRawIntBits((float)value));
}
@Override
public DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new FloatValues(input, numDocs);
}
};
final static class DoubleValues extends DocValuesArray {
private final double[] values;
DoubleValues() {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
values = new double[0];
}
private DoubleValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
values = new double[numDocs];
/*
* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs back in using readInt / readLong
*/
for (int i = 0; i < values.length; i++) {
values[i] = Double.longBitsToDouble(input.readLong());
}
}
@Override
public double[] getArray() {
return values;
}
@Override
public double getFloat(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public DocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new DoubleValues(input, numDocs);
}
};
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.codecs.lucene40.values.Writer;
import org.apache.lucene.index.FieldInfo;
@ -81,14 +82,10 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
@Override
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
return Writer.create(valueType,
docValuesId(segmentName, field.number),
PerDocProducerBase.docValuesId(segmentName, field.number),
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
}
public static String docValuesId(String segmentsName, int fieldId) {
return segmentsName + "_" + fieldId;
}
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();

View File

@ -46,7 +46,7 @@ class FixedDerefBytesImpl {
public static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
}
@Override
@ -84,7 +84,7 @@ class FixedDerefBytesImpl {
@Override
public Source getDirectSource()
throws IOException {
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType());
}
@Override

View File

@ -58,7 +58,7 @@ class FixedSortedBytesImpl {
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
this.comp = comp;
}

View File

@ -22,10 +22,12 @@ import java.io.IOException;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesSourceBase;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesWriterBase;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -52,6 +54,7 @@ class FixedStraightBytesImpl {
static final int VERSION_CURRENT = VERSION_START;
static abstract class FixedBytesWriterBase extends BytesWriterBase {
protected final DocValuesField bytesSpareField = new DocValuesField("", new BytesRef(), Type.BYTES_FIXED_STRAIGHT);
protected int lastDocID = -1;
// start at -1 if the first added value is > 0
protected int size = -1;
@ -60,13 +63,20 @@ class FixedStraightBytesImpl {
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, codecName, version, bytesUsed, context);
this(dir, id, codecName, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
}
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
super(dir, id, codecName, version, bytesUsed, context, type);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
pool.nextBuffer();
}
@Override
protected void add(int docID, BytesRef bytes) throws IOException {
public void add(int docID, IndexableField value) throws IOException {
final BytesRef bytes = value.binaryValue();
assert bytes != null;
assert lastDocID < docID;
if (size == -1) {
@ -277,7 +287,7 @@ class FixedStraightBytesImpl {
@Override
public Source getDirectSource() throws IOException {
return new DirectFixedStraightSource(cloneData(), size, type());
return new DirectFixedStraightSource(cloneData(), size, getType());
}
@Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40.values;
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@ -39,7 +40,7 @@ import org.apache.lucene.util.IOUtils;
*
* @lucene.experimental
*/
class Floats {
public class Floats {
protected static final String CODEC_NAME = "Floats";
protected static final int VERSION_START = 0;
@ -69,31 +70,28 @@ class Floats {
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
private final int size;
private final DocValuesArray template;
private final DocValuesArraySource template;
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, Type type) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
size = typeToSize(type);
this.bytesRef = new BytesRef(size);
bytesRef.length = size;
template = DocValuesArray.TEMPLATES.get(type);
template = DocValuesArraySource.forType(type);
assert template != null;
}
protected void add(int docID, double v) throws IOException {
template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
@Override
public void add(int docID, IndexableField docValue) throws IOException {
add(docID, docValue.numericValue().doubleValue());
}
@Override
protected boolean tryBulkMerge(DocValues docValues) {
// only bulk merge if value type is the same otherwise size differs
return super.tryBulkMerge(docValues) && docValues.type() == template.type();
return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
}
@Override
public void add(int docID, IndexableField value) throws IOException {
template.toBytes(value.numericValue().doubleValue(), bytesRef);
bytesSpareField.setBytesValue(bytesRef);
super.add(docID, bytesSpareField);
}
@Override
@ -104,11 +102,11 @@ class Floats {
}
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
final DocValuesArray arrayTemplate;
final DocValuesArraySource arrayTemplate;
FloatsReader(Directory dir, String id, int maxDoc, IOContext context, Type type)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
arrayTemplate = DocValuesArraySource.forType(type);
assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40.values;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@ -36,7 +37,7 @@ import org.apache.lucene.util.IOUtils;
*
* @lucene.experimental
*/
final class Ints {
public final class Ints {
protected static final String CODEC_NAME = "Ints";
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
@ -88,7 +89,7 @@ final class Ints {
static class IntsWriter extends FixedStraightBytesImpl.Writer {
private final DocValuesArray template;
private final DocValuesArraySource template;
public IntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, Type valueType) throws IOException {
@ -101,17 +102,7 @@ final class Ints {
size = typeToSize(valueType);
this.bytesRef = new BytesRef(size);
bytesRef.length = size;
template = DocValuesArray.TEMPLATES.get(valueType);
}
protected void add(int docID, long v) throws IOException {
template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
@Override
public void add(int docID, IndexableField docValue) throws IOException {
add(docID, docValue.numericValue().longValue());
template = DocValuesArraySource.forType(valueType);
}
@Override
@ -120,21 +111,28 @@ final class Ints {
template.toBytes(value, bytesRef);
}
@Override
public void add(int docID, IndexableField value) throws IOException {
template.toBytes(value.numericValue().longValue(), bytesRef);
bytesSpareField.setBytesValue(bytesRef);
super.add(docID, bytesSpareField);
}
@Override
protected boolean tryBulkMerge(DocValues docValues) {
// only bulk merge if value type is the same otherwise size differs
return super.tryBulkMerge(docValues) && docValues.type() == template.type();
return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
}
}
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
private final DocValuesArray arrayTemplate;
private final DocValuesArraySource arrayTemplate;
IntsReader(Directory dir, String id, int maxDoc, IOContext context, Type type)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc,
context, type);
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
arrayTemplate = DocValuesArraySource.forType(type);
assert arrayTemplate != null;
assert type == sizeToType(size);
}

View File

@ -18,9 +18,8 @@ package org.apache.lucene.codecs.lucene40.values;
*/
import java.io.IOException;
import org.apache.lucene.codecs.lucene40.values.DocValuesArray.LongValues;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.lucene40.values.FixedStraightBytesImpl.FixedBytesWriterBase;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
@ -59,27 +58,10 @@ class PackedIntValues {
protected PackedIntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.VAR_INTS);
bytesRef = new BytesRef(8);
}
protected void add(int docID, long v) throws IOException {
assert lastDocId < docID;
if (!started) {
started = true;
minValue = maxValue = v;
} else {
if (v < minValue) {
minValue = v;
} else if (v > maxValue) {
maxValue = v;
}
}
lastDocId = docID;
BytesRefUtils.copyLong(bytesRef, v);
add(docID, bytesRef);
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
@ -112,13 +94,6 @@ class PackedIntValues {
}
}
@Override
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
add(docID, source.getInt(sourceDoc));
}
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
datOut.writeLong(minValue);
@ -149,10 +124,25 @@ class PackedIntValues {
}
w.finish();
}
@Override
public void add(int docID, IndexableField docValue) throws IOException {
add(docID, docValue.numericValue().longValue());
final long v = docValue.numericValue().longValue();
assert lastDocId < docID;
if (!started) {
started = true;
minValue = maxValue = v;
} else {
if (v < minValue) {
minValue = v;
} else if (v > maxValue) {
maxValue = v;
}
}
lastDocId = docID;
DocValuesArraySource.copyLong(bytesRef, v);
bytesSpareField.setBytesValue(bytesRef);
super.add(docID, bytesSpareField);
}
}
@ -164,7 +154,7 @@ class PackedIntValues {
private final IndexInput datIn;
private final byte type;
private final int numDocs;
private final LongValues values;
private final DocValuesArraySource values;
protected PackedIntsReader(Directory dir, String id, int numDocs,
IOContext context) throws IOException {
@ -176,7 +166,7 @@ class PackedIntValues {
try {
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
type = datIn.readByte();
values = type == FIXED_64 ? new LongValues() : null;
values = type == FIXED_64 ? DocValuesArraySource.forType(Type.FIXED_INTS_64) : null;
success = true;
} finally {
if (!success) {
@ -220,7 +210,7 @@ class PackedIntValues {
@Override
public Type type() {
public Type getType() {
return Type.VAR_INTS;
}
@ -247,7 +237,7 @@ class PackedIntValues {
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
ref.grow(8);
BytesRefUtils.copyLong(ref, getInt(docID));
DocValuesArraySource.copyLong(ref, getInt(docID));
return ref;
}

View File

@ -57,7 +57,7 @@ class VarDerefBytesImpl {
static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
size = 0;
}
@ -105,7 +105,7 @@ class VarDerefBytesImpl {
@Override
public Source getDirectSource()
throws IOException {
return new DirectVarDerefSource(cloneData(), cloneIndex(), type());
return new DirectVarDerefSource(cloneData(), cloneIndex(), getType());
}
}

View File

@ -59,7 +59,7 @@ final class VarSortedBytesImpl {
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
this.comp = comp;
size = 0;
}
@ -166,7 +166,7 @@ final class VarSortedBytesImpl {
@Override
public Source getDirectSource() throws IOException {
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, type());
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
}
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -63,7 +64,7 @@ class VarStraightBytesImpl {
private boolean merge = false;
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
docToAddress = new long[1];
pool.nextBuffer(); // init
@ -84,7 +85,9 @@ class VarStraightBytesImpl {
}
@Override
protected void add(int docID, BytesRef bytes) throws IOException {
public void add(int docID, IndexableField value) throws IOException {
final BytesRef bytes = value.binaryValue();
assert bytes != null;
assert !merge;
if (bytes.length == 0) {
return; // default
@ -245,7 +248,7 @@ class VarStraightBytesImpl {
@Override
public Source getDirectSource()
throws IOException {
return new DirectVarStraightSource(cloneData(), cloneIndex(), type());
return new DirectVarStraightSource(cloneData(), cloneIndex(), getType());
}
}

View File

@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
*/
abstract class Writer extends DocValuesConsumer {
protected final Counter bytesUsed;
protected Type type;
/**
* Creates a new {@link Writer}.
@ -49,9 +50,19 @@ abstract class Writer extends DocValuesConsumer {
* internally allocated memory. All tracked bytes must be released
* once {@link #finish(int)} has been called.
*/
protected Writer(Counter bytesUsed) {
protected Writer(Counter bytesUsed, Type type) {
this.bytesUsed = bytesUsed;
this.type = type;
}
@Override
protected Type getType() {
return type;
}
/**
* Factory method to create a {@link Writer} instance for a given type. This

View File

@ -98,8 +98,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")";
}
private static final boolean VERBOSE = false;
private final static class TermsWriter extends TermsConsumer {
private final IndexOutput out;
private final FieldInfo field;
@ -123,10 +121,13 @@ public class MemoryPostingsFormat extends PostingsFormat {
// NOTE: not private so we don't pay access check at runtime:
int docCount;
RAMOutputStream buffer = new RAMOutputStream();
int lastOffsetLength;
int lastOffset;
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
if (VERBOSE) System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
//System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
final int delta = docID - lastDocID;
assert docID == 0 || delta > 0;
lastDocID = docID;
@ -143,20 +144,23 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
lastPos = 0;
lastOffset = 0;
}
@Override
public void addPosition(int pos, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert payload == null || field.storePayloads;
if (VERBOSE) System.out.println(" addPos pos=" + pos + " payload=" + payload);
//System.out.println(" addPos pos=" + pos + " payload=" + payload);
final int delta = pos - lastPos;
assert delta >= 0;
lastPos = pos;
int payloadLen = 0;
if (field.storePayloads) {
final int payloadLen = payload == null ? 0 : payload.length;
payloadLen = payload == null ? 0 : payload.length;
if (payloadLen != lastPayloadLen) {
lastPayloadLen = payloadLen;
buffer.writeVInt((delta<<1)|1);
@ -164,13 +168,28 @@ public class MemoryPostingsFormat extends PostingsFormat {
} else {
buffer.writeVInt(delta<<1);
}
if (payloadLen > 0) {
buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
}
} else {
buffer.writeVInt(delta);
}
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
// don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
// and the numbers aren't that much smaller anyways.
int offsetDelta = startOffset - lastOffset;
int offsetLength = endOffset - startOffset;
if (offsetLength != lastOffsetLength) {
buffer.writeVInt(offsetDelta << 1 | 1);
buffer.writeVInt(offsetLength);
} else {
buffer.writeVInt(offsetDelta << 1);
}
lastOffset = startOffset;
lastOffsetLength = offsetLength;
}
if (payloadLen > 0) {
buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
}
}
@Override
@ -182,6 +201,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
lastDocID = 0;
docCount = 0;
lastPayloadLen = 0;
// force first offset to write its length
lastOffsetLength = -1;
return this;
}
}
@ -190,7 +211,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public PostingsConsumer startTerm(BytesRef text) {
if (VERBOSE) System.out.println(" startTerm term=" + text.utf8ToString());
//System.out.println(" startTerm term=" + text.utf8ToString());
return postingsWriter.reset();
}
@ -224,12 +245,12 @@ public class MemoryPostingsFormat extends PostingsFormat {
spare.bytes = finalBuffer;
spare.length = totalBytes;
if (VERBOSE) {
System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
for(int i=0;i<totalBytes;i++) {
System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
}
}
//System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
//for(int i=0;i<totalBytes;i++) {
// System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
//}
builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
termCount++;
}
@ -249,7 +270,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4));
}
fst.save(out);
if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
}
}
@ -270,10 +291,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
return new FieldsConsumer() {
@Override
public TermsConsumer addField(FieldInfo field) {
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
throw new UnsupportedOperationException("this codec cannot index offsets");
}
if (VERBOSE) System.out.println("\naddField field=" + field.name);
//System.out.println("\naddField field=" + field.name);
return new TermsWriter(out, field, doPackFST);
}
@ -331,11 +349,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public int nextDoc() {
while(true) {
if (VERBOSE) System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
//System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
if (docUpto == numDocs) {
if (VERBOSE) {
System.out.println(" END");
}
// System.out.println(" END");
return docID = NO_MORE_DOCS;
}
docUpto++;
@ -344,7 +360,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
} else {
final int code = in.readVInt();
accum += code >>> 1;
if (VERBOSE) System.out.println(" docID=" + accum + " code=" + code);
//System.out.println(" docID=" + accum + " code=" + code);
if ((code & 1) != 0) {
freq = 1;
} else {
@ -352,8 +368,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
assert freq > 0;
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// Skip positions
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
// Skip positions/payloads
for(int posUpto=0;posUpto<freq;posUpto++) {
if (!storePayloads) {
in.readVInt();
@ -365,11 +381,26 @@ public class MemoryPostingsFormat extends PostingsFormat {
in.skipBytes(payloadLen);
}
}
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
// Skip positions/offsets/payloads
for(int posUpto=0;posUpto<freq;posUpto++) {
int posCode = in.readVInt();
if (storePayloads && ((posCode & 1) != 0)) {
payloadLen = in.readVInt();
}
if ((in.readVInt() & 1) != 0) {
// new offset length
in.readVInt();
}
if (storePayloads) {
in.skipBytes(payloadLen);
}
}
}
}
if (liveDocs == null || liveDocs.get(accum)) {
if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq);
//System.out.println(" return docID=" + accum + " freq=" + freq);
return (docID = accum);
}
}
@ -413,26 +444,30 @@ public class MemoryPostingsFormat extends PostingsFormat {
private int posPending;
private int payloadLength;
private boolean payloadRetrieved;
final boolean storeOffsets;
int offsetLength;
int startOffset;
private int pos;
private final BytesRef payload = new BytesRef();
public FSTDocsAndPositionsEnum(boolean storePayloads) {
public FSTDocsAndPositionsEnum(boolean storePayloads, boolean storeOffsets) {
this.storePayloads = storePayloads;
this.storeOffsets = storeOffsets;
}
public boolean canReuse(boolean storePayloads) {
return storePayloads == this.storePayloads;
public boolean canReuse(boolean storePayloads, boolean storeOffsets) {
return storePayloads == this.storePayloads && storeOffsets == this.storeOffsets;
}
public FSTDocsAndPositionsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
assert numDocs > 0;
if (VERBOSE) {
System.out.println("D&P reset bytes this=" + this);
for(int i=bufferIn.offset;i<bufferIn.length;i++) {
System.out.println(" " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
}
}
// System.out.println("D&P reset bytes this=" + this);
// for(int i=bufferIn.offset;i<bufferIn.length;i++) {
// System.out.println(" " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
// }
if (buffer.length < bufferIn.length - bufferIn.offset) {
buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
}
@ -447,6 +482,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
this.numDocs = numDocs;
posPending = 0;
payloadRetrieved = false;
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
offsetLength = 0;
return this;
}
@ -456,9 +493,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
nextPosition();
}
while(true) {
if (VERBOSE) System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
//System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
if (docUpto == numDocs) {
if (VERBOSE) System.out.println(" END");
//System.out.println(" END");
return docID = NO_MORE_DOCS;
}
docUpto++;
@ -474,8 +511,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
if (liveDocs == null || liveDocs.get(accum)) {
pos = 0;
startOffset = storeOffsets ? 0 : -1;
posPending = freq;
if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq);
//System.out.println(" return docID=" + accum + " freq=" + freq);
return (docID = accum);
}
@ -487,8 +525,18 @@ public class MemoryPostingsFormat extends PostingsFormat {
final int skipCode = in.readVInt();
if ((skipCode & 1) != 0) {
payloadLength = in.readVInt();
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
//System.out.println(" new payloadLen=" + payloadLength);
}
}
if (storeOffsets) {
if ((in.readVInt() & 1) != 0) {
// new offset length
offsetLength = in.readVInt();
}
}
if (storePayloads) {
in.skipBytes(payloadLength);
}
}
@ -497,7 +545,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public int nextPosition() {
if (VERBOSE) System.out.println(" nextPos storePayloads=" + storePayloads + " this=" + this);
//System.out.println(" nextPos storePayloads=" + storePayloads + " this=" + this);
assert posPending > 0;
posPending--;
if (!storePayloads) {
@ -511,6 +559,18 @@ public class MemoryPostingsFormat extends PostingsFormat {
//} else {
//System.out.println(" same payloadLen=" + payloadLength);
}
}
if (storeOffsets) {
int offsetCode = in.readVInt();
if ((offsetCode & 1) != 0) {
// new offset length
offsetLength = in.readVInt();
}
startOffset += offsetCode >>> 1;
}
if (storePayloads) {
payload.offset = in.getPosition();
in.skipBytes(payloadLength);
payload.length = payloadLength;
@ -520,18 +580,18 @@ public class MemoryPostingsFormat extends PostingsFormat {
payloadRetrieved = false;
}
if (VERBOSE) System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
//System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
return pos;
}
@Override
public int startOffset() {
return -1;
return startOffset;
}
@Override
public int endOffset() {
return -1;
return startOffset + offsetLength;
}
@Override
@ -594,14 +654,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
totalTermFreq = -1;
}
current.output.offset = buffer.getPosition();
if (VERBOSE) System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
//System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
didDecode = true;
}
}
@Override
public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
if (VERBOSE) System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
//System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekExact(text);
didDecode = false;
return current != null;
@ -609,25 +669,24 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
if (VERBOSE) System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
//System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekCeil(text);
if (current == null) {
return SeekStatus.END;
} else {
if (VERBOSE) {
System.out.println(" got term=" + current.input.utf8ToString());
for(int i=0;i<current.output.length;i++) {
System.out.println(" " + Integer.toHexString(current.output.bytes[i]&0xFF));
}
}
// System.out.println(" got term=" + current.input.utf8ToString());
// for(int i=0;i<current.output.length;i++) {
// System.out.println(" " + Integer.toHexString(current.output.bytes[i]&0xFF));
// }
didDecode = false;
if (text.equals(current.input)) {
if (VERBOSE) System.out.println(" found!");
//System.out.println(" found!");
return SeekStatus.FOUND;
} else {
if (VERBOSE) System.out.println(" not found: " + current.input.utf8ToString());
//System.out.println(" not found: " + current.input.utf8ToString());
return SeekStatus.NOT_FOUND;
}
}
@ -654,9 +713,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
if (needsOffsets) {
// Not until we can index offsets...
return null;
boolean hasOffsets = field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (needsOffsets && !hasOffsets) {
return null; // not available
}
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
@ -665,14 +724,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
decodeMetaData();
FSTDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads);
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads, hasOffsets);
} else {
docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
if (!docsAndPositionsEnum.canReuse(field.storePayloads)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads);
if (!docsAndPositionsEnum.canReuse(field.storePayloads, hasOffsets)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads, hasOffsets);
}
}
if (VERBOSE) System.out.println("D&P reset this=" + this);
//System.out.println("D&P reset this=" + this);
return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
}
@ -683,14 +742,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public BytesRef next() throws IOException {
if (VERBOSE) System.out.println("te.next");
//System.out.println("te.next");
current = fstEnum.next();
if (current == null) {
if (VERBOSE) System.out.println(" END");
//System.out.println(" END");
return null;
}
didDecode = false;
if (VERBOSE) System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
//System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
return current.input;
}
@ -794,9 +853,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
break;
}
final TermsReader termsReader = new TermsReader(state.fieldInfos, in, termCount);
if (VERBOSE) {
System.out.println("load field=" + termsReader.field.name);
}
// System.out.println("load field=" + termsReader.field.name);
fields.put(termsReader.field.name, termsReader);
}
} finally {

View File

@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.ServiceLoader; // javadocs
import java.util.Set;
import java.util.TreeMap;
@ -47,7 +48,14 @@ import org.apache.lucene.util.IOUtils;
/**
* Enables per field format support.
*
* <p>
* Note, when extending this class, the name ({@link #getName}) is
* written into the index. In order for the field to be read, the
* name must resolve to your implementation via {@link #forName(String)}.
* This method uses Java's
* {@link ServiceLoader Service Provider Interface} to resolve format names.
* <p>
* @see ServiceLoader
* @lucene.experimental
*/

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.codecs.lucene40.values.DocValuesWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -58,7 +59,7 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
private static void files(Directory dir,FieldInfos fieldInfos, String segmentName, Set<String> files) {
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
String filename = docValuesId(segmentName, fieldInfo.number);
String filename = PerDocProducerBase.docValuesId(segmentName, fieldInfo.number);
switch (fieldInfo.getDocValuesType()) {
case BYTES_FIXED_DEREF:
case BYTES_VAR_DEREF:

View File

@ -22,16 +22,22 @@ import java.util.Collection;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.lucene40.values.DocValuesReaderBase;
import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.codecs.lucene40.values.Bytes;
import org.apache.lucene.codecs.lucene40.values.Floats;
import org.apache.lucene.codecs.lucene40.values.Ints;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.IOUtils;
/**
* Implementation of PerDocProducer that uses separate files.
* @lucene.experimental
*/
public class SepDocValuesProducer extends DocValuesReaderBase {
public class SepDocValuesProducer extends PerDocProducerBase {
private final TreeMap<String, DocValues> docValues;
/**
@ -51,4 +57,35 @@ public class SepDocValuesProducer extends DocValuesReaderBase {
protected void closeInternal(Collection<? extends Closeable> closeables) throws IOException {
IOUtils.close(closeables);
}
@Override
protected DocValues loadDocValues(int docCount, Directory dir, String id,
Type type, IOContext context) throws IOException {
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
case BYTES_FIXED_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
case BYTES_VAR_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
case BYTES_VAR_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
case BYTES_VAR_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
default:
throw new IllegalStateException("unrecognized index values mode " + type);
}
}
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfosFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
/**
* plain text index format.
@ -41,7 +40,7 @@ public final class SimpleTextCodec extends Codec {
private final FieldInfosFormat fieldInfosFormat = new SimpleTextFieldInfosFormat();
private final TermVectorsFormat vectorsFormat = new SimpleTextTermVectorsFormat();
// TODO: need a plain-text impl
private final DocValuesFormat docValues = new Lucene40DocValuesFormat();
private final DocValuesFormat docValues = new SimpleTextDocValuesFormat();
// TODO: need a plain-text impl (using the above)
private final NormsFormat normsFormat = new SimpleTextNormsFormat();
private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();

View File

@ -0,0 +1,288 @@
package org.apache.lucene.codecs.simpletext;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.IOUtils;
/**
* @lucene.experimental
*/
public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
static final BytesRef ZERO_DOUBLE = new BytesRef(Double.toString(0d));
static final BytesRef ZERO_INT = new BytesRef(Integer.toString(0));
static final BytesRef HEADER = new BytesRef("SimpleTextDocValues");
static final BytesRef END = new BytesRef("END");
static final BytesRef VALUE_SIZE = new BytesRef("valuesize ");
static final BytesRef DOC = new BytesRef(" doc ");
static final BytesRef VALUE = new BytesRef(" value ");
protected BytesRef scratch = new BytesRef();
protected int maxDocId = -1;
protected final String segment;
protected final Directory dir;
protected final IOContext ctx;
protected final Type type;
protected final BytesRefHash hash;
private int[] ords;
private int fixedSize = Integer.MIN_VALUE;
private BytesRef zeroBytes;
private final String segmentSuffix;
public SimpleTextDocValuesConsumer(String segment, Directory dir,
IOContext ctx, Type type, String segmentSuffix) {
this.ctx = ctx;
this.dir = dir;
this.segment = segment;
this.type = type;
hash = new BytesRefHash();
ords = new int[0];
this.segmentSuffix = segmentSuffix;
}
@Override
public void add(int docID, IndexableField value) throws IOException {
assert docID >= 0;
int ord = -1;
int vSize = -1;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
vSize = value.binaryValue().length;
ord = hash.add(value.binaryValue());
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
vSize = -1;
try {
ord = hash.add(value.binaryValue());
} catch (NullPointerException e) {
System.err.println();
}
break;
case FIXED_INTS_16:
vSize = 2;
scratch.grow(2);
DocValuesArraySource.copyShort(scratch, value.numericValue().shortValue());
ord = hash.add(scratch);
break;
case FIXED_INTS_32:
vSize = 4;
scratch.grow(4);
DocValuesArraySource.copyInt(scratch, value.numericValue().intValue());
ord = hash.add(scratch);
break;
case FIXED_INTS_8:
vSize = 1;
scratch.grow(1);
scratch.bytes[scratch.offset] = value.numericValue().byteValue();
scratch.length = 1;
ord = hash.add(scratch);
break;
case FIXED_INTS_64:
vSize = 8;
case VAR_INTS:
scratch.grow(8);
DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
ord = hash.add(scratch);
break;
case FLOAT_32:
vSize = 4;
scratch.grow(4);
DocValuesArraySource.copyInt(scratch,
Float.floatToRawIntBits(value.numericValue().floatValue()));
ord = hash.add(scratch);
break;
case FLOAT_64:
vSize = 8;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch,
Double.doubleToRawLongBits(value.numericValue().doubleValue()));
ord = hash.add(scratch);
break;
}
if (fixedSize == Integer.MIN_VALUE) {
assert maxDocId == -1;
fixedSize = vSize;
} else {
if (fixedSize != vSize) {
throw new IllegalArgumentException("value size must be " + fixedSize + " but was: " + vSize);
}
}
maxDocId = Math.max(docID, maxDocId);
ords = grow(ords, docID);
ords[docID] = (ord < 0 ? (-ord)-1 : ord) + 1;
}
protected BytesRef getHeader() {
return HEADER;
}
private int[] grow(int[] array, int upto) {
if (array.length <= upto) {
return ArrayUtil.grow(array, 1 + upto);
}
return array;
}
private void prepareFlush(int docCount) {
assert ords != null;
ords = grow(ords, docCount);
}
@Override
public void finish(int docCount) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "",
segmentSuffix);
IndexOutput output = dir.createOutput(fileName, ctx);
boolean success = false;
BytesRef spare = new BytesRef();
try {
SimpleTextUtil.write(output, getHeader());
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE_SIZE);
SimpleTextUtil.write(output, Integer.toString(this.fixedSize), scratch);
SimpleTextUtil.writeNewline(output);
prepareFlush(docCount);
for (int i = 0; i < docCount; i++) {
SimpleTextUtil.write(output, DOC);
SimpleTextUtil.write(output, Integer.toString(i), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE);
writeDoc(output, i, spare);
SimpleTextUtil.writeNewline(output);
}
SimpleTextUtil.write(output, END);
SimpleTextUtil.writeNewline(output);
success = true;
} finally {
hash.close();
if (success) {
IOUtils.close(output);
} else {
IOUtils.closeWhileHandlingException(output);
}
}
}
protected void writeDoc(IndexOutput output, int docId, BytesRef spare) throws IOException {
int ord = ords[docId] - 1;
if (ord != -1) {
assert ord >= 0;
hash.get(ord, spare);
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
SimpleTextUtil.write(output, spare);
break;
case FIXED_INTS_16:
SimpleTextUtil.write(output,
Short.toString(DocValuesArraySource.asShort(spare)), scratch);
break;
case FIXED_INTS_32:
SimpleTextUtil.write(output,
Integer.toString(DocValuesArraySource.asInt(spare)), scratch);
break;
case VAR_INTS:
case FIXED_INTS_64:
SimpleTextUtil.write(output,
Long.toString(DocValuesArraySource.asLong(spare)), scratch);
break;
case FIXED_INTS_8:
assert spare.length == 1 : spare.length;
SimpleTextUtil.write(output,
Integer.toString(spare.bytes[spare.offset]), scratch);
break;
case FLOAT_32:
float valueFloat = Float.intBitsToFloat(DocValuesArraySource.asInt(spare));
SimpleTextUtil.write(output, Float.toString(valueFloat), scratch);
break;
case FLOAT_64:
double valueDouble = Double.longBitsToDouble(DocValuesArraySource
.asLong(spare));
SimpleTextUtil.write(output, Double.toString(valueDouble), scratch);
break;
default:
throw new IllegalArgumentException("unsupported type: " + type);
}
} else {
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
if(zeroBytes == null) {
assert fixedSize > 0;
zeroBytes = new BytesRef(new byte[fixedSize]);
}
SimpleTextUtil.write(output, zeroBytes);
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
scratch.length = 0;
SimpleTextUtil.write(output, scratch);
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
SimpleTextUtil.write(output, ZERO_INT);
break;
case FLOAT_32:
case FLOAT_64:
SimpleTextUtil.write(output, ZERO_DOUBLE);
break;
default:
throw new IllegalArgumentException("unsupported type: " + type);
}
}
}
@Override
protected Type getType() {
return type;
}
}

View File

@ -0,0 +1,53 @@
package org.apache.lucene.codecs.simpletext;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.util.BytesRef;
/**
* @lucene.experimental
*/
public class SimpleTextDocValuesFormat extends DocValuesFormat {
private static final String DOC_VALUES_SEG_SUFFIX = "dv";
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new SimpleTextPerDocConsumer(state, DOC_VALUES_SEG_SUFFIX);
}
@Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SimpleTextPerDocProducer(state, BytesRef.getUTF8SortedAsUnicodeComparator(), DOC_VALUES_SEG_SUFFIX);
}
static String docValuesId(String segmentsName, int fieldId) {
return segmentsName + "_" + fieldId;
}
@Override
public void files(SegmentInfo info, Set<String> files)
throws IOException {
SimpleTextPerDocConsumer.files(info, files, DOC_VALUES_SEG_SUFFIX);
}
}

View File

@ -1,294 +0,0 @@
package org.apache.lucene.codecs.simpletext;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* Writes plain-text norms
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
*
* @lucene.experimental
*/
public class SimpleTextNormsConsumer extends PerDocConsumer {
/** Extension of norms file */
static final String NORMS_EXTENSION = "len";
final static BytesRef END = new BytesRef("END");
final static BytesRef FIELD = new BytesRef("field ");
final static BytesRef DOC = new BytesRef(" doc ");
final static BytesRef NORM = new BytesRef(" norm ");
private NormsWriter writer;
private final Directory directory;
private final String segment;
private final IOContext context;
public SimpleTextNormsConsumer(Directory directory, String segment,
IOContext context) throws IOException {
this.directory = directory;
this.segment = segment;
this.context = context;
}
@Override
public void close() throws IOException {
if (writer != null) {
boolean success = false;
try {
writer.finish();
success = true;
} finally {
if (success) {
IOUtils.close(writer);
} else {
IOUtils.closeWhileHandlingException(writer);
}
}
}
}
@Override
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
throws IOException {
return reader.normValues(info.name);
}
@Override
protected boolean canMerge(FieldInfo info) {
return info.normsPresent();
}
@Override
protected Type getDocValuesType(FieldInfo info) {
return info.getNormType();
}
@Override
public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo)
throws IOException {
if (type != Type.FIXED_INTS_8) {
throw new UnsupportedOperationException("Codec only supports single byte norm values. Type give: " + type);
}
return new SimpleTextNormsDocValuesConsumer(fieldInfo);
}
@Override
public void abort() {
if (writer != null) {
try {
writer.abort();
} catch (IOException e) {
}
}
}
private class SimpleTextNormsDocValuesConsumer extends DocValuesConsumer {
// Holds all docID/norm pairs we've seen
int[] docIDs = new int[1];
byte[] norms = new byte[1];
int upto;
private final FieldInfo fi;
public SimpleTextNormsDocValuesConsumer(FieldInfo fieldInfo) {
fi = fieldInfo;
}
@Override
public void add(int docID, IndexableField docValue) throws IOException {
add(docID, docValue.numericValue().longValue());
}
public void add(int docID, long value) {
if (docIDs.length <= upto) {
assert docIDs.length == upto;
docIDs = ArrayUtil.grow(docIDs, 1 + upto);
}
if (norms.length <= upto) {
assert norms.length == upto;
norms = ArrayUtil.grow(norms, 1 + upto);
}
norms[upto] = (byte) value;
docIDs[upto] = docID;
upto++;
}
@Override
public void finish(int docCount) throws IOException {
final NormsWriter normsWriter = getNormsWriter();
boolean success = false;
try {
int uptoDoc = 0;
normsWriter.setNumTotalDocs(docCount);
if (upto > 0) {
normsWriter.startField(fi);
int docID = 0;
for (; docID < docCount; docID++) {
if (uptoDoc < upto && docIDs[uptoDoc] == docID) {
normsWriter.writeNorm(norms[uptoDoc]);
uptoDoc++;
} else {
normsWriter.writeNorm((byte) 0);
}
}
// we should have consumed every norm
assert uptoDoc == upto;
} else {
// Fill entire field with default norm:
normsWriter.startField(fi);
for (; upto < docCount; upto++)
normsWriter.writeNorm((byte) 0);
}
success = true;
} finally {
if (!success) {
normsWriter.abort();
}
}
}
}
public NormsWriter getNormsWriter() throws IOException {
if (writer == null) {
writer = new NormsWriter(directory, segment, context);
}
return writer;
}
private static class NormsWriter implements Closeable{
private final IndexOutput output;
private int numTotalDocs = 0;
private int docid = 0;
private final BytesRef scratch = new BytesRef();
public NormsWriter(Directory directory, String segment, IOContext context)
throws IOException {
final String normsFileName = IndexFileNames.segmentFileName(segment, "",
NORMS_EXTENSION);
output = directory.createOutput(normsFileName, context);
}
public void startField(FieldInfo info) throws IOException {
assert info.omitNorms == false;
docid = 0;
write(FIELD);
write(info.name);
newLine();
}
public void writeNorm(byte norm) throws IOException {
write(DOC);
write(Integer.toString(docid));
newLine();
write(NORM);
write(norm);
newLine();
docid++;
}
public void finish(int numDocs) throws IOException {
if (docid != numDocs) {
throw new RuntimeException(
"mergeNorms produced an invalid result: docCount is " + numDocs
+ " but only saw " + docid + " file=" + output.toString()
+ "; now aborting this merge to prevent index corruption");
}
write(END);
newLine();
}
private void write(String s) throws IOException {
SimpleTextUtil.write(output, s, scratch);
}
private void write(BytesRef bytes) throws IOException {
SimpleTextUtil.write(output, bytes);
}
private void write(byte b) throws IOException {
scratch.grow(1);
scratch.bytes[scratch.offset] = b;
scratch.length = 1;
SimpleTextUtil.write(output, scratch);
}
private void newLine() throws IOException {
SimpleTextUtil.writeNewline(output);
}
public void setNumTotalDocs(int numTotalDocs) {
assert this.numTotalDocs == 0 || numTotalDocs == this.numTotalDocs;
this.numTotalDocs = numTotalDocs;
}
public void abort() throws IOException {
close();
}
public void finish() throws IOException {
finish(numTotalDocs);
}
@Override
public void close() throws IOException {
output.close();
}
}
public static void files(SegmentInfo info, Set<String> files) throws IOException {
FieldInfos fieldInfos = info.getFieldInfos();
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.normsPresent()) {
files.add(IndexFileNames.segmentFileName(info.name, "",
NORMS_EXTENSION));
break;
}
}
}
}

View File

@ -18,35 +18,123 @@ package org.apache.lucene.codecs.simpletext;
*/
import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* plain-text norms format
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
*
* @lucene.experimental
*/
public class SimpleTextNormsFormat extends NormsFormat {
private static final String NORMS_SEG_SUFFIX = "len";
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new SimpleTextNormsConsumer(state.directory, state.segmentName, state.context);
return new SimpleTextNormsPerDocConsumer(state, NORMS_SEG_SUFFIX);
}
@Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SimpleTextNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context);
return new SimpleTextNormsPerDocProducer(state,
BytesRef.getUTF8SortedAsUnicodeComparator(), NORMS_SEG_SUFFIX);
}
@Override
public void files(SegmentInfo info, Set<String> files) throws IOException {
SimpleTextNormsConsumer.files(info, files);
}
SimpleTextNormsPerDocConsumer.files(info, files);
}
public static class SimpleTextNormsPerDocProducer extends
SimpleTextPerDocProducer {
public SimpleTextNormsPerDocProducer(SegmentReadState state,
Comparator<BytesRef> comp, String segmentSuffix) throws IOException {
super(state, comp, segmentSuffix);
}
@Override
protected boolean canLoad(FieldInfo info) {
return info.hasNorms();
}
@Override
protected Type getDocValuesType(FieldInfo info) {
return info.getNormType();
}
@Override
protected boolean anyDocValuesFields(FieldInfos infos) {
return infos.hasNorms();
}
}
public static class SimpleTextNormsPerDocConsumer extends
SimpleTextPerDocConsumer {
public SimpleTextNormsPerDocConsumer(PerDocWriteState state,
String segmentSuffix) throws IOException {
super(state, segmentSuffix);
}
@Override
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
throws IOException {
return reader.normValues(info.name);
}
@Override
protected boolean canMerge(FieldInfo info) {
return info.hasNorms();
}
@Override
protected Type getDocValuesType(FieldInfo info) {
return info.getNormType();
}
@Override
public void abort() {
Set<String> files = new HashSet<String>();
filesInternal(state.fieldInfos, state.segmentName, files, segmentSuffix);
IOUtils.deleteFilesIgnoringExceptions(state.directory,
files.toArray(new String[0]));
}
public static void files(SegmentInfo segmentInfo, Set<String> files)
throws IOException {
filesInternal(segmentInfo.getFieldInfos(), segmentInfo.name, files,
NORMS_SEG_SUFFIX);
}
public static void filesInternal(FieldInfos fieldInfos, String segmentName,
Set<String> files, String segmentSuffix) {
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasNorms()) {
String id = docValuesId(segmentName, fieldInfo.number);
files.add(IndexFileNames.segmentFileName(id, "",
segmentSuffix));
}
}
}
}
}

View File

@ -1,175 +0,0 @@
package org.apache.lucene.codecs.simpletext;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.DOC;
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORM;
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORMS_EXTENSION;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
/**
* Reads plain-text norms
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
* @lucene.experimental
*/
public class SimpleTextNormsProducer extends PerDocProducer {
Map<String,NormsDocValues> norms = new HashMap<String,NormsDocValues>();
public SimpleTextNormsProducer(Directory directory, SegmentInfo si, FieldInfos fields, IOContext context) throws IOException {
if (fields.hasNorms()) {
readNorms(directory.openInput(IndexFileNames.segmentFileName(si.name, "", NORMS_EXTENSION), context), si.docCount);
}
}
// we read in all the norms up front into a hashmap
private void readNorms(IndexInput in, int maxDoc) throws IOException {
BytesRef scratch = new BytesRef();
boolean success = false;
try {
SimpleTextUtil.readLine(in, scratch);
while (!scratch.equals(END)) {
assert StringHelper.startsWith(scratch, FIELD);
final String fieldName = readString(FIELD.length, scratch);
byte bytes[] = new byte[maxDoc];
for (int i = 0; i < bytes.length; i++) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, DOC);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, NORM);
bytes[i] = scratch.bytes[scratch.offset + NORM.length];
}
norms.put(fieldName, new NormsDocValues(new Norm(bytes)));
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, FIELD) || scratch.equals(END);
}
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
}
@Override
public void close() throws IOException {
norms = null;
}
static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
FieldInfos fieldInfos = info.getFieldInfos();
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.normsPresent()) {
files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION));
break;
}
}
}
private String readString(int offset, BytesRef scratch) {
return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
}
@Override
public DocValues docValues(String field) throws IOException {
return norms.get(field);
}
private class NormsDocValues extends DocValues {
private final Source source;
public NormsDocValues(Source source) {
this.source = source;
}
@Override
public Source load() throws IOException {
return source;
}
@Override
public Source getDirectSource() throws IOException {
return getSource();
}
@Override
public Type type() {
return Type.FIXED_INTS_8;
}
@Override
public int getValueSize() {
return 1;
}
}
static final class Norm extends Source {
protected Norm(byte[] bytes) {
super(Type.FIXED_INTS_8);
this.bytes = bytes;
}
final byte bytes[];
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
ref.bytes = bytes;
ref.offset = docID;
ref.length = 1;
return ref;
}
@Override
public long getInt(int docID) {
return bytes[docID];
}
@Override
public boolean hasArray() {
return true;
}
@Override
public Object getArray() {
return bytes;
}
}
}

View File

@ -0,0 +1,94 @@
package org.apache.lucene.codecs.simpletext;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.PerDocConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/**
* @lucene.experimental
*/
class SimpleTextPerDocConsumer extends PerDocConsumer {
protected final PerDocWriteState state;
protected final String segmentSuffix;
public SimpleTextPerDocConsumer(PerDocWriteState state, String segmentSuffix)
throws IOException {
this.state = state;
this.segmentSuffix = segmentSuffix;
}
@Override
public void close() throws IOException {
}
@Override
public DocValuesConsumer addValuesField(Type type, FieldInfo field)
throws IOException {
return new SimpleTextDocValuesConsumer(SimpleTextDocValuesFormat.docValuesId(state.segmentName,
field.number), state.directory, state.context, type, segmentSuffix);
}
@Override
public void abort() {
Set<String> files = new HashSet<String>();
files(state.directory, state.fieldInfos, state.segmentName, files, segmentSuffix);
IOUtils.deleteFilesIgnoringExceptions(state.directory,
files.toArray(new String[0]));
}
static void files(SegmentInfo info, Set<String> files, String segmentSuffix) throws IOException {
files(info.dir, info.getFieldInfos(), info.name, files, segmentSuffix);
}
static String docValuesId(String segmentsName, int fieldId) {
return segmentsName + "_" + fieldId;
}
@SuppressWarnings("fallthrough")
private static void files(Directory dir, FieldInfos fieldInfos,
String segmentName, Set<String> files, String segmentSuffix) {
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
String filename = docValuesId(segmentName, fieldInfo.number);
files.add(IndexFileNames.segmentFileName(filename, "",
segmentSuffix));
try {
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
segmentSuffix));
} catch (IOException e) {
// don't throw checked exception - dir is only used in assert
throw new RuntimeException(e);
}
}
}
}
}

View File

@ -0,0 +1,431 @@
package org.apache.lucene.codecs.simpletext;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.DOC;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.HEADER;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE_SIZE;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.PerDocProducerBase;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.PackedInts.Reader;
/**
* @lucene.experimental
*/
public class SimpleTextPerDocProducer extends PerDocProducerBase {
protected final TreeMap<String, DocValues> docValues;
private Comparator<BytesRef> comp;
private final String segmentSuffix;
/**
* Creates a new {@link SimpleTextPerDocProducer} instance and loads all
* {@link DocValues} instances for this segment and codec.
*/
public SimpleTextPerDocProducer(SegmentReadState state,
Comparator<BytesRef> comp, String segmentSuffix) throws IOException {
this.comp = comp;
this.segmentSuffix = segmentSuffix;
if (anyDocValuesFields(state.fieldInfos)) {
docValues = load(state.fieldInfos, state.segmentInfo.name,
state.segmentInfo.docCount, state.dir, state.context);
} else {
docValues = new TreeMap<String, DocValues>();
}
}
@Override
protected Map<String, DocValues> docValues() {
return docValues;
}
protected DocValues loadDocValues(int docCount, Directory dir, String id,
DocValues.Type type, IOContext context) throws IOException {
return new SimpleTextDocValues(dir, context, type, id, docCount, comp, segmentSuffix);
}
@Override
protected void closeInternal(Collection<? extends Closeable> closeables)
throws IOException {
IOUtils.close(closeables);
}
private static class SimpleTextDocValues extends DocValues {
private int docCount;
@Override
public void close() throws IOException {
try {
super.close();
} finally {
IOUtils.close(input);
}
}
private Type type;
private Comparator<BytesRef> comp;
private int valueSize;
private final IndexInput input;
public SimpleTextDocValues(Directory dir, IOContext ctx, Type type,
String id, int docCount, Comparator<BytesRef> comp, String segmentSuffix) throws IOException {
this.type = type;
this.docCount = docCount;
this.comp = comp;
final String fileName = IndexFileNames.segmentFileName(id, "", segmentSuffix);
boolean success = false;
IndexInput in = null;
try {
in = dir.openInput(fileName, ctx);
valueSize = readHeader(in);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
}
}
input = in;
}
@Override
public Source load() throws IOException {
boolean success = false;
IndexInput in = (IndexInput) input.clone();
try {
Source source = null;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
source = read(in, new ValueReader(type, docCount, comp));
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case VAR_INTS:
case FIXED_INTS_64:
case FIXED_INTS_8:
case FLOAT_32:
case FLOAT_64:
source = read(in, new ValueReader(type, docCount, null));
break;
default:
throw new IllegalArgumentException("unknown type: " + type);
}
assert source != null;
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
} else {
IOUtils.close(in);
}
}
}
private int readHeader(IndexInput in) throws IOException {
BytesRef scratch = new BytesRef();
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, HEADER);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, VALUE_SIZE);
return Integer.parseInt(readString(scratch.offset + VALUE_SIZE.length,
scratch));
}
private Source read(IndexInput in, ValueReader reader) throws IOException {
BytesRef scratch = new BytesRef();
for (int i = 0; i < docCount; i++) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, DOC) : scratch.utf8ToString();
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, VALUE);
reader.fromString(i, scratch, scratch.offset + VALUE.length);
}
SimpleTextUtil.readLine(in, scratch);
assert scratch.equals(END);
return reader.getSource();
}
@Override
public Source getDirectSource() throws IOException {
return this.getSource();
}
@Override
public int getValueSize() {
return valueSize;
}
@Override
public Type getType() {
return type;
}
}
public static String readString(int offset, BytesRef scratch) {
return new String(scratch.bytes, scratch.offset + offset, scratch.length
- offset, IOUtils.CHARSET_UTF_8);
}
private static final class ValueReader {
private final Type type;
private byte[] bytes;
private short[] shorts;
private int[] ints;
private long[] longs;
private float[] floats;
private double[] doubles;
private Source source;
private BytesRefHash hash;
private BytesRef scratch;
public ValueReader(Type type, int maxDocs, Comparator<BytesRef> comp) {
super();
this.type = type;
Source docValuesArray = null;
switch (type) {
case FIXED_INTS_16:
shorts = new short[maxDocs];
docValuesArray = DocValuesArraySource.forType(type)
.newFromArray(shorts);
break;
case FIXED_INTS_32:
ints = new int[maxDocs];
docValuesArray = DocValuesArraySource.forType(type).newFromArray(ints);
break;
case FIXED_INTS_64:
longs = new long[maxDocs];
docValuesArray = DocValuesArraySource.forType(type)
.newFromArray(longs);
break;
case VAR_INTS:
longs = new long[maxDocs];
docValuesArray = new VarIntsArraySource(type, longs);
break;
case FIXED_INTS_8:
bytes = new byte[maxDocs];
docValuesArray = DocValuesArraySource.forType(type).newFromArray(bytes);
break;
case FLOAT_32:
floats = new float[maxDocs];
docValuesArray = DocValuesArraySource.forType(type)
.newFromArray(floats);
break;
case FLOAT_64:
doubles = new double[maxDocs];
docValuesArray = DocValuesArraySource.forType(type).newFromArray(
doubles);
break;
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
assert comp != null;
hash = new BytesRefHash();
BytesSource bytesSource = new BytesSource(type, comp, maxDocs, hash);
ints = bytesSource.docIdToEntry;
source = bytesSource;
scratch = new BytesRef();
break;
}
if (docValuesArray != null) {
assert source == null;
this.source = docValuesArray;
}
}
public void fromString(int ord, BytesRef ref, int offset) {
switch (type) {
case FIXED_INTS_16:
assert shorts != null;
shorts[ord] = Short.parseShort(readString(offset, ref));
break;
case FIXED_INTS_32:
assert ints != null;
ints[ord] = Integer.parseInt(readString(offset, ref));
break;
case FIXED_INTS_64:
case VAR_INTS:
assert longs != null;
longs[ord] = Long.parseLong(readString(offset, ref));
break;
case FIXED_INTS_8:
assert bytes != null;
bytes[ord] = (byte) Integer.parseInt(readString(offset, ref));
break;
case FLOAT_32:
assert floats != null;
floats[ord] = Float.parseFloat(readString(offset, ref));
break;
case FLOAT_64:
assert doubles != null;
doubles[ord] = Double.parseDouble(readString(offset, ref));
break;
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
scratch.bytes = ref.bytes;
scratch.length = ref.length - offset;
scratch.offset = ref.offset + offset;
int key = hash.add(scratch);
ints[ord] = key < 0 ? (-key) - 1 : key;
break;
}
}
public Source getSource() {
if (source instanceof BytesSource) {
((BytesSource) source).maybeSort();
}
return source;
}
}
private static final class BytesSource extends SortedSource {
private final BytesRefHash hash;
int[] docIdToEntry;
int[] sortedEntries;
int[] adresses;
private final boolean isSorted;
protected BytesSource(Type type, Comparator<BytesRef> comp, int maxDoc,
BytesRefHash hash) {
super(type, comp);
docIdToEntry = new int[maxDoc];
this.hash = hash;
isSorted = type == Type.BYTES_FIXED_SORTED
|| type == Type.BYTES_VAR_SORTED;
}
void maybeSort() {
if (isSorted) {
adresses = new int[hash.size()];
sortedEntries = hash.sort(getComparator());
for (int i = 0; i < adresses.length; i++) {
int entry = sortedEntries[i];
adresses[entry] = i;
}
}
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
if (isSorted) {
return hash.get(sortedEntries[ord(docID)], ref);
} else {
return hash.get(docIdToEntry[docID], ref);
}
}
@Override
public SortedSource asSortedSource() {
if (isSorted) {
return this;
}
return null;
}
@Override
public int ord(int docID) {
assert isSorted;
try {
return adresses[docIdToEntry[docID]];
} catch (Exception e) {
return 0;
}
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
assert isSorted;
return hash.get(sortedEntries[ord], bytesRef);
}
@Override
public Reader getDocToOrd() {
return null;
}
@Override
public int getValueCount() {
return hash.size();
}
}
private static class VarIntsArraySource extends Source {
private final long[] array;
protected VarIntsArraySource(Type type, long[] array) {
super(type);
this.array = array;
}
@Override
public long getInt(int docID) {
return array[docID];
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
DocValuesArraySource.copyLong(ref, getInt(docID));
return ref;
}
}
}

View File

@ -60,12 +60,17 @@ public abstract class AtomicReader extends IndexReader {
return readerContext;
}
/** Returns true if there are norms stored for this field. */
public boolean hasNorms(String field) throws IOException {
// backward compatible implementation.
// SegmentReader has an efficient implementation.
/**
* Returns true if there are norms stored for this field.
* @deprecated (4.0) use {@link #getFieldInfos()} and check {@link FieldInfo#hasNorms()}
* for the field instead.
*/
@Deprecated
public final boolean hasNorms(String field) throws IOException {
ensureOpen();
return normValues(field) != null;
// note: using normValues(field) != null would potentially cause i/o
FieldInfo fi = getFieldInfos().fieldInfo(field);
return fi != null && fi.hasNorms();
}
/**

View File

@ -53,6 +53,14 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
private final int numDocs;
private final boolean hasDeletions;
/**
* Constructs a {@code BaseCompositeReader} on the given subReaders.
* @param subReaders the wrapped sub-readers. This array is returned by
* {@link #getSequentialSubReaders} and used to resolve the correct
* subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
* cloned and not protected for modification, the subclass is responsible
* to do this.
*/
protected BaseCompositeReader(R[] subReaders) throws IOException {
this.subReaders = subReaders;
starts = new int[subReaders.length + 1]; // build starts array

View File

@ -210,7 +210,7 @@ class BufferedDeletesStream {
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
final IndexWriter.ReadersAndLiveDocs rld = readerPool.get(info, true);
final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
int delCount = 0;
final boolean segAllDeletes;
@ -224,11 +224,12 @@ class BufferedDeletesStream {
// Don't delete by Term here; DocumentsWriterPerThread
// already did that on flush:
delCount += applyQueryDeletes(packet.queriesIterable(), rld, reader);
final int fullDelCount = rld.info.getDelCount() + rld.pendingDeleteCount;
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
assert fullDelCount <= rld.info.docCount;
segAllDeletes = fullDelCount == rld.info.docCount;
} finally {
readerPool.release(reader, false);
rld.release(reader);
readerPool.release(rld);
}
anyNewDeletes |= delCount > 0;
@ -262,18 +263,19 @@ class BufferedDeletesStream {
if (coalescedDeletes != null) {
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
final IndexWriter.ReadersAndLiveDocs rld = readerPool.get(info, true);
final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
int delCount = 0;
final boolean segAllDeletes;
try {
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader);
final int fullDelCount = rld.info.getDelCount() + rld.pendingDeleteCount;
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
assert fullDelCount <= rld.info.docCount;
segAllDeletes = fullDelCount == rld.info.docCount;
} finally {
readerPool.release(reader, false);
} finally {
rld.release(reader);
readerPool.release(rld);
}
anyNewDeletes |= delCount > 0;
@ -353,7 +355,7 @@ class BufferedDeletesStream {
}
// Delete by Term
private synchronized long applyTermDeletes(Iterable<Term> termsIter, IndexWriter.ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
private synchronized long applyTermDeletes(Iterable<Term> termsIter, ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
long delCount = 0;
Fields fields = reader.fields();
if (fields == null) {
@ -394,7 +396,7 @@ class BufferedDeletesStream {
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes(), false)) {
DocsEnum docsEnum = termsEnum.docs(rld.liveDocs, docs, false);
DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, false);
//System.out.println("BDS: got docsEnum=" + docsEnum);
if (docsEnum != null) {
@ -434,7 +436,7 @@ class BufferedDeletesStream {
}
// Delete by query
private static long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, IndexWriter.ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
private static long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
long delCount = 0;
final AtomicReaderContext readerContext = reader.getTopReaderContext();
boolean any = false;

View File

@ -651,28 +651,17 @@ public class CheckIndex {
if (infoStream != null) {
infoStream.print(" test: field norms.........");
}
DocValues dv;
for (FieldInfo info : fieldInfos) {
if (reader.hasNorms(info.name)) {
dv = reader.normValues(info.name);
assert dv != null;
if (dv.getSource().hasArray()) {
Object array = dv.getSource().getArray();
if (Array.getLength(array) != reader.maxDoc()) {
throw new RuntimeException("norms for field: " + info.name + " are of the wrong size");
}
}
if (!info.isIndexed || info.omitNorms) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
if (info.hasNorms()) {
assert reader.hasNorms(info.name); // deprecated path
DocValues dv = reader.normValues(info.name);
checkDocValues(dv, info.name, info.getNormType(), reader.maxDoc());
++status.totFields;
} else {
assert !reader.hasNorms(info.name); // deprecated path
if (reader.normValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
if (info.normsPresent()) {
throw new RuntimeException("field: " + info.name + " should have norms but omits them!");
}
}
}
@ -1171,6 +1160,92 @@ public class CheckIndex {
return status;
}
/** Helper method to verify values (either docvalues or norms), also checking
* type and size against fieldinfos/segmentinfo
*/
private void checkDocValues(DocValues docValues, String fieldName, DocValues.Type expectedType, int expectedDocs) throws IOException {
if (docValues == null) {
throw new RuntimeException("field: " + fieldName + " omits docvalues but should have them!");
}
DocValues.Type type = docValues.getType();
if (type != expectedType) {
throw new RuntimeException("field: " + fieldName + " has type: " + type + " but fieldInfos says:" + expectedType);
}
final Source values = docValues.getDirectSource();
int size = docValues.getValueSize();
for (int i = 0; i < expectedDocs; i++) {
switch (type) {
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_STRAIGHT:
BytesRef bytes = new BytesRef();
values.getBytes(i, bytes);
if (size != -1 && size != bytes.length) {
throw new RuntimeException("field: " + fieldName + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
}
break;
case FLOAT_32:
assert size == 4;
values.getFloat(i);
break;
case FLOAT_64:
assert size == 8;
values.getFloat(i);
break;
case VAR_INTS:
assert size == -1;
values.getInt(i);
break;
case FIXED_INTS_16:
assert size == 2;
values.getInt(i);
break;
case FIXED_INTS_32:
assert size == 4;
values.getInt(i);
break;
case FIXED_INTS_64:
assert size == 8;
values.getInt(i);
break;
case FIXED_INTS_8:
assert size == 1;
values.getInt(i);
break;
default:
throw new IllegalArgumentException("Field: " + fieldName
+ " - no such DocValues type: " + type);
}
}
if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
// check sorted bytes
SortedSource sortedValues = values.asSortedSource();
Comparator<BytesRef> comparator = sortedValues.getComparator();
int lastOrd = -1;
BytesRef lastBytes = new BytesRef();
for (int i = 0; i < expectedDocs; i++) {
int ord = sortedValues.ord(i);
if (ord < 0 || ord > expectedDocs) {
throw new RuntimeException("field: " + fieldName + " ord is out of bounds: " + ord);
}
BytesRef bytes = new BytesRef();
sortedValues.getByOrd(ord, bytes);
if (lastOrd != -1) {
int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
if (ordComp != bytesComp) {
throw new RuntimeException("field: " + fieldName + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
}
}
lastOrd = ord;
lastBytes = bytes;
}
}
}
private Status.DocValuesStatus testDocValues(SegmentInfo info,
SegmentReader reader) {
final Status.DocValuesStatus status = new Status.DocValuesStatus();
@ -1183,87 +1258,7 @@ public class CheckIndex {
if (fieldInfo.hasDocValues()) {
status.totalValueFields++;
final DocValues docValues = reader.docValues(fieldInfo.name);
if (docValues == null) {
throw new RuntimeException("field: " + fieldInfo.name + " omits docvalues but should have them!");
}
DocValues.Type type = docValues.type();
if (type != fieldInfo.getDocValuesType()) {
throw new RuntimeException("field: " + fieldInfo.name + " has type: " + type + " but fieldInfos says:" + fieldInfo.getDocValuesType());
}
final Source values = docValues.getDirectSource();
final int maxDoc = reader.maxDoc();
int size = docValues.getValueSize();
for (int i = 0; i < maxDoc; i++) {
switch (fieldInfo.getDocValuesType()) {
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_STRAIGHT:
BytesRef bytes = new BytesRef();
values.getBytes(i, bytes);
if (size != -1 && size != bytes.length) {
throw new RuntimeException("field: " + fieldInfo.name + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
}
break;
case FLOAT_32:
assert size == 4;
values.getFloat(i);
break;
case FLOAT_64:
assert size == 8;
values.getFloat(i);
break;
case VAR_INTS:
assert size == -1;
values.getInt(i);
break;
case FIXED_INTS_16:
assert size == 2;
values.getInt(i);
break;
case FIXED_INTS_32:
assert size == 4;
values.getInt(i);
break;
case FIXED_INTS_64:
assert size == 8;
values.getInt(i);
break;
case FIXED_INTS_8:
assert size == 1;
values.getInt(i);
break;
default:
throw new IllegalArgumentException("Field: " + fieldInfo.name
+ " - no such DocValues type: " + fieldInfo.getDocValuesType());
}
}
if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
// check sorted bytes
SortedSource sortedValues = values.asSortedSource();
Comparator<BytesRef> comparator = sortedValues.getComparator();
int lastOrd = -1;
BytesRef lastBytes = new BytesRef();
for (int i = 0; i < maxDoc; i++) {
int ord = sortedValues.ord(i);
if (ord < 0 || ord > maxDoc) {
throw new RuntimeException("field: " + fieldInfo.name + " ord is out of bounds: " + ord);
}
BytesRef bytes = new BytesRef();
sortedValues.getByOrd(ord, bytes);
if (lastOrd != -1) {
int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
if (ordComp != bytesComp) {
throw new RuntimeException("field: " + fieldInfo.name + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
}
}
lastOrd = ord;
lastBytes = bytes;
}
}
checkDocValues(docValues, fieldInfo.name, fieldInfo.getDocValuesType(), reader.maxDoc());
} else {
if (reader.docValues(fieldInfo.name) != null) {
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");

View File

@ -81,6 +81,9 @@ public abstract class CompositeReader extends IndexReader {
* If this method returns an empty array, that means this
* reader is a null reader (for example a MultiReader
* that has no sub readers).
* <p><b>Warning:</b> Don't modify the returned array!
* Doing so will corrupt the internal structure of this
* {@code CompositeReader}.
*/
public abstract IndexReader[] getSequentialSubReaders();

View File

@ -323,8 +323,17 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
}
}
protected DirectoryReader(Directory directory, AtomicReader[] readers) throws CorruptIndexException, IOException {
super(readers);
/**
* Expert: Constructs a {@code DirectoryReader} on the given subReaders.
* @param segmentReaders the wrapped atomic index segment readers. This array is
* returned by {@link #getSequentialSubReaders} and used to resolve the correct
* subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
* cloned and not protected for modification outside of this reader.
* Subclasses of {@code DirectoryReader} should take care to not allow
* modification of this internal array, e.g. {@link #doOpenIfChanged()}.
*/
protected DirectoryReader(Directory directory, AtomicReader[] segmentReaders) throws CorruptIndexException, IOException {
super(segmentReaders);
this.directory = directory;
}

View File

@ -216,6 +216,13 @@ public class DocTermOrds {
}
}
/**
* @return The number of terms in this field
*/
public int numTerms() {
return numTermsInField;
}
/** Subclass can override this */
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
}

View File

@ -90,7 +90,7 @@ public abstract class DocValues implements Closeable {
/**
* Returns the {@link Type} of this {@link DocValues} instance
*/
public abstract Type type();
public abstract Type getType();
/**
* Closes this {@link DocValues} instance. This method should only be called
@ -191,7 +191,7 @@ public abstract class DocValues implements Closeable {
*
* @return the {@link Type} of this source.
*/
public Type type() {
public Type getType() {
return type;
}

View File

@ -122,14 +122,23 @@ public final class FieldInfo {
}
}
/**
* @return true if this field has any docValues.
*/
public boolean hasDocValues() {
return docValueType != null;
}
/**
* @return {@link DocValues.Type} of the docValues. this may be null if the field has no docvalues.
*/
public DocValues.Type getDocValuesType() {
return docValueType;
}
/**
* @return {@link DocValues.Type} of the norm. this may be null if the field has no norms.
*/
public DocValues.Type getNormType() {
return normType;
}
@ -146,11 +155,17 @@ public final class FieldInfo {
}
}
/**
* @return true if norms are explicitly omitted for this field
*/
public boolean omitNorms() {
return omitNorms;
}
public boolean normsPresent() {
/**
* @return true if this field actually has any norms.
*/
public boolean hasNorms() {
return isIndexed && !omitNorms && normType != null;
}

View File

@ -178,7 +178,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return fis;
}
/** Returns true if any fields do not positions */
/** Returns true if any fields have positions */
public boolean hasProx() {
if (isReadOnly()) {
return hasProx;
@ -349,6 +349,12 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return fi;
}
/**
* lookup the number of a field by name.
*
* @param fieldName field's name
* @return number of field, or -1 if it does not exist.
*/
public int fieldNumber(String fieldName) {
FieldInfo fi = fieldInfo(fieldName);
return (fi != null) ? fi.number : -1;
@ -384,11 +390,17 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return byNumber.values().iterator();
}
/**
* @return number of fields
*/
public int size() {
assert byNumber.size() == byName.size();
return byNumber.size();
}
/**
* @return true if at least one field has any vectors
*/
public boolean hasVectors() {
if (isReadOnly()) {
return hasVectors;
@ -402,9 +414,12 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return false;
}
/**
* @return true if at least one field has any norms
*/
public boolean hasNorms() {
for (FieldInfo fi : this) {
if (fi.normsPresent()) {
if (fi.hasNorms()) {
return true;
}
}
@ -441,7 +456,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
return roFis;
}
public boolean anyDocValuesFields() {
/**
* @return true if at least one field has docValues
*/
public boolean hasDocValues() {
for (FieldInfo fi : this) {
if (fi.hasDocValues()) {
return true;

View File

@ -359,12 +359,6 @@ public class FilterAtomicReader extends AtomicReader {
return in.hasDeletions();
}
@Override
public boolean hasNorms(String field) throws IOException {
ensureOpen();
return in.hasNorms(field);
}
@Override
protected void doClose() throws IOException {
in.close();

View File

@ -453,7 +453,7 @@ final class IndexFileDeleter {
assert Thread.holdsLock(writer);
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "now checkpoint \"" + writer.segString(segmentInfos) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
infoStream.message("IFD", "now checkpoint \"" + writer.segString(writer.toLiveInfos(segmentInfos)) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
}
// Try again now to delete any previously un-deletable

View File

@ -41,25 +41,27 @@ public abstract class IndexReaderContext {
this.isTopLevel = parent==null;
}
/** Returns the {@link IndexReader}, this context represents. */
public abstract IndexReader reader();
/**
* Returns the context's leaves if this context is a top-level context
* otherwise <code>null</code>. For convenience, if this is an
* {@link AtomicReaderContext} this returns itsself as the only leaf.
* <p>
* Note: this is convenience method since leaves can always be obtained by
* <p>Note: this is convenience method since leaves can always be obtained by
* walking the context tree.
* <p><b>Warning:</b> Don't modify the returned array!
* Doing so will corrupt the internal structure of this
* {@code IndexReaderContext}.
*/
public abstract AtomicReaderContext[] leaves();
/**
* Returns the context's children iff this context is a composite context
* otherwise <code>null</code>.
* <p>
* Note: this method is a convenience method to prevent
* <code>instanceof</code> checks and type-casts to
* {@link CompositeReaderContext}.
* <p><b>Warning:</b> Don't modify the returned array!
* Doing so will corrupt the internal structure of this
* {@code IndexReaderContext}.
*/
public abstract IndexReaderContext[] children();
}

View File

@ -33,7 +33,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -392,260 +391,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return r;
}
// This class inherits all sync from IW:
class ReadersAndLiveDocs {
// Not final because we replace (clone) when we need to
// change it and it's been shared:
public final SegmentInfo info;
// Set once (null, and then maybe set, and never set again):
private SegmentReader reader;
// TODO: it's sometimes wasteful that we hold open two
// separate SRs (one for merging one for
// reading)... maybe just use a single SR? The gains of
// not loading the terms index (for merging in the
// non-NRT case) are far less now... and if the app has
// any deletes it'll open real readers anyway.
// Set once (null, and then maybe set, and never set again):
private SegmentReader mergeReader;
// Holds the current shared (readable and writable
// liveDocs). This is null when there are no deleted
// docs, and it's copy-on-write (cloned whenever we need
// to change it but it's been shared to an external NRT
// reader).
public Bits liveDocs;
// How many further deletions we've done against
// liveDocs vs when we loaded it or last wrote it:
public int pendingDeleteCount;
// True if the current liveDocs is referenced by an
// external NRT reader:
public boolean shared;
public ReadersAndLiveDocs(SegmentInfo info) {
this.info = info;
shared = true;
}
// Returns false if we are the only remaining refs of
// this reader:
public synchronized boolean anyOutsideRefs(SegmentReader sr) {
int myRefCounts = 0;
if (sr == reader) {
myRefCounts++;
}
if (sr == mergeReader) {
myRefCounts++;
}
final int rc = sr.getRefCount();
assert rc >= myRefCounts;
return rc > myRefCounts;
}
// Call only from assert!
public synchronized boolean verifyDocCounts() {
int count;
if (liveDocs != null) {
count = 0;
for(int docID=0;docID<info.docCount;docID++) {
if (liveDocs.get(docID)) {
count++;
}
}
} else {
count = info.docCount;
}
assert info.docCount - info.getDelCount() - pendingDeleteCount == count: "info.docCount=" + info.docCount + " info.getDelCount()=" + info.getDelCount() + " pendingDeleteCount=" + pendingDeleteCount + " count=" + count;;
return true;
}
// Returns true if any reader remains
public synchronized boolean removeReader(SegmentReader sr, boolean drop) throws IOException {
if (sr == reader) {
//System.out.println(" non-merge reader");
reader.decRef();
reader = null;
}
if (sr == mergeReader) {
//System.out.println(" merge reader");
mergeReader.decRef();
mergeReader = null;
if (drop && reader != null) {
//System.out.println(" also release normal reader rc=" + rld.reader.getRefCount());
reader.decRef();
reader = null;
}
}
return reader != null || mergeReader != null;
}
// Get reader for searching/deleting
public synchronized SegmentReader getReader(IOContext context) throws IOException {
//System.out.println(" livedocs=" + rld.liveDocs);
if (reader == null) {
reader = new SegmentReader(info, config.getReaderTermsIndexDivisor(), context);
if (liveDocs == null) {
liveDocs = reader.getLiveDocs();
}
//System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool");
}
// Ref for caller
reader.incRef();
return reader;
}
// Get reader for merging (does not load the terms
// index):
public synchronized SegmentReader getMergeReader(IOContext context) throws IOException {
//System.out.println(" livedocs=" + rld.liveDocs);
if (mergeReader == null) {
if (reader != null) {
// Just use the already opened non-merge reader
// for merging. In the NRT case this saves us
// pointless double-open:
//System.out.println("PROMOTE non-merge reader seg=" + rld.info);
reader.incRef();
mergeReader = reader;
} else {
mergeReader = new SegmentReader(info, -1, context);
if (liveDocs == null) {
liveDocs = mergeReader.getLiveDocs();
}
}
}
// Ref for caller
mergeReader.incRef();
return mergeReader;
}
public synchronized boolean delete(int docID) {
assert liveDocs != null;
assert docID >= 0 && docID < liveDocs.length() : "out of bounds: docid=" + docID + ",liveDocsLength=" + liveDocs.length();
assert !shared;
final boolean didDelete = liveDocs.get(docID);
if (didDelete) {
((MutableBits) liveDocs).clear(docID);
pendingDeleteCount++;
//System.out.println(" new del seg=" + info + " docID=" + docID + " pendingDelCount=" + pendingDeleteCount + " totDelCount=" + (info.docCount-liveDocs.count()));
}
return didDelete;
}
public synchronized void dropReaders() throws IOException {
if (reader != null) {
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
reader.decRef();
reader = null;
}
if (mergeReader != null) {
//System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount());
mergeReader.decRef();
mergeReader = null;
}
}
/**
* Returns a ref to a clone. NOTE: this clone is not
* enrolled in the pool, so you should simply close()
* it when you're done (ie, do not call release()).
*/
public synchronized SegmentReader getReadOnlyClone(IOContext context) throws IOException {
if (reader == null) {
getReader(context).decRef();
assert reader != null;
}
shared = true;
if (liveDocs != null) {
return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount);
} else {
reader.incRef();
return reader;
}
}
public synchronized void initWritableLiveDocs() throws IOException {
assert Thread.holdsLock(IndexWriter.this);
assert info.docCount > 0;
//System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared);
if (shared) {
// Copy on write: this means we've cloned a
// SegmentReader sharing the current liveDocs
// instance; must now make a private clone so we can
// change it:
LiveDocsFormat liveDocsFormat = info.getCodec().liveDocsFormat();
if (liveDocs == null) {
//System.out.println("create BV seg=" + info);
liveDocs = liveDocsFormat.newLiveDocs(info.docCount);
} else {
liveDocs = liveDocsFormat.newLiveDocs(liveDocs);
}
shared = false;
} else {
assert liveDocs != null;
}
}
public synchronized Bits getReadOnlyLiveDocs() {
//System.out.println("getROLiveDocs seg=" + info);
assert Thread.holdsLock(IndexWriter.this);
shared = true;
//if (liveDocs != null) {
//System.out.println(" liveCount=" + liveDocs.count());
//}
return liveDocs;
}
// Commit live docs to the directory (writes new
// _X_N.del files); returns true if it wrote the file
// and false if there were no new deletes to write:
public synchronized boolean writeLiveDocs(Directory dir) throws IOException {
//System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount);
if (pendingDeleteCount != 0) {
// We have new deletes
assert liveDocs.length() == info.docCount;
// Save in case we need to rollback on failure:
final SegmentInfo sav = (SegmentInfo) info.clone();
info.advanceDelGen();
info.setDelCount(info.getDelCount() + pendingDeleteCount);
// We can write directly to the actual name (vs to a
// .tmp & renaming it) because the file is not live
// until segments file is written:
boolean success = false;
try {
info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, dir, info, IOContext.DEFAULT);
success = true;
} finally {
if (!success) {
info.reset(sav);
}
}
pendingDeleteCount = 0;
return true;
} else {
return false;
}
}
@Override
public String toString() {
return "SegmentLiveDocs(seg=" + info + " pendingDeleteCount=" + pendingDeleteCount + " shared=" + shared + ")";
}
}
/** Holds shared SegmentReader instances. IndexWriter uses
* SegmentReaders for 1) applying deletes, 2) doing
* merges, 3) handing out a real-time reader. This pool
@ -665,44 +410,36 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return true;
}
/**
* Release the segment reader (i.e. decRef it and close if there
* are no more references). If drop is true then we
* remove this entry from the pool.
* @param sr
* @throws IOException
*/
public synchronized void release(SegmentReader sr, boolean drop) throws IOException {
// Drop caller's ref; for an external reader (not
// pooled), this decRef will close it
//System.out.println("pool.release seg=" + sr.getSegmentInfo() + " rc=" + sr.getRefCount() + " drop=" + drop);
sr.decRef();
public synchronized void drop(SegmentInfo info) throws IOException {
final ReadersAndLiveDocs rld = readerMap.get(info);
if (rld != null) {
assert info == rld.info;
readerMap.remove(info);
rld.dropReaders();
}
}
final ReadersAndLiveDocs rld = readerMap.get(sr.getSegmentInfo());
public synchronized void release(ReadersAndLiveDocs rld) throws IOException {
if (rld != null && (drop || (!poolReaders && !rld.anyOutsideRefs(sr)))) {
// Matches incRef in get:
rld.decRef();
// Discard (don't save) changes when we are dropping
// the reader; this is used only on the sub-readers
// after a successful merge. If deletes had
// accumulated on those sub-readers while the merge
// is running, by now we have carried forward those
// deletes onto the newly merged segment, so we can
// discard them on the sub-readers:
// Pool still holds a ref:
assert rld.refCount() >= 1;
if (!drop) {
if (rld.writeLiveDocs(directory)) {
assert infoIsLive(sr.getSegmentInfo());
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
if (!poolReaders && rld.refCount() == 1) {
// This is the last ref to this RLD, and we're not
// pooling, so remove it:
if (rld.writeLiveDocs(directory)) {
// Make sure we only write del docs for a live segment:
assert infoIsLive(rld.info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
if (!rld.removeReader(sr, drop)) {
//System.out.println("DROP seg=" + rld.info + " " + readerMap.size() + " in pool");
readerMap.remove(sr.getSegmentInfo());
}
rld.dropReaders();
readerMap.remove(rld.info);
}
}
@ -712,8 +449,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final Iterator<Map.Entry<SegmentInfo,ReadersAndLiveDocs>> it = readerMap.entrySet().iterator();
while(it.hasNext()) {
final ReadersAndLiveDocs rld = it.next().getValue();
//System.out.println("pool.dropAll: seg=" + rld.info);
if (doSave && rld.writeLiveDocs(directory)) {
// Make sure we only write del docs for a live segment:
assert infoIsLive(rld.info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
@ -735,13 +472,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
assert readerMap.size() == 0;
}
public synchronized void drop(SegmentInfo info) throws IOException {
final ReadersAndLiveDocs rld = readerMap.remove(info);
if (rld != null) {
rld.dropReaders();
}
}
/**
* Commit live docs changes for the segment readers for
* the provided infos.
@ -751,19 +481,23 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
public synchronized void commit(SegmentInfos infos) throws IOException {
for (SegmentInfo info : infos) {
final ReadersAndLiveDocs rld = readerMap.get(info);
if (rld != null && rld.writeLiveDocs(directory)) {
assert infoIsLive(info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
if (rld != null) {
assert rld.info == info;
if (rld.writeLiveDocs(directory)) {
// Make sure we only write del docs for a live segment:
assert infoIsLive(info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
}
}
}
/**
* Obtain a ReadersAndLiveDocs instance from the
* readerPool. If getReader is true, you must later call
* {@link #release(SegmentReader)}.
* readerPool. If create is true, you must later call
* {@link #release(ReadersAndLiveDocs)}.
* @throws IOException
*/
public synchronized ReadersAndLiveDocs get(SegmentInfo info, boolean create) {
@ -771,15 +505,22 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
assert info.dir == directory;
ReadersAndLiveDocs rld = readerMap.get(info);
//System.out.println("rld.get seg=" + info + " poolReaders=" + poolReaders);
if (rld == null) {
//System.out.println(" new rld");
if (!create) {
return null;
}
rld = new ReadersAndLiveDocs(info);
rld = new ReadersAndLiveDocs(IndexWriter.this, info);
// Steal initial reference:
readerMap.put(info, rld);
} else {
assert rld.info == info: "rld.info=" + rld.info + " info=" + info + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info);
}
if (create) {
// Return ref to caller:
rld.incRef();
}
return rld;
}
}
@ -795,7 +536,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
delCount += rld.pendingDeleteCount;
delCount += rld.getPendingDeleteCount();
}
return delCount;
}
@ -1116,7 +857,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
finishMerges(waitForMerges);
stopMerges = true;
}
mergeScheduler.close();
if (infoStream.isEnabled("IW")) {
@ -1160,8 +900,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
}
/** Returns the Directory used by this index. */
public Directory getDirectory() {
@ -2020,6 +1758,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
notifyAll();
}
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
@ -2038,9 +1779,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
}
lastCommitChangeCount = changeCount;
@ -3023,16 +2761,18 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final int docCount = info.docCount;
final Bits prevLiveDocs = merge.readerLiveDocs.get(i);
final Bits currentLiveDocs;
ReadersAndLiveDocs rld = readerPool.get(info, false);
// We enrolled in mergeInit:
assert rld != null;
currentLiveDocs = rld.liveDocs;
final ReadersAndLiveDocs rld = readerPool.get(info, false);
// We hold a ref so it should still be in the pool:
assert rld != null: "seg=" + info.name;
currentLiveDocs = rld.getLiveDocs();
if (prevLiveDocs != null) {
// If we had deletions on starting the merge we must
// still have deletions now:
assert currentLiveDocs != null;
assert prevLiveDocs.length() == docCount;
assert currentLiveDocs.length() == docCount;
// There were deletes on this segment when the merge
// started. The merge has collapsed away those
@ -3066,9 +2806,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
} else {
docUpto += info.docCount - info.getDelCount() - rld.pendingDeleteCount;
docUpto += info.docCount - info.getDelCount() - rld.getPendingDeleteCount();
}
} else if (currentLiveDocs != null) {
assert currentLiveDocs.length() == docCount;
// This segment had no deletes before but now it
// does:
for(int j=0; j<docCount; j++) {
@ -3087,11 +2828,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
assert docUpto == merge.info.docCount;
if (infoStream.isEnabled("IW")) {
if (mergedDeletes == null) {
infoStream.message("IW", "no new deletes since merge started");
} else {
infoStream.message("IW", mergedDeletes.pendingDeleteCount + " new deletes since merge started");
infoStream.message("IW", mergedDeletes.getPendingDeleteCount() + " new deletes since merge started");
}
}
@ -3136,7 +2879,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final ReadersAndLiveDocs mergedDeletes = merge.info.docCount == 0 ? null : commitMergedDeletes(merge);
assert mergedDeletes == null || mergedDeletes.pendingDeleteCount != 0;
assert mergedDeletes == null || mergedDeletes.getPendingDeleteCount() != 0;
// If the doc store we are using has been closed and
// is in now compound format (but wasn't when we
@ -3148,7 +2891,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final boolean allDeleted = merge.segments.size() == 0 ||
merge.info.docCount == 0 ||
(mergedDeletes != null &&
mergedDeletes.pendingDeleteCount == merge.info.docCount);
mergedDeletes.getPendingDeleteCount() == merge.info.docCount);
if (infoStream.isEnabled("IW")) {
if (allDeleted) {
@ -3165,15 +2908,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
assert merge.info.docCount != 0 || keepFullyDeletedSegments || dropSegment;
segmentInfos.applyMergeChanges(merge, dropSegment);
if (dropSegment) {
readerPool.drop(merge.info);
deleter.deleteNewFiles(merge.info.files());
assert !segmentInfos.contains(merge.info);
} else {
if (mergedDeletes != null && !poolReaders) {
mergedDeletes.writeLiveDocs(directory);
readerPool.drop(merge.info);
if (mergedDeletes != null) {
if (dropSegment) {
mergedDeletes.dropChanges();
}
readerPool.release(mergedDeletes);
if (dropSegment) {
readerPool.drop(mergedDeletes.info);
}
}
@ -3289,7 +3031,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
infoStream.message("IW", "merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
}
}
//System.out.println(Thread.currentThread().getName() + ": merge end");
}
/** Hook that's called when the specified merge is complete. */
@ -3524,9 +3265,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
boolean drop = !suppressExceptions;
for (int i = 0; i < numSegments; i++) {
if (merge.readers.get(i) != null) {
final SegmentReader sr = merge.readers.get(i);
if (sr != null) {
try {
readerPool.release(merge.readers.get(i), drop);
final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(), false);
// We still hold a ref so it should not have been removed:
assert rld != null;
if (drop) {
rld.dropChanges();
}
rld.release(sr);
readerPool.release(rld);
if (drop) {
readerPool.drop(rld.info);
}
} catch (Throwable t) {
if (th == null) {
th = t;
@ -3589,17 +3341,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// Carefully pull the most recent live docs:
final Bits liveDocs;
final int delCount;
synchronized(this) {
// Must sync to ensure BufferedDeletesStream
// cannot change liveDocs/pendingDeleteCount while
// we pull a copy:
liveDocs = rld.getReadOnlyLiveDocs();
delCount = rld.getPendingDeleteCount() + info.getDelCount();
assert rld.verifyDocCounts();
if (infoStream.isEnabled("IW")) {
if (rld.pendingDeleteCount != 0) {
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.pendingDeleteCount);
if (rld.getPendingDeleteCount() != 0) {
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount());
} else if (info.getDelCount() != 0) {
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount());
} else {
@ -3609,8 +3364,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
merge.readerLiveDocs.add(liveDocs);
merge.readers.add(reader);
final int delCount = rld.pendingDeleteCount + info.getDelCount();
assert delCount <= info.docCount;
assert delCount <= info.docCount: "delCount=" + delCount + " info.docCount=" + info.docCount + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
if (delCount < info.docCount) {
merger.add(reader, liveDocs);
}
@ -3708,7 +3462,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
mergedSegmentWarmer.warm(sr);
} finally {
synchronized(this) {
readerPool.release(sr, false);
rld.release(sr);
readerPool.release(rld);
}
}
}
@ -3762,11 +3517,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
/** @lucene.internal */
public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
final StringBuilder buffer = new StringBuilder();
for(final SegmentInfo s : infos) {
for(final SegmentInfo info : infos) {
if (buffer.length() > 0) {
buffer.append(' ');
}
buffer.append(segString(s));
buffer.append(segString(info));
}
return buffer.toString();
}
@ -3819,6 +3574,24 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return true;
}
// For infoStream output
synchronized SegmentInfos toLiveInfos(SegmentInfos sis) {
final SegmentInfos newSIS = new SegmentInfos();
final Map<SegmentInfo,SegmentInfo> liveSIS = new HashMap<SegmentInfo,SegmentInfo>();
for(SegmentInfo info : segmentInfos) {
liveSIS.put(info, info);
}
for(SegmentInfo info : sis) {
SegmentInfo liveInfo = liveSIS.get(info);
if (liveInfo != null) {
info = liveInfo;
}
newSIS.add(info);
}
return newSIS;
}
/** Walk through all files referenced by the current
* segmentInfos and ask the Directory to sync each file,
* if it wasn't already. If that succeeds, then we
@ -3853,7 +3626,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount);
}
assert filesExist(toSync);

View File

@ -54,7 +54,7 @@ public interface IndexableField {
/** Non-null if this field has a Reader value */
public Reader readerValue();
/** Non-null if this field hasa numeric value */
/** Non-null if this field has a numeric value */
public Number numericValue();
/**

View File

@ -144,7 +144,7 @@ public class MultiDocValues extends DocValues {
}
final DocValues d = puller.pull(r, field);
if (d != null) {
TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize());
TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize());
promotedType[0] = promotedType[0].promote(incoming);
} else if (puller.stopLoadingOnNull(r, field)){
promotedType[0] = TypePromoter.getIdentityPromoter(); // set to identity to return null
@ -203,8 +203,8 @@ public class MultiDocValues extends DocValues {
}
@Override
public Type type() {
return emptySource.type();
public Type getType() {
return emptySource.getType();
}
@Override
@ -230,8 +230,8 @@ public class MultiDocValues extends DocValues {
}
@Override
public Type type() {
return emptyFixedSource.type();
public Type getType() {
return emptyFixedSource.getType();
}
@Override
@ -519,7 +519,7 @@ public class MultiDocValues extends DocValues {
@Override
public SortedSource asSortedSource() {
if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
if (getType() == Type.BYTES_FIXED_SORTED || getType() == Type.BYTES_VAR_SORTED) {
}
return super.asSortedSource();
@ -586,7 +586,7 @@ public class MultiDocValues extends DocValues {
}
@Override
public Type type() {
public Type getType() {
return type;
}

View File

@ -51,7 +51,7 @@ public class MultiReader extends BaseCompositeReader<IndexReader> {
/**
* <p>Construct a MultiReader aggregating the named set of (sub)readers.
* @param subReaders set of (sub)readers
* @param subReaders set of (sub)readers; this array will be cloned.
* @param closeSubReaders indicates whether the subreaders should be closed
* when this MultiReader is closed
*/

View File

@ -263,13 +263,6 @@ public final class ParallelAtomicReader extends AtomicReader {
return fields;
}
@Override
public boolean hasNorms(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
return reader==null ? false : reader.hasNorms(field);
}
@Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;

View File

@ -0,0 +1,303 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.MutableBits;
// Used by IndexWriter to hold open SegmentReaders (for
// searching or merging), plus pending deletes,
// for a given segment
class ReadersAndLiveDocs {
// Not final because we replace (clone) when we need to
// change it and it's been shared:
public final SegmentInfo info;
// Tracks how many consumers are using this instance:
private final AtomicInteger refCount = new AtomicInteger(1);
private final IndexWriter writer;
// Set once (null, and then maybe set, and never set again):
private SegmentReader reader;
// TODO: it's sometimes wasteful that we hold open two
// separate SRs (one for merging one for
// reading)... maybe just use a single SR? The gains of
// not loading the terms index (for merging in the
// non-NRT case) are far less now... and if the app has
// any deletes it'll open real readers anyway.
// Set once (null, and then maybe set, and never set again):
private SegmentReader mergeReader;
// Holds the current shared (readable and writable
// liveDocs). This is null when there are no deleted
// docs, and it's copy-on-write (cloned whenever we need
// to change it but it's been shared to an external NRT
// reader).
private Bits liveDocs;
// How many further deletions we've done against
// liveDocs vs when we loaded it or last wrote it:
private int pendingDeleteCount;
// True if the current liveDocs is referenced by an
// external NRT reader:
private boolean shared;
public ReadersAndLiveDocs(IndexWriter writer, SegmentInfo info) {
this.info = info;
this.writer = writer;
shared = true;
}
public void incRef() {
final int rc = refCount.incrementAndGet();
assert rc > 1;
}
public void decRef() {
final int rc = refCount.decrementAndGet();
assert rc >= 0;
}
public int refCount() {
final int rc = refCount.get();
assert rc >= 0;
return rc;
}
public synchronized int getPendingDeleteCount() {
return pendingDeleteCount;
}
// Call only from assert!
public synchronized boolean verifyDocCounts() {
int count;
if (liveDocs != null) {
count = 0;
for(int docID=0;docID<info.docCount;docID++) {
if (liveDocs.get(docID)) {
count++;
}
}
} else {
count = info.docCount;
}
assert info.docCount - info.getDelCount() - pendingDeleteCount == count: "info.docCount=" + info.docCount + " info.getDelCount()=" + info.getDelCount() + " pendingDeleteCount=" + pendingDeleteCount + " count=" + count;;
return true;
}
// Get reader for searching/deleting
public synchronized SegmentReader getReader(IOContext context) throws IOException {
//System.out.println(" livedocs=" + rld.liveDocs);
if (reader == null) {
// We steal returned ref:
reader = new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), context);
if (liveDocs == null) {
liveDocs = reader.getLiveDocs();
}
//System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool");
//System.out.println(Thread.currentThread().getName() + ": getReader seg=" + info.name);
}
// Ref for caller
reader.incRef();
return reader;
}
// Get reader for merging (does not load the terms
// index):
public synchronized SegmentReader getMergeReader(IOContext context) throws IOException {
//System.out.println(" livedocs=" + rld.liveDocs);
if (mergeReader == null) {
if (reader != null) {
// Just use the already opened non-merge reader
// for merging. In the NRT case this saves us
// pointless double-open:
//System.out.println("PROMOTE non-merge reader seg=" + rld.info);
// Ref for us:
reader.incRef();
mergeReader = reader;
//System.out.println(Thread.currentThread().getName() + ": getMergeReader share seg=" + info.name);
} else {
//System.out.println(Thread.currentThread().getName() + ": getMergeReader seg=" + info.name);
// We steal returned ref:
mergeReader = new SegmentReader(info, -1, context);
if (liveDocs == null) {
liveDocs = mergeReader.getLiveDocs();
}
}
}
// Ref for caller
mergeReader.incRef();
return mergeReader;
}
public synchronized void release(SegmentReader sr) throws IOException {
assert info == sr.getSegmentInfo();
sr.decRef();
}
public synchronized boolean delete(int docID) {
assert liveDocs != null;
assert Thread.holdsLock(writer);
assert docID >= 0 && docID < liveDocs.length() : "out of bounds: docid=" + docID + " liveDocsLength=" + liveDocs.length() + " seg=" + info.name + " docCount=" + info.docCount;
assert !shared;
final boolean didDelete = liveDocs.get(docID);
if (didDelete) {
((MutableBits) liveDocs).clear(docID);
pendingDeleteCount++;
//System.out.println(" new del seg=" + info + " docID=" + docID + " pendingDelCount=" + pendingDeleteCount + " totDelCount=" + (info.docCount-liveDocs.count()));
}
return didDelete;
}
// NOTE: removes callers ref
public synchronized void dropReaders() throws IOException {
if (reader != null) {
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
reader.decRef();
reader = null;
}
if (mergeReader != null) {
//System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount());
mergeReader.decRef();
mergeReader = null;
}
decRef();
}
/**
* Returns a ref to a clone. NOTE: this clone is not
* enrolled in the pool, so you should simply close()
* it when you're done (ie, do not call release()).
*/
public synchronized SegmentReader getReadOnlyClone(IOContext context) throws IOException {
if (reader == null) {
getReader(context).decRef();
assert reader != null;
}
shared = true;
if (liveDocs != null) {
return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount);
} else {
assert reader.getLiveDocs() == liveDocs;
reader.incRef();
return reader;
}
}
public synchronized void initWritableLiveDocs() throws IOException {
assert Thread.holdsLock(writer);
assert info.docCount > 0;
//System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared);
if (shared) {
// Copy on write: this means we've cloned a
// SegmentReader sharing the current liveDocs
// instance; must now make a private clone so we can
// change it:
LiveDocsFormat liveDocsFormat = info.getCodec().liveDocsFormat();
if (liveDocs == null) {
//System.out.println("create BV seg=" + info);
liveDocs = liveDocsFormat.newLiveDocs(info.docCount);
} else {
liveDocs = liveDocsFormat.newLiveDocs(liveDocs);
}
shared = false;
} else {
assert liveDocs != null;
}
}
public synchronized Bits getLiveDocs() {
assert Thread.holdsLock(writer);
return liveDocs;
}
public synchronized Bits getReadOnlyLiveDocs() {
//System.out.println("getROLiveDocs seg=" + info);
assert Thread.holdsLock(writer);
shared = true;
//if (liveDocs != null) {
//System.out.println(" liveCount=" + liveDocs.count());
//}
return liveDocs;
}
public synchronized void dropChanges() {
// Discard (don't save) changes when we are dropping
// the reader; this is used only on the sub-readers
// after a successful merge. If deletes had
// accumulated on those sub-readers while the merge
// is running, by now we have carried forward those
// deletes onto the newly merged segment, so we can
// discard them on the sub-readers:
pendingDeleteCount = 0;
}
// Commit live docs to the directory (writes new
// _X_N.del files); returns true if it wrote the file
// and false if there were no new deletes to write:
public synchronized boolean writeLiveDocs(Directory dir) throws IOException {
//System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount);
if (pendingDeleteCount != 0) {
// We have new deletes
assert liveDocs.length() == info.docCount;
// Save in case we need to rollback on failure:
final SegmentInfo sav = (SegmentInfo) info.clone();
info.advanceDelGen();
info.setDelCount(info.getDelCount() + pendingDeleteCount);
// We can write directly to the actual name (vs to a
// .tmp & renaming it) because the file is not live
// until segments file is written:
boolean success = false;
try {
info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, dir, info, IOContext.DEFAULT);
success = true;
} finally {
if (!success) {
info.reset(sav);
}
}
pendingDeleteCount = 0;
return true;
} else {
return false;
}
}
@Override
public String toString() {
return "ReadersAndLiveDocs(seg=" + info + " pendingDeleteCount=" + pendingDeleteCount + " shared=" + shared + ")";
}
}

View File

@ -173,7 +173,7 @@ final class SegmentMerger {
// returns an updated typepromoter (tracking type and size) given a previous one,
// and a newly encountered docvalues
private TypePromoter mergeDocValuesType(TypePromoter previous, DocValues docValues) {
TypePromoter incoming = TypePromoter.create(docValues.type(), docValues.getValueSize());
TypePromoter incoming = TypePromoter.create(docValues.getType(), docValues.getValueSize());
if (previous == null) {
previous = TypePromoter.getIdentityPromoter();
}
@ -210,7 +210,7 @@ final class SegmentMerger {
TypePromoter previous = docValuesTypes.get(merged);
docValuesTypes.put(merged, mergeDocValuesType(previous, reader.docValues(fi.name)));
}
if (fi.normsPresent()) {
if (fi.hasNorms()) {
TypePromoter previous = normValuesTypes.get(merged);
normValuesTypes.put(merged, mergeDocValuesType(previous, reader.normValues(fi.name)));
}

View File

@ -150,13 +150,6 @@ public final class SegmentReader extends AtomicReader {
return si.docCount;
}
@Override
public boolean hasNorms(String field) {
ensureOpen();
FieldInfo fi = core.fieldInfos.fieldInfo(field);
return fi.normsPresent();
}
/** @lucene.internal */
public TermVectorsReader getTermVectorsReader() {
ensureOpen();

View File

@ -91,21 +91,27 @@ final class StandardDirectoryReader extends DirectoryReader {
try {
final SegmentInfo info = infos.info(i);
assert info.dir == dir;
final IndexWriter.ReadersAndLiveDocs rld = writer.readerPool.get(info, true);
final SegmentReader reader = rld.getReadOnlyClone(IOContext.READ);
if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
readers.add(reader);
infosUpto++;
} else {
reader.close();
segmentInfos.remove(infosUpto);
final ReadersAndLiveDocs rld = writer.readerPool.get(info, true);
try {
final SegmentReader reader = rld.getReadOnlyClone(IOContext.READ);
if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
// Steal the ref:
readers.add(reader);
infosUpto++;
} else {
reader.close();
segmentInfos.remove(infosUpto);
}
} finally {
writer.readerPool.release(rld);
}
success = true;
} catch(IOException ex) {
prior = ex;
} finally {
if (!success)
if (!success) {
IOUtils.closeWhileHandlingException(prior, readers);
}
}
}
return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]),
@ -219,12 +225,12 @@ final class StandardDirectoryReader extends DirectoryReader {
}
@Override
protected final DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
protected DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
return doOpenIfChanged(null);
}
@Override
protected final DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
protected DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
ensureOpen();
// If we were obtained by writer.getReader(), re-ask the
@ -237,7 +243,7 @@ final class StandardDirectoryReader extends DirectoryReader {
}
@Override
protected final DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
ensureOpen();
if (writer == this.writer && applyAllDeletes == this.applyAllDeletes) {
return doOpenFromWriter(null);
@ -246,7 +252,7 @@ final class StandardDirectoryReader extends DirectoryReader {
}
}
private final DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
private DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
if (commit != null) {
throw new IllegalArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
}

View File

@ -1640,7 +1640,7 @@ public abstract class FieldComparator<T> {
// This means segment has doc values, but they are
// not able to provide a sorted source; consider
// this a hard error:
throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().type() + " reader=" + context.reader());
throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().getType() + " reader=" + context.reader());
}
}

View File

@ -584,8 +584,9 @@ public class IndexSearcher {
Weight weight = query.createWeight(this);
float v = weight.getValueForNormalization();
float norm = getSimilarity().queryNorm(v);
if (Float.isInfinite(norm) || Float.isNaN(norm))
if (Float.isInfinite(norm) || Float.isNaN(norm)) {
norm = 1.0f;
}
weight.normalize(norm, 1.0f);
return weight;
}
@ -812,6 +813,8 @@ public class IndexSearcher {
final int docCount;
final long sumTotalTermFreq;
final long sumDocFreq;
assert field != null;
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {

View File

@ -22,7 +22,6 @@ import java.util.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
@ -238,7 +237,7 @@ public class MultiPhraseQuery extends Query {
docFreq = termsEnum.docFreq();
}
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
}
// sort by increasing docFreq order
@ -314,9 +313,21 @@ public class MultiPhraseQuery extends Query {
}
buffer.append("\"");
int k = 0;
Iterator<Term[]> i = termArrays.iterator();
int lastPos = -1;
boolean first = true;
while (i.hasNext()) {
Term[] terms = i.next();
int position = positions.get(k);
if (first) {
first = false;
} else {
buffer.append(" ");
for (int j=1; j<(position-lastPos); j++) {
buffer.append("? ");
}
}
if (terms.length > 1) {
buffer.append("(");
for (int j = 0; j < terms.length; j++) {
@ -328,8 +339,8 @@ public class MultiPhraseQuery extends Query {
} else {
buffer.append(terms[0].text());
}
if (i.hasNext())
buffer.append(" ");
lastPos = position;
++k;
}
buffer.append("\"");

View File

@ -31,12 +31,15 @@ final class PhrasePositions {
final int ord; // unique across all PhrasePositions instances
final DocsAndPositionsEnum postings; // stream of docs & positions
PhrasePositions next; // used to make lists
PhrasePositions nextRepeating; // link to next repeating pp: standing for same term in different query offsets
int rptGroup = -1; // >=0 indicates that this is a repeating PP
int rptInd; // index in the rptGroup
final Term[] terms; // for repetitions initialization
PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) {
this.postings = postings;
offset = o;
this.ord = ord;
this.terms = terms;
}
final boolean next() throws IOException { // increments to next doc
@ -78,8 +81,8 @@ final class PhrasePositions {
@Override
public String toString() {
String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
if (nextRepeating!=null) {
s += " rpt[ "+nextRepeating+" ]";
if (rptGroup >=0 ) {
s += " rpt:"+rptGroup+",i"+rptInd;
}
return s;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
@ -137,23 +138,46 @@ public class PhraseQuery extends Query {
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
final Term term;
final Term[] terms;
final int nTerms; // for faster comparisons
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
this.term = term;
nTerms = terms==null ? 0 : terms.length;
if (nTerms>0) {
if (terms.length==1) {
this.terms = terms;
} else {
Term[] terms2 = new Term[terms.length];
System.arraycopy(terms, 0, terms2, 0, terms.length);
Arrays.sort(terms2);
this.terms = terms2;
}
} else {
this.terms = null;
}
}
public int compareTo(PostingsAndFreq other) {
if (docFreq == other.docFreq) {
if (position == other.position) {
return term.compareTo(other.term);
}
if (docFreq != other.docFreq) {
return docFreq - other.docFreq;
}
if (position != other.position) {
return position - other.position;
}
return docFreq - other.docFreq;
if (nTerms != other.nTerms) {
return nTerms - other.nTerms;
}
if (nTerms == 0) {
return 0;
}
for (int i=0; i<terms.length; i++) {
int res = terms[i].compareTo(other.terms[i]);
if (res!=0) return res;
}
return 0;
}
@Override
@ -162,7 +186,9 @@ public class PhraseQuery extends Query {
int result = 1;
result = prime * result + docFreq;
result = prime * result + position;
result = prime * result + ((term == null) ? 0 : term.hashCode());
for (int i=0; i<nTerms; i++) {
result = prime * result + terms[i].hashCode();
}
return result;
}
@ -174,10 +200,8 @@ public class PhraseQuery extends Query {
PostingsAndFreq other = (PostingsAndFreq) obj;
if (docFreq != other.docFreq) return false;
if (position != other.position) return false;
if (term == null) {
if (other.term != null) return false;
} else if (!term.equals(other.term)) return false;
return true;
if (terms == null) return other.terms == null;
return Arrays.equals(terms, other.terms);
}
}

View File

@ -49,11 +49,11 @@ abstract class PhraseScorer extends Scorer {
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
min = new PhrasePositions(postings[0].postings, postings[0].position, 0);
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;

View File

@ -19,22 +19,38 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.OpenBitSet;
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
private boolean checkedRepeats; // flag to only check in first candidate doc in case there are no repeats
private boolean hasRepeats; // flag indicating that there are repeats (already checked in first candidate doc)
private PhraseQueue pq; // for advancing min position
private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
private final int slop;
private final int numPostings;
private final PhraseQueue pq; // for advancing min position
private int end; // current largest phrase position
private boolean hasRpts; // flag indicating that there are repetitions (as checked in first candidate doc)
private boolean checkedRpts; // flag to only check for repetitions in first candidate doc
private boolean hasMultiTermRpts; //
private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer);
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
}
/**
* Score a candidate doc for all slop-valid position-combinations (matches)
* encountered while traversing/hopping the PhrasePositions.
@ -55,31 +71,27 @@ final class SloppyPhraseScorer extends PhraseScorer {
*/
@Override
protected float phraseFreq() throws IOException {
int end = initPhrasePositions();
//printPositions(System.err, "INIT DONE:");
if (end==Integer.MIN_VALUE) {
if (!initPhrasePositions()) {
return 0.0f;
}
float freq = 0.0f;
PhrasePositions pp = pq.pop();
int matchLength = end - pp.position;
int next = pq.size()>0 ? pq.top().position : pp.position;
//printQueue(System.err, pp, "Bef Loop: next="+next+" mlen="+end+"-"+pp.position+"="+matchLength);
while (pp.nextPosition() && (end=advanceRepeats(pp, end)) != Integer.MIN_VALUE) {
if (pp.position > next) {
//printQueue(System.err, pp, "A: >next="+next+" matchLength="+matchLength);
int next = pq.top().position;
while (advancePP(pp)) {
if (hasRpts && !advanceRpts(pp)) {
break; // pps exhausted
}
if (pp.position > next) { // done minimizing current match-length
if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match
}
pq.add(pp);
pp = pq.pop();
next = pq.size()>0 ? pq.top().position : pp.position;
next = pq.top().position;
matchLength = end - pp.position;
//printQueue(System.err, pp, "B: >next="+next+" matchLength="+matchLength);
} else {
int matchLength2 = end - pp.position;
//printQueue(System.err, pp, "C: mlen2<mlen: next="+next+" matchLength="+matchLength+" matchLength2="+matchLength2);
if (matchLength2 < matchLength) {
matchLength = matchLength2;
}
@ -91,53 +103,82 @@ final class SloppyPhraseScorer extends PhraseScorer {
return freq;
}
/**
* Advance repeating pps of an input (non-repeating) pp.
* Return a modified 'end' in case pp or its repeats exceeds original 'end'.
* "Dirty" trick: when there are repeats, modifies pp's position to that of
* least repeater of pp (needed when due to holes repeaters' positions are "back").
*/
private int advanceRepeats(PhrasePositions pp, int end) throws IOException {
int repeatsEnd = end;
if (pp.position > repeatsEnd) {
repeatsEnd = pp.position;
/** advance a PhrasePosition and update 'end', return false if exhausted */
private boolean advancePP(PhrasePositions pp) throws IOException {
if (!pp.nextPosition()) {
return false;
}
if (!hasRepeats) {
return repeatsEnd;
if (pp.position > end) {
end = pp.position;
}
return true;
}
/** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser
* of the two colliding pps. Note that there can only be one collision, as by the initialization
* there were no collisions before pp was advanced. */
private boolean advanceRpts(PhrasePositions pp) throws IOException {
if (pp.rptGroup < 0) {
return true; // not a repeater
}
PhrasePositions[] rg = rptGroups[pp.rptGroup];
OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
int k0 = pp.rptInd;
int k;
while((k=collide(pp)) >= 0) {
pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
if (!advancePP(pp)) {
return false; // exhausted
}
if (k != k0) { // careful: mark only those currently in the queue
bits.set(k); // mark that pp2 need to be re-queued
}
}
// collisions resolved, now re-queue
// empty (partially) the queue until seeing all pps advanced for resolving collisions
int n = 0;
while (bits.cardinality() > 0) {
PhrasePositions pp2 = pq.pop();
rptStack[n++] = pp2;
if (pp2.rptGroup >= 0 && bits.get(pp2.rptInd)) {
bits.clear(pp2.rptInd);
}
}
// add back to queue
for (int i=n-1; i>=0; i--) {
pq.add(rptStack[i]);
}
return true;
}
/** compare two pps, but only by position and offset */
private PhrasePositions lesser(PhrasePositions pp, PhrasePositions pp2) {
if (pp.position < pp2.position ||
(pp.position == pp2.position && pp.offset < pp2.offset)) {
return pp;
}
return pp2;
}
/** index of a pp2 colliding with pp, or -1 if none */
private int collide(PhrasePositions pp) {
int tpPos = tpPos(pp);
for (PhrasePositions pp2=pp.nextRepeating; pp2!=null; pp2=pp2.nextRepeating) {
while (tpPos(pp2) <= tpPos) {
if (!pp2.nextPosition()) {
return Integer.MIN_VALUE;
}
}
tpPos = tpPos(pp2);
if (pp2.position > repeatsEnd) {
repeatsEnd = pp2.position;
}
// "dirty" trick: with holes, given a pp, its repeating pp2 might have smaller position.
// so in order to have the right "start" in matchLength computation we fake pp.position.
// this relies on pp.nextPosition() not using pp.position.
if (pp2.position < pp.position) {
pp.position = pp2.position;
PhrasePositions[] rg = rptGroups[pp.rptGroup];
for (int i=0; i<rg.length; i++) {
PhrasePositions pp2 = rg[i];
if (pp2 != pp && tpPos(pp2) == tpPos) {
return pp2.rptInd;
}
}
return repeatsEnd;
return -1;
}
/**
* Initialize PhrasePositions in place.
* There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
* A one time initialization for this scorer (on first doc matching all terms):
* <ul>
* <li>Detect groups of repeating pps: those with same tpPos (tpPos==position in the doc) but different offsets in query.
* <li>For each such group:
* <ul>
* <li>form an inner linked list of the repeating ones.
* <li>propagate all group members but first so that they land on different tpPos().
* </ul>
* <li>Mark whether there are repetitions at all, so that scoring queries with no repetitions has no overhead due to this computation.
* <li>Insert to pq only non repeating PPs, or PPs that are the first in a repeating group.
* <li>Check if there are repetitions
* <li>If there are, find groups of repetitions.
* </ul>
* Examples:
* <ol>
@ -145,118 +186,305 @@ final class SloppyPhraseScorer extends PhraseScorer {
* <li>repetitions: <b>"ho my my"~2</b>
* <li>repetitions: <b>"my ho my"~2</b>
* </ol>
* @return end (max position), or Integer.MIN_VALUE if any term ran out (i.e. done)
* @return false if PPs are exhausted (and so current doc will not be a match)
*/
private int initPhrasePositions() throws IOException {
int end = Integer.MIN_VALUE;
// no repeats at all (most common case is also the simplest one)
if (checkedRepeats && !hasRepeats) {
// build queue from list
pq.clear();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
pp.firstPosition();
if (pp.position > end) {
end = pp.position;
}
pq.add(pp); // build pq from list
}
return end;
private boolean initPhrasePositions() throws IOException {
end = Integer.MIN_VALUE;
if (!checkedRpts) {
return initFirstTime();
}
//printPositions(System.err, "Init: 1: Bef position");
// position the pp's
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
pp.firstPosition();
if (!hasRpts) {
initSimple();
return true; // PPs available
}
//printPositions(System.err, "Init: 2: Aft position");
// one time initialization for this scorer (done only for the first candidate doc)
if (!checkedRepeats) {
checkedRepeats = true;
ArrayList<PhrasePositions> ppsA = new ArrayList<PhrasePositions>();
PhrasePositions dummyPP = new PhrasePositions(null, -1, -1);
// check for repeats
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
if (pp.nextRepeating != null) {
continue; // a repetition of an earlier pp
}
ppsA.add(pp);
int tpPos = tpPos(pp);
for (PhrasePositions prevB=pp, pp2=pp.next; pp2!= min; pp2=pp2.next) {
if (
pp2.nextRepeating != null // already detected as a repetition of an earlier pp
|| pp.offset == pp2.offset // not a repetition: the two PPs are originally in same offset in the query!
|| tpPos(pp2) != tpPos) { // not a repetition
continue;
}
// a repetition
hasRepeats = true;
prevB.nextRepeating = pp2; // add pp2 to the repeats linked list
pp2.nextRepeating = dummyPP; // allows not to handle the last pp in a sub-list
prevB = pp2;
}
}
if (hasRepeats) {
// clean dummy markers
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
if (pp.nextRepeating == dummyPP) {
pp.nextRepeating = null;
}
}
}
nrPps = ppsA.toArray(new PhrasePositions[0]);
pq = new PhraseQueue(nrPps.length);
}
//printPositions(System.err, "Init: 3: Aft check-repeats");
// with repeats must advance some repeating pp's so they all start with differing tp's
if (hasRepeats) {
for (PhrasePositions pp: nrPps) {
if ((end=advanceRepeats(pp, end)) == Integer.MIN_VALUE) {
return Integer.MIN_VALUE; // ran out of a term -- done (no valid matches in current doc)
}
}
}
//printPositions(System.err, "Init: 4: Aft advance-repeats");
// build queue from non repeating pps
return initComplex();
}
/** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */
private void initSimple() throws IOException {
//System.err.println("initSimple: doc: "+min.doc);
pq.clear();
for (PhrasePositions pp: nrPps) {
// position pps and build queue from list
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
pp.firstPosition();
if (pp.position > end) {
end = pp.position;
}
pq.add(pp);
}
return end;
}
/** with repeats: not so simple. */
private boolean initComplex() throws IOException {
//System.err.println("initComplex: doc: "+min.doc);
placeFirstPositions();
if (!advanceRepeatGroups()) {
return false; // PPs exhausted
}
fillQueue();
return true; // PPs available
}
/** move all PPs to their first position */
private void placeFirstPositions() throws IOException {
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
pp.firstPosition();
}
}
/** Fill the queue (all pps are already placed */
private void fillQueue() {
pq.clear();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
if (pp.position > end) {
end = pp.position;
}
pq.add(pp);
}
}
/** At initialization (each doc), each repetition group is sorted by (query) offset.
* This provides the start condition: no collisions.
* <p>Case 1: no multi-term repeats<br>
* It is sufficient to advance each pp in the group by one less than its group index.
* So lesser pp is not advanced, 2nd one advance once, 3rd one advanced twice, etc.
* <p>Case 2: multi-term repeats<br>
*
* @return false if PPs are exhausted.
*/
private boolean advanceRepeatGroups() throws IOException {
for (PhrasePositions[] rg: rptGroups) {
if (hasMultiTermRpts) {
// more involved, some may not collide
int incr;
for (int i=0; i<rg.length; i+=incr) {
incr = 1;
PhrasePositions pp = rg[i];
int k;
while((k=collide(pp)) >= 0) {
PhrasePositions pp2 = lesser(pp, rg[k]);
if (!advancePP(pp2)) { // at initialization always advance pp with higher offset
return false; // exhausted
}
if (pp2.rptInd < i) { // should not happen?
incr = 0;
break;
}
}
}
} else {
// simpler, we know exactly how much to advance
for (int j=1; j<rg.length; j++) {
for (int k=0; k<j; k++) {
if (!rg[j].nextPosition()) {
return false; // PPs exhausted
}
}
}
}
}
return true; // PPs available
}
/** initialize with checking for repeats. Heavy work, but done only for the first candidate doc.<p>
* If there are repetitions, check if multi-term postings (MTP) are involved.<p>
* Without MTP, once PPs are placed in the first candidate doc, repeats (and groups) are visible.<br>
* With MTP, a more complex check is needed, up-front, as there may be "hidden collisions".<br>
* For example P1 has {A,B}, P1 has {B,C}, and the first doc is: "A C B". At start, P1 would point
* to "A", p2 to "C", and it will not be identified that P1 and P2 are repetitions of each other.<p>
* The more complex initialization has two parts:<br>
* (1) identification of repetition groups.<br>
* (2) advancing repeat groups at the start of the doc.<br>
* For (1), a possible solution is to just create a single repetition group,
* made of all repeating pps. But this would slow down the check for collisions,
* as all pps would need to be checked. Instead, we compute "connected regions"
* on the bipartite graph of postings and terms.
*/
private boolean initFirstTime() throws IOException {
//System.err.println("initFirstTime: doc: "+min.doc);
checkedRpts = true;
placeFirstPositions();
LinkedHashMap<Term,Integer> rptTerms = repeatingTerms();
hasRpts = !rptTerms.isEmpty();
if (hasRpts) {
rptStack = new PhrasePositions[numPostings]; // needed with repetitions
ArrayList<ArrayList<PhrasePositions>> rgs = gatherRptGroups(rptTerms);
sortRptGroups(rgs);
if (!advanceRepeatGroups()) {
return false; // PPs exhausted
}
}
fillQueue();
return true; // PPs available
}
/** sort each repetition group by (query) offset.
* Done only once (at first doc) and allows to initialize faster for each doc. */
private void sortRptGroups(ArrayList<ArrayList<PhrasePositions>> rgs) {
rptGroups = new PhrasePositions[rgs.size()][];
Comparator<PhrasePositions> cmprtr = new Comparator<PhrasePositions>() {
public int compare(PhrasePositions pp1, PhrasePositions pp2) {
return pp1.offset - pp2.offset;
}
};
for (int i=0; i<rptGroups.length; i++) {
PhrasePositions[] rg = rgs.get(i).toArray(new PhrasePositions[0]);
Arrays.sort(rg, cmprtr);
rptGroups[i] = rg;
for (int j=0; j<rg.length; j++) {
rg[j].rptInd = j; // we use this index for efficient re-queuing
}
}
}
/** Detect repetition groups. Done once - for first doc */
private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term,Integer> rptTerms) throws IOException {
PhrasePositions[] rpp = repeatingPPs(rptTerms);
ArrayList<ArrayList<PhrasePositions>> res = new ArrayList<ArrayList<PhrasePositions>>();
if (!hasMultiTermRpts) {
// simpler - no multi-terms - can base on positions in first doc
for (int i=0; i<rpp.length; i++) {
PhrasePositions pp = rpp[i];
if (pp.rptGroup >=0) continue; // already marked as a repetition
int tpPos = tpPos(pp);
for (int j=i+1; j<rpp.length; j++) {
PhrasePositions pp2 = rpp[j];
if (
pp2.rptGroup >=0 // already marked as a repetition
|| pp2.offset == pp.offset // not a repetition: two PPs are originally in same offset in the query!
|| tpPos(pp2) != tpPos) { // not a repetition
continue;
}
// a repetition
int g = pp.rptGroup;
if (g < 0) {
g = res.size();
pp.rptGroup = g;
ArrayList<PhrasePositions> rl = new ArrayList<PhrasePositions>(2);
rl.add(pp);
res.add(rl);
}
pp2.rptGroup = g;
res.get(g).add(pp2);
}
}
} else {
// more involved - has multi-terms
ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<HashSet<PhrasePositions>>();
ArrayList<OpenBitSet> bb = ppTermsBitSets(rpp, rptTerms);
unionTermGroups(bb);
HashMap<Term,Integer> tg = termGroups(rptTerms, bb);
HashSet<Integer> distinctGroupIDs = new HashSet<Integer>(tg.values());
for (int i=0; i<distinctGroupIDs.size(); i++) {
tmp.add(new HashSet<PhrasePositions>());
}
for (PhrasePositions pp : rpp) {
for (Term t: pp.terms) {
if (rptTerms.containsKey(t)) {
int g = tg.get(t);
tmp.get(g).add(pp);
assert pp.rptGroup==-1 || pp.rptGroup==g;
pp.rptGroup = g;
}
}
}
for (HashSet<PhrasePositions> hs : tmp) {
res.add(new ArrayList<PhrasePositions>(hs));
}
}
return res;
}
/** Actual position in doc of a PhrasePosition, relies on that position = tpPos - offset) */
private final int tpPos(PhrasePositions pp) {
return pp.position + pp.offset;
}
// private void printPositions(PrintStream ps, String title) {
// ps.println();
// ps.println("---- "+title);
// int k = 0;
// if (nrPps!=null) {
// for (PhrasePositions pp: nrPps) {
// ps.println(" " + k++ + " " + pp);
// }
// } else {
// for (PhrasePositions pp=min; 0==k || pp!=min; pp = pp.next) {
// ps.println(" " + k++ + " " + pp);
// }
// }
// }
/** find repeating terms and assign them ordinal values */
private LinkedHashMap<Term,Integer> repeatingTerms() {
LinkedHashMap<Term,Integer> tord = new LinkedHashMap<Term,Integer>();
HashMap<Term,Integer> tcnt = new HashMap<Term,Integer>();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (Term t : pp.terms) {
Integer cnt0 = tcnt.get(t);
Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
tcnt.put(t, cnt);
if (cnt==2) {
tord.put(t,tord.size());
}
}
}
return tord;
}
/** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
ArrayList<PhrasePositions> rp = new ArrayList<PhrasePositions>();
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
for (Term t : pp.terms) {
if (rptTerms.containsKey(t)) {
rp.add(pp);
hasMultiTermRpts |= (pp.terms.length > 1);
break;
}
}
}
return rp.toArray(new PhrasePositions[0]);
}
/** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
private ArrayList<OpenBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
ArrayList<OpenBitSet> bb = new ArrayList<OpenBitSet>(rpp.length);
for (PhrasePositions pp : rpp) {
OpenBitSet b = new OpenBitSet(tord.size());
Integer ord;
for (Term t: pp.terms) {
if ((ord=tord.get(t))!=null) {
b.set(ord);
}
}
bb.add(b);
}
return bb;
}
/** union (term group) bit-sets until they are disjoint (O(n^^2)), and each group have different terms */
private void unionTermGroups(ArrayList<OpenBitSet> bb) {
int incr;
for (int i=0; i<bb.size()-1; i+=incr) {
incr = 1;
int j = i+1;
while (j<bb.size()) {
if (bb.get(i).intersects(bb.get(j))) {
bb.get(i).union(bb.get(j));
bb.remove(j);
incr = 0;
} else {
++j;
}
}
}
}
/** map each term to the single group that contains it */
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<OpenBitSet> bb) throws IOException {
HashMap<Term,Integer> tg = new HashMap<Term,Integer>();
Term[] t = tord.keySet().toArray(new Term[0]);
for (int i=0; i<bb.size(); i++) { // i is the group no.
DocIdSetIterator bits = bb.get(i).iterator();
int ord;
while ((ord=bits.nextDoc())!=NO_MORE_DOCS) {
tg.put(t[ord],i);
}
}
return tg;
}
// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
// //if (min.doc != ?) return;
// ps.println();
// ps.println("---- "+title);
// ps.println("EXT: "+ext);
@ -266,7 +494,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
// ps.println(" " + 0 + " " + t[0]);
// for (int i=1; i<t.length; i++) {
// t[i] = pq.pop();
// assert t[i-1].position <= t[i].position : "PQ is out of order: "+(i-1)+"::"+t[i-1]+" "+i+"::"+t[i];
// assert t[i-1].position <= t[i].position;
// ps.println(" " + i + " " + t[i]);
// }
// // add them back
@ -275,4 +503,5 @@ final class SloppyPhraseScorer extends PhraseScorer {
// }
// }
// }
}

View File

@ -57,9 +57,12 @@ public class SpanWeight extends Weight {
termContexts.put(term, state);
i++;
}
stats = similarity.computeWeight(query.getBoost(),
searcher.collectionStatistics(query.getField()),
termStats);
final String field = query.getField();
if (field != null) {
stats = similarity.computeWeight(query.getBoost(),
searcher.collectionStatistics(query.getField()),
termStats);
}
}
@Override
@ -67,18 +70,24 @@ public class SpanWeight extends Weight {
@Override
public float getValueForNormalization() throws IOException {
return stats.getValueForNormalization();
return stats == null ? 1.0f : stats.getValueForNormalization();
}
@Override
public void normalize(float queryNorm, float topLevelBoost) {
stats.normalize(queryNorm, topLevelBoost);
if (stats != null) {
stats.normalize(queryNorm, topLevelBoost);
}
}
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
if (stats == null) {
return null;
} else {
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
}
}
@Override

View File

@ -67,7 +67,7 @@ public class ChecksumIndexInput extends IndexInput {
@Override
public void seek(long pos) {
throw new RuntimeException("not allowed");
throw new UnsupportedOperationException();
}
@Override

View File

@ -68,7 +68,7 @@ public class ChecksumIndexOutput extends IndexOutput {
@Override
public void seek(long pos) {
throw new RuntimeException("not allowed");
throw new UnsupportedOperationException();
}
/**

View File

@ -153,7 +153,7 @@ final class CompoundFileWriter implements Closeable{
*/
public void close() throws IOException {
if (closed) {
throw new IllegalStateException("already closed");
return;
}
IOException priorException = null;
IndexOutput entryTableOut = null;
@ -192,7 +192,7 @@ final class CompoundFileWriter implements Closeable{
private final void ensureOpen() {
if (closed) {
throw new IllegalStateException("CFS Directory is already closed");
throw new AlreadyClosedException("CFS Directory is already closed");
}
}
@ -260,7 +260,7 @@ final class CompoundFileWriter implements Closeable{
} else {
entry.dir = this.directory;
if (directory.fileExists(name)) {
throw new IOException("File already exists");
throw new IllegalArgumentException("File " + name + " already exists");
}
out = new DirectCFSIndexOutput(directory.createOutput(name, context), entry,
true);

View File

@ -171,6 +171,7 @@ class NativeFSLock extends Lock {
throw new IOException("Cannot create directory: " +
lockDir.getAbsolutePath());
} else if (!lockDir.isDirectory()) {
// TODO: NoSuchDirectoryException instead?
throw new IOException("Found regular file where directory expected: " +
lockDir.getAbsolutePath());
}

View File

@ -121,6 +121,7 @@ class SimpleFSLock extends Lock {
throw new IOException("Cannot create directory: " +
lockDir.getAbsolutePath());
} else if (!lockDir.isDirectory()) {
// TODO: NoSuchDirectoryException instead?
throw new IOException("Found regular file where directory expected: " +
lockDir.getAbsolutePath());
}

View File

@ -532,10 +532,10 @@ public final class Util {
* Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
* for visualization. Example of use:
*
* <pre>
* PrintStream ps = new PrintStream(&quot;out.dot&quot;);
* fst.toDot(ps);
* ps.close();
* <pre class="prettyprint">
* PrintWriter pw = new PrintWriter(&quot;out.dot&quot;);
* Util.toDot(fst, pw, true, true);
* pw.close();
* </pre>
*
* and then, from command line:

View File

@ -188,7 +188,7 @@ public class TestDocValues extends LuceneTestCase {
DocValues r = Ints.getValues(dir, "test", 2, Type.VAR_INTS, newIOContext(random));
Source source = getSource(r);
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
expectedTypes[i], source.type());
expectedTypes[i], source.getType());
assertEquals(minMax[i][0], source.getInt(0));
assertEquals(minMax[i][1], source.getInt(1));
@ -368,7 +368,7 @@ public class TestDocValues extends LuceneTestCase {
DocValues r = Ints.getValues(dir, "test", NUM_VALUES + additionalDocs, type, newIOContext(random));
for (int iter = 0; iter < 2; iter++) {
Source s = getSource(r);
assertEquals(type, s.type());
assertEquals(type, s.getType());
for (int i = 0; i < NUM_VALUES; i++) {
final long v = s.getInt(i);
assertEquals("index " + i, values[i], v);

View File

@ -682,7 +682,7 @@ public class TestCompoundFile extends LuceneTestCase
try {
newDir.copy(csw, "d1", "d1", newIOContext(random));
fail("file does already exist");
} catch (IOException e) {
} catch (IllegalArgumentException e) {
//
}
out.close();

View File

@ -83,7 +83,7 @@ public class TestCustomNorms extends LuceneTestCase {
assertNotNull(normValues);
Source source = normValues.getSource();
assertTrue(source.hasArray());
assertEquals(Type.FLOAT_32, normValues.type());
assertEquals(Type.FLOAT_32, normValues.getType());
float[] norms = (float[]) source.getArray();
for (int i = 0; i < open.maxDoc(); i++) {
Document document = open.document(i);

View File

@ -148,8 +148,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
Directory target = newDirectory();
IndexWriter w = new IndexWriter(target, writerConfig(random.nextBoolean()));
IndexReader r_1 = IndexReader.open(w_1, true);
IndexReader r_2 = IndexReader.open(w_2, true);
DirectoryReader r_1 = DirectoryReader.open(w_1, true);
DirectoryReader r_2 = DirectoryReader.open(w_2, true);
if (random.nextBoolean()) {
w.addIndexes(d_1, d_2);
} else {
@ -163,7 +163,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
// check values
IndexReader merged = IndexReader.open(w, true);
DirectoryReader merged = DirectoryReader.open(w, true);
Source source_1 = getSource(getDocValues(r_1, first.name()));
Source source_2 = getSource(getDocValues(r_2, second.name()));
Source source_1_merged = getSource(getDocValues(merged, first.name()));
@ -260,7 +260,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
FixedBitSet deleted = indexValues(w, numValues, val, numVariantList,
withDeletions, 7);
List<Closeable> closeables = new ArrayList<Closeable>();
IndexReader r = IndexReader.open(w, true);
DirectoryReader r = DirectoryReader.open(w, true);
final int numRemainingValues = numValues - deleted.cardinality();
final int base = r.numDocs() - numRemainingValues;
// for FIXED_INTS_8 we use value mod 128 - to enable testing in
@ -338,7 +338,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
final int bytesSize = 1 + atLeast(50);
FixedBitSet deleted = indexValues(w, numValues, byteIndexValue,
byteVariantList, withDeletions, bytesSize);
final IndexReader r = IndexReader.open(w, withDeletions);
final DirectoryReader r = DirectoryReader.open(w, withDeletions);
assertEquals(0, r.numDeletedDocs());
final int numRemainingValues = numValues - deleted.cardinality();
final int base = r.numDocs() - numRemainingValues;
@ -422,13 +422,17 @@ public class TestDocValuesIndexing extends LuceneTestCase {
for (Type val : numVariantList) {
indexValues(w, numValues, val, numVariantList,
false, 7);
IndexReader r = IndexReader.open(w, true);
DirectoryReader r = DirectoryReader.open(w, true);
if (val == Type.VAR_INTS) {
DocValues docValues = getDocValues(r, val.name());
}
DocValues docValues = getDocValues(r, val.name());
assertNotNull(docValues);
// make sure we don't get a direct source since they don't support getArray()
if (val == Type.VAR_INTS) {
}
Source source = docValues.getSource();
switch (source.type()) {
switch (source.getType()) {
case FIXED_INTS_8:
{
assertTrue(source.hasArray());
@ -465,7 +469,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
}
break;
case VAR_INTS:
case VAR_INTS:
System.out.println(source.hasArray());
assertFalse(source.hasArray());
break;
case FLOAT_32:
@ -487,7 +492,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
break;
default:
fail("unexpected value " + source.type());
fail("unexpected value " + source.getType());
}
r.close();
}
@ -503,27 +508,28 @@ public class TestDocValuesIndexing extends LuceneTestCase {
final int numValues = 50 + atLeast(10);
// only single byte fixed straight supports getArray()
indexValues(w, numValues, Type.BYTES_FIXED_STRAIGHT, null, false, 1);
IndexReader r = IndexReader.open(w, true);
DirectoryReader r = DirectoryReader.open(w, true);
DocValues docValues = getDocValues(r, Type.BYTES_FIXED_STRAIGHT.name());
assertNotNull(docValues);
// make sure we don't get a direct source since they don't support
// getArray()
Source source = docValues.getSource();
switch (source.type()) {
switch (source.getType()) {
case BYTES_FIXED_STRAIGHT: {
BytesRef ref = new BytesRef();
assertTrue(source.hasArray());
byte[] values = (byte[]) source.getArray();
for (int i = 0; i < numValues; i++) {
source.getBytes(i, ref);
assertEquals(1, ref.length);
assertEquals(values[i], ref.bytes[ref.offset]);
if (source.hasArray()) {
byte[] values = (byte[]) source.getArray();
for (int i = 0; i < numValues; i++) {
source.getBytes(i, ref);
assertEquals(1, ref.length);
assertEquals(values[i], ref.bytes[ref.offset]);
}
}
}
break;
default:
fail("unexpected value " + source.type());
fail("unexpected value " + source.getType());
}
r.close();
w.close();
@ -543,7 +549,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
case 2:
return values.getDirectSource();
case 1:
if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) {
if(values.getType() == Type.BYTES_VAR_SORTED || values.getType() == Type.BYTES_FIXED_SORTED) {
return values.getSource().asSortedSource();
}
default:
@ -925,4 +931,4 @@ public class TestDocValuesIndexing extends LuceneTestCase {
r.close();
dir.close();
}
}
}

View File

@ -97,7 +97,7 @@ public class TestDocumentWriter extends LuceneTestCase {
// omitNorms is true
for (FieldInfo fi : reader.getFieldInfos()) {
if (fi.isIndexed) {
assertTrue(fi.omitNorms == !reader.hasNorms(fi.name));
assertTrue(fi.omitNorms == (reader.normValues(fi.name) == null));
}
}
reader.close();
@ -330,10 +330,10 @@ public class TestDocumentWriter extends LuceneTestCase {
SegmentReader reader = getOnlySegmentReader(IndexReader.open(dir));
FieldInfos fi = reader.getFieldInfos();
// f1
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
assertFalse("f1 should have no norms", fi.fieldInfo("f1").hasNorms());
assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
// f2
assertTrue("f2 should have norms", reader.hasNorms("f2"));
assertTrue("f2 should have norms", fi.fieldInfo("f2").hasNorms());
assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
reader.close();
}

View File

@ -562,7 +562,7 @@ public class TestDuelingCodecs extends LuceneTestCase {
public void assertDocValues(DocValues leftDocValues, DocValues rightDocValues) throws Exception {
assertNotNull(info, leftDocValues);
assertNotNull(info, rightDocValues);
assertEquals(info, leftDocValues.type(), rightDocValues.type());
assertEquals(info, leftDocValues.getType(), rightDocValues.getType());
assertEquals(info, leftDocValues.getValueSize(), rightDocValues.getValueSize());
assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource());
assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource());
@ -572,8 +572,8 @@ public class TestDuelingCodecs extends LuceneTestCase {
* checks source API
*/
public void assertDocValuesSource(DocValues.Source left, DocValues.Source right) throws Exception {
DocValues.Type leftType = left.type();
assertEquals(info, leftType, right.type());
DocValues.Type leftType = left.getType();
assertEquals(info, leftType, right.getType());
switch(leftType) {
case VAR_INTS:
case FIXED_INTS_8:

View File

@ -96,7 +96,7 @@ public class TestNorms extends LuceneTestCase {
assertNotNull(normValues);
Source source = normValues.getSource();
assertTrue(source.hasArray());
assertEquals(Type.FIXED_INTS_8, normValues.type());
assertEquals(Type.FIXED_INTS_8, normValues.getType());
byte[] norms = (byte[]) source.getArray();
for (int i = 0; i < open.maxDoc(); i++) {
Document document = open.document(i);
@ -128,9 +128,9 @@ public class TestNorms extends LuceneTestCase {
assertFalse(fieldInfo.omitNorms);
assertTrue(fieldInfo.isIndexed);
if (secondWriteNorm) {
assertTrue(fieldInfo.normsPresent());
assertTrue(fieldInfo.hasNorms());
} else {
assertFalse(fieldInfo.normsPresent());
assertFalse(fieldInfo.hasNorms());
}
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
@ -144,18 +144,18 @@ public class TestNorms extends LuceneTestCase {
FieldInfo fi = mergedReader.getFieldInfos().fieldInfo(byteTestField);
assertFalse(fi.omitNorms);
assertTrue(fi.isIndexed);
assertFalse(fi.normsPresent());
assertFalse(fi.hasNorms());
} else {
FieldInfo fi = mergedReader.getFieldInfos().fieldInfo(byteTestField);
assertFalse(fi.omitNorms);
assertTrue(fi.isIndexed);
assertTrue(fi.normsPresent());
assertTrue(fi.hasNorms());
DocValues normValues = mergedReader.normValues(byteTestField);
assertNotNull(normValues);
Source source = normValues.getSource();
assertTrue(source.hasArray());
assertEquals(Type.FIXED_INTS_8, normValues.type());
assertEquals(Type.FIXED_INTS_8, normValues.getType());
byte[] norms = (byte[]) source.getArray();
for (int i = 0; i < mergedReader.maxDoc(); i++) {
Document document = mergedReader.document(i);

View File

@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockPayloadAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -43,6 +44,8 @@ import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
// TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
// not all codecs store prx separate...
public class TestPostingsOffsets extends LuceneTestCase {
IndexWriterConfig iwc;
@ -54,7 +57,11 @@ public class TestPostingsOffsets extends LuceneTestCase {
if (Codec.getDefault().getName().equals("Lucene40")) {
// pulsing etc are not implemented
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
if (random.nextBoolean()) {
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
} else {
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat()));
}
}
}
@ -126,7 +133,11 @@ public class TestPostingsOffsets extends LuceneTestCase {
iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
if (Codec.getDefault().getName().equals("Lucene40")) {
// pulsing etc are not implemented
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
if (random.nextBoolean()) {
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
} else {
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat()));
}
}
iwc.setMergePolicy(newLogMergePolicy()); // will rely on docids a bit for skipping
RandomIndexWriter w = new RandomIndexWriter(random, dir, iwc);

View File

@ -179,9 +179,9 @@ public class TestSegmentReader extends LuceneTestCase {
for (int i=0; i<DocHelper.fields.length; i++) {
IndexableField f = DocHelper.fields[i];
if (f.fieldType().indexed()) {
assertEquals(reader.hasNorms(f.name()), !f.fieldType().omitNorms());
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
if (!reader.hasNorms(f.name())) {
assertEquals(reader.normValues(f.name()) != null, !f.fieldType().omitNorms());
assertEquals(reader.normValues(f.name()) != null, !DocHelper.noNorms.containsKey(f.name()));
if (reader.normValues(f.name()) == null) {
// test for norms of null
DocValues norms = MultiDocValues.getNormDocValues(reader, f.name());
assertNull(norms);

View File

@ -349,7 +349,7 @@ public class TestTypePromotion extends LuceneTestCase {
DocValues docValues = children[0].reader().docValues("promote");
assertNotNull(docValues);
assertValues(TestType.Byte, dir, values);
assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.type());
assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType());
reader.close();
dir.close();
}

View File

@ -38,6 +38,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Ignore;
/**
* This class tests the MultiPhraseQuery class.
@ -156,6 +157,43 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
indexStore.close();
}
@Ignore //LUCENE-3821 fixes sloppy phrase scoring, except for this known problem
public void testMultiSloppyWithRepeats() throws IOException {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
add("a b c d e f g h i k", writer);
IndexReader r = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(r);
MultiPhraseQuery q = new MultiPhraseQuery();
// this will fail, when the scorer would propagate [a] rather than [a,b],
q.add(new Term[] {new Term("body", "a"), new Term("body", "b")});
q.add(new Term[] {new Term("body", "a")});
q.setSlop(6);
assertEquals(1, searcher.search(q, 1).totalHits); // should match on "a b"
r.close();
indexStore.close();
}
public void testMultiExactWithRepeats() throws IOException {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
add("a b c d e f g h i k", writer);
IndexReader r = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(r);
MultiPhraseQuery q = new MultiPhraseQuery();
q.add(new Term[] {new Term("body", "a"), new Term("body", "d")}, 0);
q.add(new Term[] {new Term("body", "a"), new Term("body", "f")}, 2);
assertEquals(1, searcher.search(q, 1).totalHits); // should match on "a b"
r.close();
indexStore.close();
}
private void add(String s, RandomIndexWriter writer) throws IOException {
Document doc = new Document();
doc.add(newField("body", s, TextField.TYPE_STORED));

View File

@ -169,4 +169,23 @@ public class TestSimpleSearchEquivalence extends SearchEquivalenceTestBase {
q2.add(new Term[] { t2, t3 }, 2);
assertSubsetOf(q1, q2);
}
/** "A B"~∞ = +A +B if A != B */
public void testSloppyPhraseVersusBooleanAnd() throws Exception {
Term t1 = randomTerm();
Term t2 = null;
// semantics differ from SpanNear: SloppyPhrase handles repeats,
// so we must ensure t1 != t2
do {
t2 = randomTerm();
} while (t1.equals(t2));
PhraseQuery q1 = new PhraseQuery();
q1.add(t1);
q1.add(t2);
q1.setSlop(Integer.MAX_VALUE);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(t1), Occur.MUST);
q2.add(new TermQuery(t2), Occur.MUST);
assertSameSet(q1, q2);
}
}

View File

@ -21,12 +21,10 @@ import java.util.Random;
import org.apache.lucene.index.Term;
import org.apache.lucene.util._TestUtil;
import org.junit.Ignore;
/**
* random sloppy phrase query tests
*/
@Ignore("Put this back when we fix LUCENE-3821")
public class TestSloppyPhraseQuery2 extends SearchEquivalenceTestBase {
/** "A B"~N ⊆ "A B"~N+1 */
public void testIncreasingSloppiness() throws Exception {

View File

@ -0,0 +1,109 @@
package org.apache.lucene.search.spans;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.SearchEquivalenceTestBase;
import org.apache.lucene.search.TermQuery;
/**
* Basic equivalence tests for span queries
*/
public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
// TODO: we could go a little crazy for a lot of these,
// but these are just simple minimal cases in case something
// goes horribly wrong. Put more intense tests elsewhere.
/** SpanTermQuery(A) = TermQuery(A) */
public void testSpanTermVersusTerm() throws Exception {
Term t1 = randomTerm();
assertSameSet(new TermQuery(t1), new SpanTermQuery(t1));
}
/** SpanOrQuery(A, B) = (A B) */
public void testSpanOrVersusBoolean() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(t1), Occur.SHOULD);
q1.add(new TermQuery(t2), Occur.SHOULD);
SpanOrQuery q2 = new SpanOrQuery(new SpanTermQuery(t1), new SpanTermQuery(t2));
assertSameSet(q1, q2);
}
/** SpanNotQuery(A, B) ⊆ SpanTermQuery(A) */
public void testSpanNotVersusSpanTerm() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
assertSubsetOf(new SpanNotQuery(new SpanTermQuery(t1), new SpanTermQuery(t2)), new SpanTermQuery(t1));
}
/** SpanFirstQuery(A, 10) ⊆ SpanTermQuery(A) */
public void testSpanFirstVersusSpanTerm() throws Exception {
Term t1 = randomTerm();
assertSubsetOf(new SpanFirstQuery(new SpanTermQuery(t1), 10), new SpanTermQuery(t1));
}
/** SpanNearQuery([A, B], 0, true) = "A B" */
public void testSpanNearVersusPhrase() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true);
PhraseQuery q2 = new PhraseQuery();
q2.add(t1);
q2.add(t2);
assertSameSet(q1, q2);
}
/** SpanNearQuery([A, B], ∞, false) = +A +B */
public void testSpanNearVersusBooleanAnd() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanNearQuery q1 = new SpanNearQuery(subquery, Integer.MAX_VALUE, false);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(t1), Occur.MUST);
q2.add(new TermQuery(t2), Occur.MUST);
assertSameSet(q1, q2);
}
/** SpanNearQuery([A B], 0, false) ⊆ SpanNearQuery([A B], 1, false) */
public void testSpanNearVersusSloppySpanNear() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanNearQuery q1 = new SpanNearQuery(subquery, 0, false);
SpanNearQuery q2 = new SpanNearQuery(subquery, 1, false);
assertSubsetOf(q1, q2);
}
/** SpanNearQuery([A B], 3, true) ⊆ SpanNearQuery([A B], 3, false) */
public void testSpanNearInOrderVersusOutOfOrder() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanNearQuery q1 = new SpanNearQuery(subquery, 3, true);
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
assertSubsetOf(q1, q2);
}
}

View File

@ -17,31 +17,31 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ReaderUtil;
import java.io.IOException;
public class TestSpans extends LuceneTestCase {
private IndexSearcher searcher;
private IndexReader reader;

Some files were not shown because too many files have changed in this diff Show More