mirror of https://github.com/apache/lucene.git
LUCENE-3795: updating to trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3795_lsp_spatial_module@1300232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
d5b39f875f
|
@ -124,6 +124,7 @@
|
|||
<fileset dir="modules" includes="build.xml" />
|
||||
<fileset dir="solr" includes="build.xml" />
|
||||
</subant>
|
||||
<delete dir="dist" failonerror="false" />
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@
|
|||
<classpathentry kind="lib" path="modules/benchmark/lib/commons-compress-1.2.jar"/>
|
||||
<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r1211150.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/commons-fileupload-1.2.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/commons-httpclient-3.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/commons-io-2.1.jar"/>
|
||||
|
@ -112,14 +112,22 @@
|
|||
<classpathentry kind="lib" path="solr/lib/jcl-over-slf4j-1.6.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/junit-4.10.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/log4j-over-slf4j-1.6.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/servlet-api-2.4.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/slf4j-api-1.6.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/slf4j-jdk14-1.6.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/wstx-asl-3.2.7.jar"/>
|
||||
<classpathentry kind="lib" path="solr/lib/zookeeper-3.3.4.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-6.1.26-patched-JETTY-1340.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-util-6.1.26-patched-JETTY-1340.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/servlet-api-2.5-20081211.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-continuation-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-deploy-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-http-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-io-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-jmx-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-security-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-server-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-servlet-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-util-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-webapp-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jetty-xml-8.1.2.v20120308.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/servlet-api-3.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.5.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.3.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>
|
||||
|
|
|
@ -70,8 +70,8 @@
|
|||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
|
|
@ -42,8 +42,7 @@
|
|||
<base.specification.version>4.0.0</base.specification.version>
|
||||
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
|
||||
<java.compat.version>1.6</java.compat.version>
|
||||
<jetty.version>6.1.26</jetty.version>
|
||||
<patched.jetty.version>6.1.26-patched-JETTY-1340</patched.jetty.version>
|
||||
<jetty.version>8.1.2.v20120308</jetty.version>
|
||||
<slf4j.version>1.6.1</slf4j.version>
|
||||
<tika.version>1.0</tika.version>
|
||||
</properties>
|
||||
|
@ -296,14 +295,24 @@
|
|||
<version>2.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
<version>${patched.jetty.version}</version>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<version>${jetty.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
<version>${jetty.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<version>${patched.jetty.version}</version>
|
||||
<version>${jetty.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-webapp</artifactId>
|
||||
<version>${jetty.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
|
@ -331,9 +340,9 @@
|
|||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
<version>2.4</version>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<version>3.0.0.v201112011016</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.spatial4j</groupId>
|
||||
|
@ -495,7 +504,7 @@
|
|||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>maven-jetty-plugin</artifactId>
|
||||
<artifactId>jetty-maven-plugin</artifactId>
|
||||
<version>${jetty.version}</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
|
@ -636,7 +645,7 @@
|
|||
<artifactId>solr-commons-csv</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<packaging>jar</packaging>
|
||||
<file>solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar</file>
|
||||
<file>solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar</file>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
|
@ -653,34 +662,6 @@
|
|||
<file>solr/lib/apache-solr-noggit-r1211150.jar</file>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>install-jetty</id>
|
||||
<phase>install</phase>
|
||||
<goals>
|
||||
<goal>install-file</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
<version>${patched.jetty.version}</version>
|
||||
<packaging>jar</packaging>
|
||||
<file>solr/example/lib/jetty-${patched.jetty.version}.jar</file>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>install-jetty-util</id>
|
||||
<phase>install</phase>
|
||||
<goals>
|
||||
<goal>install-file</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<version>${patched.jetty.version}</version>
|
||||
<packaging>jar</packaging>
|
||||
<file>solr/example/lib/jetty-util-${patched.jetty.version}.jar</file>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>install-jsonic</id>
|
||||
<phase>install</phase>
|
||||
|
|
|
@ -94,12 +94,17 @@
|
|||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
|
|
@ -73,12 +73,17 @@
|
|||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-servlet</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
|
|
@ -172,15 +172,20 @@
|
|||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-webapp</artifactId>
|
||||
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.woodstox</groupId>
|
||||
<artifactId>wstx-asl</artifactId>
|
||||
|
@ -193,8 +198,8 @@
|
|||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<!-- compile scope; solr-core is a jar not a war -->
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -58,8 +58,8 @@
|
|||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
<groupId>org.eclipse.jetty.orbit</groupId>
|
||||
<artifactId>javax.servlet</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
@ -98,9 +98,9 @@
|
|||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<!-- http://docs.codehaus.org/display/JETTY/Maven+Jetty+Plugin -->
|
||||
<!-- http://wiki.eclipse.org/Jetty/Feature/Jetty_Maven_Plugin -->
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>maven-jetty-plugin</artifactId>
|
||||
<artifactId>jetty-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<scanIntervalSeconds>10</scanIntervalSeconds>
|
||||
<webAppConfig>
|
||||
|
|
|
@ -410,6 +410,10 @@ API Changes
|
|||
method maybeReopen has been deprecated in favor of maybeRefresh().
|
||||
(Shai Erera, Mike McCandless, Simon Willnauer)
|
||||
|
||||
* LUCENE-3859: AtomicReader.hasNorms(field) is deprecated, instead you
|
||||
can inspect the FieldInfo yourself to see if norms are present, which
|
||||
also allows you to get the type. (Robert Muir)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
|
||||
|
@ -919,7 +923,13 @@ Bug fixes
|
|||
from the delegate DocIdSet.iterator(), which is allowed to return
|
||||
null by DocIdSet specification when no documents match.
|
||||
(Shay Banon via Uwe Schindler)
|
||||
|
||||
|
||||
* LUCENE-3821: SloppyPhraseScorer missed documents that ExactPhraseScorer finds
|
||||
When phrase queru had repeating terms (e.g. "yes ho yes")
|
||||
sloppy query missed documents that exact query matched.
|
||||
Fixed except when for repeating multiterms (e.g. "yes ho yes|no").
|
||||
(Robert Muir, Doron Cohen)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-3653: Improve concurrency in VirtualMethod and AttributeSource by
|
||||
|
@ -932,6 +942,9 @@ Documentation
|
|||
|
||||
Build
|
||||
|
||||
* LUCENE-3857: exceptions from other threads in beforeclass/etc do not fail
|
||||
the test (Dawid Weiss)
|
||||
|
||||
* LUCENE-3847: LuceneTestCase will now check for modifications of System
|
||||
properties before and after each test (and suite). If changes are detected,
|
||||
the test will fail. A rule can be used to reset system properties to
|
||||
|
|
|
@ -170,7 +170,7 @@
|
|||
<property name="junit.output.dir.backwards" location="${build.dir.backwards}/test"/>
|
||||
<property name="junit.reports" location="${build.dir}/test/reports"/>
|
||||
<property name="junit.reports.backwards" location="${build.dir.backwards}/test/reports"/>
|
||||
<property name="junit.excludes" value=""/>
|
||||
<property name="junit.excludes" value="**/Abstract*"/>
|
||||
<condition property="junit.details.formatter"
|
||||
value="org.apache.tools.ant.taskdefs.optional.junit.BriefJUnitResultFormatter"
|
||||
else="org.apache.lucene.util.LuceneJUnitResultFormatter">
|
||||
|
|
|
@ -72,6 +72,8 @@ New Features
|
|||
start/endOffset, if offsets are indexed. (Alan Woodward via Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-3802: Support for grouped faceting. (Martijn van Groningen)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2606: Changed RegexCapabilities interface to fix thread
|
||||
|
@ -242,6 +244,10 @@ Bug Fixes
|
|||
that take stopwords and stem exclusion tables also initialize
|
||||
the default stem overrides (e.g. kind/kinder, fiets). (Robert Muir)
|
||||
|
||||
* LUCENE-3831: avoid NPE if the SpanQuery has a null field (eg a
|
||||
SpanOrQuery with no clauses added). (Alan Woodward via Mike
|
||||
McCandless).
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-3599: Javadocs for DistanceUtils.haversine() were incorrectly
|
||||
|
|
|
@ -43,8 +43,8 @@ class MemoryIndexNormDocValues extends DocValues {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
return source.type();
|
||||
public Type getType() {
|
||||
return source.getType();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.BufferedReader;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -40,11 +41,16 @@ import org.apache.lucene.index.DocsEnum;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -225,4 +231,28 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
reader.close();
|
||||
}
|
||||
|
||||
// LUCENE-3831
|
||||
public void testNullPointerException() throws IOException {
|
||||
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
|
||||
SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<RegexpQuery>(regex);
|
||||
|
||||
MemoryIndex mindex = new MemoryIndex();
|
||||
mindex.addField("field", new MockAnalyzer(random).tokenStream("field", new StringReader("hello there")));
|
||||
|
||||
// This throws an NPE
|
||||
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
|
||||
}
|
||||
|
||||
// LUCENE-3831
|
||||
public void testPassesIfWrapped() throws IOException {
|
||||
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
|
||||
SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(regex));
|
||||
|
||||
MemoryIndex mindex = new MemoryIndex();
|
||||
mindex.addField("field", new MockAnalyzer(random).tokenStream("field", new StringReader("hello there")));
|
||||
|
||||
// This passes though
|
||||
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -197,6 +197,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
assert field != null;
|
||||
return fields.get(field);
|
||||
}
|
||||
|
||||
|
|
|
@ -211,6 +211,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
assert field != null;
|
||||
return fields.get(field);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.ServiceLoader; // javadocs
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexWriterConfig; // javadocs
|
||||
|
@ -26,7 +27,15 @@ import org.apache.lucene.index.SegmentInfo;
|
|||
import org.apache.lucene.util.NamedSPILoader;
|
||||
|
||||
/**
|
||||
* Encodes/decodes an inverted index segment
|
||||
* Encodes/decodes an inverted index segment.
|
||||
* <p>
|
||||
* Note, when extending this class, the name ({@link #getName}) is
|
||||
* written into the index. In order for the segment to be read, the
|
||||
* name must resolve to your implementation via {@link #forName(String)}.
|
||||
* This method uses Java's
|
||||
* {@link ServiceLoader Service Provider Interface} to resolve codec names.
|
||||
* <p>
|
||||
* @see ServiceLoader
|
||||
*/
|
||||
public abstract class Codec implements NamedSPILoader.NamedSPI {
|
||||
|
||||
|
|
|
@ -0,0 +1,513 @@
|
|||
package org.apache.lucene.codecs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract class DocValuesArraySource extends Source {
|
||||
|
||||
private static final Map<Type, DocValuesArraySource> TEMPLATES;
|
||||
|
||||
static {
|
||||
EnumMap<Type, DocValuesArraySource> templates = new EnumMap<Type, DocValuesArraySource>(
|
||||
Type.class);
|
||||
templates.put(Type.FIXED_INTS_16, new ShortValues());
|
||||
templates.put(Type.FIXED_INTS_32, new IntValues());
|
||||
templates.put(Type.FIXED_INTS_64, new LongValues());
|
||||
templates.put(Type.FIXED_INTS_8, new ByteValues());
|
||||
templates.put(Type.FLOAT_32, new FloatValues());
|
||||
templates.put(Type.FLOAT_64, new DoubleValues());
|
||||
TEMPLATES = Collections.unmodifiableMap(templates);
|
||||
}
|
||||
|
||||
public static DocValuesArraySource forType(Type type) {
|
||||
return TEMPLATES.get(type);
|
||||
}
|
||||
|
||||
protected final int bytesPerValue;
|
||||
|
||||
DocValuesArraySource(int bytesPerValue, Type type) {
|
||||
super(type);
|
||||
this.bytesPerValue = bytesPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract BytesRef getBytes(int docID, BytesRef ref);
|
||||
|
||||
|
||||
public abstract DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException;
|
||||
|
||||
public abstract DocValuesArraySource newFromArray(Object array);
|
||||
|
||||
@Override
|
||||
public final boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void toBytes(long value, BytesRef bytesRef) {
|
||||
copyLong(bytesRef, value);
|
||||
}
|
||||
|
||||
public void toBytes(double value, BytesRef bytesRef) {
|
||||
copyLong(bytesRef, Double.doubleToRawLongBits(value));
|
||||
}
|
||||
|
||||
final static class ByteValues extends DocValuesArraySource {
|
||||
private final byte[] values;
|
||||
|
||||
ByteValues() {
|
||||
super(1, Type.FIXED_INTS_8);
|
||||
values = new byte[0];
|
||||
}
|
||||
private ByteValues(byte[] array) {
|
||||
super(1, Type.FIXED_INTS_8);
|
||||
values = array;
|
||||
}
|
||||
|
||||
private ByteValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(1, Type.FIXED_INTS_8);
|
||||
values = new byte[numDocs];
|
||||
input.readBytes(values, 0, values.length, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ByteValues(input, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromArray(Object array) {
|
||||
assert array instanceof byte[];
|
||||
return new ByteValues((byte[]) array);
|
||||
}
|
||||
|
||||
public void toBytes(long value, BytesRef bytesRef) {
|
||||
if (bytesRef.bytes.length == 0) {
|
||||
bytesRef.bytes = new byte[1];
|
||||
}
|
||||
bytesRef.bytes[0] = (byte) (0xFFL & value);
|
||||
bytesRef.offset = 0;
|
||||
bytesRef.length = 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
toBytes(getInt(docID), ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class ShortValues extends DocValuesArraySource {
|
||||
private final short[] values;
|
||||
|
||||
ShortValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
|
||||
values = new short[0];
|
||||
}
|
||||
|
||||
private ShortValues(short[] array) {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
|
||||
values = array;
|
||||
}
|
||||
|
||||
private ShortValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
|
||||
values = new short[numDocs];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readShort();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public short[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ShortValues(input, numDocs);
|
||||
}
|
||||
|
||||
public void toBytes(long value, BytesRef bytesRef) {
|
||||
copyShort(bytesRef, (short) (0xFFFFL & value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromArray(Object array) {
|
||||
assert array instanceof short[];
|
||||
return new ShortValues((short[]) array);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
toBytes(getInt(docID), ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class IntValues extends DocValuesArraySource {
|
||||
private final int[] values;
|
||||
|
||||
IntValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
|
||||
values = new int[0];
|
||||
}
|
||||
|
||||
private IntValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
|
||||
values = new int[numDocs];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readInt();
|
||||
}
|
||||
}
|
||||
|
||||
private IntValues(int[] array) {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
|
||||
values = array;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return 0xFFFFFFFF & values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new IntValues(input, numDocs);
|
||||
}
|
||||
|
||||
public void toBytes(long value, BytesRef bytesRef) {
|
||||
copyInt(bytesRef, (int) (0xFFFFFFFF & value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromArray(Object array) {
|
||||
assert array instanceof int[];
|
||||
return new IntValues((int[]) array);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
toBytes(getInt(docID), ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class LongValues extends DocValuesArraySource {
|
||||
private final long[] values;
|
||||
|
||||
LongValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
|
||||
values = new long[0];
|
||||
}
|
||||
|
||||
private LongValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
|
||||
values = new long[numDocs];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readLong();
|
||||
}
|
||||
}
|
||||
|
||||
private LongValues(long[] array) {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
|
||||
values = array;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new LongValues(input, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromArray(Object array) {
|
||||
assert array instanceof long[];
|
||||
return new LongValues((long[])array);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
toBytes(getInt(docID), ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class FloatValues extends DocValuesArraySource {
|
||||
private final float[] values;
|
||||
|
||||
FloatValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
|
||||
values = new float[0];
|
||||
}
|
||||
|
||||
private FloatValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
|
||||
values = new float[numDocs];
|
||||
/*
|
||||
* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs back in using readInt / readLong
|
||||
*/
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Float.intBitsToFloat(input.readInt());
|
||||
}
|
||||
}
|
||||
|
||||
private FloatValues(float[] array) {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
|
||||
values = array;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void toBytes(double value, BytesRef bytesRef) {
|
||||
copyInt(bytesRef, Float.floatToRawIntBits((float)value));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new FloatValues(input, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromArray(Object array) {
|
||||
assert array instanceof float[];
|
||||
return new FloatValues((float[]) array);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
toBytes(getFloat(docID), ref);
|
||||
return ref;
|
||||
}
|
||||
};
|
||||
|
||||
final static class DoubleValues extends DocValuesArraySource {
|
||||
private final double[] values;
|
||||
|
||||
DoubleValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
|
||||
values = new double[0];
|
||||
}
|
||||
|
||||
private DoubleValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
|
||||
values = new double[numDocs];
|
||||
/*
|
||||
* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs back in using readInt / readLong
|
||||
*/
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Double.longBitsToDouble(input.readLong());
|
||||
}
|
||||
}
|
||||
|
||||
private DoubleValues(double[] array) {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
|
||||
values = array;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new DoubleValues(input, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArraySource newFromArray(Object array) {
|
||||
assert array instanceof double[];
|
||||
return new DoubleValues((double[]) array);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
toBytes(getFloat(docID), ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* Copies the given long value and encodes it as 8 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 8 and resizes the
|
||||
* reference array if needed.
|
||||
*/
|
||||
public static void copyLong(BytesRef ref, long value) {
|
||||
if (ref.bytes.length < 8) {
|
||||
ref.bytes = new byte[8];
|
||||
}
|
||||
copyInternal(ref, (int) (value >> 32), ref.offset = 0);
|
||||
copyInternal(ref, (int) value, 4);
|
||||
ref.length = 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given int value and encodes it as 4 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 4 and resizes the
|
||||
* reference array if needed.
|
||||
*/
|
||||
public static void copyInt(BytesRef ref, int value) {
|
||||
if (ref.bytes.length < 4) {
|
||||
ref.bytes = new byte[4];
|
||||
}
|
||||
copyInternal(ref, value, ref.offset = 0);
|
||||
ref.length = 4;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given short value and encodes it as a 2 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 2 and resizes the
|
||||
* reference array if needed.
|
||||
*/
|
||||
public static void copyShort(BytesRef ref, short value) {
|
||||
if (ref.bytes.length < 2) {
|
||||
ref.bytes = new byte[2];
|
||||
}
|
||||
ref.offset = 0;
|
||||
ref.bytes[ref.offset] = (byte) (value >> 8);
|
||||
ref.bytes[ref.offset + 1] = (byte) (value);
|
||||
ref.length = 2;
|
||||
}
|
||||
|
||||
private static void copyInternal(BytesRef ref, int value, int startOffset) {
|
||||
ref.bytes[startOffset] = (byte) (value >> 24);
|
||||
ref.bytes[startOffset + 1] = (byte) (value >> 16);
|
||||
ref.bytes[startOffset + 2] = (byte) (value >> 8);
|
||||
ref.bytes[startOffset + 3] = (byte) (value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 2 consecutive bytes from the current offset to a short. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public static short asShort(BytesRef b) {
|
||||
return (short) (0xFFFF & ((b.bytes[b.offset] & 0xFF) << 8) | (b.bytes[b.offset + 1] & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 4 consecutive bytes from the current offset to an int. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public static int asInt(BytesRef b) {
|
||||
return asIntInternal(b, b.offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 8 consecutive bytes from the current offset to a long. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public static long asLong(BytesRef b) {
|
||||
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
|
||||
b.offset + 4) & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
private static int asIntInternal(BytesRef b, int pos) {
|
||||
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
|
||||
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.document.DocValuesField;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -40,6 +41,7 @@ public abstract class DocValuesConsumer {
|
|||
|
||||
protected final BytesRef spare = new BytesRef();
|
||||
|
||||
protected abstract Type getType();
|
||||
/**
|
||||
* Adds the given {@link IndexableField} instance to this
|
||||
* {@link DocValuesConsumer}
|
||||
|
@ -110,7 +112,7 @@ public abstract class DocValuesConsumer {
|
|||
final Source source = reader.getDirectSource();
|
||||
assert source != null;
|
||||
int docID = docBase;
|
||||
final DocValues.Type type = reader.type();
|
||||
final Type type = getType();
|
||||
final Field scratchField;
|
||||
switch(type) {
|
||||
case VAR_INTS:
|
||||
|
@ -160,7 +162,7 @@ public abstract class DocValuesConsumer {
|
|||
*/
|
||||
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc)
|
||||
throws IOException {
|
||||
switch(source.type()) {
|
||||
switch(getType()) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.codecs.lucene40.values;
|
||||
package org.apache.lucene.codecs;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -24,10 +24,6 @@ import java.util.Comparator;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.PerDocProducer;
|
||||
import org.apache.lucene.codecs.lucene40.values.Bytes;
|
||||
import org.apache.lucene.codecs.lucene40.values.Floats;
|
||||
import org.apache.lucene.codecs.lucene40.values.Ints;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
|
@ -40,7 +36,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* Abstract base class for PerDocProducer implementations
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class DocValuesReaderBase extends PerDocProducer {
|
||||
public abstract class PerDocProducerBase extends PerDocProducer {
|
||||
|
||||
protected abstract void closeInternal(Collection<? extends Closeable> closeables) throws IOException;
|
||||
protected abstract Map<String, DocValues> docValues();
|
||||
|
@ -70,9 +66,7 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
|
|||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (canLoad(fieldInfo)) {
|
||||
final String field = fieldInfo.name;
|
||||
// TODO can we have a compound file per segment and codec for
|
||||
// docvalues?
|
||||
final String id = DocValuesWriterBase.docValuesId(segment,
|
||||
final String id = docValuesId(segment,
|
||||
fieldInfo.number);
|
||||
values.put(field,
|
||||
loadDocValues(docCount, dir, id, getDocValuesType(fieldInfo), context));
|
||||
|
@ -97,7 +91,11 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
|
|||
}
|
||||
|
||||
protected boolean anyDocValuesFields(FieldInfos infos) {
|
||||
return infos.anyDocValuesFields();
|
||||
return infos.hasDocValues();
|
||||
}
|
||||
|
||||
public static String docValuesId(String segmentsName, int fieldId) {
|
||||
return segmentsName + "_" + fieldId;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -119,33 +117,6 @@ public abstract class DocValuesReaderBase extends PerDocProducer {
|
|||
* @throws IllegalArgumentException
|
||||
* if the given {@link Type} is not supported
|
||||
*/
|
||||
protected DocValues loadDocValues(int docCount, Directory dir, String id,
|
||||
DocValues.Type type, IOContext context) throws IOException {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
return Ints.getValues(dir, id, docCount, type, context);
|
||||
case FLOAT_32:
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case FLOAT_64:
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
|
||||
default:
|
||||
throw new IllegalStateException("unrecognized index values mode " + type);
|
||||
}
|
||||
}
|
||||
protected abstract DocValues loadDocValues(int docCount, Directory dir, String id,
|
||||
DocValues.Type type, IOContext context) throws IOException;
|
||||
}
|
|
@ -107,7 +107,7 @@ class Lucene3xFieldInfosReader extends FieldInfosReader {
|
|||
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.BYTES_VAR_STRAIGHT : null);
|
||||
omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.FIXED_INTS_8 : null);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
|
|
|
@ -76,7 +76,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
try {
|
||||
long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now)
|
||||
for (FieldInfo fi : fields) {
|
||||
if (fi.normsPresent()) {
|
||||
if (fi.hasNorms()) {
|
||||
String fileName = getNormFilename(segmentName, normGen, fi.number);
|
||||
Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir;
|
||||
|
||||
|
@ -235,7 +235,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
public Type getType() {
|
||||
return Type.FIXED_INTS_8;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,19 +24,24 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.lucene40.values.DocValuesReaderBase;
|
||||
import org.apache.lucene.codecs.PerDocProducerBase;
|
||||
import org.apache.lucene.codecs.lucene40.values.Bytes;
|
||||
import org.apache.lucene.codecs.lucene40.values.Floats;
|
||||
import org.apache.lucene.codecs.lucene40.values.Ints;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.CompoundFileDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Default PerDocProducer implementation that uses compound file.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Lucene40DocValuesProducer extends DocValuesReaderBase {
|
||||
public class Lucene40DocValuesProducer extends PerDocProducerBase {
|
||||
protected final TreeMap<String,DocValues> docValues;
|
||||
private final Directory cfs;
|
||||
/**
|
||||
|
@ -71,4 +76,35 @@ public class Lucene40DocValuesProducer extends DocValuesReaderBase {
|
|||
IOUtils.close(closeables);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocValues loadDocValues(int docCount, Directory dir, String id,
|
||||
Type type, IOContext context) throws IOException {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
return Ints.getValues(dir, id, docCount, type, context);
|
||||
case FLOAT_32:
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case FLOAT_64:
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
|
||||
default:
|
||||
throw new IllegalStateException("unrecognized index values mode " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public class Lucene40NormsFormat extends NormsFormat {
|
|||
|
||||
@Override
|
||||
protected boolean canLoad(FieldInfo info) {
|
||||
return info.normsPresent();
|
||||
return info.hasNorms();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,7 +92,7 @@ public class Lucene40NormsFormat extends NormsFormat {
|
|||
|
||||
@Override
|
||||
protected boolean canMerge(FieldInfo info) {
|
||||
return info.normsPresent();
|
||||
return info.hasNorms();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -104,7 +104,7 @@ public class Lucene40NormsFormat extends NormsFormat {
|
|||
final String normsFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.normsPresent()) {
|
||||
if (fieldInfo.hasNorms()) {
|
||||
final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
|
||||
files.add(normsFileName);
|
||||
files.add(normsEntriesFileName);
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
|||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DocValues.SortedSource;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
|
@ -64,7 +63,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final class Bytes {
|
||||
public final class Bytes {
|
||||
|
||||
static final String DV_SEGMENT_SUFFIX = "dv";
|
||||
|
||||
|
@ -242,8 +241,8 @@ final class Bytes {
|
|||
private final IOContext context;
|
||||
|
||||
protected BytesWriterBase(Directory dir, String id, String codecName,
|
||||
int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(bytesUsed);
|
||||
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
|
||||
super(bytesUsed, type);
|
||||
this.id = id;
|
||||
this.dir = dir;
|
||||
this.codecName = codecName;
|
||||
|
@ -292,25 +291,11 @@ final class Bytes {
|
|||
}
|
||||
return idxOut;
|
||||
}
|
||||
/**
|
||||
* Must be called only with increasing docIDs. It's OK for some docIDs to be
|
||||
* skipped; they will be filled with 0 bytes.
|
||||
*/
|
||||
protected
|
||||
abstract void add(int docID, BytesRef bytes) throws IOException;
|
||||
|
||||
|
||||
@Override
|
||||
public abstract void finish(int docCount) throws IOException;
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
|
||||
add(docID, source.getBytes(sourceDoc, bytesRef));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.binaryValue());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -378,7 +363,7 @@ final class Bytes {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
@ -393,22 +378,22 @@ final class Bytes {
|
|||
protected long maxBytes = 0;
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int codecVersion, Counter bytesUsed, IOContext context)
|
||||
int codecVersion, Counter bytesUsed, IOContext context, Type type)
|
||||
throws IOException {
|
||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false);
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
|
||||
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
|
||||
throws IOException {
|
||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam);
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, codecName, codecVersion, bytesUsed, context);
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
|
||||
super(dir, id, codecName, codecVersion, bytesUsed, context, type);
|
||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||
|
@ -430,7 +415,9 @@ final class Bytes {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void add(int docID, BytesRef bytes) throws IOException {
|
||||
public void add(int docID, IndexableField value) throws IOException {
|
||||
BytesRef bytes = value.binaryValue();
|
||||
assert bytes != null;
|
||||
if (bytes.length == 0) { // default value - skip it
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,120 +0,0 @@
|
|||
package org.apache.lucene.codecs.lucene40.values;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Package private BytesRefUtils - can move this into the o.a.l.utils package if
|
||||
* needed.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class BytesRefUtils {
|
||||
|
||||
private BytesRefUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given long value and encodes it as 8 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 8 and resizes the
|
||||
* reference array if needed.
|
||||
*/
|
||||
public static void copyLong(BytesRef ref, long value) {
|
||||
if (ref.bytes.length < 8) {
|
||||
ref.bytes = new byte[8];
|
||||
}
|
||||
copyInternal(ref, (int) (value >> 32), ref.offset = 0);
|
||||
copyInternal(ref, (int) value, 4);
|
||||
ref.length = 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given int value and encodes it as 4 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 4 and resizes the
|
||||
* reference array if needed.
|
||||
*/
|
||||
public static void copyInt(BytesRef ref, int value) {
|
||||
if (ref.bytes.length < 4) {
|
||||
ref.bytes = new byte[4];
|
||||
}
|
||||
copyInternal(ref, value, ref.offset = 0);
|
||||
ref.length = 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given short value and encodes it as a 2 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 2 and resizes the
|
||||
* reference array if needed.
|
||||
*/
|
||||
public static void copyShort(BytesRef ref, short value) {
|
||||
if (ref.bytes.length < 2) {
|
||||
ref.bytes = new byte[2];
|
||||
}
|
||||
ref.bytes[ref.offset] = (byte) (value >> 8);
|
||||
ref.bytes[ref.offset + 1] = (byte) (value);
|
||||
ref.length = 2;
|
||||
}
|
||||
|
||||
private static void copyInternal(BytesRef ref, int value, int startOffset) {
|
||||
ref.bytes[startOffset] = (byte) (value >> 24);
|
||||
ref.bytes[startOffset + 1] = (byte) (value >> 16);
|
||||
ref.bytes[startOffset + 2] = (byte) (value >> 8);
|
||||
ref.bytes[startOffset + 3] = (byte) (value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 2 consecutive bytes from the current offset to a short. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public static short asShort(BytesRef b) {
|
||||
return (short) (0xFFFF & ((b.bytes[b.offset] & 0xFF) << 8) | (b.bytes[b.offset + 1] & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 4 consecutive bytes from the current offset to an int. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public static int asInt(BytesRef b) {
|
||||
return asIntInternal(b, b.offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 8 consecutive bytes from the current offset to a long. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public static long asLong(BytesRef b) {
|
||||
return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
|
||||
b.offset + 4) & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
private static int asIntInternal(BytesRef b, int pos) {
|
||||
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
|
||||
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,306 +0,0 @@
|
|||
package org.apache.lucene.codecs.lucene40.values;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
abstract class DocValuesArray extends Source {
|
||||
|
||||
static final Map<Type, DocValuesArray> TEMPLATES;
|
||||
|
||||
static {
|
||||
EnumMap<Type, DocValuesArray> templates = new EnumMap<Type, DocValuesArray>(
|
||||
Type.class);
|
||||
templates.put(Type.FIXED_INTS_16, new ShortValues());
|
||||
templates.put(Type.FIXED_INTS_32, new IntValues());
|
||||
templates.put(Type.FIXED_INTS_64, new LongValues());
|
||||
templates.put(Type.FIXED_INTS_8, new ByteValues());
|
||||
templates.put(Type.FLOAT_32, new FloatValues());
|
||||
templates.put(Type.FLOAT_64, new DoubleValues());
|
||||
TEMPLATES = Collections.unmodifiableMap(templates);
|
||||
}
|
||||
|
||||
protected final int bytesPerValue;
|
||||
|
||||
DocValuesArray(int bytesPerValue, Type type) {
|
||||
super(type);
|
||||
this.bytesPerValue = bytesPerValue;
|
||||
}
|
||||
|
||||
public abstract DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public final boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
BytesRefUtils.copyLong(bytesRef, value);
|
||||
}
|
||||
|
||||
void toBytes(double value, BytesRef bytesRef) {
|
||||
BytesRefUtils.copyLong(bytesRef, Double.doubleToRawLongBits(value));
|
||||
}
|
||||
|
||||
final static class ByteValues extends DocValuesArray {
|
||||
private final byte[] values;
|
||||
|
||||
ByteValues() {
|
||||
super(1, Type.FIXED_INTS_8);
|
||||
values = new byte[0];
|
||||
}
|
||||
|
||||
private ByteValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(1, Type.FIXED_INTS_8);
|
||||
values = new byte[numDocs];
|
||||
input.readBytes(values, 0, values.length, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ByteValues(input, numDocs);
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
bytesRef.bytes[0] = (byte) (0xFFL & value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class ShortValues extends DocValuesArray {
|
||||
private final short[] values;
|
||||
|
||||
ShortValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
|
||||
values = new short[0];
|
||||
}
|
||||
|
||||
private ShortValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, Type.FIXED_INTS_16);
|
||||
values = new short[numDocs];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readShort();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public short[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ShortValues(input, numDocs);
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
BytesRefUtils.copyShort(bytesRef, (short) (0xFFFFL & value));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class IntValues extends DocValuesArray {
|
||||
private final int[] values;
|
||||
|
||||
IntValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
|
||||
values = new int[0];
|
||||
}
|
||||
|
||||
private IntValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, Type.FIXED_INTS_32);
|
||||
values = new int[numDocs];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readInt();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return 0xFFFFFFFF & values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new IntValues(input, numDocs);
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
BytesRefUtils.copyInt(bytesRef, (int) (0xFFFFFFFF & value));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class LongValues extends DocValuesArray {
|
||||
private final long[] values;
|
||||
|
||||
LongValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
|
||||
values = new long[0];
|
||||
}
|
||||
|
||||
private LongValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, Type.FIXED_INTS_64);
|
||||
values = new long[numDocs];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new LongValues(input, numDocs);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class FloatValues extends DocValuesArray {
|
||||
private final float[] values;
|
||||
|
||||
FloatValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
|
||||
values = new float[0];
|
||||
}
|
||||
|
||||
private FloatValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, Type.FLOAT_32);
|
||||
values = new float[numDocs];
|
||||
/*
|
||||
* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs back in using readInt / readLong
|
||||
*/
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Float.intBitsToFloat(input.readInt());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
void toBytes(double value, BytesRef bytesRef) {
|
||||
BytesRefUtils.copyInt(bytesRef, Float.floatToRawIntBits((float)value));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new FloatValues(input, numDocs);
|
||||
}
|
||||
};
|
||||
|
||||
final static class DoubleValues extends DocValuesArray {
|
||||
private final double[] values;
|
||||
|
||||
DoubleValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
|
||||
values = new double[0];
|
||||
}
|
||||
|
||||
private DoubleValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, Type.FLOAT_64);
|
||||
values = new double[numDocs];
|
||||
/*
|
||||
* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs back in using readInt / readLong
|
||||
*/
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Double.longBitsToDouble(input.readLong());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new DoubleValues(input, numDocs);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.PerDocProducerBase;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.codecs.lucene40.values.Writer;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -81,14 +82,10 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
@Override
|
||||
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
|
||||
return Writer.create(valueType,
|
||||
docValuesId(segmentName, field.number),
|
||||
PerDocProducerBase.docValuesId(segmentName, field.number),
|
||||
getDirectory(), getComparator(), bytesUsed, context, fasterButMoreRam);
|
||||
}
|
||||
|
||||
public static String docValuesId(String segmentsName, int fieldId) {
|
||||
return segmentsName + "_" + fieldId;
|
||||
}
|
||||
|
||||
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
|
|
|
@ -46,7 +46,7 @@ class FixedDerefBytesImpl {
|
|||
public static class Writer extends DerefBytesWriterBase {
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -84,7 +84,7 @@ class FixedDerefBytesImpl {
|
|||
@Override
|
||||
public Source getDirectSource()
|
||||
throws IOException {
|
||||
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
|
||||
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -58,7 +58,7 @@ class FixedSortedBytesImpl {
|
|||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
|
||||
this.comp = comp;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,10 +22,12 @@ import java.io.IOException;
|
|||
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesSourceBase;
|
||||
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -52,6 +54,7 @@ class FixedStraightBytesImpl {
|
|||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static abstract class FixedBytesWriterBase extends BytesWriterBase {
|
||||
protected final DocValuesField bytesSpareField = new DocValuesField("", new BytesRef(), Type.BYTES_FIXED_STRAIGHT);
|
||||
protected int lastDocID = -1;
|
||||
// start at -1 if the first added value is > 0
|
||||
protected int size = -1;
|
||||
|
@ -60,13 +63,20 @@ class FixedStraightBytesImpl {
|
|||
|
||||
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, codecName, version, bytesUsed, context);
|
||||
this(dir, id, codecName, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
|
||||
}
|
||||
|
||||
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
|
||||
super(dir, id, codecName, version, bytesUsed, context, type);
|
||||
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
||||
pool.nextBuffer();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void add(int docID, BytesRef bytes) throws IOException {
|
||||
public void add(int docID, IndexableField value) throws IOException {
|
||||
final BytesRef bytes = value.binaryValue();
|
||||
assert bytes != null;
|
||||
assert lastDocID < docID;
|
||||
|
||||
if (size == -1) {
|
||||
|
@ -277,7 +287,7 @@ class FixedStraightBytesImpl {
|
|||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return new DirectFixedStraightSource(cloneData(), size, type());
|
||||
return new DirectFixedStraightSource(cloneData(), size, getType());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40.values;
|
|||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesArraySource;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
|
@ -39,7 +40,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class Floats {
|
||||
public class Floats {
|
||||
|
||||
protected static final String CODEC_NAME = "Floats";
|
||||
protected static final int VERSION_START = 0;
|
||||
|
@ -69,31 +70,28 @@ class Floats {
|
|||
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
|
||||
|
||||
private final int size;
|
||||
private final DocValuesArray template;
|
||||
private final DocValuesArraySource template;
|
||||
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, Type type) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
size = typeToSize(type);
|
||||
this.bytesRef = new BytesRef(size);
|
||||
bytesRef.length = size;
|
||||
template = DocValuesArray.TEMPLATES.get(type);
|
||||
template = DocValuesArraySource.forType(type);
|
||||
assert template != null;
|
||||
}
|
||||
|
||||
protected void add(int docID, double v) throws IOException {
|
||||
template.toBytes(v, bytesRef);
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.numericValue().doubleValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean tryBulkMerge(DocValues docValues) {
|
||||
// only bulk merge if value type is the same otherwise size differs
|
||||
return super.tryBulkMerge(docValues) && docValues.type() == template.type();
|
||||
return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField value) throws IOException {
|
||||
template.toBytes(value.numericValue().doubleValue(), bytesRef);
|
||||
bytesSpareField.setBytesValue(bytesRef);
|
||||
super.add(docID, bytesSpareField);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -104,11 +102,11 @@ class Floats {
|
|||
}
|
||||
|
||||
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
||||
final DocValuesArray arrayTemplate;
|
||||
final DocValuesArraySource arrayTemplate;
|
||||
FloatsReader(Directory dir, String id, int maxDoc, IOContext context, Type type)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
|
||||
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
|
||||
arrayTemplate = DocValuesArraySource.forType(type);
|
||||
assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesArraySource;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
|
@ -36,7 +37,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final class Ints {
|
||||
public final class Ints {
|
||||
protected static final String CODEC_NAME = "Ints";
|
||||
protected static final int VERSION_START = 0;
|
||||
protected static final int VERSION_CURRENT = VERSION_START;
|
||||
|
@ -88,7 +89,7 @@ final class Ints {
|
|||
|
||||
|
||||
static class IntsWriter extends FixedStraightBytesImpl.Writer {
|
||||
private final DocValuesArray template;
|
||||
private final DocValuesArraySource template;
|
||||
|
||||
public IntsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, Type valueType) throws IOException {
|
||||
|
@ -101,17 +102,7 @@ final class Ints {
|
|||
size = typeToSize(valueType);
|
||||
this.bytesRef = new BytesRef(size);
|
||||
bytesRef.length = size;
|
||||
template = DocValuesArray.TEMPLATES.get(valueType);
|
||||
}
|
||||
|
||||
protected void add(int docID, long v) throws IOException {
|
||||
template.toBytes(v, bytesRef);
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.numericValue().longValue());
|
||||
template = DocValuesArraySource.forType(valueType);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -120,21 +111,28 @@ final class Ints {
|
|||
template.toBytes(value, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField value) throws IOException {
|
||||
template.toBytes(value.numericValue().longValue(), bytesRef);
|
||||
bytesSpareField.setBytesValue(bytesRef);
|
||||
super.add(docID, bytesSpareField);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean tryBulkMerge(DocValues docValues) {
|
||||
// only bulk merge if value type is the same otherwise size differs
|
||||
return super.tryBulkMerge(docValues) && docValues.type() == template.type();
|
||||
return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
|
||||
}
|
||||
}
|
||||
|
||||
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
||||
private final DocValuesArray arrayTemplate;
|
||||
private final DocValuesArraySource arrayTemplate;
|
||||
|
||||
IntsReader(Directory dir, String id, int maxDoc, IOContext context, Type type)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc,
|
||||
context, type);
|
||||
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
|
||||
arrayTemplate = DocValuesArraySource.forType(type);
|
||||
assert arrayTemplate != null;
|
||||
assert type == sizeToType(size);
|
||||
}
|
||||
|
|
|
@ -18,9 +18,8 @@ package org.apache.lucene.codecs.lucene40.values;
|
|||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.lucene40.values.DocValuesArray.LongValues;
|
||||
import org.apache.lucene.codecs.DocValuesArraySource;
|
||||
import org.apache.lucene.codecs.lucene40.values.FixedStraightBytesImpl.FixedBytesWriterBase;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
|
@ -59,27 +58,10 @@ class PackedIntValues {
|
|||
|
||||
protected PackedIntsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.VAR_INTS);
|
||||
bytesRef = new BytesRef(8);
|
||||
}
|
||||
|
||||
protected void add(int docID, long v) throws IOException {
|
||||
assert lastDocId < docID;
|
||||
if (!started) {
|
||||
started = true;
|
||||
minValue = maxValue = v;
|
||||
} else {
|
||||
if (v < minValue) {
|
||||
minValue = v;
|
||||
} else if (v > maxValue) {
|
||||
maxValue = v;
|
||||
}
|
||||
}
|
||||
lastDocId = docID;
|
||||
BytesRefUtils.copyLong(bytesRef, v);
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
|
@ -112,13 +94,6 @@ class PackedIntValues {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
add(docID, source.getInt(sourceDoc));
|
||||
}
|
||||
|
||||
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
|
||||
datOut.writeLong(minValue);
|
||||
|
||||
|
@ -149,10 +124,25 @@ class PackedIntValues {
|
|||
}
|
||||
w.finish();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.numericValue().longValue());
|
||||
final long v = docValue.numericValue().longValue();
|
||||
assert lastDocId < docID;
|
||||
if (!started) {
|
||||
started = true;
|
||||
minValue = maxValue = v;
|
||||
} else {
|
||||
if (v < minValue) {
|
||||
minValue = v;
|
||||
} else if (v > maxValue) {
|
||||
maxValue = v;
|
||||
}
|
||||
}
|
||||
lastDocId = docID;
|
||||
DocValuesArraySource.copyLong(bytesRef, v);
|
||||
bytesSpareField.setBytesValue(bytesRef);
|
||||
super.add(docID, bytesSpareField);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,7 +154,7 @@ class PackedIntValues {
|
|||
private final IndexInput datIn;
|
||||
private final byte type;
|
||||
private final int numDocs;
|
||||
private final LongValues values;
|
||||
private final DocValuesArraySource values;
|
||||
|
||||
protected PackedIntsReader(Directory dir, String id, int numDocs,
|
||||
IOContext context) throws IOException {
|
||||
|
@ -176,7 +166,7 @@ class PackedIntValues {
|
|||
try {
|
||||
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
type = datIn.readByte();
|
||||
values = type == FIXED_64 ? new LongValues() : null;
|
||||
values = type == FIXED_64 ? DocValuesArraySource.forType(Type.FIXED_INTS_64) : null;
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
|
@ -220,7 +210,7 @@ class PackedIntValues {
|
|||
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
public Type getType() {
|
||||
return Type.VAR_INTS;
|
||||
}
|
||||
|
||||
|
@ -247,7 +237,7 @@ class PackedIntValues {
|
|||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
ref.grow(8);
|
||||
BytesRefUtils.copyLong(ref, getInt(docID));
|
||||
DocValuesArraySource.copyLong(ref, getInt(docID));
|
||||
return ref;
|
||||
}
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ class VarDerefBytesImpl {
|
|||
static class Writer extends DerefBytesWriterBase {
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
|
||||
size = 0;
|
||||
}
|
||||
|
||||
|
@ -105,7 +105,7 @@ class VarDerefBytesImpl {
|
|||
@Override
|
||||
public Source getDirectSource()
|
||||
throws IOException {
|
||||
return new DirectVarDerefSource(cloneData(), cloneIndex(), type());
|
||||
return new DirectVarDerefSource(cloneData(), cloneIndex(), getType());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ final class VarSortedBytesImpl {
|
|||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
|
||||
this.comp = comp;
|
||||
size = 0;
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ final class VarSortedBytesImpl {
|
|||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, type());
|
||||
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -63,7 +64,7 @@ class VarStraightBytesImpl {
|
|||
private boolean merge = false;
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
|
||||
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
||||
docToAddress = new long[1];
|
||||
pool.nextBuffer(); // init
|
||||
|
@ -84,7 +85,9 @@ class VarStraightBytesImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void add(int docID, BytesRef bytes) throws IOException {
|
||||
public void add(int docID, IndexableField value) throws IOException {
|
||||
final BytesRef bytes = value.binaryValue();
|
||||
assert bytes != null;
|
||||
assert !merge;
|
||||
if (bytes.length == 0) {
|
||||
return; // default
|
||||
|
@ -245,7 +248,7 @@ class VarStraightBytesImpl {
|
|||
@Override
|
||||
public Source getDirectSource()
|
||||
throws IOException {
|
||||
return new DirectVarStraightSource(cloneData(), cloneIndex(), type());
|
||||
return new DirectVarStraightSource(cloneData(), cloneIndex(), getType());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.util.Counter;
|
|||
*/
|
||||
abstract class Writer extends DocValuesConsumer {
|
||||
protected final Counter bytesUsed;
|
||||
protected Type type;
|
||||
|
||||
/**
|
||||
* Creates a new {@link Writer}.
|
||||
|
@ -49,9 +50,19 @@ abstract class Writer extends DocValuesConsumer {
|
|||
* internally allocated memory. All tracked bytes must be released
|
||||
* once {@link #finish(int)} has been called.
|
||||
*/
|
||||
protected Writer(Counter bytesUsed) {
|
||||
protected Writer(Counter bytesUsed, Type type) {
|
||||
this.bytesUsed = bytesUsed;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
protected Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Factory method to create a {@link Writer} instance for a given type. This
|
||||
|
|
|
@ -98,8 +98,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")";
|
||||
}
|
||||
|
||||
private static final boolean VERBOSE = false;
|
||||
|
||||
private final static class TermsWriter extends TermsConsumer {
|
||||
private final IndexOutput out;
|
||||
private final FieldInfo field;
|
||||
|
@ -123,10 +121,13 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
// NOTE: not private so we don't pay access check at runtime:
|
||||
int docCount;
|
||||
RAMOutputStream buffer = new RAMOutputStream();
|
||||
|
||||
int lastOffsetLength;
|
||||
int lastOffset;
|
||||
|
||||
@Override
|
||||
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||
if (VERBOSE) System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
|
||||
//System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
|
||||
final int delta = docID - lastDocID;
|
||||
assert docID == 0 || delta > 0;
|
||||
lastDocID = docID;
|
||||
|
@ -143,20 +144,23 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
lastPos = 0;
|
||||
lastOffset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addPosition(int pos, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||
assert payload == null || field.storePayloads;
|
||||
|
||||
if (VERBOSE) System.out.println(" addPos pos=" + pos + " payload=" + payload);
|
||||
//System.out.println(" addPos pos=" + pos + " payload=" + payload);
|
||||
|
||||
final int delta = pos - lastPos;
|
||||
assert delta >= 0;
|
||||
lastPos = pos;
|
||||
|
||||
int payloadLen = 0;
|
||||
|
||||
if (field.storePayloads) {
|
||||
final int payloadLen = payload == null ? 0 : payload.length;
|
||||
payloadLen = payload == null ? 0 : payload.length;
|
||||
if (payloadLen != lastPayloadLen) {
|
||||
lastPayloadLen = payloadLen;
|
||||
buffer.writeVInt((delta<<1)|1);
|
||||
|
@ -164,13 +168,28 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
} else {
|
||||
buffer.writeVInt(delta<<1);
|
||||
}
|
||||
|
||||
if (payloadLen > 0) {
|
||||
buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
|
||||
}
|
||||
} else {
|
||||
buffer.writeVInt(delta);
|
||||
}
|
||||
|
||||
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
|
||||
// don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
|
||||
// and the numbers aren't that much smaller anyways.
|
||||
int offsetDelta = startOffset - lastOffset;
|
||||
int offsetLength = endOffset - startOffset;
|
||||
if (offsetLength != lastOffsetLength) {
|
||||
buffer.writeVInt(offsetDelta << 1 | 1);
|
||||
buffer.writeVInt(offsetLength);
|
||||
} else {
|
||||
buffer.writeVInt(offsetDelta << 1);
|
||||
}
|
||||
lastOffset = startOffset;
|
||||
lastOffsetLength = offsetLength;
|
||||
}
|
||||
|
||||
if (payloadLen > 0) {
|
||||
buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -182,6 +201,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
lastDocID = 0;
|
||||
docCount = 0;
|
||||
lastPayloadLen = 0;
|
||||
// force first offset to write its length
|
||||
lastOffsetLength = -1;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
@ -190,7 +211,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public PostingsConsumer startTerm(BytesRef text) {
|
||||
if (VERBOSE) System.out.println(" startTerm term=" + text.utf8ToString());
|
||||
//System.out.println(" startTerm term=" + text.utf8ToString());
|
||||
return postingsWriter.reset();
|
||||
}
|
||||
|
||||
|
@ -224,12 +245,12 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
spare.bytes = finalBuffer;
|
||||
spare.length = totalBytes;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
|
||||
for(int i=0;i<totalBytes;i++) {
|
||||
System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
|
||||
//for(int i=0;i<totalBytes;i++) {
|
||||
// System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
|
||||
//}
|
||||
|
||||
builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
|
||||
termCount++;
|
||||
}
|
||||
|
@ -249,7 +270,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4));
|
||||
}
|
||||
fst.save(out);
|
||||
if (VERBOSE) System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
|
||||
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,10 +291,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
return new FieldsConsumer() {
|
||||
@Override
|
||||
public TermsConsumer addField(FieldInfo field) {
|
||||
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
|
||||
throw new UnsupportedOperationException("this codec cannot index offsets");
|
||||
}
|
||||
if (VERBOSE) System.out.println("\naddField field=" + field.name);
|
||||
//System.out.println("\naddField field=" + field.name);
|
||||
return new TermsWriter(out, field, doPackFST);
|
||||
}
|
||||
|
||||
|
@ -331,11 +349,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
@Override
|
||||
public int nextDoc() {
|
||||
while(true) {
|
||||
if (VERBOSE) System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
|
||||
//System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
|
||||
if (docUpto == numDocs) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" END");
|
||||
}
|
||||
// System.out.println(" END");
|
||||
return docID = NO_MORE_DOCS;
|
||||
}
|
||||
docUpto++;
|
||||
|
@ -344,7 +360,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
} else {
|
||||
final int code = in.readVInt();
|
||||
accum += code >>> 1;
|
||||
if (VERBOSE) System.out.println(" docID=" + accum + " code=" + code);
|
||||
//System.out.println(" docID=" + accum + " code=" + code);
|
||||
if ((code & 1) != 0) {
|
||||
freq = 1;
|
||||
} else {
|
||||
|
@ -352,8 +368,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
assert freq > 0;
|
||||
}
|
||||
|
||||
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
|
||||
// Skip positions
|
||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
// Skip positions/payloads
|
||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||
if (!storePayloads) {
|
||||
in.readVInt();
|
||||
|
@ -365,11 +381,26 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
in.skipBytes(payloadLen);
|
||||
}
|
||||
}
|
||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||
// Skip positions/offsets/payloads
|
||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||
int posCode = in.readVInt();
|
||||
if (storePayloads && ((posCode & 1) != 0)) {
|
||||
payloadLen = in.readVInt();
|
||||
}
|
||||
if ((in.readVInt() & 1) != 0) {
|
||||
// new offset length
|
||||
in.readVInt();
|
||||
}
|
||||
if (storePayloads) {
|
||||
in.skipBytes(payloadLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (liveDocs == null || liveDocs.get(accum)) {
|
||||
if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq);
|
||||
//System.out.println(" return docID=" + accum + " freq=" + freq);
|
||||
return (docID = accum);
|
||||
}
|
||||
}
|
||||
|
@ -413,26 +444,30 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
private int posPending;
|
||||
private int payloadLength;
|
||||
private boolean payloadRetrieved;
|
||||
final boolean storeOffsets;
|
||||
int offsetLength;
|
||||
int startOffset;
|
||||
|
||||
private int pos;
|
||||
private final BytesRef payload = new BytesRef();
|
||||
|
||||
public FSTDocsAndPositionsEnum(boolean storePayloads) {
|
||||
public FSTDocsAndPositionsEnum(boolean storePayloads, boolean storeOffsets) {
|
||||
this.storePayloads = storePayloads;
|
||||
this.storeOffsets = storeOffsets;
|
||||
}
|
||||
|
||||
public boolean canReuse(boolean storePayloads) {
|
||||
return storePayloads == this.storePayloads;
|
||||
public boolean canReuse(boolean storePayloads, boolean storeOffsets) {
|
||||
return storePayloads == this.storePayloads && storeOffsets == this.storeOffsets;
|
||||
}
|
||||
|
||||
public FSTDocsAndPositionsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
|
||||
assert numDocs > 0;
|
||||
if (VERBOSE) {
|
||||
System.out.println("D&P reset bytes this=" + this);
|
||||
for(int i=bufferIn.offset;i<bufferIn.length;i++) {
|
||||
System.out.println(" " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
|
||||
}
|
||||
}
|
||||
|
||||
// System.out.println("D&P reset bytes this=" + this);
|
||||
// for(int i=bufferIn.offset;i<bufferIn.length;i++) {
|
||||
// System.out.println(" " + Integer.toHexString(bufferIn.bytes[i]&0xFF));
|
||||
// }
|
||||
|
||||
if (buffer.length < bufferIn.length - bufferIn.offset) {
|
||||
buffer = ArrayUtil.grow(buffer, bufferIn.length - bufferIn.offset);
|
||||
}
|
||||
|
@ -447,6 +482,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
this.numDocs = numDocs;
|
||||
posPending = 0;
|
||||
payloadRetrieved = false;
|
||||
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
|
||||
offsetLength = 0;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -456,9 +493,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
nextPosition();
|
||||
}
|
||||
while(true) {
|
||||
if (VERBOSE) System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
|
||||
//System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this);
|
||||
if (docUpto == numDocs) {
|
||||
if (VERBOSE) System.out.println(" END");
|
||||
//System.out.println(" END");
|
||||
return docID = NO_MORE_DOCS;
|
||||
}
|
||||
docUpto++;
|
||||
|
@ -474,8 +511,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
if (liveDocs == null || liveDocs.get(accum)) {
|
||||
pos = 0;
|
||||
startOffset = storeOffsets ? 0 : -1;
|
||||
posPending = freq;
|
||||
if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq);
|
||||
//System.out.println(" return docID=" + accum + " freq=" + freq);
|
||||
return (docID = accum);
|
||||
}
|
||||
|
||||
|
@ -487,8 +525,18 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
final int skipCode = in.readVInt();
|
||||
if ((skipCode & 1) != 0) {
|
||||
payloadLength = in.readVInt();
|
||||
if (VERBOSE) System.out.println(" new payloadLen=" + payloadLength);
|
||||
//System.out.println(" new payloadLen=" + payloadLength);
|
||||
}
|
||||
}
|
||||
|
||||
if (storeOffsets) {
|
||||
if ((in.readVInt() & 1) != 0) {
|
||||
// new offset length
|
||||
offsetLength = in.readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
if (storePayloads) {
|
||||
in.skipBytes(payloadLength);
|
||||
}
|
||||
}
|
||||
|
@ -497,7 +545,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public int nextPosition() {
|
||||
if (VERBOSE) System.out.println(" nextPos storePayloads=" + storePayloads + " this=" + this);
|
||||
//System.out.println(" nextPos storePayloads=" + storePayloads + " this=" + this);
|
||||
assert posPending > 0;
|
||||
posPending--;
|
||||
if (!storePayloads) {
|
||||
|
@ -511,6 +559,18 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
//} else {
|
||||
//System.out.println(" same payloadLen=" + payloadLength);
|
||||
}
|
||||
}
|
||||
|
||||
if (storeOffsets) {
|
||||
int offsetCode = in.readVInt();
|
||||
if ((offsetCode & 1) != 0) {
|
||||
// new offset length
|
||||
offsetLength = in.readVInt();
|
||||
}
|
||||
startOffset += offsetCode >>> 1;
|
||||
}
|
||||
|
||||
if (storePayloads) {
|
||||
payload.offset = in.getPosition();
|
||||
in.skipBytes(payloadLength);
|
||||
payload.length = payloadLength;
|
||||
|
@ -520,18 +580,18 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
payloadRetrieved = false;
|
||||
}
|
||||
|
||||
if (VERBOSE) System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
|
||||
//System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
return -1;
|
||||
return startOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
return -1;
|
||||
return startOffset + offsetLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -594,14 +654,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
totalTermFreq = -1;
|
||||
}
|
||||
current.output.offset = buffer.getPosition();
|
||||
if (VERBOSE) System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
|
||||
//System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
|
||||
didDecode = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
||||
if (VERBOSE) System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
|
||||
//System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
|
||||
current = fstEnum.seekExact(text);
|
||||
didDecode = false;
|
||||
return current != null;
|
||||
|
@ -609,25 +669,24 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
||||
if (VERBOSE) System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
|
||||
//System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
|
||||
current = fstEnum.seekCeil(text);
|
||||
if (current == null) {
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + current.input.utf8ToString());
|
||||
for(int i=0;i<current.output.length;i++) {
|
||||
System.out.println(" " + Integer.toHexString(current.output.bytes[i]&0xFF));
|
||||
}
|
||||
}
|
||||
|
||||
// System.out.println(" got term=" + current.input.utf8ToString());
|
||||
// for(int i=0;i<current.output.length;i++) {
|
||||
// System.out.println(" " + Integer.toHexString(current.output.bytes[i]&0xFF));
|
||||
// }
|
||||
|
||||
didDecode = false;
|
||||
|
||||
if (text.equals(current.input)) {
|
||||
if (VERBOSE) System.out.println(" found!");
|
||||
//System.out.println(" found!");
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
if (VERBOSE) System.out.println(" not found: " + current.input.utf8ToString());
|
||||
//System.out.println(" not found: " + current.input.utf8ToString());
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
@ -654,9 +713,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
|
||||
|
||||
if (needsOffsets) {
|
||||
// Not until we can index offsets...
|
||||
return null;
|
||||
boolean hasOffsets = field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
if (needsOffsets && !hasOffsets) {
|
||||
return null; // not available
|
||||
}
|
||||
|
||||
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
|
@ -665,14 +724,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
decodeMetaData();
|
||||
FSTDocsAndPositionsEnum docsAndPositionsEnum;
|
||||
if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
|
||||
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads);
|
||||
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads, hasOffsets);
|
||||
} else {
|
||||
docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
|
||||
if (!docsAndPositionsEnum.canReuse(field.storePayloads)) {
|
||||
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads);
|
||||
if (!docsAndPositionsEnum.canReuse(field.storePayloads, hasOffsets)) {
|
||||
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.storePayloads, hasOffsets);
|
||||
}
|
||||
}
|
||||
if (VERBOSE) System.out.println("D&P reset this=" + this);
|
||||
//System.out.println("D&P reset this=" + this);
|
||||
return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
|
||||
}
|
||||
|
||||
|
@ -683,14 +742,14 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (VERBOSE) System.out.println("te.next");
|
||||
//System.out.println("te.next");
|
||||
current = fstEnum.next();
|
||||
if (current == null) {
|
||||
if (VERBOSE) System.out.println(" END");
|
||||
//System.out.println(" END");
|
||||
return null;
|
||||
}
|
||||
didDecode = false;
|
||||
if (VERBOSE) System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
|
||||
//System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
|
||||
return current.input;
|
||||
}
|
||||
|
||||
|
@ -794,9 +853,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
break;
|
||||
}
|
||||
final TermsReader termsReader = new TermsReader(state.fieldInfos, in, termCount);
|
||||
if (VERBOSE) {
|
||||
System.out.println("load field=" + termsReader.field.name);
|
||||
}
|
||||
// System.out.println("load field=" + termsReader.field.name);
|
||||
fields.put(termsReader.field.name, termsReader);
|
||||
}
|
||||
} finally {
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.HashMap;
|
|||
import java.util.IdentityHashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.ServiceLoader; // javadocs
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
|
@ -47,7 +48,14 @@ import org.apache.lucene.util.IOUtils;
|
|||
|
||||
/**
|
||||
* Enables per field format support.
|
||||
*
|
||||
* <p>
|
||||
* Note, when extending this class, the name ({@link #getName}) is
|
||||
* written into the index. In order for the field to be read, the
|
||||
* name must resolve to your implementation via {@link #forName(String)}.
|
||||
* This method uses Java's
|
||||
* {@link ServiceLoader Service Provider Interface} to resolve format names.
|
||||
* <p>
|
||||
* @see ServiceLoader
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.PerDocProducerBase;
|
||||
import org.apache.lucene.codecs.lucene40.values.DocValuesWriterBase;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -58,7 +59,7 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
|
|||
private static void files(Directory dir,FieldInfos fieldInfos, String segmentName, Set<String> files) {
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.hasDocValues()) {
|
||||
String filename = docValuesId(segmentName, fieldInfo.number);
|
||||
String filename = PerDocProducerBase.docValuesId(segmentName, fieldInfo.number);
|
||||
switch (fieldInfo.getDocValuesType()) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_VAR_DEREF:
|
||||
|
|
|
@ -22,16 +22,22 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.lucene40.values.DocValuesReaderBase;
|
||||
import org.apache.lucene.codecs.PerDocProducerBase;
|
||||
import org.apache.lucene.codecs.lucene40.values.Bytes;
|
||||
import org.apache.lucene.codecs.lucene40.values.Floats;
|
||||
import org.apache.lucene.codecs.lucene40.values.Ints;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Implementation of PerDocProducer that uses separate files.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SepDocValuesProducer extends DocValuesReaderBase {
|
||||
public class SepDocValuesProducer extends PerDocProducerBase {
|
||||
private final TreeMap<String, DocValues> docValues;
|
||||
|
||||
/**
|
||||
|
@ -51,4 +57,35 @@ public class SepDocValuesProducer extends DocValuesReaderBase {
|
|||
protected void closeInternal(Collection<? extends Closeable> closeables) throws IOException {
|
||||
IOUtils.close(closeables);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocValues loadDocValues(int docCount, Directory dir, String id,
|
||||
Type type, IOContext context) throws IOException {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
return Ints.getValues(dir, id, docCount, type, context);
|
||||
case FLOAT_32:
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case FLOAT_64:
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
|
||||
default:
|
||||
throw new IllegalStateException("unrecognized index values mode " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfosFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
|
||||
|
||||
/**
|
||||
* plain text index format.
|
||||
|
@ -41,7 +40,7 @@ public final class SimpleTextCodec extends Codec {
|
|||
private final FieldInfosFormat fieldInfosFormat = new SimpleTextFieldInfosFormat();
|
||||
private final TermVectorsFormat vectorsFormat = new SimpleTextTermVectorsFormat();
|
||||
// TODO: need a plain-text impl
|
||||
private final DocValuesFormat docValues = new Lucene40DocValuesFormat();
|
||||
private final DocValuesFormat docValues = new SimpleTextDocValuesFormat();
|
||||
// TODO: need a plain-text impl (using the above)
|
||||
private final NormsFormat normsFormat = new SimpleTextNormsFormat();
|
||||
private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
|
||||
|
|
|
@ -0,0 +1,288 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesArraySource;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextDocValuesConsumer extends DocValuesConsumer {
|
||||
|
||||
static final BytesRef ZERO_DOUBLE = new BytesRef(Double.toString(0d));
|
||||
static final BytesRef ZERO_INT = new BytesRef(Integer.toString(0));
|
||||
static final BytesRef HEADER = new BytesRef("SimpleTextDocValues");
|
||||
|
||||
static final BytesRef END = new BytesRef("END");
|
||||
static final BytesRef VALUE_SIZE = new BytesRef("valuesize ");
|
||||
static final BytesRef DOC = new BytesRef(" doc ");
|
||||
static final BytesRef VALUE = new BytesRef(" value ");
|
||||
protected BytesRef scratch = new BytesRef();
|
||||
protected int maxDocId = -1;
|
||||
protected final String segment;
|
||||
protected final Directory dir;
|
||||
protected final IOContext ctx;
|
||||
protected final Type type;
|
||||
protected final BytesRefHash hash;
|
||||
private int[] ords;
|
||||
private int fixedSize = Integer.MIN_VALUE;
|
||||
private BytesRef zeroBytes;
|
||||
private final String segmentSuffix;
|
||||
|
||||
|
||||
public SimpleTextDocValuesConsumer(String segment, Directory dir,
|
||||
IOContext ctx, Type type, String segmentSuffix) {
|
||||
this.ctx = ctx;
|
||||
this.dir = dir;
|
||||
this.segment = segment;
|
||||
this.type = type;
|
||||
hash = new BytesRefHash();
|
||||
ords = new int[0];
|
||||
this.segmentSuffix = segmentSuffix;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField value) throws IOException {
|
||||
assert docID >= 0;
|
||||
int ord = -1;
|
||||
int vSize = -1;
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
vSize = value.binaryValue().length;
|
||||
ord = hash.add(value.binaryValue());
|
||||
break;
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
vSize = -1;
|
||||
try {
|
||||
ord = hash.add(value.binaryValue());
|
||||
} catch (NullPointerException e) {
|
||||
System.err.println();
|
||||
}
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
vSize = 2;
|
||||
scratch.grow(2);
|
||||
DocValuesArraySource.copyShort(scratch, value.numericValue().shortValue());
|
||||
ord = hash.add(scratch);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
vSize = 4;
|
||||
scratch.grow(4);
|
||||
DocValuesArraySource.copyInt(scratch, value.numericValue().intValue());
|
||||
ord = hash.add(scratch);
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
vSize = 1;
|
||||
scratch.grow(1);
|
||||
scratch.bytes[scratch.offset] = value.numericValue().byteValue();
|
||||
scratch.length = 1;
|
||||
ord = hash.add(scratch);
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
vSize = 8;
|
||||
case VAR_INTS:
|
||||
scratch.grow(8);
|
||||
DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
|
||||
ord = hash.add(scratch);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
vSize = 4;
|
||||
scratch.grow(4);
|
||||
DocValuesArraySource.copyInt(scratch,
|
||||
Float.floatToRawIntBits(value.numericValue().floatValue()));
|
||||
ord = hash.add(scratch);
|
||||
break;
|
||||
case FLOAT_64:
|
||||
vSize = 8;
|
||||
scratch.grow(8);
|
||||
DocValuesArraySource.copyLong(scratch,
|
||||
Double.doubleToRawLongBits(value.numericValue().doubleValue()));
|
||||
ord = hash.add(scratch);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
if (fixedSize == Integer.MIN_VALUE) {
|
||||
assert maxDocId == -1;
|
||||
fixedSize = vSize;
|
||||
} else {
|
||||
if (fixedSize != vSize) {
|
||||
throw new IllegalArgumentException("value size must be " + fixedSize + " but was: " + vSize);
|
||||
}
|
||||
}
|
||||
maxDocId = Math.max(docID, maxDocId);
|
||||
ords = grow(ords, docID);
|
||||
|
||||
ords[docID] = (ord < 0 ? (-ord)-1 : ord) + 1;
|
||||
}
|
||||
|
||||
protected BytesRef getHeader() {
|
||||
return HEADER;
|
||||
}
|
||||
|
||||
private int[] grow(int[] array, int upto) {
|
||||
if (array.length <= upto) {
|
||||
return ArrayUtil.grow(array, 1 + upto);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
private void prepareFlush(int docCount) {
|
||||
assert ords != null;
|
||||
ords = grow(ords, docCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segment, "",
|
||||
segmentSuffix);
|
||||
IndexOutput output = dir.createOutput(fileName, ctx);
|
||||
boolean success = false;
|
||||
BytesRef spare = new BytesRef();
|
||||
try {
|
||||
SimpleTextUtil.write(output, getHeader());
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
SimpleTextUtil.write(output, VALUE_SIZE);
|
||||
SimpleTextUtil.write(output, Integer.toString(this.fixedSize), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
prepareFlush(docCount);
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
SimpleTextUtil.write(output, DOC);
|
||||
SimpleTextUtil.write(output, Integer.toString(i), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
SimpleTextUtil.write(output, VALUE);
|
||||
writeDoc(output, i, spare);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
SimpleTextUtil.write(output, END);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
success = true;
|
||||
} finally {
|
||||
hash.close();
|
||||
if (success) {
|
||||
IOUtils.close(output);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeDoc(IndexOutput output, int docId, BytesRef spare) throws IOException {
|
||||
int ord = ords[docId] - 1;
|
||||
if (ord != -1) {
|
||||
assert ord >= 0;
|
||||
hash.get(ord, spare);
|
||||
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
SimpleTextUtil.write(output, spare);
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
SimpleTextUtil.write(output,
|
||||
Short.toString(DocValuesArraySource.asShort(spare)), scratch);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
SimpleTextUtil.write(output,
|
||||
Integer.toString(DocValuesArraySource.asInt(spare)), scratch);
|
||||
break;
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_64:
|
||||
SimpleTextUtil.write(output,
|
||||
Long.toString(DocValuesArraySource.asLong(spare)), scratch);
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
assert spare.length == 1 : spare.length;
|
||||
SimpleTextUtil.write(output,
|
||||
Integer.toString(spare.bytes[spare.offset]), scratch);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
float valueFloat = Float.intBitsToFloat(DocValuesArraySource.asInt(spare));
|
||||
SimpleTextUtil.write(output, Float.toString(valueFloat), scratch);
|
||||
break;
|
||||
case FLOAT_64:
|
||||
double valueDouble = Double.longBitsToDouble(DocValuesArraySource
|
||||
.asLong(spare));
|
||||
SimpleTextUtil.write(output, Double.toString(valueDouble), scratch);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unsupported type: " + type);
|
||||
}
|
||||
} else {
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
if(zeroBytes == null) {
|
||||
assert fixedSize > 0;
|
||||
zeroBytes = new BytesRef(new byte[fixedSize]);
|
||||
}
|
||||
SimpleTextUtil.write(output, zeroBytes);
|
||||
break;
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
scratch.length = 0;
|
||||
SimpleTextUtil.write(output, scratch);
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
SimpleTextUtil.write(output, ZERO_INT);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
SimpleTextUtil.write(output, ZERO_DOUBLE);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unsupported type: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.codecs.PerDocProducer;
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextDocValuesFormat extends DocValuesFormat {
|
||||
private static final String DOC_VALUES_SEG_SUFFIX = "dv";
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new SimpleTextPerDocConsumer(state, DOC_VALUES_SEG_SUFFIX);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
|
||||
return new SimpleTextPerDocProducer(state, BytesRef.getUTF8SortedAsUnicodeComparator(), DOC_VALUES_SEG_SUFFIX);
|
||||
}
|
||||
|
||||
static String docValuesId(String segmentsName, int fieldId) {
|
||||
return segmentsName + "_" + fieldId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(SegmentInfo info, Set<String> files)
|
||||
throws IOException {
|
||||
SimpleTextPerDocConsumer.files(info, files, DOC_VALUES_SEG_SUFFIX);
|
||||
}
|
||||
}
|
|
@ -1,294 +0,0 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Writes plain-text norms
|
||||
* <p>
|
||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextNormsConsumer extends PerDocConsumer {
|
||||
|
||||
/** Extension of norms file */
|
||||
static final String NORMS_EXTENSION = "len";
|
||||
final static BytesRef END = new BytesRef("END");
|
||||
final static BytesRef FIELD = new BytesRef("field ");
|
||||
final static BytesRef DOC = new BytesRef(" doc ");
|
||||
final static BytesRef NORM = new BytesRef(" norm ");
|
||||
|
||||
private NormsWriter writer;
|
||||
|
||||
private final Directory directory;
|
||||
|
||||
private final String segment;
|
||||
|
||||
private final IOContext context;
|
||||
|
||||
public SimpleTextNormsConsumer(Directory directory, String segment,
|
||||
IOContext context) throws IOException {
|
||||
this.directory = directory;
|
||||
this.segment = segment;
|
||||
this.context = context;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (writer != null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
writer.finish();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(writer);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
|
||||
throws IOException {
|
||||
return reader.normValues(info.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean canMerge(FieldInfo info) {
|
||||
return info.normsPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo)
|
||||
throws IOException {
|
||||
if (type != Type.FIXED_INTS_8) {
|
||||
throw new UnsupportedOperationException("Codec only supports single byte norm values. Type give: " + type);
|
||||
}
|
||||
return new SimpleTextNormsDocValuesConsumer(fieldInfo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void abort() {
|
||||
if (writer != null) {
|
||||
try {
|
||||
writer.abort();
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class SimpleTextNormsDocValuesConsumer extends DocValuesConsumer {
|
||||
// Holds all docID/norm pairs we've seen
|
||||
int[] docIDs = new int[1];
|
||||
byte[] norms = new byte[1];
|
||||
int upto;
|
||||
private final FieldInfo fi;
|
||||
|
||||
public SimpleTextNormsDocValuesConsumer(FieldInfo fieldInfo) {
|
||||
fi = fieldInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void add(int docID, long value) {
|
||||
if (docIDs.length <= upto) {
|
||||
assert docIDs.length == upto;
|
||||
docIDs = ArrayUtil.grow(docIDs, 1 + upto);
|
||||
}
|
||||
if (norms.length <= upto) {
|
||||
assert norms.length == upto;
|
||||
norms = ArrayUtil.grow(norms, 1 + upto);
|
||||
}
|
||||
norms[upto] = (byte) value;
|
||||
|
||||
docIDs[upto] = docID;
|
||||
upto++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
final NormsWriter normsWriter = getNormsWriter();
|
||||
boolean success = false;
|
||||
try {
|
||||
int uptoDoc = 0;
|
||||
normsWriter.setNumTotalDocs(docCount);
|
||||
if (upto > 0) {
|
||||
normsWriter.startField(fi);
|
||||
int docID = 0;
|
||||
for (; docID < docCount; docID++) {
|
||||
if (uptoDoc < upto && docIDs[uptoDoc] == docID) {
|
||||
normsWriter.writeNorm(norms[uptoDoc]);
|
||||
uptoDoc++;
|
||||
} else {
|
||||
normsWriter.writeNorm((byte) 0);
|
||||
}
|
||||
}
|
||||
// we should have consumed every norm
|
||||
assert uptoDoc == upto;
|
||||
|
||||
} else {
|
||||
// Fill entire field with default norm:
|
||||
normsWriter.startField(fi);
|
||||
for (; upto < docCount; upto++)
|
||||
normsWriter.writeNorm((byte) 0);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
normsWriter.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public NormsWriter getNormsWriter() throws IOException {
|
||||
if (writer == null) {
|
||||
writer = new NormsWriter(directory, segment, context);
|
||||
}
|
||||
return writer;
|
||||
}
|
||||
|
||||
private static class NormsWriter implements Closeable{
|
||||
|
||||
private final IndexOutput output;
|
||||
private int numTotalDocs = 0;
|
||||
private int docid = 0;
|
||||
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
|
||||
|
||||
public NormsWriter(Directory directory, String segment, IOContext context)
|
||||
throws IOException {
|
||||
final String normsFileName = IndexFileNames.segmentFileName(segment, "",
|
||||
NORMS_EXTENSION);
|
||||
output = directory.createOutput(normsFileName, context);
|
||||
|
||||
}
|
||||
|
||||
public void startField(FieldInfo info) throws IOException {
|
||||
assert info.omitNorms == false;
|
||||
docid = 0;
|
||||
write(FIELD);
|
||||
write(info.name);
|
||||
newLine();
|
||||
}
|
||||
|
||||
public void writeNorm(byte norm) throws IOException {
|
||||
write(DOC);
|
||||
write(Integer.toString(docid));
|
||||
newLine();
|
||||
|
||||
write(NORM);
|
||||
write(norm);
|
||||
newLine();
|
||||
docid++;
|
||||
}
|
||||
|
||||
public void finish(int numDocs) throws IOException {
|
||||
if (docid != numDocs) {
|
||||
throw new RuntimeException(
|
||||
"mergeNorms produced an invalid result: docCount is " + numDocs
|
||||
+ " but only saw " + docid + " file=" + output.toString()
|
||||
+ "; now aborting this merge to prevent index corruption");
|
||||
}
|
||||
write(END);
|
||||
newLine();
|
||||
}
|
||||
|
||||
private void write(String s) throws IOException {
|
||||
SimpleTextUtil.write(output, s, scratch);
|
||||
}
|
||||
|
||||
private void write(BytesRef bytes) throws IOException {
|
||||
SimpleTextUtil.write(output, bytes);
|
||||
}
|
||||
|
||||
private void write(byte b) throws IOException {
|
||||
scratch.grow(1);
|
||||
scratch.bytes[scratch.offset] = b;
|
||||
scratch.length = 1;
|
||||
SimpleTextUtil.write(output, scratch);
|
||||
}
|
||||
|
||||
private void newLine() throws IOException {
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
|
||||
public void setNumTotalDocs(int numTotalDocs) {
|
||||
assert this.numTotalDocs == 0 || numTotalDocs == this.numTotalDocs;
|
||||
this.numTotalDocs = numTotalDocs;
|
||||
}
|
||||
|
||||
public void abort() throws IOException {
|
||||
close();
|
||||
}
|
||||
|
||||
public void finish() throws IOException {
|
||||
finish(numTotalDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
FieldInfos fieldInfos = info.getFieldInfos();
|
||||
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.normsPresent()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "",
|
||||
NORMS_EXTENSION));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,35 +18,123 @@ package org.apache.lucene.codecs.simpletext;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.codecs.PerDocProducer;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* plain-text norms format
|
||||
* <p>
|
||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextNormsFormat extends NormsFormat {
|
||||
private static final String NORMS_SEG_SUFFIX = "len";
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new SimpleTextNormsConsumer(state.directory, state.segmentName, state.context);
|
||||
return new SimpleTextNormsPerDocConsumer(state, NORMS_SEG_SUFFIX);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
|
||||
return new SimpleTextNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context);
|
||||
return new SimpleTextNormsPerDocProducer(state,
|
||||
BytesRef.getUTF8SortedAsUnicodeComparator(), NORMS_SEG_SUFFIX);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
SimpleTextNormsConsumer.files(info, files);
|
||||
}
|
||||
SimpleTextNormsPerDocConsumer.files(info, files);
|
||||
}
|
||||
|
||||
public static class SimpleTextNormsPerDocProducer extends
|
||||
SimpleTextPerDocProducer {
|
||||
|
||||
public SimpleTextNormsPerDocProducer(SegmentReadState state,
|
||||
Comparator<BytesRef> comp, String segmentSuffix) throws IOException {
|
||||
super(state, comp, segmentSuffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean canLoad(FieldInfo info) {
|
||||
return info.hasNorms();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean anyDocValuesFields(FieldInfos infos) {
|
||||
return infos.hasNorms();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class SimpleTextNormsPerDocConsumer extends
|
||||
SimpleTextPerDocConsumer {
|
||||
|
||||
public SimpleTextNormsPerDocConsumer(PerDocWriteState state,
|
||||
String segmentSuffix) throws IOException {
|
||||
super(state, segmentSuffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
|
||||
throws IOException {
|
||||
return reader.normValues(info.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean canMerge(FieldInfo info) {
|
||||
return info.hasNorms();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void abort() {
|
||||
Set<String> files = new HashSet<String>();
|
||||
filesInternal(state.fieldInfos, state.segmentName, files, segmentSuffix);
|
||||
IOUtils.deleteFilesIgnoringExceptions(state.directory,
|
||||
files.toArray(new String[0]));
|
||||
}
|
||||
|
||||
public static void files(SegmentInfo segmentInfo, Set<String> files)
|
||||
throws IOException {
|
||||
filesInternal(segmentInfo.getFieldInfos(), segmentInfo.name, files,
|
||||
NORMS_SEG_SUFFIX);
|
||||
}
|
||||
|
||||
public static void filesInternal(FieldInfos fieldInfos, String segmentName,
|
||||
Set<String> files, String segmentSuffix) {
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.hasNorms()) {
|
||||
String id = docValuesId(segmentName, fieldInfo.number);
|
||||
files.add(IndexFileNames.segmentFileName(id, "",
|
||||
segmentSuffix));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,175 +0,0 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.DOC;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORM;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextNormsConsumer.NORMS_EXTENSION;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.PerDocProducer;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* Reads plain-text norms
|
||||
* <p>
|
||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextNormsProducer extends PerDocProducer {
|
||||
|
||||
Map<String,NormsDocValues> norms = new HashMap<String,NormsDocValues>();
|
||||
|
||||
public SimpleTextNormsProducer(Directory directory, SegmentInfo si, FieldInfos fields, IOContext context) throws IOException {
|
||||
if (fields.hasNorms()) {
|
||||
readNorms(directory.openInput(IndexFileNames.segmentFileName(si.name, "", NORMS_EXTENSION), context), si.docCount);
|
||||
}
|
||||
}
|
||||
|
||||
// we read in all the norms up front into a hashmap
|
||||
private void readNorms(IndexInput in, int maxDoc) throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
boolean success = false;
|
||||
try {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
while (!scratch.equals(END)) {
|
||||
assert StringHelper.startsWith(scratch, FIELD);
|
||||
final String fieldName = readString(FIELD.length, scratch);
|
||||
byte bytes[] = new byte[maxDoc];
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, DOC);
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, NORM);
|
||||
bytes[i] = scratch.bytes[scratch.offset + NORM.length];
|
||||
}
|
||||
norms.put(fieldName, new NormsDocValues(new Norm(bytes)));
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, FIELD) || scratch.equals(END);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(in);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
norms = null;
|
||||
}
|
||||
|
||||
static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
|
||||
FieldInfos fieldInfos = info.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.normsPresent()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String readString(int offset, BytesRef scratch) {
|
||||
return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues docValues(String field) throws IOException {
|
||||
return norms.get(field);
|
||||
}
|
||||
|
||||
private class NormsDocValues extends DocValues {
|
||||
private final Source source;
|
||||
public NormsDocValues(Source source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
return source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return getSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
return Type.FIXED_INTS_8;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueSize() {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Norm extends Source {
|
||||
protected Norm(byte[] bytes) {
|
||||
super(Type.FIXED_INTS_8);
|
||||
this.bytes = bytes;
|
||||
}
|
||||
final byte bytes[];
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
ref.bytes = bytes;
|
||||
ref.offset = docID;
|
||||
ref.length = 1;
|
||||
return ref;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
return bytes[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class SimpleTextPerDocConsumer extends PerDocConsumer {
|
||||
|
||||
protected final PerDocWriteState state;
|
||||
protected final String segmentSuffix;
|
||||
public SimpleTextPerDocConsumer(PerDocWriteState state, String segmentSuffix)
|
||||
throws IOException {
|
||||
this.state = state;
|
||||
this.segmentSuffix = segmentSuffix;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(Type type, FieldInfo field)
|
||||
throws IOException {
|
||||
return new SimpleTextDocValuesConsumer(SimpleTextDocValuesFormat.docValuesId(state.segmentName,
|
||||
field.number), state.directory, state.context, type, segmentSuffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void abort() {
|
||||
Set<String> files = new HashSet<String>();
|
||||
files(state.directory, state.fieldInfos, state.segmentName, files, segmentSuffix);
|
||||
IOUtils.deleteFilesIgnoringExceptions(state.directory,
|
||||
files.toArray(new String[0]));
|
||||
}
|
||||
|
||||
|
||||
static void files(SegmentInfo info, Set<String> files, String segmentSuffix) throws IOException {
|
||||
files(info.dir, info.getFieldInfos(), info.name, files, segmentSuffix);
|
||||
}
|
||||
|
||||
static String docValuesId(String segmentsName, int fieldId) {
|
||||
return segmentsName + "_" + fieldId;
|
||||
}
|
||||
|
||||
@SuppressWarnings("fallthrough")
|
||||
private static void files(Directory dir, FieldInfos fieldInfos,
|
||||
String segmentName, Set<String> files, String segmentSuffix) {
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.hasDocValues()) {
|
||||
String filename = docValuesId(segmentName, fieldInfo.number);
|
||||
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||
segmentSuffix));
|
||||
try {
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
segmentSuffix));
|
||||
} catch (IOException e) {
|
||||
// don't throw checked exception - dir is only used in assert
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,431 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.DOC;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.HEADER;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesConsumer.VALUE_SIZE;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesArraySource;
|
||||
import org.apache.lucene.codecs.PerDocProducerBase;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.SortedSource;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextPerDocProducer extends PerDocProducerBase {
|
||||
protected final TreeMap<String, DocValues> docValues;
|
||||
private Comparator<BytesRef> comp;
|
||||
private final String segmentSuffix;
|
||||
|
||||
/**
|
||||
* Creates a new {@link SimpleTextPerDocProducer} instance and loads all
|
||||
* {@link DocValues} instances for this segment and codec.
|
||||
*/
|
||||
public SimpleTextPerDocProducer(SegmentReadState state,
|
||||
Comparator<BytesRef> comp, String segmentSuffix) throws IOException {
|
||||
this.comp = comp;
|
||||
this.segmentSuffix = segmentSuffix;
|
||||
if (anyDocValuesFields(state.fieldInfos)) {
|
||||
docValues = load(state.fieldInfos, state.segmentInfo.name,
|
||||
state.segmentInfo.docCount, state.dir, state.context);
|
||||
} else {
|
||||
docValues = new TreeMap<String, DocValues>();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, DocValues> docValues() {
|
||||
return docValues;
|
||||
}
|
||||
|
||||
protected DocValues loadDocValues(int docCount, Directory dir, String id,
|
||||
DocValues.Type type, IOContext context) throws IOException {
|
||||
return new SimpleTextDocValues(dir, context, type, id, docCount, comp, segmentSuffix);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void closeInternal(Collection<? extends Closeable> closeables)
|
||||
throws IOException {
|
||||
IOUtils.close(closeables);
|
||||
}
|
||||
|
||||
private static class SimpleTextDocValues extends DocValues {
|
||||
|
||||
private int docCount;
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
super.close();
|
||||
} finally {
|
||||
IOUtils.close(input);
|
||||
}
|
||||
}
|
||||
|
||||
private Type type;
|
||||
private Comparator<BytesRef> comp;
|
||||
private int valueSize;
|
||||
private final IndexInput input;
|
||||
|
||||
public SimpleTextDocValues(Directory dir, IOContext ctx, Type type,
|
||||
String id, int docCount, Comparator<BytesRef> comp, String segmentSuffix) throws IOException {
|
||||
this.type = type;
|
||||
this.docCount = docCount;
|
||||
this.comp = comp;
|
||||
final String fileName = IndexFileNames.segmentFileName(id, "", segmentSuffix);
|
||||
boolean success = false;
|
||||
IndexInput in = null;
|
||||
try {
|
||||
in = dir.openInput(fileName, ctx);
|
||||
valueSize = readHeader(in);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
}
|
||||
}
|
||||
input = in;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
boolean success = false;
|
||||
IndexInput in = (IndexInput) input.clone();
|
||||
try {
|
||||
Source source = null;
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
source = read(in, new ValueReader(type, docCount, comp));
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
source = read(in, new ValueReader(type, docCount, null));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unknown type: " + type);
|
||||
}
|
||||
assert source != null;
|
||||
success = true;
|
||||
return source;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
} else {
|
||||
IOUtils.close(in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int readHeader(IndexInput in) throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, HEADER);
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, VALUE_SIZE);
|
||||
return Integer.parseInt(readString(scratch.offset + VALUE_SIZE.length,
|
||||
scratch));
|
||||
}
|
||||
|
||||
private Source read(IndexInput in, ValueReader reader) throws IOException {
|
||||
BytesRef scratch = new BytesRef();
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
|
||||
assert StringHelper.startsWith(scratch, DOC) : scratch.utf8ToString();
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch, VALUE);
|
||||
reader.fromString(i, scratch, scratch.offset + VALUE.length);
|
||||
}
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert scratch.equals(END);
|
||||
return reader.getSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return this.getSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueSize() {
|
||||
return valueSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static String readString(int offset, BytesRef scratch) {
|
||||
return new String(scratch.bytes, scratch.offset + offset, scratch.length
|
||||
- offset, IOUtils.CHARSET_UTF_8);
|
||||
}
|
||||
|
||||
private static final class ValueReader {
|
||||
private final Type type;
|
||||
private byte[] bytes;
|
||||
private short[] shorts;
|
||||
private int[] ints;
|
||||
private long[] longs;
|
||||
private float[] floats;
|
||||
private double[] doubles;
|
||||
private Source source;
|
||||
private BytesRefHash hash;
|
||||
private BytesRef scratch;
|
||||
|
||||
public ValueReader(Type type, int maxDocs, Comparator<BytesRef> comp) {
|
||||
super();
|
||||
this.type = type;
|
||||
Source docValuesArray = null;
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
shorts = new short[maxDocs];
|
||||
docValuesArray = DocValuesArraySource.forType(type)
|
||||
.newFromArray(shorts);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
ints = new int[maxDocs];
|
||||
docValuesArray = DocValuesArraySource.forType(type).newFromArray(ints);
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
longs = new long[maxDocs];
|
||||
docValuesArray = DocValuesArraySource.forType(type)
|
||||
.newFromArray(longs);
|
||||
break;
|
||||
case VAR_INTS:
|
||||
longs = new long[maxDocs];
|
||||
docValuesArray = new VarIntsArraySource(type, longs);
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
bytes = new byte[maxDocs];
|
||||
docValuesArray = DocValuesArraySource.forType(type).newFromArray(bytes);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
floats = new float[maxDocs];
|
||||
docValuesArray = DocValuesArraySource.forType(type)
|
||||
.newFromArray(floats);
|
||||
break;
|
||||
case FLOAT_64:
|
||||
doubles = new double[maxDocs];
|
||||
docValuesArray = DocValuesArraySource.forType(type).newFromArray(
|
||||
doubles);
|
||||
break;
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
assert comp != null;
|
||||
hash = new BytesRefHash();
|
||||
BytesSource bytesSource = new BytesSource(type, comp, maxDocs, hash);
|
||||
ints = bytesSource.docIdToEntry;
|
||||
source = bytesSource;
|
||||
scratch = new BytesRef();
|
||||
break;
|
||||
|
||||
}
|
||||
if (docValuesArray != null) {
|
||||
assert source == null;
|
||||
this.source = docValuesArray;
|
||||
}
|
||||
}
|
||||
|
||||
public void fromString(int ord, BytesRef ref, int offset) {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
assert shorts != null;
|
||||
shorts[ord] = Short.parseShort(readString(offset, ref));
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
assert ints != null;
|
||||
ints[ord] = Integer.parseInt(readString(offset, ref));
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
case VAR_INTS:
|
||||
assert longs != null;
|
||||
longs[ord] = Long.parseLong(readString(offset, ref));
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
assert bytes != null;
|
||||
bytes[ord] = (byte) Integer.parseInt(readString(offset, ref));
|
||||
break;
|
||||
case FLOAT_32:
|
||||
assert floats != null;
|
||||
floats[ord] = Float.parseFloat(readString(offset, ref));
|
||||
break;
|
||||
case FLOAT_64:
|
||||
assert doubles != null;
|
||||
doubles[ord] = Double.parseDouble(readString(offset, ref));
|
||||
break;
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
scratch.bytes = ref.bytes;
|
||||
scratch.length = ref.length - offset;
|
||||
scratch.offset = ref.offset + offset;
|
||||
int key = hash.add(scratch);
|
||||
ints[ord] = key < 0 ? (-key) - 1 : key;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public Source getSource() {
|
||||
if (source instanceof BytesSource) {
|
||||
((BytesSource) source).maybeSort();
|
||||
}
|
||||
return source;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class BytesSource extends SortedSource {
|
||||
|
||||
private final BytesRefHash hash;
|
||||
int[] docIdToEntry;
|
||||
int[] sortedEntries;
|
||||
int[] adresses;
|
||||
private final boolean isSorted;
|
||||
|
||||
protected BytesSource(Type type, Comparator<BytesRef> comp, int maxDoc,
|
||||
BytesRefHash hash) {
|
||||
super(type, comp);
|
||||
docIdToEntry = new int[maxDoc];
|
||||
this.hash = hash;
|
||||
isSorted = type == Type.BYTES_FIXED_SORTED
|
||||
|| type == Type.BYTES_VAR_SORTED;
|
||||
}
|
||||
|
||||
void maybeSort() {
|
||||
if (isSorted) {
|
||||
adresses = new int[hash.size()];
|
||||
sortedEntries = hash.sort(getComparator());
|
||||
for (int i = 0; i < adresses.length; i++) {
|
||||
int entry = sortedEntries[i];
|
||||
adresses[entry] = i;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
if (isSorted) {
|
||||
return hash.get(sortedEntries[ord(docID)], ref);
|
||||
} else {
|
||||
return hash.get(docIdToEntry[docID], ref);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSource asSortedSource() {
|
||||
if (isSorted) {
|
||||
return this;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord(int docID) {
|
||||
assert isSorted;
|
||||
try {
|
||||
return adresses[docIdToEntry[docID]];
|
||||
} catch (Exception e) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
assert isSorted;
|
||||
return hash.get(sortedEntries[ord], bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Reader getDocToOrd() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return hash.size();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class VarIntsArraySource extends Source {
|
||||
|
||||
private final long[] array;
|
||||
|
||||
protected VarIntsArraySource(Type type, long[] array) {
|
||||
super(type);
|
||||
this.array = array;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
return array[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
DocValuesArraySource.copyLong(ref, getInt(docID));
|
||||
return ref;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -60,12 +60,17 @@ public abstract class AtomicReader extends IndexReader {
|
|||
return readerContext;
|
||||
}
|
||||
|
||||
/** Returns true if there are norms stored for this field. */
|
||||
public boolean hasNorms(String field) throws IOException {
|
||||
// backward compatible implementation.
|
||||
// SegmentReader has an efficient implementation.
|
||||
/**
|
||||
* Returns true if there are norms stored for this field.
|
||||
* @deprecated (4.0) use {@link #getFieldInfos()} and check {@link FieldInfo#hasNorms()}
|
||||
* for the field instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public final boolean hasNorms(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return normValues(field) != null;
|
||||
// note: using normValues(field) != null would potentially cause i/o
|
||||
FieldInfo fi = getFieldInfos().fieldInfo(field);
|
||||
return fi != null && fi.hasNorms();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -53,6 +53,14 @@ public abstract class BaseCompositeReader<R extends IndexReader> extends Composi
|
|||
private final int numDocs;
|
||||
private final boolean hasDeletions;
|
||||
|
||||
/**
|
||||
* Constructs a {@code BaseCompositeReader} on the given subReaders.
|
||||
* @param subReaders the wrapped sub-readers. This array is returned by
|
||||
* {@link #getSequentialSubReaders} and used to resolve the correct
|
||||
* subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
|
||||
* cloned and not protected for modification, the subclass is responsible
|
||||
* to do this.
|
||||
*/
|
||||
protected BaseCompositeReader(R[] subReaders) throws IOException {
|
||||
this.subReaders = subReaders;
|
||||
starts = new int[subReaders.length + 1]; // build starts array
|
||||
|
|
|
@ -210,7 +210,7 @@ class BufferedDeletesStream {
|
|||
|
||||
// Lock order: IW -> BD -> RP
|
||||
assert readerPool.infoIsLive(info);
|
||||
final IndexWriter.ReadersAndLiveDocs rld = readerPool.get(info, true);
|
||||
final ReadersAndLiveDocs rld = readerPool.get(info, true);
|
||||
final SegmentReader reader = rld.getReader(IOContext.READ);
|
||||
int delCount = 0;
|
||||
final boolean segAllDeletes;
|
||||
|
@ -224,11 +224,12 @@ class BufferedDeletesStream {
|
|||
// Don't delete by Term here; DocumentsWriterPerThread
|
||||
// already did that on flush:
|
||||
delCount += applyQueryDeletes(packet.queriesIterable(), rld, reader);
|
||||
final int fullDelCount = rld.info.getDelCount() + rld.pendingDeleteCount;
|
||||
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
|
||||
assert fullDelCount <= rld.info.docCount;
|
||||
segAllDeletes = fullDelCount == rld.info.docCount;
|
||||
} finally {
|
||||
readerPool.release(reader, false);
|
||||
rld.release(reader);
|
||||
readerPool.release(rld);
|
||||
}
|
||||
anyNewDeletes |= delCount > 0;
|
||||
|
||||
|
@ -262,18 +263,19 @@ class BufferedDeletesStream {
|
|||
if (coalescedDeletes != null) {
|
||||
// Lock order: IW -> BD -> RP
|
||||
assert readerPool.infoIsLive(info);
|
||||
final IndexWriter.ReadersAndLiveDocs rld = readerPool.get(info, true);
|
||||
final ReadersAndLiveDocs rld = readerPool.get(info, true);
|
||||
final SegmentReader reader = rld.getReader(IOContext.READ);
|
||||
int delCount = 0;
|
||||
final boolean segAllDeletes;
|
||||
try {
|
||||
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader);
|
||||
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader);
|
||||
final int fullDelCount = rld.info.getDelCount() + rld.pendingDeleteCount;
|
||||
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
|
||||
assert fullDelCount <= rld.info.docCount;
|
||||
segAllDeletes = fullDelCount == rld.info.docCount;
|
||||
} finally {
|
||||
readerPool.release(reader, false);
|
||||
} finally {
|
||||
rld.release(reader);
|
||||
readerPool.release(rld);
|
||||
}
|
||||
anyNewDeletes |= delCount > 0;
|
||||
|
||||
|
@ -353,7 +355,7 @@ class BufferedDeletesStream {
|
|||
}
|
||||
|
||||
// Delete by Term
|
||||
private synchronized long applyTermDeletes(Iterable<Term> termsIter, IndexWriter.ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
|
||||
private synchronized long applyTermDeletes(Iterable<Term> termsIter, ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
|
||||
long delCount = 0;
|
||||
Fields fields = reader.fields();
|
||||
if (fields == null) {
|
||||
|
@ -394,7 +396,7 @@ class BufferedDeletesStream {
|
|||
// System.out.println(" term=" + term);
|
||||
|
||||
if (termsEnum.seekExact(term.bytes(), false)) {
|
||||
DocsEnum docsEnum = termsEnum.docs(rld.liveDocs, docs, false);
|
||||
DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, false);
|
||||
//System.out.println("BDS: got docsEnum=" + docsEnum);
|
||||
|
||||
if (docsEnum != null) {
|
||||
|
@ -434,7 +436,7 @@ class BufferedDeletesStream {
|
|||
}
|
||||
|
||||
// Delete by query
|
||||
private static long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, IndexWriter.ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
|
||||
private static long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
|
||||
long delCount = 0;
|
||||
final AtomicReaderContext readerContext = reader.getTopReaderContext();
|
||||
boolean any = false;
|
||||
|
|
|
@ -651,28 +651,17 @@ public class CheckIndex {
|
|||
if (infoStream != null) {
|
||||
infoStream.print(" test: field norms.........");
|
||||
}
|
||||
DocValues dv;
|
||||
for (FieldInfo info : fieldInfos) {
|
||||
if (reader.hasNorms(info.name)) {
|
||||
dv = reader.normValues(info.name);
|
||||
assert dv != null;
|
||||
if (dv.getSource().hasArray()) {
|
||||
Object array = dv.getSource().getArray();
|
||||
if (Array.getLength(array) != reader.maxDoc()) {
|
||||
throw new RuntimeException("norms for field: " + info.name + " are of the wrong size");
|
||||
}
|
||||
}
|
||||
if (!info.isIndexed || info.omitNorms) {
|
||||
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
|
||||
}
|
||||
if (info.hasNorms()) {
|
||||
assert reader.hasNorms(info.name); // deprecated path
|
||||
DocValues dv = reader.normValues(info.name);
|
||||
checkDocValues(dv, info.name, info.getNormType(), reader.maxDoc());
|
||||
++status.totFields;
|
||||
} else {
|
||||
assert !reader.hasNorms(info.name); // deprecated path
|
||||
if (reader.normValues(info.name) != null) {
|
||||
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
|
||||
}
|
||||
if (info.normsPresent()) {
|
||||
throw new RuntimeException("field: " + info.name + " should have norms but omits them!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1171,6 +1160,92 @@ public class CheckIndex {
|
|||
return status;
|
||||
}
|
||||
|
||||
/** Helper method to verify values (either docvalues or norms), also checking
|
||||
* type and size against fieldinfos/segmentinfo
|
||||
*/
|
||||
private void checkDocValues(DocValues docValues, String fieldName, DocValues.Type expectedType, int expectedDocs) throws IOException {
|
||||
if (docValues == null) {
|
||||
throw new RuntimeException("field: " + fieldName + " omits docvalues but should have them!");
|
||||
}
|
||||
DocValues.Type type = docValues.getType();
|
||||
if (type != expectedType) {
|
||||
throw new RuntimeException("field: " + fieldName + " has type: " + type + " but fieldInfos says:" + expectedType);
|
||||
}
|
||||
final Source values = docValues.getDirectSource();
|
||||
int size = docValues.getValueSize();
|
||||
for (int i = 0; i < expectedDocs; i++) {
|
||||
switch (type) {
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
BytesRef bytes = new BytesRef();
|
||||
values.getBytes(i, bytes);
|
||||
if (size != -1 && size != bytes.length) {
|
||||
throw new RuntimeException("field: " + fieldName + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
|
||||
}
|
||||
break;
|
||||
case FLOAT_32:
|
||||
assert size == 4;
|
||||
values.getFloat(i);
|
||||
break;
|
||||
case FLOAT_64:
|
||||
assert size == 8;
|
||||
values.getFloat(i);
|
||||
break;
|
||||
case VAR_INTS:
|
||||
assert size == -1;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
assert size == 2;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
assert size == 4;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
assert size == 8;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
assert size == 1;
|
||||
values.getInt(i);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Field: " + fieldName
|
||||
+ " - no such DocValues type: " + type);
|
||||
}
|
||||
}
|
||||
if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
|
||||
// check sorted bytes
|
||||
SortedSource sortedValues = values.asSortedSource();
|
||||
Comparator<BytesRef> comparator = sortedValues.getComparator();
|
||||
int lastOrd = -1;
|
||||
BytesRef lastBytes = new BytesRef();
|
||||
for (int i = 0; i < expectedDocs; i++) {
|
||||
int ord = sortedValues.ord(i);
|
||||
if (ord < 0 || ord > expectedDocs) {
|
||||
throw new RuntimeException("field: " + fieldName + " ord is out of bounds: " + ord);
|
||||
}
|
||||
BytesRef bytes = new BytesRef();
|
||||
sortedValues.getByOrd(ord, bytes);
|
||||
if (lastOrd != -1) {
|
||||
int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
|
||||
int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
|
||||
if (ordComp != bytesComp) {
|
||||
throw new RuntimeException("field: " + fieldName + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
|
||||
}
|
||||
}
|
||||
lastOrd = ord;
|
||||
lastBytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Status.DocValuesStatus testDocValues(SegmentInfo info,
|
||||
SegmentReader reader) {
|
||||
final Status.DocValuesStatus status = new Status.DocValuesStatus();
|
||||
|
@ -1183,87 +1258,7 @@ public class CheckIndex {
|
|||
if (fieldInfo.hasDocValues()) {
|
||||
status.totalValueFields++;
|
||||
final DocValues docValues = reader.docValues(fieldInfo.name);
|
||||
if (docValues == null) {
|
||||
throw new RuntimeException("field: " + fieldInfo.name + " omits docvalues but should have them!");
|
||||
}
|
||||
DocValues.Type type = docValues.type();
|
||||
if (type != fieldInfo.getDocValuesType()) {
|
||||
throw new RuntimeException("field: " + fieldInfo.name + " has type: " + type + " but fieldInfos says:" + fieldInfo.getDocValuesType());
|
||||
}
|
||||
final Source values = docValues.getDirectSource();
|
||||
final int maxDoc = reader.maxDoc();
|
||||
int size = docValues.getValueSize();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
switch (fieldInfo.getDocValuesType()) {
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
BytesRef bytes = new BytesRef();
|
||||
values.getBytes(i, bytes);
|
||||
if (size != -1 && size != bytes.length) {
|
||||
throw new RuntimeException("field: " + fieldInfo.name + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
|
||||
}
|
||||
break;
|
||||
case FLOAT_32:
|
||||
assert size == 4;
|
||||
values.getFloat(i);
|
||||
break;
|
||||
case FLOAT_64:
|
||||
assert size == 8;
|
||||
values.getFloat(i);
|
||||
break;
|
||||
case VAR_INTS:
|
||||
assert size == -1;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
assert size == 2;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
assert size == 4;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
assert size == 8;
|
||||
values.getInt(i);
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
assert size == 1;
|
||||
values.getInt(i);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Field: " + fieldInfo.name
|
||||
+ " - no such DocValues type: " + fieldInfo.getDocValuesType());
|
||||
}
|
||||
}
|
||||
if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
|
||||
// check sorted bytes
|
||||
SortedSource sortedValues = values.asSortedSource();
|
||||
Comparator<BytesRef> comparator = sortedValues.getComparator();
|
||||
int lastOrd = -1;
|
||||
BytesRef lastBytes = new BytesRef();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
int ord = sortedValues.ord(i);
|
||||
if (ord < 0 || ord > maxDoc) {
|
||||
throw new RuntimeException("field: " + fieldInfo.name + " ord is out of bounds: " + ord);
|
||||
}
|
||||
BytesRef bytes = new BytesRef();
|
||||
sortedValues.getByOrd(ord, bytes);
|
||||
if (lastOrd != -1) {
|
||||
int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
|
||||
int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
|
||||
if (ordComp != bytesComp) {
|
||||
throw new RuntimeException("field: " + fieldInfo.name + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
|
||||
}
|
||||
}
|
||||
lastOrd = ord;
|
||||
lastBytes = bytes;
|
||||
}
|
||||
}
|
||||
checkDocValues(docValues, fieldInfo.name, fieldInfo.getDocValuesType(), reader.maxDoc());
|
||||
} else {
|
||||
if (reader.docValues(fieldInfo.name) != null) {
|
||||
throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!");
|
||||
|
|
|
@ -81,6 +81,9 @@ public abstract class CompositeReader extends IndexReader {
|
|||
* If this method returns an empty array, that means this
|
||||
* reader is a null reader (for example a MultiReader
|
||||
* that has no sub readers).
|
||||
* <p><b>Warning:</b> Don't modify the returned array!
|
||||
* Doing so will corrupt the internal structure of this
|
||||
* {@code CompositeReader}.
|
||||
*/
|
||||
public abstract IndexReader[] getSequentialSubReaders();
|
||||
|
||||
|
|
|
@ -323,8 +323,17 @@ public abstract class DirectoryReader extends BaseCompositeReader<AtomicReader>
|
|||
}
|
||||
}
|
||||
|
||||
protected DirectoryReader(Directory directory, AtomicReader[] readers) throws CorruptIndexException, IOException {
|
||||
super(readers);
|
||||
/**
|
||||
* Expert: Constructs a {@code DirectoryReader} on the given subReaders.
|
||||
* @param segmentReaders the wrapped atomic index segment readers. This array is
|
||||
* returned by {@link #getSequentialSubReaders} and used to resolve the correct
|
||||
* subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b>
|
||||
* cloned and not protected for modification outside of this reader.
|
||||
* Subclasses of {@code DirectoryReader} should take care to not allow
|
||||
* modification of this internal array, e.g. {@link #doOpenIfChanged()}.
|
||||
*/
|
||||
protected DirectoryReader(Directory directory, AtomicReader[] segmentReaders) throws CorruptIndexException, IOException {
|
||||
super(segmentReaders);
|
||||
this.directory = directory;
|
||||
}
|
||||
|
||||
|
|
|
@ -216,6 +216,13 @@ public class DocTermOrds {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The number of terms in this field
|
||||
*/
|
||||
public int numTerms() {
|
||||
return numTermsInField;
|
||||
}
|
||||
|
||||
/** Subclass can override this */
|
||||
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ public abstract class DocValues implements Closeable {
|
|||
/**
|
||||
* Returns the {@link Type} of this {@link DocValues} instance
|
||||
*/
|
||||
public abstract Type type();
|
||||
public abstract Type getType();
|
||||
|
||||
/**
|
||||
* Closes this {@link DocValues} instance. This method should only be called
|
||||
|
@ -191,7 +191,7 @@ public abstract class DocValues implements Closeable {
|
|||
*
|
||||
* @return the {@link Type} of this source.
|
||||
*/
|
||||
public Type type() {
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
|
|
@ -122,14 +122,23 @@ public final class FieldInfo {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if this field has any docValues.
|
||||
*/
|
||||
public boolean hasDocValues() {
|
||||
return docValueType != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {@link DocValues.Type} of the docValues. this may be null if the field has no docvalues.
|
||||
*/
|
||||
public DocValues.Type getDocValuesType() {
|
||||
return docValueType;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {@link DocValues.Type} of the norm. this may be null if the field has no norms.
|
||||
*/
|
||||
public DocValues.Type getNormType() {
|
||||
return normType;
|
||||
}
|
||||
|
@ -146,11 +155,17 @@ public final class FieldInfo {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if norms are explicitly omitted for this field
|
||||
*/
|
||||
public boolean omitNorms() {
|
||||
return omitNorms;
|
||||
}
|
||||
|
||||
public boolean normsPresent() {
|
||||
/**
|
||||
* @return true if this field actually has any norms.
|
||||
*/
|
||||
public boolean hasNorms() {
|
||||
return isIndexed && !omitNorms && normType != null;
|
||||
}
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return fis;
|
||||
}
|
||||
|
||||
/** Returns true if any fields do not positions */
|
||||
/** Returns true if any fields have positions */
|
||||
public boolean hasProx() {
|
||||
if (isReadOnly()) {
|
||||
return hasProx;
|
||||
|
@ -349,6 +349,12 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return fi;
|
||||
}
|
||||
|
||||
/**
|
||||
* lookup the number of a field by name.
|
||||
*
|
||||
* @param fieldName field's name
|
||||
* @return number of field, or -1 if it does not exist.
|
||||
*/
|
||||
public int fieldNumber(String fieldName) {
|
||||
FieldInfo fi = fieldInfo(fieldName);
|
||||
return (fi != null) ? fi.number : -1;
|
||||
|
@ -384,11 +390,17 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return byNumber.values().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return number of fields
|
||||
*/
|
||||
public int size() {
|
||||
assert byNumber.size() == byName.size();
|
||||
return byNumber.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if at least one field has any vectors
|
||||
*/
|
||||
public boolean hasVectors() {
|
||||
if (isReadOnly()) {
|
||||
return hasVectors;
|
||||
|
@ -402,9 +414,12 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if at least one field has any norms
|
||||
*/
|
||||
public boolean hasNorms() {
|
||||
for (FieldInfo fi : this) {
|
||||
if (fi.normsPresent()) {
|
||||
if (fi.hasNorms()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -441,7 +456,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
return roFis;
|
||||
}
|
||||
|
||||
public boolean anyDocValuesFields() {
|
||||
/**
|
||||
* @return true if at least one field has docValues
|
||||
*/
|
||||
public boolean hasDocValues() {
|
||||
for (FieldInfo fi : this) {
|
||||
if (fi.hasDocValues()) {
|
||||
return true;
|
||||
|
|
|
@ -359,12 +359,6 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
return in.hasDeletions();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNorms(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return in.hasNorms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
in.close();
|
||||
|
|
|
@ -453,7 +453,7 @@ final class IndexFileDeleter {
|
|||
assert Thread.holdsLock(writer);
|
||||
|
||||
if (infoStream.isEnabled("IFD")) {
|
||||
infoStream.message("IFD", "now checkpoint \"" + writer.segString(segmentInfos) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
|
||||
infoStream.message("IFD", "now checkpoint \"" + writer.segString(writer.toLiveInfos(segmentInfos)) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
|
||||
}
|
||||
|
||||
// Try again now to delete any previously un-deletable
|
||||
|
|
|
@ -41,25 +41,27 @@ public abstract class IndexReaderContext {
|
|||
this.isTopLevel = parent==null;
|
||||
}
|
||||
|
||||
/** Returns the {@link IndexReader}, this context represents. */
|
||||
public abstract IndexReader reader();
|
||||
|
||||
/**
|
||||
* Returns the context's leaves if this context is a top-level context
|
||||
* otherwise <code>null</code>. For convenience, if this is an
|
||||
* {@link AtomicReaderContext} this returns itsself as the only leaf.
|
||||
* <p>
|
||||
* Note: this is convenience method since leaves can always be obtained by
|
||||
* <p>Note: this is convenience method since leaves can always be obtained by
|
||||
* walking the context tree.
|
||||
* <p><b>Warning:</b> Don't modify the returned array!
|
||||
* Doing so will corrupt the internal structure of this
|
||||
* {@code IndexReaderContext}.
|
||||
*/
|
||||
public abstract AtomicReaderContext[] leaves();
|
||||
|
||||
/**
|
||||
* Returns the context's children iff this context is a composite context
|
||||
* otherwise <code>null</code>.
|
||||
* <p>
|
||||
* Note: this method is a convenience method to prevent
|
||||
* <code>instanceof</code> checks and type-casts to
|
||||
* {@link CompositeReaderContext}.
|
||||
* <p><b>Warning:</b> Don't modify the returned array!
|
||||
* Doing so will corrupt the internal structure of this
|
||||
* {@code IndexReaderContext}.
|
||||
*/
|
||||
public abstract IndexReaderContext[] children();
|
||||
}
|
|
@ -33,7 +33,6 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
|
||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -392,260 +391,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
return r;
|
||||
}
|
||||
|
||||
// This class inherits all sync from IW:
|
||||
class ReadersAndLiveDocs {
|
||||
// Not final because we replace (clone) when we need to
|
||||
// change it and it's been shared:
|
||||
public final SegmentInfo info;
|
||||
|
||||
// Set once (null, and then maybe set, and never set again):
|
||||
private SegmentReader reader;
|
||||
|
||||
// TODO: it's sometimes wasteful that we hold open two
|
||||
// separate SRs (one for merging one for
|
||||
// reading)... maybe just use a single SR? The gains of
|
||||
// not loading the terms index (for merging in the
|
||||
// non-NRT case) are far less now... and if the app has
|
||||
// any deletes it'll open real readers anyway.
|
||||
|
||||
// Set once (null, and then maybe set, and never set again):
|
||||
private SegmentReader mergeReader;
|
||||
|
||||
// Holds the current shared (readable and writable
|
||||
// liveDocs). This is null when there are no deleted
|
||||
// docs, and it's copy-on-write (cloned whenever we need
|
||||
// to change it but it's been shared to an external NRT
|
||||
// reader).
|
||||
public Bits liveDocs;
|
||||
|
||||
// How many further deletions we've done against
|
||||
// liveDocs vs when we loaded it or last wrote it:
|
||||
public int pendingDeleteCount;
|
||||
|
||||
// True if the current liveDocs is referenced by an
|
||||
// external NRT reader:
|
||||
public boolean shared;
|
||||
|
||||
public ReadersAndLiveDocs(SegmentInfo info) {
|
||||
this.info = info;
|
||||
shared = true;
|
||||
}
|
||||
|
||||
// Returns false if we are the only remaining refs of
|
||||
// this reader:
|
||||
public synchronized boolean anyOutsideRefs(SegmentReader sr) {
|
||||
int myRefCounts = 0;
|
||||
if (sr == reader) {
|
||||
myRefCounts++;
|
||||
}
|
||||
if (sr == mergeReader) {
|
||||
myRefCounts++;
|
||||
}
|
||||
final int rc = sr.getRefCount();
|
||||
assert rc >= myRefCounts;
|
||||
return rc > myRefCounts;
|
||||
}
|
||||
|
||||
// Call only from assert!
|
||||
public synchronized boolean verifyDocCounts() {
|
||||
int count;
|
||||
if (liveDocs != null) {
|
||||
count = 0;
|
||||
for(int docID=0;docID<info.docCount;docID++) {
|
||||
if (liveDocs.get(docID)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
count = info.docCount;
|
||||
}
|
||||
|
||||
assert info.docCount - info.getDelCount() - pendingDeleteCount == count: "info.docCount=" + info.docCount + " info.getDelCount()=" + info.getDelCount() + " pendingDeleteCount=" + pendingDeleteCount + " count=" + count;;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if any reader remains
|
||||
public synchronized boolean removeReader(SegmentReader sr, boolean drop) throws IOException {
|
||||
if (sr == reader) {
|
||||
//System.out.println(" non-merge reader");
|
||||
reader.decRef();
|
||||
reader = null;
|
||||
}
|
||||
|
||||
if (sr == mergeReader) {
|
||||
//System.out.println(" merge reader");
|
||||
mergeReader.decRef();
|
||||
mergeReader = null;
|
||||
if (drop && reader != null) {
|
||||
//System.out.println(" also release normal reader rc=" + rld.reader.getRefCount());
|
||||
reader.decRef();
|
||||
reader = null;
|
||||
}
|
||||
}
|
||||
|
||||
return reader != null || mergeReader != null;
|
||||
}
|
||||
|
||||
// Get reader for searching/deleting
|
||||
public synchronized SegmentReader getReader(IOContext context) throws IOException {
|
||||
//System.out.println(" livedocs=" + rld.liveDocs);
|
||||
|
||||
if (reader == null) {
|
||||
reader = new SegmentReader(info, config.getReaderTermsIndexDivisor(), context);
|
||||
if (liveDocs == null) {
|
||||
liveDocs = reader.getLiveDocs();
|
||||
}
|
||||
//System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool");
|
||||
}
|
||||
|
||||
// Ref for caller
|
||||
reader.incRef();
|
||||
return reader;
|
||||
}
|
||||
|
||||
// Get reader for merging (does not load the terms
|
||||
// index):
|
||||
public synchronized SegmentReader getMergeReader(IOContext context) throws IOException {
|
||||
//System.out.println(" livedocs=" + rld.liveDocs);
|
||||
|
||||
if (mergeReader == null) {
|
||||
|
||||
if (reader != null) {
|
||||
// Just use the already opened non-merge reader
|
||||
// for merging. In the NRT case this saves us
|
||||
// pointless double-open:
|
||||
//System.out.println("PROMOTE non-merge reader seg=" + rld.info);
|
||||
reader.incRef();
|
||||
mergeReader = reader;
|
||||
} else {
|
||||
mergeReader = new SegmentReader(info, -1, context);
|
||||
if (liveDocs == null) {
|
||||
liveDocs = mergeReader.getLiveDocs();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ref for caller
|
||||
mergeReader.incRef();
|
||||
return mergeReader;
|
||||
}
|
||||
|
||||
public synchronized boolean delete(int docID) {
|
||||
assert liveDocs != null;
|
||||
assert docID >= 0 && docID < liveDocs.length() : "out of bounds: docid=" + docID + ",liveDocsLength=" + liveDocs.length();
|
||||
assert !shared;
|
||||
final boolean didDelete = liveDocs.get(docID);
|
||||
if (didDelete) {
|
||||
((MutableBits) liveDocs).clear(docID);
|
||||
pendingDeleteCount++;
|
||||
//System.out.println(" new del seg=" + info + " docID=" + docID + " pendingDelCount=" + pendingDeleteCount + " totDelCount=" + (info.docCount-liveDocs.count()));
|
||||
}
|
||||
return didDelete;
|
||||
}
|
||||
|
||||
public synchronized void dropReaders() throws IOException {
|
||||
if (reader != null) {
|
||||
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
|
||||
reader.decRef();
|
||||
reader = null;
|
||||
}
|
||||
if (mergeReader != null) {
|
||||
//System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount());
|
||||
mergeReader.decRef();
|
||||
mergeReader = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a ref to a clone. NOTE: this clone is not
|
||||
* enrolled in the pool, so you should simply close()
|
||||
* it when you're done (ie, do not call release()).
|
||||
*/
|
||||
public synchronized SegmentReader getReadOnlyClone(IOContext context) throws IOException {
|
||||
if (reader == null) {
|
||||
getReader(context).decRef();
|
||||
assert reader != null;
|
||||
}
|
||||
shared = true;
|
||||
if (liveDocs != null) {
|
||||
return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount);
|
||||
} else {
|
||||
reader.incRef();
|
||||
return reader;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void initWritableLiveDocs() throws IOException {
|
||||
assert Thread.holdsLock(IndexWriter.this);
|
||||
assert info.docCount > 0;
|
||||
//System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared);
|
||||
if (shared) {
|
||||
// Copy on write: this means we've cloned a
|
||||
// SegmentReader sharing the current liveDocs
|
||||
// instance; must now make a private clone so we can
|
||||
// change it:
|
||||
LiveDocsFormat liveDocsFormat = info.getCodec().liveDocsFormat();
|
||||
if (liveDocs == null) {
|
||||
//System.out.println("create BV seg=" + info);
|
||||
liveDocs = liveDocsFormat.newLiveDocs(info.docCount);
|
||||
} else {
|
||||
liveDocs = liveDocsFormat.newLiveDocs(liveDocs);
|
||||
}
|
||||
shared = false;
|
||||
} else {
|
||||
assert liveDocs != null;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized Bits getReadOnlyLiveDocs() {
|
||||
//System.out.println("getROLiveDocs seg=" + info);
|
||||
assert Thread.holdsLock(IndexWriter.this);
|
||||
shared = true;
|
||||
//if (liveDocs != null) {
|
||||
//System.out.println(" liveCount=" + liveDocs.count());
|
||||
//}
|
||||
return liveDocs;
|
||||
}
|
||||
|
||||
// Commit live docs to the directory (writes new
|
||||
// _X_N.del files); returns true if it wrote the file
|
||||
// and false if there were no new deletes to write:
|
||||
public synchronized boolean writeLiveDocs(Directory dir) throws IOException {
|
||||
//System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount);
|
||||
if (pendingDeleteCount != 0) {
|
||||
// We have new deletes
|
||||
assert liveDocs.length() == info.docCount;
|
||||
|
||||
// Save in case we need to rollback on failure:
|
||||
final SegmentInfo sav = (SegmentInfo) info.clone();
|
||||
info.advanceDelGen();
|
||||
info.setDelCount(info.getDelCount() + pendingDeleteCount);
|
||||
|
||||
// We can write directly to the actual name (vs to a
|
||||
// .tmp & renaming it) because the file is not live
|
||||
// until segments file is written:
|
||||
boolean success = false;
|
||||
try {
|
||||
info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, dir, info, IOContext.DEFAULT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
info.reset(sav);
|
||||
}
|
||||
}
|
||||
pendingDeleteCount = 0;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SegmentLiveDocs(seg=" + info + " pendingDeleteCount=" + pendingDeleteCount + " shared=" + shared + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds shared SegmentReader instances. IndexWriter uses
|
||||
* SegmentReaders for 1) applying deletes, 2) doing
|
||||
* merges, 3) handing out a real-time reader. This pool
|
||||
|
@ -665,44 +410,36 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the segment reader (i.e. decRef it and close if there
|
||||
* are no more references). If drop is true then we
|
||||
* remove this entry from the pool.
|
||||
* @param sr
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized void release(SegmentReader sr, boolean drop) throws IOException {
|
||||
// Drop caller's ref; for an external reader (not
|
||||
// pooled), this decRef will close it
|
||||
//System.out.println("pool.release seg=" + sr.getSegmentInfo() + " rc=" + sr.getRefCount() + " drop=" + drop);
|
||||
sr.decRef();
|
||||
public synchronized void drop(SegmentInfo info) throws IOException {
|
||||
final ReadersAndLiveDocs rld = readerMap.get(info);
|
||||
if (rld != null) {
|
||||
assert info == rld.info;
|
||||
readerMap.remove(info);
|
||||
rld.dropReaders();
|
||||
}
|
||||
}
|
||||
|
||||
final ReadersAndLiveDocs rld = readerMap.get(sr.getSegmentInfo());
|
||||
public synchronized void release(ReadersAndLiveDocs rld) throws IOException {
|
||||
|
||||
if (rld != null && (drop || (!poolReaders && !rld.anyOutsideRefs(sr)))) {
|
||||
// Matches incRef in get:
|
||||
rld.decRef();
|
||||
|
||||
// Discard (don't save) changes when we are dropping
|
||||
// the reader; this is used only on the sub-readers
|
||||
// after a successful merge. If deletes had
|
||||
// accumulated on those sub-readers while the merge
|
||||
// is running, by now we have carried forward those
|
||||
// deletes onto the newly merged segment, so we can
|
||||
// discard them on the sub-readers:
|
||||
// Pool still holds a ref:
|
||||
assert rld.refCount() >= 1;
|
||||
|
||||
if (!drop) {
|
||||
if (rld.writeLiveDocs(directory)) {
|
||||
assert infoIsLive(sr.getSegmentInfo());
|
||||
// Must checkpoint w/ deleter, because we just
|
||||
// created created new _X_N.del file.
|
||||
deleter.checkpoint(segmentInfos, false);
|
||||
}
|
||||
if (!poolReaders && rld.refCount() == 1) {
|
||||
// This is the last ref to this RLD, and we're not
|
||||
// pooling, so remove it:
|
||||
if (rld.writeLiveDocs(directory)) {
|
||||
// Make sure we only write del docs for a live segment:
|
||||
assert infoIsLive(rld.info);
|
||||
// Must checkpoint w/ deleter, because we just
|
||||
// created created new _X_N.del file.
|
||||
deleter.checkpoint(segmentInfos, false);
|
||||
}
|
||||
|
||||
if (!rld.removeReader(sr, drop)) {
|
||||
//System.out.println("DROP seg=" + rld.info + " " + readerMap.size() + " in pool");
|
||||
readerMap.remove(sr.getSegmentInfo());
|
||||
}
|
||||
rld.dropReaders();
|
||||
readerMap.remove(rld.info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -712,8 +449,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
final Iterator<Map.Entry<SegmentInfo,ReadersAndLiveDocs>> it = readerMap.entrySet().iterator();
|
||||
while(it.hasNext()) {
|
||||
final ReadersAndLiveDocs rld = it.next().getValue();
|
||||
//System.out.println("pool.dropAll: seg=" + rld.info);
|
||||
if (doSave && rld.writeLiveDocs(directory)) {
|
||||
// Make sure we only write del docs for a live segment:
|
||||
assert infoIsLive(rld.info);
|
||||
// Must checkpoint w/ deleter, because we just
|
||||
// created created new _X_N.del file.
|
||||
|
@ -735,13 +472,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
assert readerMap.size() == 0;
|
||||
}
|
||||
|
||||
public synchronized void drop(SegmentInfo info) throws IOException {
|
||||
final ReadersAndLiveDocs rld = readerMap.remove(info);
|
||||
if (rld != null) {
|
||||
rld.dropReaders();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit live docs changes for the segment readers for
|
||||
* the provided infos.
|
||||
|
@ -751,19 +481,23 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
public synchronized void commit(SegmentInfos infos) throws IOException {
|
||||
for (SegmentInfo info : infos) {
|
||||
final ReadersAndLiveDocs rld = readerMap.get(info);
|
||||
if (rld != null && rld.writeLiveDocs(directory)) {
|
||||
assert infoIsLive(info);
|
||||
// Must checkpoint w/ deleter, because we just
|
||||
// created created new _X_N.del file.
|
||||
deleter.checkpoint(segmentInfos, false);
|
||||
if (rld != null) {
|
||||
assert rld.info == info;
|
||||
if (rld.writeLiveDocs(directory)) {
|
||||
// Make sure we only write del docs for a live segment:
|
||||
assert infoIsLive(info);
|
||||
// Must checkpoint w/ deleter, because we just
|
||||
// created created new _X_N.del file.
|
||||
deleter.checkpoint(segmentInfos, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain a ReadersAndLiveDocs instance from the
|
||||
* readerPool. If getReader is true, you must later call
|
||||
* {@link #release(SegmentReader)}.
|
||||
* readerPool. If create is true, you must later call
|
||||
* {@link #release(ReadersAndLiveDocs)}.
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized ReadersAndLiveDocs get(SegmentInfo info, boolean create) {
|
||||
|
@ -771,15 +505,22 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
assert info.dir == directory;
|
||||
|
||||
ReadersAndLiveDocs rld = readerMap.get(info);
|
||||
//System.out.println("rld.get seg=" + info + " poolReaders=" + poolReaders);
|
||||
if (rld == null) {
|
||||
//System.out.println(" new rld");
|
||||
if (!create) {
|
||||
return null;
|
||||
}
|
||||
rld = new ReadersAndLiveDocs(info);
|
||||
rld = new ReadersAndLiveDocs(IndexWriter.this, info);
|
||||
// Steal initial reference:
|
||||
readerMap.put(info, rld);
|
||||
} else {
|
||||
assert rld.info == info: "rld.info=" + rld.info + " info=" + info + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info);
|
||||
}
|
||||
|
||||
if (create) {
|
||||
// Return ref to caller:
|
||||
rld.incRef();
|
||||
}
|
||||
|
||||
return rld;
|
||||
}
|
||||
}
|
||||
|
@ -795,7 +536,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
|
||||
final ReadersAndLiveDocs rld = readerPool.get(info, false);
|
||||
if (rld != null) {
|
||||
delCount += rld.pendingDeleteCount;
|
||||
delCount += rld.getPendingDeleteCount();
|
||||
}
|
||||
return delCount;
|
||||
}
|
||||
|
@ -1116,7 +857,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
finishMerges(waitForMerges);
|
||||
stopMerges = true;
|
||||
}
|
||||
|
||||
mergeScheduler.close();
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
|
@ -1160,8 +900,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Returns the Directory used by this index. */
|
||||
public Directory getDirectory() {
|
||||
|
@ -2020,6 +1758,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
notifyAll();
|
||||
}
|
||||
|
||||
// Don't bother saving any changes in our segmentInfos
|
||||
readerPool.dropAll(false);
|
||||
|
||||
// Keep the same segmentInfos instance but replace all
|
||||
// of its SegmentInfo instances. This is so the next
|
||||
// attempt to commit using this instance of IndexWriter
|
||||
|
@ -2038,9 +1779,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
// them:
|
||||
deleter.checkpoint(segmentInfos, false);
|
||||
deleter.refresh();
|
||||
|
||||
// Don't bother saving any changes in our segmentInfos
|
||||
readerPool.dropAll(false);
|
||||
}
|
||||
|
||||
lastCommitChangeCount = changeCount;
|
||||
|
@ -3023,16 +2761,18 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
final int docCount = info.docCount;
|
||||
final Bits prevLiveDocs = merge.readerLiveDocs.get(i);
|
||||
final Bits currentLiveDocs;
|
||||
ReadersAndLiveDocs rld = readerPool.get(info, false);
|
||||
// We enrolled in mergeInit:
|
||||
assert rld != null;
|
||||
currentLiveDocs = rld.liveDocs;
|
||||
final ReadersAndLiveDocs rld = readerPool.get(info, false);
|
||||
// We hold a ref so it should still be in the pool:
|
||||
assert rld != null: "seg=" + info.name;
|
||||
currentLiveDocs = rld.getLiveDocs();
|
||||
|
||||
if (prevLiveDocs != null) {
|
||||
|
||||
// If we had deletions on starting the merge we must
|
||||
// still have deletions now:
|
||||
assert currentLiveDocs != null;
|
||||
assert prevLiveDocs.length() == docCount;
|
||||
assert currentLiveDocs.length() == docCount;
|
||||
|
||||
// There were deletes on this segment when the merge
|
||||
// started. The merge has collapsed away those
|
||||
|
@ -3066,9 +2806,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
docUpto += info.docCount - info.getDelCount() - rld.pendingDeleteCount;
|
||||
docUpto += info.docCount - info.getDelCount() - rld.getPendingDeleteCount();
|
||||
}
|
||||
} else if (currentLiveDocs != null) {
|
||||
assert currentLiveDocs.length() == docCount;
|
||||
// This segment had no deletes before but now it
|
||||
// does:
|
||||
for(int j=0; j<docCount; j++) {
|
||||
|
@ -3087,11 +2828,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
}
|
||||
|
||||
assert docUpto == merge.info.docCount;
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
if (mergedDeletes == null) {
|
||||
infoStream.message("IW", "no new deletes since merge started");
|
||||
} else {
|
||||
infoStream.message("IW", mergedDeletes.pendingDeleteCount + " new deletes since merge started");
|
||||
infoStream.message("IW", mergedDeletes.getPendingDeleteCount() + " new deletes since merge started");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3136,7 +2879,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
|
||||
final ReadersAndLiveDocs mergedDeletes = merge.info.docCount == 0 ? null : commitMergedDeletes(merge);
|
||||
|
||||
assert mergedDeletes == null || mergedDeletes.pendingDeleteCount != 0;
|
||||
assert mergedDeletes == null || mergedDeletes.getPendingDeleteCount() != 0;
|
||||
|
||||
// If the doc store we are using has been closed and
|
||||
// is in now compound format (but wasn't when we
|
||||
|
@ -3148,7 +2891,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
final boolean allDeleted = merge.segments.size() == 0 ||
|
||||
merge.info.docCount == 0 ||
|
||||
(mergedDeletes != null &&
|
||||
mergedDeletes.pendingDeleteCount == merge.info.docCount);
|
||||
mergedDeletes.getPendingDeleteCount() == merge.info.docCount);
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
if (allDeleted) {
|
||||
|
@ -3165,15 +2908,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
assert merge.info.docCount != 0 || keepFullyDeletedSegments || dropSegment;
|
||||
|
||||
segmentInfos.applyMergeChanges(merge, dropSegment);
|
||||
|
||||
if (dropSegment) {
|
||||
readerPool.drop(merge.info);
|
||||
deleter.deleteNewFiles(merge.info.files());
|
||||
assert !segmentInfos.contains(merge.info);
|
||||
} else {
|
||||
if (mergedDeletes != null && !poolReaders) {
|
||||
mergedDeletes.writeLiveDocs(directory);
|
||||
readerPool.drop(merge.info);
|
||||
|
||||
if (mergedDeletes != null) {
|
||||
if (dropSegment) {
|
||||
mergedDeletes.dropChanges();
|
||||
}
|
||||
readerPool.release(mergedDeletes);
|
||||
if (dropSegment) {
|
||||
readerPool.drop(mergedDeletes.info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3289,7 +3031,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
infoStream.message("IW", "merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
|
||||
}
|
||||
}
|
||||
//System.out.println(Thread.currentThread().getName() + ": merge end");
|
||||
}
|
||||
|
||||
/** Hook that's called when the specified merge is complete. */
|
||||
|
@ -3524,9 +3265,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
boolean drop = !suppressExceptions;
|
||||
|
||||
for (int i = 0; i < numSegments; i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
final SegmentReader sr = merge.readers.get(i);
|
||||
if (sr != null) {
|
||||
try {
|
||||
readerPool.release(merge.readers.get(i), drop);
|
||||
final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(), false);
|
||||
// We still hold a ref so it should not have been removed:
|
||||
assert rld != null;
|
||||
if (drop) {
|
||||
rld.dropChanges();
|
||||
}
|
||||
rld.release(sr);
|
||||
readerPool.release(rld);
|
||||
if (drop) {
|
||||
readerPool.drop(rld.info);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
|
@ -3589,17 +3341,20 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
|
||||
// Carefully pull the most recent live docs:
|
||||
final Bits liveDocs;
|
||||
final int delCount;
|
||||
|
||||
synchronized(this) {
|
||||
// Must sync to ensure BufferedDeletesStream
|
||||
// cannot change liveDocs/pendingDeleteCount while
|
||||
// we pull a copy:
|
||||
liveDocs = rld.getReadOnlyLiveDocs();
|
||||
delCount = rld.getPendingDeleteCount() + info.getDelCount();
|
||||
|
||||
assert rld.verifyDocCounts();
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
if (rld.pendingDeleteCount != 0) {
|
||||
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.pendingDeleteCount);
|
||||
if (rld.getPendingDeleteCount() != 0) {
|
||||
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount());
|
||||
} else if (info.getDelCount() != 0) {
|
||||
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount());
|
||||
} else {
|
||||
|
@ -3609,8 +3364,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
merge.readerLiveDocs.add(liveDocs);
|
||||
merge.readers.add(reader);
|
||||
final int delCount = rld.pendingDeleteCount + info.getDelCount();
|
||||
assert delCount <= info.docCount;
|
||||
assert delCount <= info.docCount: "delCount=" + delCount + " info.docCount=" + info.docCount + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
|
||||
if (delCount < info.docCount) {
|
||||
merger.add(reader, liveDocs);
|
||||
}
|
||||
|
@ -3708,7 +3462,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
mergedSegmentWarmer.warm(sr);
|
||||
} finally {
|
||||
synchronized(this) {
|
||||
readerPool.release(sr, false);
|
||||
rld.release(sr);
|
||||
readerPool.release(rld);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3762,11 +3517,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
/** @lucene.internal */
|
||||
public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
for(final SegmentInfo s : infos) {
|
||||
for(final SegmentInfo info : infos) {
|
||||
if (buffer.length() > 0) {
|
||||
buffer.append(' ');
|
||||
}
|
||||
buffer.append(segString(s));
|
||||
buffer.append(segString(info));
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
@ -3819,6 +3574,24 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
return true;
|
||||
}
|
||||
|
||||
// For infoStream output
|
||||
synchronized SegmentInfos toLiveInfos(SegmentInfos sis) {
|
||||
final SegmentInfos newSIS = new SegmentInfos();
|
||||
final Map<SegmentInfo,SegmentInfo> liveSIS = new HashMap<SegmentInfo,SegmentInfo>();
|
||||
for(SegmentInfo info : segmentInfos) {
|
||||
liveSIS.put(info, info);
|
||||
}
|
||||
for(SegmentInfo info : sis) {
|
||||
SegmentInfo liveInfo = liveSIS.get(info);
|
||||
if (liveInfo != null) {
|
||||
info = liveInfo;
|
||||
}
|
||||
newSIS.add(info);
|
||||
}
|
||||
|
||||
return newSIS;
|
||||
}
|
||||
|
||||
/** Walk through all files referenced by the current
|
||||
* segmentInfos and ask the Directory to sync each file,
|
||||
* if it wasn't already. If that succeeds, then we
|
||||
|
@ -3853,7 +3626,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
infoStream.message("IW", "startCommit index=" + segString(toSync) + " changeCount=" + changeCount);
|
||||
infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount);
|
||||
}
|
||||
|
||||
assert filesExist(toSync);
|
||||
|
|
|
@ -54,7 +54,7 @@ public interface IndexableField {
|
|||
/** Non-null if this field has a Reader value */
|
||||
public Reader readerValue();
|
||||
|
||||
/** Non-null if this field hasa numeric value */
|
||||
/** Non-null if this field has a numeric value */
|
||||
public Number numericValue();
|
||||
|
||||
/**
|
||||
|
|
|
@ -144,7 +144,7 @@ public class MultiDocValues extends DocValues {
|
|||
}
|
||||
final DocValues d = puller.pull(r, field);
|
||||
if (d != null) {
|
||||
TypePromoter incoming = TypePromoter.create(d.type(), d.getValueSize());
|
||||
TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize());
|
||||
promotedType[0] = promotedType[0].promote(incoming);
|
||||
} else if (puller.stopLoadingOnNull(r, field)){
|
||||
promotedType[0] = TypePromoter.getIdentityPromoter(); // set to identity to return null
|
||||
|
@ -203,8 +203,8 @@ public class MultiDocValues extends DocValues {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
return emptySource.type();
|
||||
public Type getType() {
|
||||
return emptySource.getType();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -230,8 +230,8 @@ public class MultiDocValues extends DocValues {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
return emptyFixedSource.type();
|
||||
public Type getType() {
|
||||
return emptyFixedSource.getType();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -519,7 +519,7 @@ public class MultiDocValues extends DocValues {
|
|||
|
||||
@Override
|
||||
public SortedSource asSortedSource() {
|
||||
if (type() == Type.BYTES_FIXED_SORTED || type() == Type.BYTES_VAR_SORTED) {
|
||||
if (getType() == Type.BYTES_FIXED_SORTED || getType() == Type.BYTES_VAR_SORTED) {
|
||||
|
||||
}
|
||||
return super.asSortedSource();
|
||||
|
@ -586,7 +586,7 @@ public class MultiDocValues extends DocValues {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Type type() {
|
||||
public Type getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ public class MultiReader extends BaseCompositeReader<IndexReader> {
|
|||
|
||||
/**
|
||||
* <p>Construct a MultiReader aggregating the named set of (sub)readers.
|
||||
* @param subReaders set of (sub)readers
|
||||
* @param subReaders set of (sub)readers; this array will be cloned.
|
||||
* @param closeSubReaders indicates whether the subreaders should be closed
|
||||
* when this MultiReader is closed
|
||||
*/
|
||||
|
|
|
@ -263,13 +263,6 @@ public final class ParallelAtomicReader extends AtomicReader {
|
|||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNorms(String field) throws IOException {
|
||||
ensureOpen();
|
||||
AtomicReader reader = fieldToReader.get(field);
|
||||
return reader==null ? false : reader.hasNorms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized void doClose() throws IOException {
|
||||
IOException ioe = null;
|
||||
|
|
|
@ -0,0 +1,303 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.codecs.LiveDocsFormat;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.MutableBits;
|
||||
|
||||
// Used by IndexWriter to hold open SegmentReaders (for
|
||||
// searching or merging), plus pending deletes,
|
||||
// for a given segment
|
||||
class ReadersAndLiveDocs {
|
||||
// Not final because we replace (clone) when we need to
|
||||
// change it and it's been shared:
|
||||
public final SegmentInfo info;
|
||||
|
||||
// Tracks how many consumers are using this instance:
|
||||
private final AtomicInteger refCount = new AtomicInteger(1);
|
||||
|
||||
private final IndexWriter writer;
|
||||
|
||||
// Set once (null, and then maybe set, and never set again):
|
||||
private SegmentReader reader;
|
||||
|
||||
// TODO: it's sometimes wasteful that we hold open two
|
||||
// separate SRs (one for merging one for
|
||||
// reading)... maybe just use a single SR? The gains of
|
||||
// not loading the terms index (for merging in the
|
||||
// non-NRT case) are far less now... and if the app has
|
||||
// any deletes it'll open real readers anyway.
|
||||
|
||||
// Set once (null, and then maybe set, and never set again):
|
||||
private SegmentReader mergeReader;
|
||||
|
||||
// Holds the current shared (readable and writable
|
||||
// liveDocs). This is null when there are no deleted
|
||||
// docs, and it's copy-on-write (cloned whenever we need
|
||||
// to change it but it's been shared to an external NRT
|
||||
// reader).
|
||||
private Bits liveDocs;
|
||||
|
||||
// How many further deletions we've done against
|
||||
// liveDocs vs when we loaded it or last wrote it:
|
||||
private int pendingDeleteCount;
|
||||
|
||||
// True if the current liveDocs is referenced by an
|
||||
// external NRT reader:
|
||||
private boolean shared;
|
||||
|
||||
public ReadersAndLiveDocs(IndexWriter writer, SegmentInfo info) {
|
||||
this.info = info;
|
||||
this.writer = writer;
|
||||
shared = true;
|
||||
}
|
||||
|
||||
public void incRef() {
|
||||
final int rc = refCount.incrementAndGet();
|
||||
assert rc > 1;
|
||||
}
|
||||
|
||||
public void decRef() {
|
||||
final int rc = refCount.decrementAndGet();
|
||||
assert rc >= 0;
|
||||
}
|
||||
|
||||
public int refCount() {
|
||||
final int rc = refCount.get();
|
||||
assert rc >= 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
public synchronized int getPendingDeleteCount() {
|
||||
return pendingDeleteCount;
|
||||
}
|
||||
|
||||
// Call only from assert!
|
||||
public synchronized boolean verifyDocCounts() {
|
||||
int count;
|
||||
if (liveDocs != null) {
|
||||
count = 0;
|
||||
for(int docID=0;docID<info.docCount;docID++) {
|
||||
if (liveDocs.get(docID)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
count = info.docCount;
|
||||
}
|
||||
|
||||
assert info.docCount - info.getDelCount() - pendingDeleteCount == count: "info.docCount=" + info.docCount + " info.getDelCount()=" + info.getDelCount() + " pendingDeleteCount=" + pendingDeleteCount + " count=" + count;;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get reader for searching/deleting
|
||||
public synchronized SegmentReader getReader(IOContext context) throws IOException {
|
||||
//System.out.println(" livedocs=" + rld.liveDocs);
|
||||
|
||||
if (reader == null) {
|
||||
// We steal returned ref:
|
||||
reader = new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), context);
|
||||
if (liveDocs == null) {
|
||||
liveDocs = reader.getLiveDocs();
|
||||
}
|
||||
//System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool");
|
||||
//System.out.println(Thread.currentThread().getName() + ": getReader seg=" + info.name);
|
||||
}
|
||||
|
||||
// Ref for caller
|
||||
reader.incRef();
|
||||
return reader;
|
||||
}
|
||||
|
||||
// Get reader for merging (does not load the terms
|
||||
// index):
|
||||
public synchronized SegmentReader getMergeReader(IOContext context) throws IOException {
|
||||
//System.out.println(" livedocs=" + rld.liveDocs);
|
||||
|
||||
if (mergeReader == null) {
|
||||
|
||||
if (reader != null) {
|
||||
// Just use the already opened non-merge reader
|
||||
// for merging. In the NRT case this saves us
|
||||
// pointless double-open:
|
||||
//System.out.println("PROMOTE non-merge reader seg=" + rld.info);
|
||||
// Ref for us:
|
||||
reader.incRef();
|
||||
mergeReader = reader;
|
||||
//System.out.println(Thread.currentThread().getName() + ": getMergeReader share seg=" + info.name);
|
||||
} else {
|
||||
//System.out.println(Thread.currentThread().getName() + ": getMergeReader seg=" + info.name);
|
||||
// We steal returned ref:
|
||||
mergeReader = new SegmentReader(info, -1, context);
|
||||
if (liveDocs == null) {
|
||||
liveDocs = mergeReader.getLiveDocs();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ref for caller
|
||||
mergeReader.incRef();
|
||||
return mergeReader;
|
||||
}
|
||||
|
||||
public synchronized void release(SegmentReader sr) throws IOException {
|
||||
assert info == sr.getSegmentInfo();
|
||||
sr.decRef();
|
||||
}
|
||||
|
||||
public synchronized boolean delete(int docID) {
|
||||
assert liveDocs != null;
|
||||
assert Thread.holdsLock(writer);
|
||||
assert docID >= 0 && docID < liveDocs.length() : "out of bounds: docid=" + docID + " liveDocsLength=" + liveDocs.length() + " seg=" + info.name + " docCount=" + info.docCount;
|
||||
assert !shared;
|
||||
final boolean didDelete = liveDocs.get(docID);
|
||||
if (didDelete) {
|
||||
((MutableBits) liveDocs).clear(docID);
|
||||
pendingDeleteCount++;
|
||||
//System.out.println(" new del seg=" + info + " docID=" + docID + " pendingDelCount=" + pendingDeleteCount + " totDelCount=" + (info.docCount-liveDocs.count()));
|
||||
}
|
||||
return didDelete;
|
||||
}
|
||||
|
||||
// NOTE: removes callers ref
|
||||
public synchronized void dropReaders() throws IOException {
|
||||
if (reader != null) {
|
||||
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
|
||||
reader.decRef();
|
||||
reader = null;
|
||||
}
|
||||
if (mergeReader != null) {
|
||||
//System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount());
|
||||
mergeReader.decRef();
|
||||
mergeReader = null;
|
||||
}
|
||||
decRef();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a ref to a clone. NOTE: this clone is not
|
||||
* enrolled in the pool, so you should simply close()
|
||||
* it when you're done (ie, do not call release()).
|
||||
*/
|
||||
public synchronized SegmentReader getReadOnlyClone(IOContext context) throws IOException {
|
||||
if (reader == null) {
|
||||
getReader(context).decRef();
|
||||
assert reader != null;
|
||||
}
|
||||
shared = true;
|
||||
if (liveDocs != null) {
|
||||
return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.docCount - info.getDelCount() - pendingDeleteCount);
|
||||
} else {
|
||||
assert reader.getLiveDocs() == liveDocs;
|
||||
reader.incRef();
|
||||
return reader;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void initWritableLiveDocs() throws IOException {
|
||||
assert Thread.holdsLock(writer);
|
||||
assert info.docCount > 0;
|
||||
//System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared);
|
||||
if (shared) {
|
||||
// Copy on write: this means we've cloned a
|
||||
// SegmentReader sharing the current liveDocs
|
||||
// instance; must now make a private clone so we can
|
||||
// change it:
|
||||
LiveDocsFormat liveDocsFormat = info.getCodec().liveDocsFormat();
|
||||
if (liveDocs == null) {
|
||||
//System.out.println("create BV seg=" + info);
|
||||
liveDocs = liveDocsFormat.newLiveDocs(info.docCount);
|
||||
} else {
|
||||
liveDocs = liveDocsFormat.newLiveDocs(liveDocs);
|
||||
}
|
||||
shared = false;
|
||||
} else {
|
||||
assert liveDocs != null;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized Bits getLiveDocs() {
|
||||
assert Thread.holdsLock(writer);
|
||||
return liveDocs;
|
||||
}
|
||||
|
||||
public synchronized Bits getReadOnlyLiveDocs() {
|
||||
//System.out.println("getROLiveDocs seg=" + info);
|
||||
assert Thread.holdsLock(writer);
|
||||
shared = true;
|
||||
//if (liveDocs != null) {
|
||||
//System.out.println(" liveCount=" + liveDocs.count());
|
||||
//}
|
||||
return liveDocs;
|
||||
}
|
||||
|
||||
public synchronized void dropChanges() {
|
||||
// Discard (don't save) changes when we are dropping
|
||||
// the reader; this is used only on the sub-readers
|
||||
// after a successful merge. If deletes had
|
||||
// accumulated on those sub-readers while the merge
|
||||
// is running, by now we have carried forward those
|
||||
// deletes onto the newly merged segment, so we can
|
||||
// discard them on the sub-readers:
|
||||
pendingDeleteCount = 0;
|
||||
}
|
||||
|
||||
// Commit live docs to the directory (writes new
|
||||
// _X_N.del files); returns true if it wrote the file
|
||||
// and false if there were no new deletes to write:
|
||||
public synchronized boolean writeLiveDocs(Directory dir) throws IOException {
|
||||
//System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount);
|
||||
if (pendingDeleteCount != 0) {
|
||||
// We have new deletes
|
||||
assert liveDocs.length() == info.docCount;
|
||||
|
||||
// Save in case we need to rollback on failure:
|
||||
final SegmentInfo sav = (SegmentInfo) info.clone();
|
||||
info.advanceDelGen();
|
||||
info.setDelCount(info.getDelCount() + pendingDeleteCount);
|
||||
|
||||
// We can write directly to the actual name (vs to a
|
||||
// .tmp & renaming it) because the file is not live
|
||||
// until segments file is written:
|
||||
boolean success = false;
|
||||
try {
|
||||
info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, dir, info, IOContext.DEFAULT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
info.reset(sav);
|
||||
}
|
||||
}
|
||||
pendingDeleteCount = 0;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ReadersAndLiveDocs(seg=" + info + " pendingDeleteCount=" + pendingDeleteCount + " shared=" + shared + ")";
|
||||
}
|
||||
}
|
|
@ -173,7 +173,7 @@ final class SegmentMerger {
|
|||
// returns an updated typepromoter (tracking type and size) given a previous one,
|
||||
// and a newly encountered docvalues
|
||||
private TypePromoter mergeDocValuesType(TypePromoter previous, DocValues docValues) {
|
||||
TypePromoter incoming = TypePromoter.create(docValues.type(), docValues.getValueSize());
|
||||
TypePromoter incoming = TypePromoter.create(docValues.getType(), docValues.getValueSize());
|
||||
if (previous == null) {
|
||||
previous = TypePromoter.getIdentityPromoter();
|
||||
}
|
||||
|
@ -210,7 +210,7 @@ final class SegmentMerger {
|
|||
TypePromoter previous = docValuesTypes.get(merged);
|
||||
docValuesTypes.put(merged, mergeDocValuesType(previous, reader.docValues(fi.name)));
|
||||
}
|
||||
if (fi.normsPresent()) {
|
||||
if (fi.hasNorms()) {
|
||||
TypePromoter previous = normValuesTypes.get(merged);
|
||||
normValuesTypes.put(merged, mergeDocValuesType(previous, reader.normValues(fi.name)));
|
||||
}
|
||||
|
|
|
@ -150,13 +150,6 @@ public final class SegmentReader extends AtomicReader {
|
|||
return si.docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNorms(String field) {
|
||||
ensureOpen();
|
||||
FieldInfo fi = core.fieldInfos.fieldInfo(field);
|
||||
return fi.normsPresent();
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public TermVectorsReader getTermVectorsReader() {
|
||||
ensureOpen();
|
||||
|
|
|
@ -91,21 +91,27 @@ final class StandardDirectoryReader extends DirectoryReader {
|
|||
try {
|
||||
final SegmentInfo info = infos.info(i);
|
||||
assert info.dir == dir;
|
||||
final IndexWriter.ReadersAndLiveDocs rld = writer.readerPool.get(info, true);
|
||||
final SegmentReader reader = rld.getReadOnlyClone(IOContext.READ);
|
||||
if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
|
||||
readers.add(reader);
|
||||
infosUpto++;
|
||||
} else {
|
||||
reader.close();
|
||||
segmentInfos.remove(infosUpto);
|
||||
final ReadersAndLiveDocs rld = writer.readerPool.get(info, true);
|
||||
try {
|
||||
final SegmentReader reader = rld.getReadOnlyClone(IOContext.READ);
|
||||
if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
|
||||
// Steal the ref:
|
||||
readers.add(reader);
|
||||
infosUpto++;
|
||||
} else {
|
||||
reader.close();
|
||||
segmentInfos.remove(infosUpto);
|
||||
}
|
||||
} finally {
|
||||
writer.readerPool.release(rld);
|
||||
}
|
||||
success = true;
|
||||
} catch(IOException ex) {
|
||||
prior = ex;
|
||||
} finally {
|
||||
if (!success)
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(prior, readers);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]),
|
||||
|
@ -219,12 +225,12 @@ final class StandardDirectoryReader extends DirectoryReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected final DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
|
||||
protected DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
|
||||
return doOpenIfChanged(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
|
||||
protected DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
|
||||
// If we were obtained by writer.getReader(), re-ask the
|
||||
|
@ -237,7 +243,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected final DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
|
||||
protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
if (writer == this.writer && applyAllDeletes == this.applyAllDeletes) {
|
||||
return doOpenFromWriter(null);
|
||||
|
@ -246,7 +252,7 @@ final class StandardDirectoryReader extends DirectoryReader {
|
|||
}
|
||||
}
|
||||
|
||||
private final DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
|
||||
private DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
|
||||
if (commit != null) {
|
||||
throw new IllegalArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
|
||||
}
|
||||
|
|
|
@ -1640,7 +1640,7 @@ public abstract class FieldComparator<T> {
|
|||
// This means segment has doc values, but they are
|
||||
// not able to provide a sorted source; consider
|
||||
// this a hard error:
|
||||
throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().type() + " reader=" + context.reader());
|
||||
throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().getType() + " reader=" + context.reader());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -584,8 +584,9 @@ public class IndexSearcher {
|
|||
Weight weight = query.createWeight(this);
|
||||
float v = weight.getValueForNormalization();
|
||||
float norm = getSimilarity().queryNorm(v);
|
||||
if (Float.isInfinite(norm) || Float.isNaN(norm))
|
||||
if (Float.isInfinite(norm) || Float.isNaN(norm)) {
|
||||
norm = 1.0f;
|
||||
}
|
||||
weight.normalize(norm, 1.0f);
|
||||
return weight;
|
||||
}
|
||||
|
@ -812,6 +813,8 @@ public class IndexSearcher {
|
|||
final int docCount;
|
||||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
|
||||
assert field != null;
|
||||
|
||||
Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms == null) {
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.*;
|
|||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
|
@ -238,7 +237,7 @@ public class MultiPhraseQuery extends Query {
|
|||
docFreq = termsEnum.docFreq();
|
||||
}
|
||||
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
|
@ -314,9 +313,21 @@ public class MultiPhraseQuery extends Query {
|
|||
}
|
||||
|
||||
buffer.append("\"");
|
||||
int k = 0;
|
||||
Iterator<Term[]> i = termArrays.iterator();
|
||||
int lastPos = -1;
|
||||
boolean first = true;
|
||||
while (i.hasNext()) {
|
||||
Term[] terms = i.next();
|
||||
int position = positions.get(k);
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
buffer.append(" ");
|
||||
for (int j=1; j<(position-lastPos); j++) {
|
||||
buffer.append("? ");
|
||||
}
|
||||
}
|
||||
if (terms.length > 1) {
|
||||
buffer.append("(");
|
||||
for (int j = 0; j < terms.length; j++) {
|
||||
|
@ -328,8 +339,8 @@ public class MultiPhraseQuery extends Query {
|
|||
} else {
|
||||
buffer.append(terms[0].text());
|
||||
}
|
||||
if (i.hasNext())
|
||||
buffer.append(" ");
|
||||
lastPos = position;
|
||||
++k;
|
||||
}
|
||||
buffer.append("\"");
|
||||
|
||||
|
|
|
@ -31,12 +31,15 @@ final class PhrasePositions {
|
|||
final int ord; // unique across all PhrasePositions instances
|
||||
final DocsAndPositionsEnum postings; // stream of docs & positions
|
||||
PhrasePositions next; // used to make lists
|
||||
PhrasePositions nextRepeating; // link to next repeating pp: standing for same term in different query offsets
|
||||
int rptGroup = -1; // >=0 indicates that this is a repeating PP
|
||||
int rptInd; // index in the rptGroup
|
||||
final Term[] terms; // for repetitions initialization
|
||||
|
||||
PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
|
||||
PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) {
|
||||
this.postings = postings;
|
||||
offset = o;
|
||||
this.ord = ord;
|
||||
this.terms = terms;
|
||||
}
|
||||
|
||||
final boolean next() throws IOException { // increments to next doc
|
||||
|
@ -78,8 +81,8 @@ final class PhrasePositions {
|
|||
@Override
|
||||
public String toString() {
|
||||
String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
|
||||
if (nextRepeating!=null) {
|
||||
s += " rpt[ "+nextRepeating+" ]";
|
||||
if (rptGroup >=0 ) {
|
||||
s += " rpt:"+rptGroup+",i"+rptInd;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
|
@ -137,23 +138,46 @@ public class PhraseQuery extends Query {
|
|||
final DocsAndPositionsEnum postings;
|
||||
final int docFreq;
|
||||
final int position;
|
||||
final Term term;
|
||||
final Term[] terms;
|
||||
final int nTerms; // for faster comparisons
|
||||
|
||||
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
|
||||
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) {
|
||||
this.postings = postings;
|
||||
this.docFreq = docFreq;
|
||||
this.position = position;
|
||||
this.term = term;
|
||||
nTerms = terms==null ? 0 : terms.length;
|
||||
if (nTerms>0) {
|
||||
if (terms.length==1) {
|
||||
this.terms = terms;
|
||||
} else {
|
||||
Term[] terms2 = new Term[terms.length];
|
||||
System.arraycopy(terms, 0, terms2, 0, terms.length);
|
||||
Arrays.sort(terms2);
|
||||
this.terms = terms2;
|
||||
}
|
||||
} else {
|
||||
this.terms = null;
|
||||
}
|
||||
}
|
||||
|
||||
public int compareTo(PostingsAndFreq other) {
|
||||
if (docFreq == other.docFreq) {
|
||||
if (position == other.position) {
|
||||
return term.compareTo(other.term);
|
||||
}
|
||||
if (docFreq != other.docFreq) {
|
||||
return docFreq - other.docFreq;
|
||||
}
|
||||
if (position != other.position) {
|
||||
return position - other.position;
|
||||
}
|
||||
return docFreq - other.docFreq;
|
||||
if (nTerms != other.nTerms) {
|
||||
return nTerms - other.nTerms;
|
||||
}
|
||||
if (nTerms == 0) {
|
||||
return 0;
|
||||
}
|
||||
for (int i=0; i<terms.length; i++) {
|
||||
int res = terms[i].compareTo(other.terms[i]);
|
||||
if (res!=0) return res;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -162,7 +186,9 @@ public class PhraseQuery extends Query {
|
|||
int result = 1;
|
||||
result = prime * result + docFreq;
|
||||
result = prime * result + position;
|
||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||
for (int i=0; i<nTerms; i++) {
|
||||
result = prime * result + terms[i].hashCode();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -174,10 +200,8 @@ public class PhraseQuery extends Query {
|
|||
PostingsAndFreq other = (PostingsAndFreq) obj;
|
||||
if (docFreq != other.docFreq) return false;
|
||||
if (position != other.position) return false;
|
||||
if (term == null) {
|
||||
if (other.term != null) return false;
|
||||
} else if (!term.equals(other.term)) return false;
|
||||
return true;
|
||||
if (terms == null) return other.terms == null;
|
||||
return Arrays.equals(terms, other.terms);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -49,11 +49,11 @@ abstract class PhraseScorer extends Scorer {
|
|||
// this allows to easily identify a matching (exact) phrase
|
||||
// when all PhrasePositions have exactly the same position.
|
||||
if (postings.length > 0) {
|
||||
min = new PhrasePositions(postings[0].postings, postings[0].position, 0);
|
||||
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
|
||||
max = min;
|
||||
max.doc = -1;
|
||||
for (int i = 1; i < postings.length; i++) {
|
||||
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
|
||||
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
max.next = pp;
|
||||
max = pp;
|
||||
max.doc = -1;
|
||||
|
|
|
@ -19,22 +19,38 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
final class SloppyPhraseScorer extends PhraseScorer {
|
||||
private int slop;
|
||||
private boolean checkedRepeats; // flag to only check in first candidate doc in case there are no repeats
|
||||
private boolean hasRepeats; // flag indicating that there are repeats (already checked in first candidate doc)
|
||||
private PhraseQueue pq; // for advancing min position
|
||||
private PhrasePositions[] nrPps; // non repeating pps ordered by their query offset
|
||||
|
||||
private final int slop;
|
||||
private final int numPostings;
|
||||
private final PhraseQueue pq; // for advancing min position
|
||||
|
||||
private int end; // current largest phrase position
|
||||
|
||||
private boolean hasRpts; // flag indicating that there are repetitions (as checked in first candidate doc)
|
||||
private boolean checkedRpts; // flag to only check for repetitions in first candidate doc
|
||||
private boolean hasMultiTermRpts; //
|
||||
private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
|
||||
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
|
||||
|
||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
int slop, Similarity.SloppySimScorer docScorer) {
|
||||
super(weight, postings, docScorer);
|
||||
this.slop = slop;
|
||||
this.numPostings = postings==null ? 0 : postings.length;
|
||||
pq = new PhraseQueue(postings.length);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score a candidate doc for all slop-valid position-combinations (matches)
|
||||
* encountered while traversing/hopping the PhrasePositions.
|
||||
|
@ -55,31 +71,27 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
*/
|
||||
@Override
|
||||
protected float phraseFreq() throws IOException {
|
||||
int end = initPhrasePositions();
|
||||
//printPositions(System.err, "INIT DONE:");
|
||||
if (end==Integer.MIN_VALUE) {
|
||||
if (!initPhrasePositions()) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
float freq = 0.0f;
|
||||
PhrasePositions pp = pq.pop();
|
||||
int matchLength = end - pp.position;
|
||||
int next = pq.size()>0 ? pq.top().position : pp.position;
|
||||
//printQueue(System.err, pp, "Bef Loop: next="+next+" mlen="+end+"-"+pp.position+"="+matchLength);
|
||||
while (pp.nextPosition() && (end=advanceRepeats(pp, end)) != Integer.MIN_VALUE) {
|
||||
if (pp.position > next) {
|
||||
//printQueue(System.err, pp, "A: >next="+next+" matchLength="+matchLength);
|
||||
int next = pq.top().position;
|
||||
while (advancePP(pp)) {
|
||||
if (hasRpts && !advanceRpts(pp)) {
|
||||
break; // pps exhausted
|
||||
}
|
||||
if (pp.position > next) { // done minimizing current match-length
|
||||
if (matchLength <= slop) {
|
||||
freq += docScorer.computeSlopFactor(matchLength); // score match
|
||||
}
|
||||
pq.add(pp);
|
||||
pp = pq.pop();
|
||||
next = pq.size()>0 ? pq.top().position : pp.position;
|
||||
next = pq.top().position;
|
||||
matchLength = end - pp.position;
|
||||
//printQueue(System.err, pp, "B: >next="+next+" matchLength="+matchLength);
|
||||
} else {
|
||||
int matchLength2 = end - pp.position;
|
||||
//printQueue(System.err, pp, "C: mlen2<mlen: next="+next+" matchLength="+matchLength+" matchLength2="+matchLength2);
|
||||
if (matchLength2 < matchLength) {
|
||||
matchLength = matchLength2;
|
||||
}
|
||||
|
@ -91,53 +103,82 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
return freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advance repeating pps of an input (non-repeating) pp.
|
||||
* Return a modified 'end' in case pp or its repeats exceeds original 'end'.
|
||||
* "Dirty" trick: when there are repeats, modifies pp's position to that of
|
||||
* least repeater of pp (needed when due to holes repeaters' positions are "back").
|
||||
*/
|
||||
private int advanceRepeats(PhrasePositions pp, int end) throws IOException {
|
||||
int repeatsEnd = end;
|
||||
if (pp.position > repeatsEnd) {
|
||||
repeatsEnd = pp.position;
|
||||
/** advance a PhrasePosition and update 'end', return false if exhausted */
|
||||
private boolean advancePP(PhrasePositions pp) throws IOException {
|
||||
if (!pp.nextPosition()) {
|
||||
return false;
|
||||
}
|
||||
if (!hasRepeats) {
|
||||
return repeatsEnd;
|
||||
if (pp.position > end) {
|
||||
end = pp.position;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser
|
||||
* of the two colliding pps. Note that there can only be one collision, as by the initialization
|
||||
* there were no collisions before pp was advanced. */
|
||||
private boolean advanceRpts(PhrasePositions pp) throws IOException {
|
||||
if (pp.rptGroup < 0) {
|
||||
return true; // not a repeater
|
||||
}
|
||||
PhrasePositions[] rg = rptGroups[pp.rptGroup];
|
||||
OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
|
||||
int k0 = pp.rptInd;
|
||||
int k;
|
||||
while((k=collide(pp)) >= 0) {
|
||||
pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
|
||||
if (!advancePP(pp)) {
|
||||
return false; // exhausted
|
||||
}
|
||||
if (k != k0) { // careful: mark only those currently in the queue
|
||||
bits.set(k); // mark that pp2 need to be re-queued
|
||||
}
|
||||
}
|
||||
// collisions resolved, now re-queue
|
||||
// empty (partially) the queue until seeing all pps advanced for resolving collisions
|
||||
int n = 0;
|
||||
while (bits.cardinality() > 0) {
|
||||
PhrasePositions pp2 = pq.pop();
|
||||
rptStack[n++] = pp2;
|
||||
if (pp2.rptGroup >= 0 && bits.get(pp2.rptInd)) {
|
||||
bits.clear(pp2.rptInd);
|
||||
}
|
||||
}
|
||||
// add back to queue
|
||||
for (int i=n-1; i>=0; i--) {
|
||||
pq.add(rptStack[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** compare two pps, but only by position and offset */
|
||||
private PhrasePositions lesser(PhrasePositions pp, PhrasePositions pp2) {
|
||||
if (pp.position < pp2.position ||
|
||||
(pp.position == pp2.position && pp.offset < pp2.offset)) {
|
||||
return pp;
|
||||
}
|
||||
return pp2;
|
||||
}
|
||||
|
||||
/** index of a pp2 colliding with pp, or -1 if none */
|
||||
private int collide(PhrasePositions pp) {
|
||||
int tpPos = tpPos(pp);
|
||||
for (PhrasePositions pp2=pp.nextRepeating; pp2!=null; pp2=pp2.nextRepeating) {
|
||||
while (tpPos(pp2) <= tpPos) {
|
||||
if (!pp2.nextPosition()) {
|
||||
return Integer.MIN_VALUE;
|
||||
}
|
||||
}
|
||||
tpPos = tpPos(pp2);
|
||||
if (pp2.position > repeatsEnd) {
|
||||
repeatsEnd = pp2.position;
|
||||
}
|
||||
// "dirty" trick: with holes, given a pp, its repeating pp2 might have smaller position.
|
||||
// so in order to have the right "start" in matchLength computation we fake pp.position.
|
||||
// this relies on pp.nextPosition() not using pp.position.
|
||||
if (pp2.position < pp.position) {
|
||||
pp.position = pp2.position;
|
||||
PhrasePositions[] rg = rptGroups[pp.rptGroup];
|
||||
for (int i=0; i<rg.length; i++) {
|
||||
PhrasePositions pp2 = rg[i];
|
||||
if (pp2 != pp && tpPos(pp2) == tpPos) {
|
||||
return pp2.rptInd;
|
||||
}
|
||||
}
|
||||
return repeatsEnd;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize PhrasePositions in place.
|
||||
* There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
|
||||
* A one time initialization for this scorer (on first doc matching all terms):
|
||||
* <ul>
|
||||
* <li>Detect groups of repeating pps: those with same tpPos (tpPos==position in the doc) but different offsets in query.
|
||||
* <li>For each such group:
|
||||
* <ul>
|
||||
* <li>form an inner linked list of the repeating ones.
|
||||
* <li>propagate all group members but first so that they land on different tpPos().
|
||||
* </ul>
|
||||
* <li>Mark whether there are repetitions at all, so that scoring queries with no repetitions has no overhead due to this computation.
|
||||
* <li>Insert to pq only non repeating PPs, or PPs that are the first in a repeating group.
|
||||
* <li>Check if there are repetitions
|
||||
* <li>If there are, find groups of repetitions.
|
||||
* </ul>
|
||||
* Examples:
|
||||
* <ol>
|
||||
|
@ -145,118 +186,305 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
* <li>repetitions: <b>"ho my my"~2</b>
|
||||
* <li>repetitions: <b>"my ho my"~2</b>
|
||||
* </ol>
|
||||
* @return end (max position), or Integer.MIN_VALUE if any term ran out (i.e. done)
|
||||
* @return false if PPs are exhausted (and so current doc will not be a match)
|
||||
*/
|
||||
private int initPhrasePositions() throws IOException {
|
||||
int end = Integer.MIN_VALUE;
|
||||
|
||||
// no repeats at all (most common case is also the simplest one)
|
||||
if (checkedRepeats && !hasRepeats) {
|
||||
// build queue from list
|
||||
pq.clear();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
pp.firstPosition();
|
||||
if (pp.position > end) {
|
||||
end = pp.position;
|
||||
}
|
||||
pq.add(pp); // build pq from list
|
||||
}
|
||||
return end;
|
||||
private boolean initPhrasePositions() throws IOException {
|
||||
end = Integer.MIN_VALUE;
|
||||
if (!checkedRpts) {
|
||||
return initFirstTime();
|
||||
}
|
||||
|
||||
//printPositions(System.err, "Init: 1: Bef position");
|
||||
|
||||
// position the pp's
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
pp.firstPosition();
|
||||
if (!hasRpts) {
|
||||
initSimple();
|
||||
return true; // PPs available
|
||||
}
|
||||
|
||||
//printPositions(System.err, "Init: 2: Aft position");
|
||||
|
||||
// one time initialization for this scorer (done only for the first candidate doc)
|
||||
if (!checkedRepeats) {
|
||||
checkedRepeats = true;
|
||||
ArrayList<PhrasePositions> ppsA = new ArrayList<PhrasePositions>();
|
||||
PhrasePositions dummyPP = new PhrasePositions(null, -1, -1);
|
||||
// check for repeats
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
if (pp.nextRepeating != null) {
|
||||
continue; // a repetition of an earlier pp
|
||||
}
|
||||
ppsA.add(pp);
|
||||
int tpPos = tpPos(pp);
|
||||
for (PhrasePositions prevB=pp, pp2=pp.next; pp2!= min; pp2=pp2.next) {
|
||||
if (
|
||||
pp2.nextRepeating != null // already detected as a repetition of an earlier pp
|
||||
|| pp.offset == pp2.offset // not a repetition: the two PPs are originally in same offset in the query!
|
||||
|| tpPos(pp2) != tpPos) { // not a repetition
|
||||
continue;
|
||||
}
|
||||
// a repetition
|
||||
hasRepeats = true;
|
||||
prevB.nextRepeating = pp2; // add pp2 to the repeats linked list
|
||||
pp2.nextRepeating = dummyPP; // allows not to handle the last pp in a sub-list
|
||||
prevB = pp2;
|
||||
}
|
||||
}
|
||||
if (hasRepeats) {
|
||||
// clean dummy markers
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
if (pp.nextRepeating == dummyPP) {
|
||||
pp.nextRepeating = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
nrPps = ppsA.toArray(new PhrasePositions[0]);
|
||||
pq = new PhraseQueue(nrPps.length);
|
||||
}
|
||||
|
||||
//printPositions(System.err, "Init: 3: Aft check-repeats");
|
||||
|
||||
// with repeats must advance some repeating pp's so they all start with differing tp's
|
||||
if (hasRepeats) {
|
||||
for (PhrasePositions pp: nrPps) {
|
||||
if ((end=advanceRepeats(pp, end)) == Integer.MIN_VALUE) {
|
||||
return Integer.MIN_VALUE; // ran out of a term -- done (no valid matches in current doc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//printPositions(System.err, "Init: 4: Aft advance-repeats");
|
||||
|
||||
// build queue from non repeating pps
|
||||
return initComplex();
|
||||
}
|
||||
|
||||
/** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */
|
||||
private void initSimple() throws IOException {
|
||||
//System.err.println("initSimple: doc: "+min.doc);
|
||||
pq.clear();
|
||||
for (PhrasePositions pp: nrPps) {
|
||||
// position pps and build queue from list
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
pp.firstPosition();
|
||||
if (pp.position > end) {
|
||||
end = pp.position;
|
||||
}
|
||||
pq.add(pp);
|
||||
}
|
||||
|
||||
return end;
|
||||
}
|
||||
|
||||
/** with repeats: not so simple. */
|
||||
private boolean initComplex() throws IOException {
|
||||
//System.err.println("initComplex: doc: "+min.doc);
|
||||
placeFirstPositions();
|
||||
if (!advanceRepeatGroups()) {
|
||||
return false; // PPs exhausted
|
||||
}
|
||||
fillQueue();
|
||||
return true; // PPs available
|
||||
}
|
||||
|
||||
/** move all PPs to their first position */
|
||||
private void placeFirstPositions() throws IOException {
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
pp.firstPosition();
|
||||
}
|
||||
}
|
||||
|
||||
/** Fill the queue (all pps are already placed */
|
||||
private void fillQueue() {
|
||||
pq.clear();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
if (pp.position > end) {
|
||||
end = pp.position;
|
||||
}
|
||||
pq.add(pp);
|
||||
}
|
||||
}
|
||||
|
||||
/** At initialization (each doc), each repetition group is sorted by (query) offset.
|
||||
* This provides the start condition: no collisions.
|
||||
* <p>Case 1: no multi-term repeats<br>
|
||||
* It is sufficient to advance each pp in the group by one less than its group index.
|
||||
* So lesser pp is not advanced, 2nd one advance once, 3rd one advanced twice, etc.
|
||||
* <p>Case 2: multi-term repeats<br>
|
||||
*
|
||||
* @return false if PPs are exhausted.
|
||||
*/
|
||||
private boolean advanceRepeatGroups() throws IOException {
|
||||
for (PhrasePositions[] rg: rptGroups) {
|
||||
if (hasMultiTermRpts) {
|
||||
// more involved, some may not collide
|
||||
int incr;
|
||||
for (int i=0; i<rg.length; i+=incr) {
|
||||
incr = 1;
|
||||
PhrasePositions pp = rg[i];
|
||||
int k;
|
||||
while((k=collide(pp)) >= 0) {
|
||||
PhrasePositions pp2 = lesser(pp, rg[k]);
|
||||
if (!advancePP(pp2)) { // at initialization always advance pp with higher offset
|
||||
return false; // exhausted
|
||||
}
|
||||
if (pp2.rptInd < i) { // should not happen?
|
||||
incr = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// simpler, we know exactly how much to advance
|
||||
for (int j=1; j<rg.length; j++) {
|
||||
for (int k=0; k<j; k++) {
|
||||
if (!rg[j].nextPosition()) {
|
||||
return false; // PPs exhausted
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true; // PPs available
|
||||
}
|
||||
|
||||
/** initialize with checking for repeats. Heavy work, but done only for the first candidate doc.<p>
|
||||
* If there are repetitions, check if multi-term postings (MTP) are involved.<p>
|
||||
* Without MTP, once PPs are placed in the first candidate doc, repeats (and groups) are visible.<br>
|
||||
* With MTP, a more complex check is needed, up-front, as there may be "hidden collisions".<br>
|
||||
* For example P1 has {A,B}, P1 has {B,C}, and the first doc is: "A C B". At start, P1 would point
|
||||
* to "A", p2 to "C", and it will not be identified that P1 and P2 are repetitions of each other.<p>
|
||||
* The more complex initialization has two parts:<br>
|
||||
* (1) identification of repetition groups.<br>
|
||||
* (2) advancing repeat groups at the start of the doc.<br>
|
||||
* For (1), a possible solution is to just create a single repetition group,
|
||||
* made of all repeating pps. But this would slow down the check for collisions,
|
||||
* as all pps would need to be checked. Instead, we compute "connected regions"
|
||||
* on the bipartite graph of postings and terms.
|
||||
*/
|
||||
private boolean initFirstTime() throws IOException {
|
||||
//System.err.println("initFirstTime: doc: "+min.doc);
|
||||
checkedRpts = true;
|
||||
placeFirstPositions();
|
||||
|
||||
LinkedHashMap<Term,Integer> rptTerms = repeatingTerms();
|
||||
hasRpts = !rptTerms.isEmpty();
|
||||
|
||||
if (hasRpts) {
|
||||
rptStack = new PhrasePositions[numPostings]; // needed with repetitions
|
||||
ArrayList<ArrayList<PhrasePositions>> rgs = gatherRptGroups(rptTerms);
|
||||
sortRptGroups(rgs);
|
||||
if (!advanceRepeatGroups()) {
|
||||
return false; // PPs exhausted
|
||||
}
|
||||
}
|
||||
|
||||
fillQueue();
|
||||
return true; // PPs available
|
||||
}
|
||||
|
||||
/** sort each repetition group by (query) offset.
|
||||
* Done only once (at first doc) and allows to initialize faster for each doc. */
|
||||
private void sortRptGroups(ArrayList<ArrayList<PhrasePositions>> rgs) {
|
||||
rptGroups = new PhrasePositions[rgs.size()][];
|
||||
Comparator<PhrasePositions> cmprtr = new Comparator<PhrasePositions>() {
|
||||
public int compare(PhrasePositions pp1, PhrasePositions pp2) {
|
||||
return pp1.offset - pp2.offset;
|
||||
}
|
||||
};
|
||||
for (int i=0; i<rptGroups.length; i++) {
|
||||
PhrasePositions[] rg = rgs.get(i).toArray(new PhrasePositions[0]);
|
||||
Arrays.sort(rg, cmprtr);
|
||||
rptGroups[i] = rg;
|
||||
for (int j=0; j<rg.length; j++) {
|
||||
rg[j].rptInd = j; // we use this index for efficient re-queuing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Detect repetition groups. Done once - for first doc */
|
||||
private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term,Integer> rptTerms) throws IOException {
|
||||
PhrasePositions[] rpp = repeatingPPs(rptTerms);
|
||||
ArrayList<ArrayList<PhrasePositions>> res = new ArrayList<ArrayList<PhrasePositions>>();
|
||||
if (!hasMultiTermRpts) {
|
||||
// simpler - no multi-terms - can base on positions in first doc
|
||||
for (int i=0; i<rpp.length; i++) {
|
||||
PhrasePositions pp = rpp[i];
|
||||
if (pp.rptGroup >=0) continue; // already marked as a repetition
|
||||
int tpPos = tpPos(pp);
|
||||
for (int j=i+1; j<rpp.length; j++) {
|
||||
PhrasePositions pp2 = rpp[j];
|
||||
if (
|
||||
pp2.rptGroup >=0 // already marked as a repetition
|
||||
|| pp2.offset == pp.offset // not a repetition: two PPs are originally in same offset in the query!
|
||||
|| tpPos(pp2) != tpPos) { // not a repetition
|
||||
continue;
|
||||
}
|
||||
// a repetition
|
||||
int g = pp.rptGroup;
|
||||
if (g < 0) {
|
||||
g = res.size();
|
||||
pp.rptGroup = g;
|
||||
ArrayList<PhrasePositions> rl = new ArrayList<PhrasePositions>(2);
|
||||
rl.add(pp);
|
||||
res.add(rl);
|
||||
}
|
||||
pp2.rptGroup = g;
|
||||
res.get(g).add(pp2);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// more involved - has multi-terms
|
||||
ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<HashSet<PhrasePositions>>();
|
||||
ArrayList<OpenBitSet> bb = ppTermsBitSets(rpp, rptTerms);
|
||||
unionTermGroups(bb);
|
||||
HashMap<Term,Integer> tg = termGroups(rptTerms, bb);
|
||||
HashSet<Integer> distinctGroupIDs = new HashSet<Integer>(tg.values());
|
||||
for (int i=0; i<distinctGroupIDs.size(); i++) {
|
||||
tmp.add(new HashSet<PhrasePositions>());
|
||||
}
|
||||
for (PhrasePositions pp : rpp) {
|
||||
for (Term t: pp.terms) {
|
||||
if (rptTerms.containsKey(t)) {
|
||||
int g = tg.get(t);
|
||||
tmp.get(g).add(pp);
|
||||
assert pp.rptGroup==-1 || pp.rptGroup==g;
|
||||
pp.rptGroup = g;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (HashSet<PhrasePositions> hs : tmp) {
|
||||
res.add(new ArrayList<PhrasePositions>(hs));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Actual position in doc of a PhrasePosition, relies on that position = tpPos - offset) */
|
||||
private final int tpPos(PhrasePositions pp) {
|
||||
return pp.position + pp.offset;
|
||||
}
|
||||
|
||||
// private void printPositions(PrintStream ps, String title) {
|
||||
// ps.println();
|
||||
// ps.println("---- "+title);
|
||||
// int k = 0;
|
||||
// if (nrPps!=null) {
|
||||
// for (PhrasePositions pp: nrPps) {
|
||||
// ps.println(" " + k++ + " " + pp);
|
||||
// }
|
||||
// } else {
|
||||
// for (PhrasePositions pp=min; 0==k || pp!=min; pp = pp.next) {
|
||||
// ps.println(" " + k++ + " " + pp);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
/** find repeating terms and assign them ordinal values */
|
||||
private LinkedHashMap<Term,Integer> repeatingTerms() {
|
||||
LinkedHashMap<Term,Integer> tord = new LinkedHashMap<Term,Integer>();
|
||||
HashMap<Term,Integer> tcnt = new HashMap<Term,Integer>();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (Term t : pp.terms) {
|
||||
Integer cnt0 = tcnt.get(t);
|
||||
Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
|
||||
tcnt.put(t, cnt);
|
||||
if (cnt==2) {
|
||||
tord.put(t,tord.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
return tord;
|
||||
}
|
||||
|
||||
/** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
|
||||
private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
|
||||
ArrayList<PhrasePositions> rp = new ArrayList<PhrasePositions>();
|
||||
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
|
||||
for (Term t : pp.terms) {
|
||||
if (rptTerms.containsKey(t)) {
|
||||
rp.add(pp);
|
||||
hasMultiTermRpts |= (pp.terms.length > 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rp.toArray(new PhrasePositions[0]);
|
||||
}
|
||||
|
||||
/** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
|
||||
private ArrayList<OpenBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
|
||||
ArrayList<OpenBitSet> bb = new ArrayList<OpenBitSet>(rpp.length);
|
||||
for (PhrasePositions pp : rpp) {
|
||||
OpenBitSet b = new OpenBitSet(tord.size());
|
||||
Integer ord;
|
||||
for (Term t: pp.terms) {
|
||||
if ((ord=tord.get(t))!=null) {
|
||||
b.set(ord);
|
||||
}
|
||||
}
|
||||
bb.add(b);
|
||||
}
|
||||
return bb;
|
||||
}
|
||||
|
||||
/** union (term group) bit-sets until they are disjoint (O(n^^2)), and each group have different terms */
|
||||
private void unionTermGroups(ArrayList<OpenBitSet> bb) {
|
||||
int incr;
|
||||
for (int i=0; i<bb.size()-1; i+=incr) {
|
||||
incr = 1;
|
||||
int j = i+1;
|
||||
while (j<bb.size()) {
|
||||
if (bb.get(i).intersects(bb.get(j))) {
|
||||
bb.get(i).union(bb.get(j));
|
||||
bb.remove(j);
|
||||
incr = 0;
|
||||
} else {
|
||||
++j;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** map each term to the single group that contains it */
|
||||
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<OpenBitSet> bb) throws IOException {
|
||||
HashMap<Term,Integer> tg = new HashMap<Term,Integer>();
|
||||
Term[] t = tord.keySet().toArray(new Term[0]);
|
||||
for (int i=0; i<bb.size(); i++) { // i is the group no.
|
||||
DocIdSetIterator bits = bb.get(i).iterator();
|
||||
int ord;
|
||||
while ((ord=bits.nextDoc())!=NO_MORE_DOCS) {
|
||||
tg.put(t[ord],i);
|
||||
}
|
||||
}
|
||||
return tg;
|
||||
}
|
||||
|
||||
// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
|
||||
// //if (min.doc != ?) return;
|
||||
// ps.println();
|
||||
// ps.println("---- "+title);
|
||||
// ps.println("EXT: "+ext);
|
||||
|
@ -266,7 +494,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
// ps.println(" " + 0 + " " + t[0]);
|
||||
// for (int i=1; i<t.length; i++) {
|
||||
// t[i] = pq.pop();
|
||||
// assert t[i-1].position <= t[i].position : "PQ is out of order: "+(i-1)+"::"+t[i-1]+" "+i+"::"+t[i];
|
||||
// assert t[i-1].position <= t[i].position;
|
||||
// ps.println(" " + i + " " + t[i]);
|
||||
// }
|
||||
// // add them back
|
||||
|
@ -275,4 +503,5 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
}
|
||||
|
|
|
@ -57,9 +57,12 @@ public class SpanWeight extends Weight {
|
|||
termContexts.put(term, state);
|
||||
i++;
|
||||
}
|
||||
stats = similarity.computeWeight(query.getBoost(),
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
termStats);
|
||||
final String field = query.getField();
|
||||
if (field != null) {
|
||||
stats = similarity.computeWeight(query.getBoost(),
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
termStats);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,18 +70,24 @@ public class SpanWeight extends Weight {
|
|||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
return stats.getValueForNormalization();
|
||||
return stats == null ? 1.0f : stats.getValueForNormalization();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
stats.normalize(queryNorm, topLevelBoost);
|
||||
if (stats != null) {
|
||||
stats.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
|
||||
if (stats == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -67,7 +67,7 @@ public class ChecksumIndexInput extends IndexInput {
|
|||
|
||||
@Override
|
||||
public void seek(long pos) {
|
||||
throw new RuntimeException("not allowed");
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -68,7 +68,7 @@ public class ChecksumIndexOutput extends IndexOutput {
|
|||
|
||||
@Override
|
||||
public void seek(long pos) {
|
||||
throw new RuntimeException("not allowed");
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -153,7 +153,7 @@ final class CompoundFileWriter implements Closeable{
|
|||
*/
|
||||
public void close() throws IOException {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("already closed");
|
||||
return;
|
||||
}
|
||||
IOException priorException = null;
|
||||
IndexOutput entryTableOut = null;
|
||||
|
@ -192,7 +192,7 @@ final class CompoundFileWriter implements Closeable{
|
|||
|
||||
private final void ensureOpen() {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("CFS Directory is already closed");
|
||||
throw new AlreadyClosedException("CFS Directory is already closed");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -260,7 +260,7 @@ final class CompoundFileWriter implements Closeable{
|
|||
} else {
|
||||
entry.dir = this.directory;
|
||||
if (directory.fileExists(name)) {
|
||||
throw new IOException("File already exists");
|
||||
throw new IllegalArgumentException("File " + name + " already exists");
|
||||
}
|
||||
out = new DirectCFSIndexOutput(directory.createOutput(name, context), entry,
|
||||
true);
|
||||
|
|
|
@ -171,6 +171,7 @@ class NativeFSLock extends Lock {
|
|||
throw new IOException("Cannot create directory: " +
|
||||
lockDir.getAbsolutePath());
|
||||
} else if (!lockDir.isDirectory()) {
|
||||
// TODO: NoSuchDirectoryException instead?
|
||||
throw new IOException("Found regular file where directory expected: " +
|
||||
lockDir.getAbsolutePath());
|
||||
}
|
||||
|
|
|
@ -121,6 +121,7 @@ class SimpleFSLock extends Lock {
|
|||
throw new IOException("Cannot create directory: " +
|
||||
lockDir.getAbsolutePath());
|
||||
} else if (!lockDir.isDirectory()) {
|
||||
// TODO: NoSuchDirectoryException instead?
|
||||
throw new IOException("Found regular file where directory expected: " +
|
||||
lockDir.getAbsolutePath());
|
||||
}
|
||||
|
|
|
@ -532,10 +532,10 @@ public final class Util {
|
|||
* Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
|
||||
* for visualization. Example of use:
|
||||
*
|
||||
* <pre>
|
||||
* PrintStream ps = new PrintStream("out.dot");
|
||||
* fst.toDot(ps);
|
||||
* ps.close();
|
||||
* <pre class="prettyprint">
|
||||
* PrintWriter pw = new PrintWriter("out.dot");
|
||||
* Util.toDot(fst, pw, true, true);
|
||||
* pw.close();
|
||||
* </pre>
|
||||
*
|
||||
* and then, from command line:
|
||||
|
|
|
@ -188,7 +188,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
DocValues r = Ints.getValues(dir, "test", 2, Type.VAR_INTS, newIOContext(random));
|
||||
Source source = getSource(r);
|
||||
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
|
||||
expectedTypes[i], source.type());
|
||||
expectedTypes[i], source.getType());
|
||||
assertEquals(minMax[i][0], source.getInt(0));
|
||||
assertEquals(minMax[i][1], source.getInt(1));
|
||||
|
||||
|
@ -368,7 +368,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
DocValues r = Ints.getValues(dir, "test", NUM_VALUES + additionalDocs, type, newIOContext(random));
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
Source s = getSource(r);
|
||||
assertEquals(type, s.type());
|
||||
assertEquals(type, s.getType());
|
||||
for (int i = 0; i < NUM_VALUES; i++) {
|
||||
final long v = s.getInt(i);
|
||||
assertEquals("index " + i, values[i], v);
|
||||
|
|
|
@ -682,7 +682,7 @@ public class TestCompoundFile extends LuceneTestCase
|
|||
try {
|
||||
newDir.copy(csw, "d1", "d1", newIOContext(random));
|
||||
fail("file does already exist");
|
||||
} catch (IOException e) {
|
||||
} catch (IllegalArgumentException e) {
|
||||
//
|
||||
}
|
||||
out.close();
|
||||
|
|
|
@ -83,7 +83,7 @@ public class TestCustomNorms extends LuceneTestCase {
|
|||
assertNotNull(normValues);
|
||||
Source source = normValues.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
assertEquals(Type.FLOAT_32, normValues.type());
|
||||
assertEquals(Type.FLOAT_32, normValues.getType());
|
||||
float[] norms = (float[]) source.getArray();
|
||||
for (int i = 0; i < open.maxDoc(); i++) {
|
||||
Document document = open.document(i);
|
||||
|
|
|
@ -148,8 +148,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
|
||||
Directory target = newDirectory();
|
||||
IndexWriter w = new IndexWriter(target, writerConfig(random.nextBoolean()));
|
||||
IndexReader r_1 = IndexReader.open(w_1, true);
|
||||
IndexReader r_2 = IndexReader.open(w_2, true);
|
||||
DirectoryReader r_1 = DirectoryReader.open(w_1, true);
|
||||
DirectoryReader r_2 = DirectoryReader.open(w_2, true);
|
||||
if (random.nextBoolean()) {
|
||||
w.addIndexes(d_1, d_2);
|
||||
} else {
|
||||
|
@ -163,7 +163,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
|
||||
// check values
|
||||
|
||||
IndexReader merged = IndexReader.open(w, true);
|
||||
DirectoryReader merged = DirectoryReader.open(w, true);
|
||||
Source source_1 = getSource(getDocValues(r_1, first.name()));
|
||||
Source source_2 = getSource(getDocValues(r_2, second.name()));
|
||||
Source source_1_merged = getSource(getDocValues(merged, first.name()));
|
||||
|
@ -260,7 +260,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
FixedBitSet deleted = indexValues(w, numValues, val, numVariantList,
|
||||
withDeletions, 7);
|
||||
List<Closeable> closeables = new ArrayList<Closeable>();
|
||||
IndexReader r = IndexReader.open(w, true);
|
||||
DirectoryReader r = DirectoryReader.open(w, true);
|
||||
final int numRemainingValues = numValues - deleted.cardinality();
|
||||
final int base = r.numDocs() - numRemainingValues;
|
||||
// for FIXED_INTS_8 we use value mod 128 - to enable testing in
|
||||
|
@ -338,7 +338,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
final int bytesSize = 1 + atLeast(50);
|
||||
FixedBitSet deleted = indexValues(w, numValues, byteIndexValue,
|
||||
byteVariantList, withDeletions, bytesSize);
|
||||
final IndexReader r = IndexReader.open(w, withDeletions);
|
||||
final DirectoryReader r = DirectoryReader.open(w, withDeletions);
|
||||
assertEquals(0, r.numDeletedDocs());
|
||||
final int numRemainingValues = numValues - deleted.cardinality();
|
||||
final int base = r.numDocs() - numRemainingValues;
|
||||
|
@ -422,13 +422,17 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
for (Type val : numVariantList) {
|
||||
indexValues(w, numValues, val, numVariantList,
|
||||
false, 7);
|
||||
IndexReader r = IndexReader.open(w, true);
|
||||
DirectoryReader r = DirectoryReader.open(w, true);
|
||||
if (val == Type.VAR_INTS) {
|
||||
DocValues docValues = getDocValues(r, val.name());
|
||||
}
|
||||
DocValues docValues = getDocValues(r, val.name());
|
||||
assertNotNull(docValues);
|
||||
// make sure we don't get a direct source since they don't support getArray()
|
||||
if (val == Type.VAR_INTS) {
|
||||
}
|
||||
Source source = docValues.getSource();
|
||||
|
||||
switch (source.type()) {
|
||||
switch (source.getType()) {
|
||||
case FIXED_INTS_8:
|
||||
{
|
||||
assertTrue(source.hasArray());
|
||||
|
@ -465,7 +469,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case VAR_INTS:
|
||||
case VAR_INTS:
|
||||
System.out.println(source.hasArray());
|
||||
assertFalse(source.hasArray());
|
||||
break;
|
||||
case FLOAT_32:
|
||||
|
@ -487,7 +492,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
}
|
||||
break;
|
||||
default:
|
||||
fail("unexpected value " + source.type());
|
||||
fail("unexpected value " + source.getType());
|
||||
}
|
||||
r.close();
|
||||
}
|
||||
|
@ -503,27 +508,28 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
final int numValues = 50 + atLeast(10);
|
||||
// only single byte fixed straight supports getArray()
|
||||
indexValues(w, numValues, Type.BYTES_FIXED_STRAIGHT, null, false, 1);
|
||||
IndexReader r = IndexReader.open(w, true);
|
||||
DirectoryReader r = DirectoryReader.open(w, true);
|
||||
DocValues docValues = getDocValues(r, Type.BYTES_FIXED_STRAIGHT.name());
|
||||
assertNotNull(docValues);
|
||||
// make sure we don't get a direct source since they don't support
|
||||
// getArray()
|
||||
Source source = docValues.getSource();
|
||||
|
||||
switch (source.type()) {
|
||||
switch (source.getType()) {
|
||||
case BYTES_FIXED_STRAIGHT: {
|
||||
BytesRef ref = new BytesRef();
|
||||
assertTrue(source.hasArray());
|
||||
byte[] values = (byte[]) source.getArray();
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
source.getBytes(i, ref);
|
||||
assertEquals(1, ref.length);
|
||||
assertEquals(values[i], ref.bytes[ref.offset]);
|
||||
if (source.hasArray()) {
|
||||
byte[] values = (byte[]) source.getArray();
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
source.getBytes(i, ref);
|
||||
assertEquals(1, ref.length);
|
||||
assertEquals(values[i], ref.bytes[ref.offset]);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fail("unexpected value " + source.type());
|
||||
fail("unexpected value " + source.getType());
|
||||
}
|
||||
r.close();
|
||||
w.close();
|
||||
|
@ -543,7 +549,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
case 2:
|
||||
return values.getDirectSource();
|
||||
case 1:
|
||||
if(values.type() == Type.BYTES_VAR_SORTED || values.type() == Type.BYTES_FIXED_SORTED) {
|
||||
if(values.getType() == Type.BYTES_VAR_SORTED || values.getType() == Type.BYTES_FIXED_SORTED) {
|
||||
return values.getSource().asSortedSource();
|
||||
}
|
||||
default:
|
||||
|
@ -925,4 +931,4 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -97,7 +97,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
// omitNorms is true
|
||||
for (FieldInfo fi : reader.getFieldInfos()) {
|
||||
if (fi.isIndexed) {
|
||||
assertTrue(fi.omitNorms == !reader.hasNorms(fi.name));
|
||||
assertTrue(fi.omitNorms == (reader.normValues(fi.name) == null));
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
|
@ -330,10 +330,10 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(dir));
|
||||
FieldInfos fi = reader.getFieldInfos();
|
||||
// f1
|
||||
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
|
||||
assertFalse("f1 should have no norms", fi.fieldInfo("f1").hasNorms());
|
||||
assertEquals("omitTermFreqAndPositions field bit should not be set for f1", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
|
||||
// f2
|
||||
assertTrue("f2 should have norms", reader.hasNorms("f2"));
|
||||
assertTrue("f2 should have norms", fi.fieldInfo("f2").hasNorms());
|
||||
assertEquals("omitTermFreqAndPositions field bit should be set for f2", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
|
||||
reader.close();
|
||||
}
|
||||
|
|
|
@ -562,7 +562,7 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
public void assertDocValues(DocValues leftDocValues, DocValues rightDocValues) throws Exception {
|
||||
assertNotNull(info, leftDocValues);
|
||||
assertNotNull(info, rightDocValues);
|
||||
assertEquals(info, leftDocValues.type(), rightDocValues.type());
|
||||
assertEquals(info, leftDocValues.getType(), rightDocValues.getType());
|
||||
assertEquals(info, leftDocValues.getValueSize(), rightDocValues.getValueSize());
|
||||
assertDocValuesSource(leftDocValues.getDirectSource(), rightDocValues.getDirectSource());
|
||||
assertDocValuesSource(leftDocValues.getSource(), rightDocValues.getSource());
|
||||
|
@ -572,8 +572,8 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
* checks source API
|
||||
*/
|
||||
public void assertDocValuesSource(DocValues.Source left, DocValues.Source right) throws Exception {
|
||||
DocValues.Type leftType = left.type();
|
||||
assertEquals(info, leftType, right.type());
|
||||
DocValues.Type leftType = left.getType();
|
||||
assertEquals(info, leftType, right.getType());
|
||||
switch(leftType) {
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_8:
|
||||
|
|
|
@ -96,7 +96,7 @@ public class TestNorms extends LuceneTestCase {
|
|||
assertNotNull(normValues);
|
||||
Source source = normValues.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
assertEquals(Type.FIXED_INTS_8, normValues.type());
|
||||
assertEquals(Type.FIXED_INTS_8, normValues.getType());
|
||||
byte[] norms = (byte[]) source.getArray();
|
||||
for (int i = 0; i < open.maxDoc(); i++) {
|
||||
Document document = open.document(i);
|
||||
|
@ -128,9 +128,9 @@ public class TestNorms extends LuceneTestCase {
|
|||
assertFalse(fieldInfo.omitNorms);
|
||||
assertTrue(fieldInfo.isIndexed);
|
||||
if (secondWriteNorm) {
|
||||
assertTrue(fieldInfo.normsPresent());
|
||||
assertTrue(fieldInfo.hasNorms());
|
||||
} else {
|
||||
assertFalse(fieldInfo.normsPresent());
|
||||
assertFalse(fieldInfo.hasNorms());
|
||||
}
|
||||
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
|
@ -144,18 +144,18 @@ public class TestNorms extends LuceneTestCase {
|
|||
FieldInfo fi = mergedReader.getFieldInfos().fieldInfo(byteTestField);
|
||||
assertFalse(fi.omitNorms);
|
||||
assertTrue(fi.isIndexed);
|
||||
assertFalse(fi.normsPresent());
|
||||
assertFalse(fi.hasNorms());
|
||||
} else {
|
||||
FieldInfo fi = mergedReader.getFieldInfos().fieldInfo(byteTestField);
|
||||
assertFalse(fi.omitNorms);
|
||||
assertTrue(fi.isIndexed);
|
||||
assertTrue(fi.normsPresent());
|
||||
assertTrue(fi.hasNorms());
|
||||
|
||||
DocValues normValues = mergedReader.normValues(byteTestField);
|
||||
assertNotNull(normValues);
|
||||
Source source = normValues.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
assertEquals(Type.FIXED_INTS_8, normValues.type());
|
||||
assertEquals(Type.FIXED_INTS_8, normValues.getType());
|
||||
byte[] norms = (byte[]) source.getArray();
|
||||
for (int i = 0; i < mergedReader.maxDoc(); i++) {
|
||||
Document document = mergedReader.document(i);
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -43,6 +44,8 @@ import org.apache.lucene.util.English;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
// TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
|
||||
// not all codecs store prx separate...
|
||||
public class TestPostingsOffsets extends LuceneTestCase {
|
||||
IndexWriterConfig iwc;
|
||||
|
||||
|
@ -54,7 +57,11 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||
|
||||
if (Codec.getDefault().getName().equals("Lucene40")) {
|
||||
// pulsing etc are not implemented
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
|
||||
if (random.nextBoolean()) {
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
|
||||
} else {
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -126,7 +133,11 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||
iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
if (Codec.getDefault().getName().equals("Lucene40")) {
|
||||
// pulsing etc are not implemented
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
|
||||
if (random.nextBoolean()) {
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
|
||||
} else {
|
||||
iwc.setCodec(_TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat()));
|
||||
}
|
||||
}
|
||||
iwc.setMergePolicy(newLogMergePolicy()); // will rely on docids a bit for skipping
|
||||
RandomIndexWriter w = new RandomIndexWriter(random, dir, iwc);
|
||||
|
|
|
@ -179,9 +179,9 @@ public class TestSegmentReader extends LuceneTestCase {
|
|||
for (int i=0; i<DocHelper.fields.length; i++) {
|
||||
IndexableField f = DocHelper.fields[i];
|
||||
if (f.fieldType().indexed()) {
|
||||
assertEquals(reader.hasNorms(f.name()), !f.fieldType().omitNorms());
|
||||
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
|
||||
if (!reader.hasNorms(f.name())) {
|
||||
assertEquals(reader.normValues(f.name()) != null, !f.fieldType().omitNorms());
|
||||
assertEquals(reader.normValues(f.name()) != null, !DocHelper.noNorms.containsKey(f.name()));
|
||||
if (reader.normValues(f.name()) == null) {
|
||||
// test for norms of null
|
||||
DocValues norms = MultiDocValues.getNormDocValues(reader, f.name());
|
||||
assertNull(norms);
|
||||
|
|
|
@ -349,7 +349,7 @@ public class TestTypePromotion extends LuceneTestCase {
|
|||
DocValues docValues = children[0].reader().docValues("promote");
|
||||
assertNotNull(docValues);
|
||||
assertValues(TestType.Byte, dir, values);
|
||||
assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.type());
|
||||
assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType());
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* This class tests the MultiPhraseQuery class.
|
||||
|
@ -156,6 +157,43 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
indexStore.close();
|
||||
}
|
||||
|
||||
@Ignore //LUCENE-3821 fixes sloppy phrase scoring, except for this known problem
|
||||
public void testMultiSloppyWithRepeats() throws IOException {
|
||||
Directory indexStore = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
|
||||
add("a b c d e f g h i k", writer);
|
||||
IndexReader r = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
MultiPhraseQuery q = new MultiPhraseQuery();
|
||||
// this will fail, when the scorer would propagate [a] rather than [a,b],
|
||||
q.add(new Term[] {new Term("body", "a"), new Term("body", "b")});
|
||||
q.add(new Term[] {new Term("body", "a")});
|
||||
q.setSlop(6);
|
||||
assertEquals(1, searcher.search(q, 1).totalHits); // should match on "a b"
|
||||
|
||||
r.close();
|
||||
indexStore.close();
|
||||
}
|
||||
|
||||
public void testMultiExactWithRepeats() throws IOException {
|
||||
Directory indexStore = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
|
||||
add("a b c d e f g h i k", writer);
|
||||
IndexReader r = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
MultiPhraseQuery q = new MultiPhraseQuery();
|
||||
q.add(new Term[] {new Term("body", "a"), new Term("body", "d")}, 0);
|
||||
q.add(new Term[] {new Term("body", "a"), new Term("body", "f")}, 2);
|
||||
assertEquals(1, searcher.search(q, 1).totalHits); // should match on "a b"
|
||||
r.close();
|
||||
indexStore.close();
|
||||
}
|
||||
|
||||
private void add(String s, RandomIndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("body", s, TextField.TYPE_STORED));
|
||||
|
|
|
@ -169,4 +169,23 @@ public class TestSimpleSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
q2.add(new Term[] { t2, t3 }, 2);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
|
||||
/** "A B"~∞ = +A +B if A != B */
|
||||
public void testSloppyPhraseVersusBooleanAnd() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = null;
|
||||
// semantics differ from SpanNear: SloppyPhrase handles repeats,
|
||||
// so we must ensure t1 != t2
|
||||
do {
|
||||
t2 = randomTerm();
|
||||
} while (t1.equals(t2));
|
||||
PhraseQuery q1 = new PhraseQuery();
|
||||
q1.add(t1);
|
||||
q1.add(t2);
|
||||
q1.setSlop(Integer.MAX_VALUE);
|
||||
BooleanQuery q2 = new BooleanQuery();
|
||||
q2.add(new TermQuery(t1), Occur.MUST);
|
||||
q2.add(new TermQuery(t2), Occur.MUST);
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,12 +21,10 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* random sloppy phrase query tests
|
||||
*/
|
||||
@Ignore("Put this back when we fix LUCENE-3821")
|
||||
public class TestSloppyPhraseQuery2 extends SearchEquivalenceTestBase {
|
||||
/** "A B"~N ⊆ "A B"~N+1 */
|
||||
public void testIncreasingSloppiness() throws Exception {
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
package org.apache.lucene.search.spans;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.SearchEquivalenceTestBase;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
/**
|
||||
* Basic equivalence tests for span queries
|
||||
*/
|
||||
public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
||||
|
||||
// TODO: we could go a little crazy for a lot of these,
|
||||
// but these are just simple minimal cases in case something
|
||||
// goes horribly wrong. Put more intense tests elsewhere.
|
||||
|
||||
/** SpanTermQuery(A) = TermQuery(A) */
|
||||
public void testSpanTermVersusTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
assertSameSet(new TermQuery(t1), new SpanTermQuery(t1));
|
||||
}
|
||||
|
||||
/** SpanOrQuery(A, B) = (A B) */
|
||||
public void testSpanOrVersusBoolean() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
BooleanQuery q1 = new BooleanQuery();
|
||||
q1.add(new TermQuery(t1), Occur.SHOULD);
|
||||
q1.add(new TermQuery(t2), Occur.SHOULD);
|
||||
SpanOrQuery q2 = new SpanOrQuery(new SpanTermQuery(t1), new SpanTermQuery(t2));
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanNotQuery(A, B) ⊆ SpanTermQuery(A) */
|
||||
public void testSpanNotVersusSpanTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
assertSubsetOf(new SpanNotQuery(new SpanTermQuery(t1), new SpanTermQuery(t2)), new SpanTermQuery(t1));
|
||||
}
|
||||
|
||||
/** SpanFirstQuery(A, 10) ⊆ SpanTermQuery(A) */
|
||||
public void testSpanFirstVersusSpanTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
assertSubsetOf(new SpanFirstQuery(new SpanTermQuery(t1), 10), new SpanTermQuery(t1));
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A, B], 0, true) = "A B" */
|
||||
public void testSpanNearVersusPhrase() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true);
|
||||
PhraseQuery q2 = new PhraseQuery();
|
||||
q2.add(t1);
|
||||
q2.add(t2);
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A, B], ∞, false) = +A +B */
|
||||
public void testSpanNearVersusBooleanAnd() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, Integer.MAX_VALUE, false);
|
||||
BooleanQuery q2 = new BooleanQuery();
|
||||
q2.add(new TermQuery(t1), Occur.MUST);
|
||||
q2.add(new TermQuery(t2), Occur.MUST);
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A B], 0, false) ⊆ SpanNearQuery([A B], 1, false) */
|
||||
public void testSpanNearVersusSloppySpanNear() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, 0, false);
|
||||
SpanNearQuery q2 = new SpanNearQuery(subquery, 1, false);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanNearQuery([A B], 3, true) ⊆ SpanNearQuery([A B], 3, false) */
|
||||
public void testSpanNearInOrderVersusOutOfOrder() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanNearQuery q1 = new SpanNearQuery(subquery, 3, true);
|
||||
SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
|
@ -17,31 +17,31 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TestSpans extends LuceneTestCase {
|
||||
private IndexSearcher searcher;
|
||||
private IndexReader reader;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue