mirror of https://github.com/apache/lucene.git
merged with trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1124321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
43e40e8844
|
@ -53,6 +53,8 @@
|
||||||
<classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
|
<classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
|
||||||
<classpathentry kind="src" path="modules/benchmark/src/java"/>
|
<classpathentry kind="src" path="modules/benchmark/src/java"/>
|
||||||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||||
|
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||||
|
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||||
<classpathentry kind="src" path="solr/src/java"/>
|
<classpathentry kind="src" path="solr/src/java"/>
|
||||||
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
||||||
<classpathentry kind="src" path="solr/src/common"/>
|
<classpathentry kind="src" path="solr/src/common"/>
|
||||||
|
@ -124,8 +126,8 @@
|
||||||
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-glassfish-2.1.v20091210.jar"/>
|
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-glassfish-2.1.v20091210.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-jetty-6.1.26.jar"/>
|
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-jetty-6.1.26.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-api-2.1-glassfish-2.1.v20091210.jar"/>
|
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-api-2.1-glassfish-2.1.v20091210.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.4.2.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.5.0.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.1.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.3.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-mapper-asl-1.5.2.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-mapper-asl-1.5.2.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/mahout-collections-0.3.jar"/>
|
<classpathentry kind="lib" path="solr/contrib/clustering/lib/mahout-collections-0.3.jar"/>
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||||
|
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||||
|
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
||||||
|
|
|
@ -71,6 +71,13 @@
|
||||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
<configuration default="false" name="grouping module" type="JUnit" factoryName="JUnit">
|
||||||
|
<module name="grouping" />
|
||||||
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/grouping/build" />
|
||||||
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
|
</configuration>
|
||||||
<configuration default="false" name="highlighter contrib" type="JUnit" factoryName="JUnit">
|
<configuration default="false" name="highlighter contrib" type="JUnit" factoryName="JUnit">
|
||||||
<module name="highlighter" />
|
<module name="highlighter" />
|
||||||
<option name="TEST_OBJECT" value="package" />
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
@ -204,7 +211,7 @@
|
||||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
</configuration>
|
</configuration>
|
||||||
<list size="29">
|
<list size="30">
|
||||||
<item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" />
|
<item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" />
|
||||||
<item index="1" class="java.lang.String" itemvalue="JUnit.ant contrib" />
|
<item index="1" class="java.lang.String" itemvalue="JUnit.ant contrib" />
|
||||||
<item index="2" class="java.lang.String" itemvalue="JUnit.bdb contrib" />
|
<item index="2" class="java.lang.String" itemvalue="JUnit.bdb contrib" />
|
||||||
|
@ -215,25 +222,26 @@
|
||||||
<item index="7" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
|
<item index="7" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
|
||||||
<item index="8" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
|
<item index="8" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
|
||||||
<item index="9" class="java.lang.String" itemvalue="JUnit.extras from dataimporthandler contrib" />
|
<item index="9" class="java.lang.String" itemvalue="JUnit.extras from dataimporthandler contrib" />
|
||||||
<item index="10" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
|
<item index="10" class="java.lang.String" itemvalue="JUnit.grouping module" />
|
||||||
<item index="11" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
|
<item index="11" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
|
||||||
<item index="12" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
|
<item index="12" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
|
||||||
<item index="13" class="java.lang.String" itemvalue="JUnit.lucene" />
|
<item index="13" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
|
||||||
<item index="14" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
|
<item index="14" class="java.lang.String" itemvalue="JUnit.lucene" />
|
||||||
<item index="15" class="java.lang.String" itemvalue="JUnit.memory contrib" />
|
<item index="15" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
|
||||||
<item index="16" class="java.lang.String" itemvalue="JUnit.misc contrib" />
|
<item index="16" class="java.lang.String" itemvalue="JUnit.memory contrib" />
|
||||||
<item index="17" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
|
<item index="17" class="java.lang.String" itemvalue="JUnit.misc contrib" />
|
||||||
<item index="18" class="java.lang.String" itemvalue="JUnit.queries contrib" />
|
<item index="18" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
|
||||||
<item index="19" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
|
<item index="19" class="java.lang.String" itemvalue="JUnit.queries contrib" />
|
||||||
<item index="20" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
<item index="20" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
|
||||||
<item index="21" class="java.lang.String" itemvalue="JUnit.solr" />
|
<item index="21" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||||
<item index="22" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
<item index="22" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||||
<item index="23" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
<item index="23" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||||
<item index="24" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
<item index="24" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
||||||
<item index="25" class="java.lang.String" itemvalue="JUnit.swing contrib" />
|
<item index="25" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||||
<item index="26" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
<item index="26" class="java.lang.String" itemvalue="JUnit.swing contrib" />
|
||||||
<item index="27" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
<item index="27" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||||
<item index="28" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
<item index="28" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||||
|
<item index="29" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||||
</list>
|
</list>
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="JAVA_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||||
|
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||||
|
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||||
|
<exclude-output />
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/work" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||||
|
<orderEntry type="module" module-name="lucene" />
|
||||||
|
</component>
|
||||||
|
</module>
|
|
@ -0,0 +1,71 @@
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-parent</artifactId>
|
||||||
|
<version>@version@</version>
|
||||||
|
<relativePath>../../lucene/pom.xml</relativePath>
|
||||||
|
</parent>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-grouping</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
<name>Lucene Grouping</name>
|
||||||
|
<description>Lucene Grouping Module</description>
|
||||||
|
<properties>
|
||||||
|
<module-directory>modules/grouping</module-directory>
|
||||||
|
<build-directory>build</build-directory>
|
||||||
|
</properties>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${project.groupId}</groupId>
|
||||||
|
<artifactId>lucene-core</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${project.groupId}</groupId>
|
||||||
|
<artifactId>lucene-test-framework</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
<build>
|
||||||
|
<directory>${build-directory}</directory>
|
||||||
|
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||||
|
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||||
|
<sourceDirectory>src/java</sourceDirectory>
|
||||||
|
<testSourceDirectory>src/test</testSourceDirectory>
|
||||||
|
<testResources>
|
||||||
|
<testResource>
|
||||||
|
<directory>${project.build.testSourceDirectory}</directory>
|
||||||
|
<excludes>
|
||||||
|
<exclude>**/*.java</exclude>
|
||||||
|
</excludes>
|
||||||
|
</testResource>
|
||||||
|
</testResources>
|
||||||
|
</build>
|
||||||
|
</project>
|
|
@ -33,6 +33,7 @@
|
||||||
<modules>
|
<modules>
|
||||||
<module>analysis</module>
|
<module>analysis</module>
|
||||||
<module>benchmark</module>
|
<module>benchmark</module>
|
||||||
|
<module>grouping</module>
|
||||||
</modules>
|
</modules>
|
||||||
<build>
|
<build>
|
||||||
<directory>build/lucene-modules-aggregator</directory>
|
<directory>build/lucene-modules-aggregator</directory>
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
<groupId>org.apache</groupId>
|
<groupId>org.apache</groupId>
|
||||||
<artifactId>apache</artifactId>
|
<artifactId>apache</artifactId>
|
||||||
<version>8</version>
|
<version>8</version>
|
||||||
|
<relativePath/>
|
||||||
</parent>
|
</parent>
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-solr-grandparent</artifactId>
|
<artifactId>lucene-solr-grandparent</artifactId>
|
||||||
|
@ -105,14 +106,6 @@
|
||||||
</license>
|
</license>
|
||||||
</licenses>
|
</licenses>
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
|
||||||
<id>carrot2.org</id>
|
|
||||||
<name>Carrot2 Maven2 repository</name>
|
|
||||||
<url>http://download.carrot2.org/maven2/</url>
|
|
||||||
<snapshots>
|
|
||||||
<updatePolicy>never</updatePolicy>
|
|
||||||
</snapshots>
|
|
||||||
</repository>
|
|
||||||
<repository>
|
<repository>
|
||||||
<id>apache.snapshots</id>
|
<id>apache.snapshots</id>
|
||||||
<name>Apache Snapshot Repository</name>
|
<name>Apache Snapshot Repository</name>
|
||||||
|
@ -305,7 +298,7 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.carrot2</groupId>
|
<groupId>org.carrot2</groupId>
|
||||||
<artifactId>carrot2-core</artifactId>
|
<artifactId>carrot2-core</artifactId>
|
||||||
<version>3.4.2</version>
|
<version>3.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.codehaus.woodstox</groupId>
|
<groupId>org.codehaus.woodstox</groupId>
|
||||||
|
|
|
@ -162,11 +162,6 @@ Changes in Runtime Behavior
|
||||||
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
|
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
|
||||||
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
|
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
|
||||||
|
|
||||||
* LUCENE-1076: The default merge policy (TieredMergePolicy) is now
|
|
||||||
able to merge non-contiguous segments, which means docIDs no longer
|
|
||||||
necessarily say "in order". If this is a problem then you can use
|
|
||||||
either of the LogMergePolicy impls. (Mike McCandless)
|
|
||||||
|
|
||||||
* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
|
* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
|
||||||
per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
|
per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
|
||||||
properties from all previous segments combined. Field numbers are now tracked
|
properties from all previous segments combined. Field numbers are now tracked
|
||||||
|
@ -416,6 +411,10 @@ New features
|
||||||
it's able to handle multi-valued fields and does not hold the term
|
it's able to handle multi-valued fields and does not hold the term
|
||||||
bytes in RAM. (Mike McCandless)
|
bytes in RAM. (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
|
||||||
|
document IDs and scores encountered during the search, and "reply" them to
|
||||||
|
another Collector. (Mike McCandless, Shai Erera)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||||
|
@ -452,6 +451,9 @@ Bug fixes
|
||||||
indexes, causing existing deletions to be applied on the incoming indexes as
|
indexes, causing existing deletions to be applied on the incoming indexes as
|
||||||
well. (Shai Erera, Mike McCandless)
|
well. (Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3068: sloppy phrase query failed to match valid documents when multiple
|
||||||
|
query terms had same position in the query. (Doron Cohen)
|
||||||
|
|
||||||
Test Cases
|
Test Cases
|
||||||
|
|
||||||
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
||||||
|
@ -476,9 +478,15 @@ Changes in backwards compatibility policy
|
||||||
(Mike McCandless, Shai Erera)
|
(Mike McCandless, Shai Erera)
|
||||||
|
|
||||||
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
|
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
|
||||||
SegmentInfos to a List<SegmentInfo>; this is actually a minor change
|
SegmentInfos to a List<SegmentInfo>. SegmentInfos itsself was changed
|
||||||
because SegmentInfos itself extends Vector<SegmentInfo>. (Uwe
|
to no longer extend Vector<SegmentInfo> (to update code that is using
|
||||||
Schindler, Mike McCandless)
|
Vector-API, use the new asList() and asSet() methods returning unmodifiable
|
||||||
|
collections; modifying SegmentInfos is now only possible through
|
||||||
|
the explicitely declared methods). IndexWriter.segString() now takes
|
||||||
|
Iterable<SegmentInfo> instead of List<SegmentInfo>. A simple recompile
|
||||||
|
should fix this. MergePolicy and SegmentInfos are internal/experimental
|
||||||
|
APIs not covered by the strict backwards compatibility policy.
|
||||||
|
(Uwe Schindler, Mike McCandless)
|
||||||
|
|
||||||
Changes in runtime behavior
|
Changes in runtime behavior
|
||||||
|
|
||||||
|
@ -492,6 +500,13 @@ Changes in runtime behavior
|
||||||
returns NumericField instances. (Uwe Schindler, Ryan McKinley,
|
returns NumericField instances. (Uwe Schindler, Ryan McKinley,
|
||||||
Mike McCandless)
|
Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-1076: Changed the default merge policy from
|
||||||
|
LogByteSizeMergePolicy to TieredMergePolicy, as of Version.LUCENE_32
|
||||||
|
(passed to IndexWriterConfig), which is able to merge non-contiguous
|
||||||
|
segments. This means docIDs no longer necessarily stay "in order"
|
||||||
|
during indexing. If this is a problem then you can use either of
|
||||||
|
the LogMergePolicy impls. (Mike McCandless)
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
|
* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
|
||||||
|
|
|
@ -75,10 +75,36 @@ Bug Fixes
|
||||||
caused a problem if you consumed a tokenstream, then reused it, added different
|
caused a problem if you consumed a tokenstream, then reused it, added different
|
||||||
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
|
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
|
||||||
|
|
||||||
|
* LUCENE-3113: Fixed some minor analysis bugs: double-reset() in ReusableAnalyzerBase
|
||||||
|
and ShingleAnalyzerWrapper, missing end() implementations in PrefixAwareTokenFilter
|
||||||
|
and PrefixAndSuffixAwareTokenFilter, invocations of incrementToken() after it
|
||||||
|
already returned false in CommonGramsQueryFilter, HyphenatedWordsFilter,
|
||||||
|
ShingleFilter, and SynonymsFilter. (Robert Muir, Steven Rowe, Uwe Schindler)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
|
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-1421: create new grouping contrib module, enabling search
|
||||||
|
results to be grouped by a single-valued indexed field. This
|
||||||
|
module was factored out of Solr's grouping implementation, but
|
||||||
|
it cannot group by function queries nor arbitrary queries. (Mike
|
||||||
|
McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3098: add AllGroupsCollector, to collect all unique groups
|
||||||
|
(but in unspecified order). (Martijn van Groningen via Mike
|
||||||
|
McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3092: Added NRTCachingDirectory in contrib/misc, which
|
||||||
|
caches small segments in RAM. This is useful, in the near-real-time
|
||||||
|
case where the indexing rate is lowish but the reopen rate is
|
||||||
|
highish, to take load off the IO system. (Mike McCandless)
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
|
||||||
|
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
|
||||||
|
over to reusableTokenStream(). (Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 3.1.0 =======================
|
======================= Lucene 3.1.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.ant;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.ant.DocumentTestCase;
|
import org.apache.lucene.ant.DocumentTestCase;
|
||||||
import org.apache.lucene.ant.HtmlDocument;
|
import org.apache.lucene.ant.HtmlDocument;
|
||||||
|
|
||||||
|
@ -27,7 +25,8 @@ public class HtmlDocumentTest extends DocumentTestCase
|
||||||
HtmlDocument doc;
|
HtmlDocument doc;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setUp() throws IOException {
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
doc = new HtmlDocument(getFile("test.html"));
|
doc = new HtmlDocument(getFile("test.html"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,8 +36,9 @@ public class HtmlDocumentTest extends DocumentTestCase
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void tearDown() {
|
public void tearDown() throws Exception {
|
||||||
doc = null;
|
doc = null;
|
||||||
|
super.tearDown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.ant;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.ant.DocumentTestCase;
|
import org.apache.lucene.ant.DocumentTestCase;
|
||||||
import org.apache.lucene.ant.TextDocument;
|
import org.apache.lucene.ant.TextDocument;
|
||||||
|
|
||||||
|
@ -27,7 +25,8 @@ public class TextDocumentTest extends DocumentTestCase
|
||||||
TextDocument doc;
|
TextDocument doc;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setUp() throws IOException {
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
doc = new TextDocument(getFile("test.txt"));
|
doc = new TextDocument(getFile("test.txt"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,8 +35,9 @@ public class TextDocumentTest extends DocumentTestCase
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void tearDown() {
|
public void tearDown() throws Exception {
|
||||||
doc = null;
|
doc = null;
|
||||||
|
super.tearDown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,7 +78,7 @@ public class Highlighter
|
||||||
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
|
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
|
||||||
throws IOException, InvalidTokenOffsetsException
|
throws IOException, InvalidTokenOffsetsException
|
||||||
{
|
{
|
||||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||||
return getBestFragment(tokenStream, text);
|
return getBestFragment(tokenStream, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,7 +130,7 @@ public class Highlighter
|
||||||
int maxNumFragments)
|
int maxNumFragments)
|
||||||
throws IOException, InvalidTokenOffsetsException
|
throws IOException, InvalidTokenOffsetsException
|
||||||
{
|
{
|
||||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||||
return getBestFragments(tokenStream, text, maxNumFragments);
|
return getBestFragments(tokenStream, text, maxNumFragments);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -286,7 +286,11 @@ public class TokenSources {
|
||||||
// convenience method
|
// convenience method
|
||||||
public static TokenStream getTokenStream(String field, String contents,
|
public static TokenStream getTokenStream(String field, String contents,
|
||||||
Analyzer analyzer) {
|
Analyzer analyzer) {
|
||||||
return analyzer.tokenStream(field, new StringReader(contents));
|
try {
|
||||||
|
return analyzer.reusableTokenStream(field, new StringReader(contents));
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
||||||
if (field.tokenStreamValue() != null) {
|
if (field.tokenStreamValue() != null) {
|
||||||
tokenStream = field.tokenStreamValue();
|
tokenStream = field.tokenStreamValue();
|
||||||
} else {
|
} else {
|
||||||
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
|
tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset the TokenStream to the first token
|
// reset the TokenStream to the first token
|
||||||
|
|
|
@ -305,11 +305,12 @@ class LuceneMethods {
|
||||||
|
|
||||||
int position = 0;
|
int position = 0;
|
||||||
// Tokenize field and add to postingTable
|
// Tokenize field and add to postingTable
|
||||||
TokenStream stream = analyzer.tokenStream(fieldName, reader);
|
TokenStream stream = analyzer.reusableTokenStream(fieldName, reader);
|
||||||
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||||
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
position += (posIncrAtt.getPositionIncrement() - 1);
|
position += (posIncrAtt.getPositionIncrement() - 1);
|
||||||
position++;
|
position++;
|
||||||
|
@ -323,6 +324,7 @@ class LuceneMethods {
|
||||||
}
|
}
|
||||||
if (position > maxFieldLength) break;
|
if (position > maxFieldLength) break;
|
||||||
}
|
}
|
||||||
|
stream.end();
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -262,8 +262,12 @@ public class MemoryIndex {
|
||||||
if (analyzer == null)
|
if (analyzer == null)
|
||||||
throw new IllegalArgumentException("analyzer must not be null");
|
throw new IllegalArgumentException("analyzer must not be null");
|
||||||
|
|
||||||
TokenStream stream = analyzer.tokenStream(fieldName,
|
TokenStream stream;
|
||||||
new StringReader(text));
|
try {
|
||||||
|
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
addField(fieldName, stream);
|
addField(fieldName, stream);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -135,7 +136,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
if (last > 1 || !isOptimized(infos.info(0))) {
|
if (last > 1 || !isOptimized(infos.info(0))) {
|
||||||
|
|
||||||
spec = new MergeSpecification();
|
spec = new MergeSpecification();
|
||||||
spec.add(new OneMerge(infos.range(0, last)));
|
spec.add(new OneMerge(infos.asList().subList(0, last)));
|
||||||
}
|
}
|
||||||
} else if (last > maxNumSegments) {
|
} else if (last > maxNumSegments) {
|
||||||
|
|
||||||
|
@ -192,7 +193,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
prev = backLink[i][prev];
|
prev = backLink[i][prev];
|
||||||
int mergeStart = i + prev;
|
int mergeStart = i + prev;
|
||||||
if((mergeEnd - mergeStart) > 1) {
|
if((mergeEnd - mergeStart) > 1) {
|
||||||
spec.add(new OneMerge(infos.range(mergeStart, mergeEnd)));
|
spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
|
||||||
} else {
|
} else {
|
||||||
if(partialExpunge) {
|
if(partialExpunge) {
|
||||||
SegmentInfo info = infos.info(mergeStart);
|
SegmentInfo info = infos.info(mergeStart);
|
||||||
|
@ -208,7 +209,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
|
|
||||||
if(partialExpunge && maxDelCount > 0) {
|
if(partialExpunge && maxDelCount > 0) {
|
||||||
// expunge deletes
|
// expunge deletes
|
||||||
spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)));
|
spec.add(new OneMerge(Collections.singletonList(infos.info(expungeCandidate))));
|
||||||
}
|
}
|
||||||
|
|
||||||
return spec;
|
return spec;
|
||||||
|
@ -250,7 +251,10 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
MergeSpecification spec = null;
|
MergeSpecification spec = null;
|
||||||
|
|
||||||
if(numLargeSegs < numSegs) {
|
if(numLargeSegs < numSegs) {
|
||||||
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs);
|
// hack to create a shallow sub-range as SegmentInfos instance,
|
||||||
|
// it does not clone all metadata, but LogMerge does not need it
|
||||||
|
final SegmentInfos smallSegments = new SegmentInfos();
|
||||||
|
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
|
||||||
spec = super.findMergesToExpungeDeletes(smallSegments);
|
spec = super.findMergesToExpungeDeletes(smallSegments);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,7 +262,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
for(int i = 0; i < numLargeSegs; i++) {
|
for(int i = 0; i < numLargeSegs; i++) {
|
||||||
SegmentInfo info = infos.info(i);
|
SegmentInfo info = infos.info(i);
|
||||||
if(info.hasDeletions()) {
|
if(info.hasDeletions()) {
|
||||||
spec.add(new OneMerge(infos.range(i, i + 1)));
|
spec.add(new OneMerge(Collections.singletonList(infos.info(i))));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return spec;
|
return spec;
|
||||||
|
@ -296,7 +300,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
if(totalSmallSegSize < targetSegSize * 2) {
|
if(totalSmallSegSize < targetSegSize * 2) {
|
||||||
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
|
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
|
||||||
if(spec == null) spec = new MergeSpecification(); // should not happen
|
if(spec == null) spec = new MergeSpecification(); // should not happen
|
||||||
spec.add(new OneMerge(infos.range(numLargeSegs, numSegs)));
|
spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
|
||||||
return spec;
|
return spec;
|
||||||
} else {
|
} else {
|
||||||
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
|
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
|
||||||
|
@ -311,11 +315,13 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
if(size(info) < sizeThreshold) break;
|
if(size(info) < sizeThreshold) break;
|
||||||
startSeg++;
|
startSeg++;
|
||||||
}
|
}
|
||||||
spec.add(new OneMerge(infos.range(startSeg, numSegs)));
|
spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
|
||||||
return spec;
|
return spec;
|
||||||
} else {
|
} else {
|
||||||
// apply the log merge policy to small segments.
|
// hack to create a shallow sub-range as SegmentInfos instance,
|
||||||
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs);
|
// it does not clone all metadata, but LogMerge does not need it
|
||||||
|
final SegmentInfos smallSegments = new SegmentInfos();
|
||||||
|
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
|
||||||
MergeSpecification spec = super.findMerges(smallSegments);
|
MergeSpecification spec = super.findMerges(smallSegments);
|
||||||
|
|
||||||
if(_partialExpunge) {
|
if(_partialExpunge) {
|
||||||
|
@ -342,7 +348,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (maxDelCount > 0) {
|
if (maxDelCount > 0) {
|
||||||
return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1));
|
return new OneMerge(Collections.singletonList(infos.info(expungeCandidate)));
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,289 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||||
|
import org.apache.lucene.index.MergePolicy;
|
||||||
|
import org.apache.lucene.index.MergeScheduler;
|
||||||
|
import org.apache.lucene.store.RAMDirectory; // javadocs
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
// - let subclass dictate policy...?
|
||||||
|
// - rename to MergeCacheingDir? NRTCachingDir
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a {@link RAMDirectory}
|
||||||
|
* around any provided delegate directory, to
|
||||||
|
* be used during NRT search. Make sure you pull the merge
|
||||||
|
* scheduler using {@link #getMergeScheduler} and pass that to your
|
||||||
|
* {@link IndexWriter}; this class uses that to keep track of which
|
||||||
|
* merges are being done by which threads, to decide when to
|
||||||
|
* cache each written file.
|
||||||
|
*
|
||||||
|
* <p>This class is likely only useful in a near-real-time
|
||||||
|
* context, where indexing rate is lowish but reopen
|
||||||
|
* rate is highish, resulting in many tiny files being
|
||||||
|
* written. This directory keeps such segments (as well as
|
||||||
|
* the segments produced by merging them, as long as they
|
||||||
|
* are small enough), in RAM.</p>
|
||||||
|
*
|
||||||
|
* <p>This is safe to use: when your app calls {IndexWriter#commit},
|
||||||
|
* all cached files will be flushed from the cached and sync'd.</p>
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: this class is somewhat sneaky in its
|
||||||
|
* approach for spying on merges to determine the size of a
|
||||||
|
* merge: it records which threads are running which merges
|
||||||
|
* by watching ConcurrentMergeScheduler's doMerge method.
|
||||||
|
* While this works correctly, likely future versions of
|
||||||
|
* this class will take a more general approach.
|
||||||
|
*
|
||||||
|
* <p>Here's a simple example usage:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* Directory fsDir = FSDirectory.open(new File("/path/to/index"));
|
||||||
|
* NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
|
||||||
|
* IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
|
||||||
|
* conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
|
||||||
|
* IndexWriter writer = new IndexWriter(cachedFSDir, conf);
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <p>This will cache all newly flushed segments, all merges
|
||||||
|
* whose expected segment size is <= 5 MB, unless the net
|
||||||
|
* cached bytes exceeds 60 MB at which point all writes will
|
||||||
|
* not be cached (until the net bytes falls below 60 MB).</p>
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class NRTCachingDirectory extends Directory {
|
||||||
|
|
||||||
|
private final RAMDirectory cache = new RAMDirectory();
|
||||||
|
|
||||||
|
private final Directory delegate;
|
||||||
|
|
||||||
|
private final long maxMergeSizeBytes;
|
||||||
|
private final long maxCachedBytes;
|
||||||
|
|
||||||
|
private static final boolean VERBOSE = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We will cache a newly created output if 1) it's a
|
||||||
|
* flush or a merge and the estimated size of the merged segmnt is <=
|
||||||
|
* maxMergeSizeMB, and 2) the total cached bytes is <=
|
||||||
|
* maxCachedMB */
|
||||||
|
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
|
||||||
|
this.delegate = delegate;
|
||||||
|
maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024);
|
||||||
|
maxCachedBytes = (long) (maxCachedMB*1024*1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized String[] listAll() throws IOException {
|
||||||
|
final Set<String> files = new HashSet<String>();
|
||||||
|
for(String f : cache.listAll()) {
|
||||||
|
files.add(f);
|
||||||
|
}
|
||||||
|
for(String f : delegate.listAll()) {
|
||||||
|
assert !files.contains(f);
|
||||||
|
files.add(f);
|
||||||
|
}
|
||||||
|
return files.toArray(new String[files.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns how many bytes are being used by the
|
||||||
|
* RAMDirectory cache */
|
||||||
|
public long sizeInBytes() {
|
||||||
|
return cache.sizeInBytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized boolean fileExists(String name) throws IOException {
|
||||||
|
return cache.fileExists(name) || delegate.fileExists(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized long fileModified(String name) throws IOException {
|
||||||
|
if (cache.fileExists(name)) {
|
||||||
|
return cache.fileModified(name);
|
||||||
|
} else {
|
||||||
|
return delegate.fileModified(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void touchFile(String name) throws IOException {
|
||||||
|
if (cache.fileExists(name)) {
|
||||||
|
cache.touchFile(name);
|
||||||
|
} else {
|
||||||
|
delegate.touchFile(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void deleteFile(String name) throws IOException {
|
||||||
|
// Delete from both, in case we are currently uncaching:
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("nrtdir.deleteFile name=" + name);
|
||||||
|
}
|
||||||
|
cache.deleteFile(name);
|
||||||
|
delegate.deleteFile(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized long fileLength(String name) throws IOException {
|
||||||
|
if (cache.fileExists(name)) {
|
||||||
|
return cache.fileLength(name);
|
||||||
|
} else {
|
||||||
|
return delegate.fileLength(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] listCachedFiles() {
|
||||||
|
return cache.listAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexOutput createOutput(String name) throws IOException {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("nrtdir.createOutput name=" + name);
|
||||||
|
}
|
||||||
|
if (doCacheWrite(name)) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" to cache");
|
||||||
|
}
|
||||||
|
return cache.createOutput(name);
|
||||||
|
} else {
|
||||||
|
return delegate.createOutput(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void sync(Collection<String> fileNames) throws IOException {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("nrtdir.sync files=" + fileNames);
|
||||||
|
}
|
||||||
|
for(String fileName : fileNames) {
|
||||||
|
unCache(fileName);
|
||||||
|
}
|
||||||
|
delegate.sync(fileNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized IndexInput openInput(String name) throws IOException {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("nrtdir.openInput name=" + name);
|
||||||
|
}
|
||||||
|
if (cache.fileExists(name)) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" from cache");
|
||||||
|
}
|
||||||
|
return cache.openInput(name);
|
||||||
|
} else {
|
||||||
|
return delegate.openInput(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||||
|
if (cache.fileExists(name)) {
|
||||||
|
return cache.openInput(name, bufferSize);
|
||||||
|
} else {
|
||||||
|
return delegate.openInput(name, bufferSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Lock makeLock(String name) {
|
||||||
|
return delegate.makeLock(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clearLock(String name) throws IOException {
|
||||||
|
delegate.clearLock(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Close thius directory, which flushes any cached files
|
||||||
|
* to the delegate and then closes the delegate. */
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
for(String fileName : cache.listAll()) {
|
||||||
|
unCache(fileName);
|
||||||
|
}
|
||||||
|
cache.close();
|
||||||
|
delegate.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private final ConcurrentHashMap<Thread,MergePolicy.OneMerge> merges = new ConcurrentHashMap<Thread,MergePolicy.OneMerge>();
|
||||||
|
|
||||||
|
public MergeScheduler getMergeScheduler() {
|
||||||
|
return new ConcurrentMergeScheduler() {
|
||||||
|
@Override
|
||||||
|
protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
|
||||||
|
try {
|
||||||
|
merges.put(Thread.currentThread(), merge);
|
||||||
|
super.doMerge(merge);
|
||||||
|
} finally {
|
||||||
|
merges.remove(Thread.currentThread());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Subclass can override this to customize logic; return
|
||||||
|
* true if this file should be written to the RAMDirectory. */
|
||||||
|
protected boolean doCacheWrite(String name) {
|
||||||
|
final MergePolicy.OneMerge merge = merges.get(Thread.currentThread());
|
||||||
|
//System.out.println(Thread.currentThread().getName() + ": CACHE check merge=" + merge + " size=" + (merge==null ? 0 : merge.estimatedMergeBytes));
|
||||||
|
return !name.equals(IndexFileNames.SEGMENTS_GEN) && (merge == null || merge.estimatedMergeBytes <= maxMergeSizeBytes) && cache.sizeInBytes() <= maxCachedBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void unCache(String fileName) throws IOException {
|
||||||
|
final IndexOutput out;
|
||||||
|
synchronized(this) {
|
||||||
|
if (!delegate.fileExists(fileName)) {
|
||||||
|
assert cache.fileExists(fileName);
|
||||||
|
out = delegate.createOutput(fileName);
|
||||||
|
} else {
|
||||||
|
out = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (out != null) {
|
||||||
|
IndexInput in = null;
|
||||||
|
try {
|
||||||
|
in = cache.openInput(fileName);
|
||||||
|
in.copyBytes(out, in.length());
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeSafely(in, out);
|
||||||
|
}
|
||||||
|
synchronized(this) {
|
||||||
|
cache.deleteFile(fileName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
public class TestNRTCachingDirectory extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testNRTAndCommit() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
|
||||||
|
conf.setMergeScheduler(cachedDir.getMergeScheduler());
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
|
||||||
|
w.w.setInfoStream(VERBOSE ? System.out : null);
|
||||||
|
final LineFileDocs docs = new LineFileDocs(random);
|
||||||
|
final int numDocs = _TestUtil.nextInt(random, 100, 400);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: numDocs=" + numDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<BytesRef> ids = new ArrayList<BytesRef>();
|
||||||
|
IndexReader r = null;
|
||||||
|
for(int docCount=0;docCount<numDocs;docCount++) {
|
||||||
|
final Document doc = docs.nextDoc();
|
||||||
|
ids.add(new BytesRef(doc.get("docid")));
|
||||||
|
w.addDocument(doc);
|
||||||
|
if (random.nextInt(20) == 17) {
|
||||||
|
if (r == null) {
|
||||||
|
r = IndexReader.open(w.w, false);
|
||||||
|
} else {
|
||||||
|
final IndexReader r2 = r.reopen();
|
||||||
|
if (r2 != r) {
|
||||||
|
r.close();
|
||||||
|
r = r2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(1+docCount, r.numDocs());
|
||||||
|
final IndexSearcher s = new IndexSearcher(r);
|
||||||
|
// Just make sure search can run; we can't assert
|
||||||
|
// totHits since it could be 0
|
||||||
|
TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
|
||||||
|
// System.out.println("tot hits " + hits.totalHits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r != null) {
|
||||||
|
r.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close should force cache to clear since all files are sync'd
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
final String[] cachedFiles = cachedDir.listCachedFiles();
|
||||||
|
for(String file : cachedFiles) {
|
||||||
|
System.out.println("FAIL: cached file " + file + " remains after sync");
|
||||||
|
}
|
||||||
|
assertEquals(0, cachedFiles.length);
|
||||||
|
|
||||||
|
r = IndexReader.open(dir);
|
||||||
|
for(BytesRef id : ids) {
|
||||||
|
assertEquals(1, r.docFreq("docid", id));
|
||||||
|
}
|
||||||
|
r.close();
|
||||||
|
cachedDir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: not a test; just here to make sure the code frag
|
||||||
|
// in the javadocs is correct!
|
||||||
|
public void verifyCompiles() throws Exception {
|
||||||
|
Analyzer analyzer = null;
|
||||||
|
|
||||||
|
Directory fsDir = FSDirectory.open(new File("/path/to/index"));
|
||||||
|
NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0);
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
|
||||||
|
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
|
||||||
|
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
|
||||||
|
}
|
||||||
|
}
|
|
@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
|
||||||
private void addTerms(IndexReader reader,FieldVals f) throws IOException
|
private void addTerms(IndexReader reader,FieldVals f) throws IOException
|
||||||
{
|
{
|
||||||
if(f.queryString==null) return;
|
if(f.queryString==null) return;
|
||||||
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
|
TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
int corpusNumDocs=reader.numDocs();
|
int corpusNumDocs=reader.numDocs();
|
||||||
|
|
|
@ -881,7 +881,7 @@ public final class MoreLikeThis {
|
||||||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||||
"term vectors, you must provide an Analyzer");
|
"term vectors, you must provide an Analyzer");
|
||||||
}
|
}
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
|
||||||
int tokenCount=0;
|
int tokenCount=0;
|
||||||
// for every token
|
// for every token
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
|
@ -85,7 +85,7 @@ public final class SimilarityQueries
|
||||||
Set<?> stop)
|
Set<?> stop)
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
TokenStream ts = a.tokenStream( field, new StringReader( body));
|
TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
BooleanQuery tmp = new BooleanQuery();
|
BooleanQuery tmp = new BooleanQuery();
|
||||||
|
|
|
@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
}
|
}
|
||||||
|
|
||||||
// get Analyzer from superclass and tokenize the term
|
// get Analyzer from superclass and tokenize the term
|
||||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
TokenStream source;
|
||||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
|
||||||
|
|
||||||
int countTokens = 0;
|
int countTokens = 0;
|
||||||
try {
|
try {
|
||||||
|
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e1) {
|
} catch (IOException e1) {
|
||||||
throw new RuntimeException(e1);
|
throw new RuntimeException(e1);
|
||||||
}
|
}
|
||||||
|
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
if (!source.incrementToken()) break;
|
if (!source.incrementToken()) break;
|
||||||
|
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
@Override
|
@Override
|
||||||
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
||||||
// get Analyzer from superclass and tokenize the term
|
// get Analyzer from superclass and tokenize the term
|
||||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
TokenStream source;
|
||||||
List<String> tlist = new ArrayList<String>();
|
List<String> tlist = new ArrayList<String>();
|
||||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
|
||||||
try {
|
try {
|
||||||
|
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e1) {
|
} catch (IOException e1) {
|
||||||
throw new RuntimeException(e1);
|
throw new RuntimeException(e1);
|
||||||
}
|
}
|
||||||
|
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
if (!source.incrementToken()) break;
|
if (!source.incrementToken()) break;
|
||||||
|
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
|
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
|
||||||
throws ParseException {
|
throws ParseException {
|
||||||
// get Analyzer from superclass and tokenize the term
|
// get Analyzer from superclass and tokenize the term
|
||||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
TokenStream source = null;
|
||||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
|
||||||
String nextToken = null;
|
String nextToken = null;
|
||||||
boolean multipleTokens = false;
|
boolean multipleTokens = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||||
|
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
source.reset();
|
source.reset();
|
||||||
if (source.incrementToken()) {
|
if (source.incrementToken()) {
|
||||||
nextToken = termAtt.toString();
|
nextToken = termAtt.toString();
|
||||||
|
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
if (part1 != null) {
|
if (part1 != null) {
|
||||||
// part1
|
// part1
|
||||||
try {
|
try {
|
||||||
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
|
||||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
source.reset();
|
source.reset();
|
||||||
multipleTokens = false;
|
multipleTokens = false;
|
||||||
|
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
}
|
}
|
||||||
|
|
||||||
if (part2 != null) {
|
if (part2 != null) {
|
||||||
// part2
|
|
||||||
source = getAnalyzer().tokenStream(field, new StringReader(part2));
|
|
||||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// part2
|
||||||
|
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
|
||||||
|
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
source.reset();
|
source.reset();
|
||||||
if (source.incrementToken()) {
|
if (source.incrementToken()) {
|
||||||
part2 = termAtt.toString();
|
part2 = termAtt.toString();
|
||||||
|
|
|
@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
||||||
String text = fieldNode.getTextAsString();
|
String text = fieldNode.getTextAsString();
|
||||||
String field = fieldNode.getFieldAsString();
|
String field = fieldNode.getFieldAsString();
|
||||||
|
|
||||||
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
|
TokenStream source;
|
||||||
text));
|
|
||||||
try {
|
try {
|
||||||
|
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e1) {
|
} catch (IOException e1) {
|
||||||
throw new RuntimeException(e1);
|
throw new RuntimeException(e1);
|
||||||
|
|
|
@ -631,8 +631,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void tearDown() {
|
public void tearDown() throws Exception {
|
||||||
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
||||||
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,7 @@ public final class SynExpand {
|
||||||
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||||
|
|
||||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
|
|
@ -124,7 +124,7 @@ public class SynLookup {
|
||||||
List<String> top = new LinkedList<String>(); // needs to be separately listed..
|
List<String> top = new LinkedList<String>(); // needs to be separately listed..
|
||||||
|
|
||||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
|
|
@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
|
||||||
stopWordsSet=new HashSet<String>();
|
stopWordsSet=new HashSet<String>();
|
||||||
for (int i = 0; i < fields.length; i++)
|
for (int i = 0; i < fields.length; i++)
|
||||||
{
|
{
|
||||||
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
|
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
|
||||||
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while(ts.incrementToken()) {
|
while(ts.incrementToken()) {
|
||||||
stopWordsSet.add(termAtt.toString());
|
stopWordsSet.add(termAtt.toString());
|
||||||
|
|
|
@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
|
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
|
||||||
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
|
|
@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
|
||||||
TermsFilter tf = new TermsFilter();
|
TermsFilter tf = new TermsFilter();
|
||||||
String text = DOMUtils.getNonBlankTextOrFail(e);
|
String text = DOMUtils.getNonBlankTextOrFail(e);
|
||||||
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||||
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
Term term = null;
|
Term term = null;
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
|
|
@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
|
||||||
|
|
||||||
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
|
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
|
||||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
|
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
Term term = null;
|
Term term = null;
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
|
|
@ -733,8 +733,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
||||||
// case we have to roll back:
|
// case we have to roll back:
|
||||||
startCommit();
|
startCommit();
|
||||||
|
|
||||||
final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
|
final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
|
||||||
rollbackSegmentInfos.addAll(segmentInfos);
|
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
@ -766,8 +765,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
||||||
deleter.refresh();
|
deleter.refresh();
|
||||||
|
|
||||||
// Restore all SegmentInfos (in case we pruned some)
|
// Restore all SegmentInfos (in case we pruned some)
|
||||||
segmentInfos.clear();
|
segmentInfos.rollbackSegmentInfos(rollbackSegments);
|
||||||
segmentInfos.addAll(rollbackSegmentInfos);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -126,7 +126,6 @@ final class DocumentsWriter {
|
||||||
final DocumentsWriterPerThreadPool perThreadPool;
|
final DocumentsWriterPerThreadPool perThreadPool;
|
||||||
final FlushPolicy flushPolicy;
|
final FlushPolicy flushPolicy;
|
||||||
final DocumentsWriterFlushControl flushControl;
|
final DocumentsWriterFlushControl flushControl;
|
||||||
final Healthiness healthiness;
|
|
||||||
DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
|
DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
|
||||||
BufferedDeletesStream bufferedDeletesStream) throws IOException {
|
BufferedDeletesStream bufferedDeletesStream) throws IOException {
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
|
@ -142,10 +141,7 @@ final class DocumentsWriter {
|
||||||
flushPolicy = configuredPolicy;
|
flushPolicy = configuredPolicy;
|
||||||
}
|
}
|
||||||
flushPolicy.init(this);
|
flushPolicy.init(this);
|
||||||
|
flushControl = new DocumentsWriterFlushControl(this, config );
|
||||||
healthiness = new Healthiness();
|
|
||||||
final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
|
|
||||||
flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void deleteQueries(final Query... queries) throws IOException {
|
synchronized void deleteQueries(final Query... queries) throws IOException {
|
||||||
|
@ -283,31 +279,28 @@ final class DocumentsWriter {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
boolean maybeMerge = false;
|
boolean maybeMerge = false;
|
||||||
final boolean isUpdate = delTerm != null;
|
final boolean isUpdate = delTerm != null;
|
||||||
if (healthiness.anyStalledThreads()) {
|
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
|
||||||
|
// Help out flushing any queued DWPTs so we can un-stall:
|
||||||
// Help out flushing any pending DWPTs so we can un-stall:
|
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)");
|
message("DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
|
||||||
}
|
}
|
||||||
|
do {
|
||||||
// Try pick up pending threads here if possible
|
// Try pick up pending threads here if possible
|
||||||
DocumentsWriterPerThread flushingDWPT;
|
DocumentsWriterPerThread flushingDWPT;
|
||||||
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
|
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
|
||||||
// Don't push the delete here since the update could fail!
|
// Don't push the delete here since the update could fail!
|
||||||
maybeMerge = doFlush(flushingDWPT);
|
maybeMerge |= doFlush(flushingDWPT);
|
||||||
if (!healthiness.anyStalledThreads()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infoStream != null && healthiness.anyStalledThreads()) {
|
if (infoStream != null && flushControl.anyStalledThreads()) {
|
||||||
message("WARNING DocumentsWriter still has stalled threads; waiting");
|
message("WARNING DocumentsWriter has stalled threads; waiting");
|
||||||
}
|
}
|
||||||
|
|
||||||
healthiness.waitIfStalled(); // block if stalled
|
flushControl.waitIfStalled(); // block if stalled
|
||||||
|
} while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
|
||||||
|
|
||||||
if (infoStream != null && healthiness.anyStalledThreads()) {
|
if (infoStream != null) {
|
||||||
message("WARNING DocumentsWriter done waiting");
|
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,7 +346,6 @@ final class DocumentsWriter {
|
||||||
maybeMerge = true;
|
maybeMerge = true;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
FlushTicket ticket = null;
|
FlushTicket ticket = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
assert currentFullFlushDelQueue == null
|
assert currentFullFlushDelQueue == null
|
||||||
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
|
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
|
||||||
|
@ -511,9 +503,7 @@ final class DocumentsWriter {
|
||||||
anythingFlushed |= doFlush(flushingDWPT);
|
anythingFlushed |= doFlush(flushingDWPT);
|
||||||
}
|
}
|
||||||
// If a concurrent flush is still in flight wait for it
|
// If a concurrent flush is still in flight wait for it
|
||||||
while (flushControl.anyFlushing()) {
|
|
||||||
flushControl.waitForFlush();
|
flushControl.waitForFlush();
|
||||||
}
|
|
||||||
if (!anythingFlushed) { // apply deletes if we did not flush any document
|
if (!anythingFlushed) { // apply deletes if we did not flush any document
|
||||||
synchronized (ticketQueue) {
|
synchronized (ticketQueue) {
|
||||||
ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));
|
ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));
|
||||||
|
|
|
@ -44,30 +44,32 @@ public final class DocumentsWriterFlushControl {
|
||||||
private long activeBytes = 0;
|
private long activeBytes = 0;
|
||||||
private long flushBytes = 0;
|
private long flushBytes = 0;
|
||||||
private volatile int numPending = 0;
|
private volatile int numPending = 0;
|
||||||
private volatile int numFlushing = 0;
|
|
||||||
final AtomicBoolean flushDeletes = new AtomicBoolean(false);
|
final AtomicBoolean flushDeletes = new AtomicBoolean(false);
|
||||||
private boolean fullFlush = false;
|
private boolean fullFlush = false;
|
||||||
private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
|
private final Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
|
||||||
// only for safety reasons if a DWPT is close to the RAM limit
|
// only for safety reasons if a DWPT is close to the RAM limit
|
||||||
private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>();
|
private final Queue<BlockedFlush> blockedFlushes = new LinkedList<BlockedFlush>();
|
||||||
|
|
||||||
|
|
||||||
|
double maxConfiguredRamBuffer = 0;
|
||||||
long peakActiveBytes = 0;// only with assert
|
long peakActiveBytes = 0;// only with assert
|
||||||
long peakFlushBytes = 0;// only with assert
|
long peakFlushBytes = 0;// only with assert
|
||||||
long peakNetBytes = 0;// only with assert
|
long peakNetBytes = 0;// only with assert
|
||||||
private final Healthiness healthiness;
|
long peakDelta = 0; // only with assert
|
||||||
|
final DocumentsWriterStallControl stallControl;
|
||||||
private final DocumentsWriterPerThreadPool perThreadPool;
|
private final DocumentsWriterPerThreadPool perThreadPool;
|
||||||
private final FlushPolicy flushPolicy;
|
private final FlushPolicy flushPolicy;
|
||||||
private boolean closed = false;
|
private boolean closed = false;
|
||||||
private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
|
private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
|
||||||
private final DocumentsWriter documentsWriter;
|
private final DocumentsWriter documentsWriter;
|
||||||
|
private final IndexWriterConfig config;
|
||||||
|
|
||||||
DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
|
DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
|
||||||
Healthiness healthiness, long hardMaxBytesPerDWPT) {
|
IndexWriterConfig config) {
|
||||||
this.healthiness = healthiness;
|
this.stallControl = new DocumentsWriterStallControl();
|
||||||
this.perThreadPool = documentsWriter.perThreadPool;
|
this.perThreadPool = documentsWriter.perThreadPool;
|
||||||
this.flushPolicy = documentsWriter.flushPolicy;
|
this.flushPolicy = documentsWriter.flushPolicy;
|
||||||
this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT;
|
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
|
||||||
|
this.config = config;
|
||||||
this.documentsWriter = documentsWriter;
|
this.documentsWriter = documentsWriter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,6 +85,24 @@ public final class DocumentsWriterFlushControl {
|
||||||
return flushBytes + activeBytes;
|
return flushBytes + activeBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long stallLimitBytes() {
|
||||||
|
final double maxRamMB = config.getRAMBufferSizeMB();
|
||||||
|
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean assertMemory() {
|
||||||
|
final double maxRamMB = config.getRAMBufferSizeMB();
|
||||||
|
if (maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
|
||||||
|
// for this assert we must be tolerant to ram buffer changes!
|
||||||
|
maxConfiguredRamBuffer = Math.max(maxRamMB, maxConfiguredRamBuffer);
|
||||||
|
final long ram = flushBytes + activeBytes;
|
||||||
|
// take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
|
||||||
|
final long expected = (long)(2 * (maxConfiguredRamBuffer * 1024 * 1024)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
|
||||||
|
assert ram <= expected : "ram was " + ram + " expected: " + expected + " flush mem: " + flushBytes + " active: " + activeBytes ;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private void commitPerThreadBytes(ThreadState perThread) {
|
private void commitPerThreadBytes(ThreadState perThread) {
|
||||||
final long delta = perThread.perThread.bytesUsed()
|
final long delta = perThread.perThread.bytesUsed()
|
||||||
- perThread.bytesUsed;
|
- perThread.bytesUsed;
|
||||||
|
@ -105,11 +125,14 @@ public final class DocumentsWriterFlushControl {
|
||||||
peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
|
peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
|
||||||
peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
|
peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
|
||||||
peakNetBytes = Math.max(peakNetBytes, netBytes());
|
peakNetBytes = Math.max(peakNetBytes, netBytes());
|
||||||
|
peakDelta = Math.max(peakDelta, delta);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
|
synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
|
||||||
boolean isUpdate) {
|
boolean isUpdate) {
|
||||||
|
try {
|
||||||
commitPerThreadBytes(perThread);
|
commitPerThreadBytes(perThread);
|
||||||
if (!perThread.flushPending) {
|
if (!perThread.flushPending) {
|
||||||
if (isUpdate) {
|
if (isUpdate) {
|
||||||
|
@ -121,37 +144,43 @@ public final class DocumentsWriterFlushControl {
|
||||||
// Safety check to prevent a single DWPT exceeding its RAM limit. This
|
// Safety check to prevent a single DWPT exceeding its RAM limit. This
|
||||||
// is super important since we can not address more than 2048 MB per DWPT
|
// is super important since we can not address more than 2048 MB per DWPT
|
||||||
setFlushPending(perThread);
|
setFlushPending(perThread);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final DocumentsWriterPerThread flushingDWPT;
|
||||||
if (fullFlush) {
|
if (fullFlush) {
|
||||||
DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
|
if (perThread.flushPending) {
|
||||||
assert toBlock != null;
|
checkoutAndBlock(perThread);
|
||||||
blockedFlushes.add(toBlock);
|
flushingDWPT = nextPendingFlush();
|
||||||
|
} else {
|
||||||
|
flushingDWPT = null;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
flushingDWPT = tryCheckoutForFlush(perThread);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
|
|
||||||
healthiness.updateStalled(this);
|
|
||||||
return flushingDWPT;
|
return flushingDWPT;
|
||||||
|
} finally {
|
||||||
|
stallControl.updateStalled(this);
|
||||||
|
assert assertMemory();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
||||||
assert flushingWriters.containsKey(dwpt);
|
assert flushingWriters.containsKey(dwpt);
|
||||||
try {
|
try {
|
||||||
numFlushing--;
|
|
||||||
Long bytes = flushingWriters.remove(dwpt);
|
Long bytes = flushingWriters.remove(dwpt);
|
||||||
flushBytes -= bytes.longValue();
|
flushBytes -= bytes.longValue();
|
||||||
perThreadPool.recycle(dwpt);
|
perThreadPool.recycle(dwpt);
|
||||||
healthiness.updateStalled(this);
|
stallControl.updateStalled(this);
|
||||||
|
assert assertMemory();
|
||||||
} finally {
|
} finally {
|
||||||
notifyAll();
|
notifyAll();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized boolean anyFlushing() {
|
|
||||||
return numFlushing != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void waitForFlush() {
|
public synchronized void waitForFlush() {
|
||||||
if (numFlushing != 0) {
|
while (flushingWriters.size() != 0) {
|
||||||
try {
|
try {
|
||||||
this.wait();
|
this.wait();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
@ -173,32 +202,51 @@ public final class DocumentsWriterFlushControl {
|
||||||
flushBytes += bytes;
|
flushBytes += bytes;
|
||||||
activeBytes -= bytes;
|
activeBytes -= bytes;
|
||||||
numPending++; // write access synced
|
numPending++; // write access synced
|
||||||
|
assert assertMemory();
|
||||||
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
|
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void doOnAbort(ThreadState state) {
|
synchronized void doOnAbort(ThreadState state) {
|
||||||
|
try {
|
||||||
if (state.flushPending) {
|
if (state.flushPending) {
|
||||||
flushBytes -= state.bytesUsed;
|
flushBytes -= state.bytesUsed;
|
||||||
} else {
|
} else {
|
||||||
activeBytes -= state.bytesUsed;
|
activeBytes -= state.bytesUsed;
|
||||||
}
|
}
|
||||||
|
assert assertMemory();
|
||||||
// Take it out of the loop this DWPT is stale
|
// Take it out of the loop this DWPT is stale
|
||||||
perThreadPool.replaceForFlush(state, closed);
|
perThreadPool.replaceForFlush(state, closed);
|
||||||
healthiness.updateStalled(this);
|
}finally {
|
||||||
|
stallControl.updateStalled(this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
|
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
|
||||||
ThreadState perThread) {
|
ThreadState perThread) {
|
||||||
if (fullFlush) {
|
return perThread.flushPending ? internalTryCheckOutForFlush(perThread) : null;
|
||||||
return null;
|
}
|
||||||
|
|
||||||
|
private void checkoutAndBlock(ThreadState perThread) {
|
||||||
|
perThread.lock();
|
||||||
|
try {
|
||||||
|
assert perThread.flushPending : "can not block non-pending threadstate";
|
||||||
|
assert fullFlush : "can not block if fullFlush == false";
|
||||||
|
final DocumentsWriterPerThread dwpt;
|
||||||
|
final long bytes = perThread.bytesUsed;
|
||||||
|
dwpt = perThreadPool.replaceForFlush(perThread, closed);
|
||||||
|
numPending--;
|
||||||
|
blockedFlushes.add(new BlockedFlush(dwpt, bytes));
|
||||||
|
}finally {
|
||||||
|
perThread.unlock();
|
||||||
}
|
}
|
||||||
return internalTryCheckOutForFlush(perThread);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private DocumentsWriterPerThread internalTryCheckOutForFlush(
|
private DocumentsWriterPerThread internalTryCheckOutForFlush(
|
||||||
ThreadState perThread) {
|
ThreadState perThread) {
|
||||||
if (perThread.flushPending) {
|
assert Thread.holdsLock(this);
|
||||||
|
assert perThread.flushPending;
|
||||||
|
try {
|
||||||
// We are pending so all memory is already moved to flushBytes
|
// We are pending so all memory is already moved to flushBytes
|
||||||
if (perThread.tryLock()) {
|
if (perThread.tryLock()) {
|
||||||
try {
|
try {
|
||||||
|
@ -212,15 +260,16 @@ public final class DocumentsWriterFlushControl {
|
||||||
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
|
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
|
||||||
flushingWriters.put(dwpt, Long.valueOf(bytes));
|
flushingWriters.put(dwpt, Long.valueOf(bytes));
|
||||||
numPending--; // write access synced
|
numPending--; // write access synced
|
||||||
numFlushing++;
|
|
||||||
return dwpt;
|
return dwpt;
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
perThread.unlock();
|
perThread.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return null;
|
return null;
|
||||||
|
} finally {
|
||||||
|
stallControl.updateStalled(this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -231,12 +280,13 @@ public final class DocumentsWriterFlushControl {
|
||||||
|
|
||||||
DocumentsWriterPerThread nextPendingFlush() {
|
DocumentsWriterPerThread nextPendingFlush() {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
DocumentsWriterPerThread poll = flushQueue.poll();
|
final DocumentsWriterPerThread poll;
|
||||||
if (poll != null) {
|
if ((poll = flushQueue.poll()) != null) {
|
||||||
|
stallControl.updateStalled(this);
|
||||||
return poll;
|
return poll;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (numPending > 0) {
|
if (numPending > 0 && !fullFlush) { // don't check if we are doing a full flush
|
||||||
final Iterator<ThreadState> allActiveThreads = perThreadPool
|
final Iterator<ThreadState> allActiveThreads = perThreadPool
|
||||||
.getActivePerThreadsIterator();
|
.getActivePerThreadsIterator();
|
||||||
while (allActiveThreads.hasNext() && numPending > 0) {
|
while (allActiveThreads.hasNext() && numPending > 0) {
|
||||||
|
@ -276,8 +326,8 @@ public final class DocumentsWriterFlushControl {
|
||||||
return documentsWriter.deleteQueue.numGlobalTermDeletes();
|
return documentsWriter.deleteQueue.numGlobalTermDeletes();
|
||||||
}
|
}
|
||||||
|
|
||||||
int numFlushingDWPT() {
|
synchronized int numFlushingDWPT() {
|
||||||
return numFlushing;
|
return flushingWriters.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean doApplyAllDeletes() {
|
public boolean doApplyAllDeletes() {
|
||||||
|
@ -289,7 +339,7 @@ public final class DocumentsWriterFlushControl {
|
||||||
}
|
}
|
||||||
|
|
||||||
int numActiveDWPT() {
|
int numActiveDWPT() {
|
||||||
return this.perThreadPool.getMaxThreadStates();
|
return this.perThreadPool.getActiveThreadState();
|
||||||
}
|
}
|
||||||
|
|
||||||
void markForFullFlush() {
|
void markForFullFlush() {
|
||||||
|
@ -331,11 +381,11 @@ public final class DocumentsWriterFlushControl {
|
||||||
if (!next.flushPending) {
|
if (!next.flushPending) {
|
||||||
setFlushPending(next);
|
setFlushPending(next);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
|
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
|
||||||
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
|
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
|
||||||
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
|
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
|
||||||
toFlush.add(flushingDWPT);
|
toFlush.add(flushingDWPT);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// get the new delete queue from DW
|
// get the new delete queue from DW
|
||||||
next.perThread.initialize();
|
next.perThread.initialize();
|
||||||
|
@ -345,31 +395,54 @@ public final class DocumentsWriterFlushControl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
assert assertBlockedFlushes(flushingQueue);
|
/* make sure we move all DWPT that are where concurrently marked as
|
||||||
flushQueue.addAll(blockedFlushes);
|
* pending and moved to blocked are moved over to the flushQueue. There is
|
||||||
blockedFlushes.clear();
|
* a chance that this happens since we marking DWPT for full flush without
|
||||||
|
* blocking indexing.*/
|
||||||
|
pruneBlockedQueue(flushingQueue);
|
||||||
|
assert assertBlockedFlushes(documentsWriter.deleteQueue);
|
||||||
flushQueue.addAll(toFlush);
|
flushQueue.addAll(toFlush);
|
||||||
|
stallControl.updateStalled(this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prunes the blockedQueue by removing all DWPT that are associated with the given flush queue.
|
||||||
|
*/
|
||||||
|
private void pruneBlockedQueue(final DocumentsWriterDeleteQueue flushingQueue) {
|
||||||
|
Iterator<BlockedFlush> iterator = blockedFlushes.iterator();
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
BlockedFlush blockedFlush = iterator.next();
|
||||||
|
if (blockedFlush.dwpt.deleteQueue == flushingQueue) {
|
||||||
|
iterator.remove();
|
||||||
|
assert !flushingWriters.containsKey(blockedFlush.dwpt) : "DWPT is already flushing";
|
||||||
|
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
|
||||||
|
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||||
|
// don't decr pending here - its already done when DWPT is blocked
|
||||||
|
flushQueue.add(blockedFlush.dwpt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void finishFullFlush() {
|
synchronized void finishFullFlush() {
|
||||||
assert fullFlush;
|
assert fullFlush;
|
||||||
assert flushQueue.isEmpty();
|
assert flushQueue.isEmpty();
|
||||||
|
assert flushingWriters.isEmpty();
|
||||||
try {
|
try {
|
||||||
if (!blockedFlushes.isEmpty()) {
|
if (!blockedFlushes.isEmpty()) {
|
||||||
assert assertBlockedFlushes(documentsWriter.deleteQueue);
|
assert assertBlockedFlushes(documentsWriter.deleteQueue);
|
||||||
flushQueue.addAll(blockedFlushes);
|
pruneBlockedQueue(documentsWriter.deleteQueue);
|
||||||
blockedFlushes.clear();
|
assert blockedFlushes.isEmpty();
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
fullFlush = false;
|
fullFlush = false;
|
||||||
|
stallControl.updateStalled(this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
|
boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
|
||||||
Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes;
|
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||||
for (DocumentsWriterPerThread documentsWriterPerThread : flushes) {
|
assert blockedFlush.dwpt.deleteQueue == flushingQueue;
|
||||||
assert documentsWriterPerThread.deleteQueue == flushingQueue;
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -379,18 +452,65 @@ public final class DocumentsWriterFlushControl {
|
||||||
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
||||||
doAfterFlush(dwpt);
|
doAfterFlush(dwpt);
|
||||||
}
|
}
|
||||||
for (DocumentsWriterPerThread dwpt : blockedFlushes) {
|
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||||
doAfterFlush(dwpt);
|
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||||
|
doAfterFlush(blockedFlush.dwpt);
|
||||||
}
|
}
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
fullFlush = false;
|
fullFlush = false;
|
||||||
flushQueue.clear();
|
flushQueue.clear();
|
||||||
blockedFlushes.clear();
|
blockedFlushes.clear();
|
||||||
|
stallControl.updateStalled(this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized boolean isFullFlush() {
|
/**
|
||||||
|
* Returns <code>true</code> if a full flush is currently running
|
||||||
|
*/
|
||||||
|
synchronized boolean isFullFlush() { // used by assert
|
||||||
return fullFlush;
|
return fullFlush;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of flushes that are already checked out but not yet
|
||||||
|
* actively flushing
|
||||||
|
*/
|
||||||
|
synchronized int numQueuedFlushes() {
|
||||||
|
return flushQueue.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of flushes that are checked out but not yet available
|
||||||
|
* for flushing. This only applies during a full flush if a DWPT needs
|
||||||
|
* flushing but must not be flushed until the full flush has finished.
|
||||||
|
*/
|
||||||
|
synchronized int numBlockedFlushes() {
|
||||||
|
return blockedFlushes.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class BlockedFlush {
|
||||||
|
final DocumentsWriterPerThread dwpt;
|
||||||
|
final long bytes;
|
||||||
|
BlockedFlush(DocumentsWriterPerThread dwpt, long bytes) {
|
||||||
|
super();
|
||||||
|
this.dwpt = dwpt;
|
||||||
|
this.bytes = bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method will block if too many DWPT are currently flushing and no
|
||||||
|
* checked out DWPT are available
|
||||||
|
*/
|
||||||
|
void waitIfStalled() {
|
||||||
|
stallControl.waitIfStalled();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns <code>true</code> iff stalled
|
||||||
|
*/
|
||||||
|
boolean anyStalledThreads() {
|
||||||
|
return stallControl.anyStalledThreads();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -166,6 +166,13 @@ public abstract class DocumentsWriterPerThreadPool {
|
||||||
return perThreads.length;
|
return perThreads.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the active number of {@link ThreadState} instances.
|
||||||
|
*/
|
||||||
|
public int getActiveThreadState() {
|
||||||
|
return numThreadStatesActive;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a new {@link ThreadState} iff any new state is available otherwise
|
* Returns a new {@link ThreadState} iff any new state is available otherwise
|
||||||
* <code>null</code>.
|
* <code>null</code>.
|
||||||
|
|
|
@ -36,8 +36,7 @@ import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
||||||
* continue indexing.
|
* continue indexing.
|
||||||
*/
|
*/
|
||||||
//TODO: rename this to DocumentsWriterStallControl (or something like that)?
|
//TODO: rename this to DocumentsWriterStallControl (or something like that)?
|
||||||
final class Healthiness {
|
final class DocumentsWriterStallControl {
|
||||||
|
|
||||||
@SuppressWarnings("serial")
|
@SuppressWarnings("serial")
|
||||||
private static final class Sync extends AbstractQueuedSynchronizer {
|
private static final class Sync extends AbstractQueuedSynchronizer {
|
||||||
volatile boolean hasBlockedThreads = false; // only with assert
|
volatile boolean hasBlockedThreads = false; // only with assert
|
||||||
|
@ -96,13 +95,14 @@ final class Healthiness {
|
||||||
* <code>true</code> iff the number of flushing
|
* <code>true</code> iff the number of flushing
|
||||||
* {@link DocumentsWriterPerThread} is greater than the number of active
|
* {@link DocumentsWriterPerThread} is greater than the number of active
|
||||||
* {@link DocumentsWriterPerThread}. Otherwise it will reset the
|
* {@link DocumentsWriterPerThread}. Otherwise it will reset the
|
||||||
* {@link Healthiness} to healthy and release all threads waiting on
|
* {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
|
||||||
* {@link #waitIfStalled()}
|
* {@link #waitIfStalled()}
|
||||||
*/
|
*/
|
||||||
void updateStalled(DocumentsWriterFlushControl flushControl) {
|
void updateStalled(DocumentsWriterFlushControl flushControl) {
|
||||||
do {
|
do {
|
||||||
// if we have more flushing DWPT than numActiveDWPT we stall!
|
// if we have more flushing / blocked DWPT than numActiveDWPT we stall!
|
||||||
while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) {
|
// don't stall if we have queued flushes - threads should be hijacked instead
|
||||||
|
while (flushControl.netBytes() > flushControl.stallLimitBytes()) {
|
||||||
if (sync.trySetStalled()) {
|
if (sync.trySetStalled()) {
|
||||||
assert wasStalled = true;
|
assert wasStalled = true;
|
||||||
return;
|
return;
|
||||||
|
@ -115,7 +115,7 @@ final class Healthiness {
|
||||||
sync.acquireShared(0);
|
sync.acquireShared(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean hasBlocked() {
|
boolean hasBlocked() { // for tests
|
||||||
return sync.hasBlockedThreads;
|
return sync.hasBlockedThreads;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -40,7 +40,13 @@ import java.util.Collection;
|
||||||
* refuses to run by default. Specify {@code -delete-prior-commits}
|
* refuses to run by default. Specify {@code -delete-prior-commits}
|
||||||
* to override this, allowing the tool to delete all but the last commit.
|
* to override this, allowing the tool to delete all but the last commit.
|
||||||
* From Java code this can be enabled by passing {@code true} to
|
* From Java code this can be enabled by passing {@code true} to
|
||||||
* {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
|
* {@link #IndexUpgrader(Directory,Version,PrintStream,boolean)}.
|
||||||
|
* <p><b>Warning:</b> This tool may reorder documents if the index was partially
|
||||||
|
* upgraded before execution (e.g., documents were added). If your application relies
|
||||||
|
* on "monotonicity" of doc IDs (which means that the order in which the documents
|
||||||
|
* were added to the index is preserved), do a full optimize instead.
|
||||||
|
* The {@link MergePolicy} set by {@link IndexWriterConfig} may also reorder
|
||||||
|
* documents.
|
||||||
*/
|
*/
|
||||||
public final class IndexUpgrader {
|
public final class IndexUpgrader {
|
||||||
|
|
||||||
|
@ -52,9 +58,11 @@ public final class IndexUpgrader {
|
||||||
System.err.println("reason, if the incoming index has more than one commit, the tool");
|
System.err.println("reason, if the incoming index has more than one commit, the tool");
|
||||||
System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
|
System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
|
||||||
System.err.println("this, allowing the tool to delete all but the last commit.");
|
System.err.println("this, allowing the tool to delete all but the last commit.");
|
||||||
|
System.err.println("WARNING: This tool may reorder document IDs!");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
String dir = null;
|
String dir = null;
|
||||||
boolean deletePriorCommits = false;
|
boolean deletePriorCommits = false;
|
||||||
|
@ -74,7 +82,7 @@ public final class IndexUpgrader {
|
||||||
printUsage();
|
printUsage();
|
||||||
}
|
}
|
||||||
|
|
||||||
new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
|
new IndexUpgrader(FSDirectory.open(new File(dir)), Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Directory dir;
|
private final Directory dir;
|
||||||
|
@ -82,16 +90,22 @@ public final class IndexUpgrader {
|
||||||
private final IndexWriterConfig iwc;
|
private final IndexWriterConfig iwc;
|
||||||
private final boolean deletePriorCommits;
|
private final boolean deletePriorCommits;
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
|
||||||
public IndexUpgrader(Directory dir) {
|
* {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points. */
|
||||||
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
|
public IndexUpgrader(Directory dir, Version matchVersion) {
|
||||||
|
this(dir, new IndexWriterConfig(matchVersion, null), null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
|
||||||
public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
|
* {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
|
||||||
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
|
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
|
||||||
|
public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
|
||||||
|
this(dir, new IndexWriterConfig(matchVersion, null), infoStream, deletePriorCommits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
|
||||||
|
* config. You have the possibility to upgrade indexes with multiple commit points by removing
|
||||||
|
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
|
||||||
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
|
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.iwc = iwc;
|
this.iwc = iwc;
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -221,7 +222,7 @@ public class IndexWriter implements Closeable {
|
||||||
private volatile long changeCount; // increments every time a change is completed
|
private volatile long changeCount; // increments every time a change is completed
|
||||||
private long lastCommitChangeCount; // last changeCount that was committed
|
private long lastCommitChangeCount; // last changeCount that was committed
|
||||||
|
|
||||||
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
|
private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
|
||||||
|
|
||||||
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
|
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
|
||||||
volatile long pendingCommitChangeCount;
|
volatile long pendingCommitChangeCount;
|
||||||
|
@ -440,14 +441,14 @@ public class IndexWriter implements Closeable {
|
||||||
public synchronized boolean infoIsLive(SegmentInfo info) {
|
public synchronized boolean infoIsLive(SegmentInfo info) {
|
||||||
int idx = segmentInfos.indexOf(info);
|
int idx = segmentInfos.indexOf(info);
|
||||||
assert idx != -1: "info=" + info + " isn't in pool";
|
assert idx != -1: "info=" + info + " isn't in pool";
|
||||||
assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
|
assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized SegmentInfo mapToLive(SegmentInfo info) {
|
public synchronized SegmentInfo mapToLive(SegmentInfo info) {
|
||||||
int idx = segmentInfos.indexOf(info);
|
int idx = segmentInfos.indexOf(info);
|
||||||
if (idx != -1) {
|
if (idx != -1) {
|
||||||
info = segmentInfos.get(idx);
|
info = segmentInfos.info(idx);
|
||||||
}
|
}
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
@ -818,7 +819,7 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
setRollbackSegmentInfos(segmentInfos);
|
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
|
||||||
|
|
||||||
// start with previous field numbers, but new FieldInfos
|
// start with previous field numbers, but new FieldInfos
|
||||||
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
|
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
|
||||||
|
@ -862,10 +863,6 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
|
|
||||||
rollbackSegmentInfos = (SegmentInfos) infos.clone();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the private {@link IndexWriterConfig}, cloned
|
* Returns the private {@link IndexWriterConfig}, cloned
|
||||||
* from the {@link IndexWriterConfig} passed to
|
* from the {@link IndexWriterConfig} passed to
|
||||||
|
@ -1126,8 +1123,7 @@ public class IndexWriter implements Closeable {
|
||||||
else
|
else
|
||||||
count = 0;
|
count = 0;
|
||||||
|
|
||||||
for (int i = 0; i < segmentInfos.size(); i++)
|
count += segmentInfos.totalDocCount();
|
||||||
count += segmentInfos.info(i).docCount;
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1144,8 +1140,7 @@ public class IndexWriter implements Closeable {
|
||||||
else
|
else
|
||||||
count = 0;
|
count = 0;
|
||||||
|
|
||||||
for (int i = 0; i < segmentInfos.size(); i++) {
|
for (final SegmentInfo info : segmentInfos) {
|
||||||
final SegmentInfo info = segmentInfos.info(i);
|
|
||||||
count += info.docCount - numDeletedDocs(info);
|
count += info.docCount - numDeletedDocs(info);
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
|
@ -1159,9 +1154,11 @@ public class IndexWriter implements Closeable {
|
||||||
if (docWriter.anyDeletions()) {
|
if (docWriter.anyDeletions()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < segmentInfos.size(); i++)
|
for (final SegmentInfo info : segmentInfos) {
|
||||||
if (segmentInfos.info(i).hasDeletions())
|
if (info.hasDeletions()) {
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1554,7 +1551,8 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
resetMergeExceptions();
|
resetMergeExceptions();
|
||||||
segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos);
|
segmentsToOptimize.clear();
|
||||||
|
segmentsToOptimize.addAll(segmentInfos.asSet());
|
||||||
optimizeMaxNumSegments = maxNumSegments;
|
optimizeMaxNumSegments = maxNumSegments;
|
||||||
|
|
||||||
// Now mark all pending & running merges as optimize
|
// Now mark all pending & running merges as optimize
|
||||||
|
@ -1778,7 +1776,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
final MergePolicy.MergeSpecification spec;
|
final MergePolicy.MergeSpecification spec;
|
||||||
if (optimize) {
|
if (optimize) {
|
||||||
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
|
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
|
||||||
|
|
||||||
if (spec != null) {
|
if (spec != null) {
|
||||||
final int numMerges = spec.merges.size();
|
final int numMerges = spec.merges.size();
|
||||||
|
@ -1889,8 +1887,7 @@ public class IndexWriter implements Closeable {
|
||||||
// attempt to commit using this instance of IndexWriter
|
// attempt to commit using this instance of IndexWriter
|
||||||
// will always write to a new generation ("write
|
// will always write to a new generation ("write
|
||||||
// once").
|
// once").
|
||||||
segmentInfos.clear();
|
segmentInfos.rollbackSegmentInfos(rollbackSegments);
|
||||||
segmentInfos.addAll(rollbackSegmentInfos);
|
|
||||||
|
|
||||||
docWriter.abort();
|
docWriter.abort();
|
||||||
|
|
||||||
|
@ -2555,7 +2552,7 @@ public class IndexWriter implements Closeable {
|
||||||
lastCommitChangeCount = pendingCommitChangeCount;
|
lastCommitChangeCount = pendingCommitChangeCount;
|
||||||
segmentInfos.updateGeneration(pendingCommit);
|
segmentInfos.updateGeneration(pendingCommit);
|
||||||
segmentInfos.setUserData(pendingCommit.getUserData());
|
segmentInfos.setUserData(pendingCommit.getUserData());
|
||||||
setRollbackSegmentInfos(pendingCommit);
|
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
|
||||||
deleter.checkpoint(pendingCommit, true);
|
deleter.checkpoint(pendingCommit, true);
|
||||||
} finally {
|
} finally {
|
||||||
// Matches the incRef done in startCommit:
|
// Matches the incRef done in startCommit:
|
||||||
|
@ -2660,7 +2657,7 @@ public class IndexWriter implements Closeable {
|
||||||
final synchronized void applyAllDeletes() throws IOException {
|
final synchronized void applyAllDeletes() throws IOException {
|
||||||
flushDeletesCount.incrementAndGet();
|
flushDeletesCount.incrementAndGet();
|
||||||
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
|
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
|
||||||
.applyDeletes(readerPool, segmentInfos);
|
.applyDeletes(readerPool, segmentInfos.asList());
|
||||||
if (result.anyDeletes) {
|
if (result.anyDeletes) {
|
||||||
checkpoint();
|
checkpoint();
|
||||||
}
|
}
|
||||||
|
@ -2709,7 +2706,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
|
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
|
||||||
for(SegmentInfo info : merge.segments) {
|
for(SegmentInfo info : merge.segments) {
|
||||||
if (segmentInfos.indexOf(info) == -1) {
|
if (!segmentInfos.contains(info)) {
|
||||||
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
|
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2847,38 +2844,12 @@ public class IndexWriter implements Closeable {
|
||||||
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
|
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
|
||||||
}
|
}
|
||||||
|
|
||||||
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
|
final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
|
||||||
int segIdx = 0;
|
segmentInfos.applyMergeChanges(merge, dropSegment);
|
||||||
int newSegIdx = 0;
|
|
||||||
boolean inserted = false;
|
|
||||||
final int curSegCount = segmentInfos.size();
|
|
||||||
while(segIdx < curSegCount) {
|
|
||||||
final SegmentInfo info = segmentInfos.info(segIdx++);
|
|
||||||
if (mergedAway.contains(info)) {
|
|
||||||
if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
|
|
||||||
segmentInfos.set(segIdx-1, merge.info);
|
|
||||||
inserted = true;
|
|
||||||
newSegIdx++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
segmentInfos.set(newSegIdx++, info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Either we found place to insert segment, or, we did
|
if (dropSegment) {
|
||||||
// not, but only because all segments we merged became
|
|
||||||
// deleted while we are merging, in which case it should
|
|
||||||
// be the case that the new segment is also all deleted:
|
|
||||||
if (!inserted) {
|
|
||||||
assert allDeleted;
|
|
||||||
if (keepFullyDeletedSegments) {
|
|
||||||
segmentInfos.add(0, merge.info);
|
|
||||||
} else {
|
|
||||||
readerPool.drop(merge.info);
|
readerPool.drop(merge.info);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
|
|
||||||
|
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("after commit: " + segString());
|
message("after commit: " + segString());
|
||||||
|
@ -3014,7 +2985,7 @@ public class IndexWriter implements Closeable {
|
||||||
if (mergingSegments.contains(info)) {
|
if (mergingSegments.contains(info)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (segmentInfos.indexOf(info) == -1) {
|
if (!segmentInfos.contains(info)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (info.dir != directory) {
|
if (info.dir != directory) {
|
||||||
|
@ -3462,7 +3433,7 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// utility routines for tests
|
// utility routines for tests
|
||||||
SegmentInfo newestSegment() {
|
synchronized SegmentInfo newestSegment() {
|
||||||
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
|
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3472,19 +3443,18 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public synchronized String segString(List<SegmentInfo> infos) throws IOException {
|
public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
|
||||||
StringBuilder buffer = new StringBuilder();
|
final StringBuilder buffer = new StringBuilder();
|
||||||
final int count = infos.size();
|
for(final SegmentInfo s : infos) {
|
||||||
for(int i = 0; i < count; i++) {
|
if (buffer.length() > 0) {
|
||||||
if (i > 0) {
|
|
||||||
buffer.append(' ');
|
buffer.append(' ');
|
||||||
}
|
}
|
||||||
buffer.append(segString(infos.get(i)));
|
buffer.append(segString(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
return buffer.toString();
|
return buffer.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
public synchronized String segString(SegmentInfo info) throws IOException {
|
public synchronized String segString(SegmentInfo info) throws IOException {
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
SegmentReader reader = readerPool.getIfExists(info);
|
SegmentReader reader = readerPool.getIfExists(info);
|
||||||
|
|
|
@ -133,10 +133,15 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new config that with defaults that match the specified
|
* Creates a new config that with defaults that match the specified
|
||||||
* {@link Version} as well as the default {@link Analyzer}. {@link Version} is
|
* {@link Version} as well as the default {@link
|
||||||
* a placeholder for future changes. The default settings are relevant to 3.1
|
* Analyzer}. If matchVersion is >= {@link
|
||||||
* and before. In the future, if different settings will apply to different
|
* Version#LUCENE_32}, {@link TieredMergePolicy} is used
|
||||||
* versions, they will be documented here.
|
* for merging; else {@link LogByteSizeMergePolicy}.
|
||||||
|
* Note that {@link TieredMergePolicy} is free to select
|
||||||
|
* non-contiguous merges, which means docIDs may not
|
||||||
|
* remain montonic over time. If this is a problem you
|
||||||
|
* should switch to {@link LogByteSizeMergePolicy} or
|
||||||
|
* {@link LogDocMergePolicy}.
|
||||||
*/
|
*/
|
||||||
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
|
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
|
||||||
this.matchVersion = matchVersion;
|
this.matchVersion = matchVersion;
|
||||||
|
@ -154,7 +159,11 @@ public final class IndexWriterConfig implements Cloneable {
|
||||||
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
|
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
|
||||||
mergedSegmentWarmer = null;
|
mergedSegmentWarmer = null;
|
||||||
codecProvider = CodecProvider.getDefault();
|
codecProvider = CodecProvider.getDefault();
|
||||||
|
if (matchVersion.onOrAfter(Version.LUCENE_32)) {
|
||||||
mergePolicy = new TieredMergePolicy();
|
mergePolicy = new TieredMergePolicy();
|
||||||
|
} else {
|
||||||
|
mergePolicy = new LogByteSizeMergePolicy();
|
||||||
|
}
|
||||||
readerPooling = DEFAULT_READER_POOLING;
|
readerPooling = DEFAULT_READER_POOLING;
|
||||||
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
|
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
|
||||||
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
|
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
|
||||||
|
|
|
@ -242,6 +242,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
private MergeSpecification findMergesForOptimizeSizeLimit(
|
private MergeSpecification findMergesForOptimizeSizeLimit(
|
||||||
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
||||||
MergeSpecification spec = new MergeSpecification();
|
MergeSpecification spec = new MergeSpecification();
|
||||||
|
final List<SegmentInfo> segments = infos.asList();
|
||||||
|
|
||||||
int start = last - 1;
|
int start = last - 1;
|
||||||
while (start >= 0) {
|
while (start >= 0) {
|
||||||
|
@ -254,12 +255,12 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
// unless there is only 1 which is optimized.
|
// unless there is only 1 which is optimized.
|
||||||
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
|
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
|
||||||
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
|
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
|
||||||
spec.add(new OneMerge(infos.range(start + 1, last)));
|
spec.add(new OneMerge(segments.subList(start + 1, last)));
|
||||||
}
|
}
|
||||||
last = start;
|
last = start;
|
||||||
} else if (last - start == mergeFactor) {
|
} else if (last - start == mergeFactor) {
|
||||||
// mergeFactor eligible segments were found, add them as a merge.
|
// mergeFactor eligible segments were found, add them as a merge.
|
||||||
spec.add(new OneMerge(infos.range(start, last)));
|
spec.add(new OneMerge(segments.subList(start, last)));
|
||||||
last = start;
|
last = start;
|
||||||
}
|
}
|
||||||
--start;
|
--start;
|
||||||
|
@ -267,7 +268,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
// Add any left-over segments, unless there is just 1 already optimized.
|
// Add any left-over segments, unless there is just 1 already optimized.
|
||||||
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
|
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
|
||||||
spec.add(new OneMerge(infos.range(start, last)));
|
spec.add(new OneMerge(segments.subList(start, last)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return spec.merges.size() == 0 ? null : spec;
|
return spec.merges.size() == 0 ? null : spec;
|
||||||
|
@ -280,11 +281,12 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
*/
|
*/
|
||||||
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
||||||
MergeSpecification spec = new MergeSpecification();
|
MergeSpecification spec = new MergeSpecification();
|
||||||
|
final List<SegmentInfo> segments = infos.asList();
|
||||||
|
|
||||||
// First, enroll all "full" merges (size
|
// First, enroll all "full" merges (size
|
||||||
// mergeFactor) to potentially be run concurrently:
|
// mergeFactor) to potentially be run concurrently:
|
||||||
while (last - maxNumSegments + 1 >= mergeFactor) {
|
while (last - maxNumSegments + 1 >= mergeFactor) {
|
||||||
spec.add(new OneMerge(infos.range(last - mergeFactor, last)));
|
spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
|
||||||
last -= mergeFactor;
|
last -= mergeFactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -296,7 +298,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
// Since we must optimize down to 1 segment, the
|
// Since we must optimize down to 1 segment, the
|
||||||
// choice is simple:
|
// choice is simple:
|
||||||
if (last > 1 || !isOptimized(infos.info(0))) {
|
if (last > 1 || !isOptimized(infos.info(0))) {
|
||||||
spec.add(new OneMerge(infos.range(0, last)));
|
spec.add(new OneMerge(segments.subList(0, last)));
|
||||||
}
|
}
|
||||||
} else if (last > maxNumSegments) {
|
} else if (last > maxNumSegments) {
|
||||||
|
|
||||||
|
@ -325,7 +327,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize)));
|
spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return spec.merges.size() == 0 ? null : spec;
|
return spec.merges.size() == 0 ? null : spec;
|
||||||
|
@ -412,7 +414,8 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
@Override
|
@Override
|
||||||
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
|
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
final int numSegments = segmentInfos.size();
|
final List<SegmentInfo> segments = segmentInfos.asList();
|
||||||
|
final int numSegments = segments.size();
|
||||||
|
|
||||||
if (verbose())
|
if (verbose())
|
||||||
message("findMergesToExpungeDeletes: " + numSegments + " segments");
|
message("findMergesToExpungeDeletes: " + numSegments + " segments");
|
||||||
|
@ -434,7 +437,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
// deletions, so force a merge now:
|
// deletions, so force a merge now:
|
||||||
if (verbose())
|
if (verbose())
|
||||||
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
||||||
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
|
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
|
||||||
firstSegmentWithDeletions = i;
|
firstSegmentWithDeletions = i;
|
||||||
}
|
}
|
||||||
} else if (firstSegmentWithDeletions != -1) {
|
} else if (firstSegmentWithDeletions != -1) {
|
||||||
|
@ -443,7 +446,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
// mergeFactor segments
|
// mergeFactor segments
|
||||||
if (verbose())
|
if (verbose())
|
||||||
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
||||||
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
|
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
|
||||||
firstSegmentWithDeletions = -1;
|
firstSegmentWithDeletions = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -451,7 +454,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
if (firstSegmentWithDeletions != -1) {
|
if (firstSegmentWithDeletions != -1) {
|
||||||
if (verbose())
|
if (verbose())
|
||||||
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
|
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
|
||||||
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments)));
|
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return spec;
|
return spec;
|
||||||
|
|
|
@ -72,7 +72,7 @@ public abstract class MergePolicy implements java.io.Closeable {
|
||||||
long mergeGen; // used by IndexWriter
|
long mergeGen; // used by IndexWriter
|
||||||
boolean isExternal; // used by IndexWriter
|
boolean isExternal; // used by IndexWriter
|
||||||
int maxNumSegmentsOptimize; // used by IndexWriter
|
int maxNumSegmentsOptimize; // used by IndexWriter
|
||||||
long estimatedMergeBytes; // used by IndexWriter
|
public long estimatedMergeBytes; // used by IndexWriter
|
||||||
List<SegmentReader> readers; // used by IndexWriter
|
List<SegmentReader> readers; // used by IndexWriter
|
||||||
List<SegmentReader> readerClones; // used by IndexWriter
|
List<SegmentReader> readerClones; // used by IndexWriter
|
||||||
public final List<SegmentInfo> segments;
|
public final List<SegmentInfo> segments;
|
||||||
|
@ -84,7 +84,8 @@ public abstract class MergePolicy implements java.io.Closeable {
|
||||||
public OneMerge(List<SegmentInfo> segments) {
|
public OneMerge(List<SegmentInfo> segments) {
|
||||||
if (0 == segments.size())
|
if (0 == segments.size())
|
||||||
throw new RuntimeException("segments must include at least one segment");
|
throw new RuntimeException("segments must include at least one segment");
|
||||||
this.segments = segments;
|
// clone the list, as the in list may be based off original SegmentInfos and may be modified
|
||||||
|
this.segments = new ArrayList<SegmentInfo>(segments);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for(SegmentInfo info : segments) {
|
for(SegmentInfo info : segments) {
|
||||||
count += info.docCount;
|
count += info.docCount;
|
||||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.Constants;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class SegmentInfo {
|
public final class SegmentInfo implements Cloneable {
|
||||||
// TODO: remove with hasVector and hasProx
|
// TODO: remove with hasVector and hasProx
|
||||||
private static final int CHECK_FIELDINFO = -2;
|
private static final int CHECK_FIELDINFO = -2;
|
||||||
static final int NO = -1; // e.g. no norms; no deletes;
|
static final int NO = -1; // e.g. no norms; no deletes;
|
||||||
|
|
|
@ -20,13 +20,16 @@ package org.apache.lucene.index;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Vector;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
@ -45,7 +48,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class SegmentInfos extends Vector<SegmentInfo> {
|
public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The file format version, a negative number.
|
* The file format version, a negative number.
|
||||||
|
@ -85,6 +88,11 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
|
|
||||||
private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
|
private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
|
||||||
|
|
||||||
|
private List<SegmentInfo> segments = new ArrayList<SegmentInfo>();
|
||||||
|
private Set<SegmentInfo> segmentSet = new HashSet<SegmentInfo>();
|
||||||
|
private transient List<SegmentInfo> cachedUnmodifiableList;
|
||||||
|
private transient Set<SegmentInfo> cachedUnmodifiableSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If non-null, information about loading segments_N files
|
* If non-null, information about loading segments_N files
|
||||||
* will be printed here. @see #setInfoStream.
|
* will be printed here. @see #setInfoStream.
|
||||||
|
@ -107,8 +115,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final SegmentInfo info(int i) {
|
public SegmentInfo info(int i) {
|
||||||
return get(i);
|
return segments.get(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -237,7 +245,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
||||||
// Clear any previous segments:
|
// Clear any previous segments:
|
||||||
clear();
|
this.clear();
|
||||||
|
|
||||||
generation = generationFromSegmentsFileName(segmentFileName);
|
generation = generationFromSegmentsFileName(segmentFileName);
|
||||||
|
|
||||||
|
@ -252,7 +260,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
// Clear any segment infos we had loaded so we
|
// Clear any segment infos we had loaded so we
|
||||||
// have a clean slate on retry:
|
// have a clean slate on retry:
|
||||||
clear();
|
this.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -349,15 +357,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
|
|
||||||
/** Prunes any segment whose docs are all deleted. */
|
/** Prunes any segment whose docs are all deleted. */
|
||||||
public void pruneDeletedSegments() {
|
public void pruneDeletedSegments() {
|
||||||
int segIdx = 0;
|
for(final Iterator<SegmentInfo> it = segments.iterator(); it.hasNext();) {
|
||||||
while(segIdx < size()) {
|
final SegmentInfo info = it.next();
|
||||||
final SegmentInfo info = info(segIdx);
|
|
||||||
if (info.getDelCount() == info.docCount) {
|
if (info.getDelCount() == info.docCount) {
|
||||||
remove(segIdx);
|
it.remove();
|
||||||
} else {
|
segmentSet.remove(info);
|
||||||
segIdx++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
assert segmentSet.size() == segments.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -367,14 +374,23 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
SegmentInfos sis = (SegmentInfos) super.clone();
|
try {
|
||||||
for(int i=0;i<sis.size();i++) {
|
final SegmentInfos sis = (SegmentInfos) super.clone();
|
||||||
final SegmentInfo info = sis.info(i);
|
// deep clone, first recreate all collections:
|
||||||
|
sis.segments = new ArrayList<SegmentInfo>(size());
|
||||||
|
sis.segmentSet = new HashSet<SegmentInfo>(size());
|
||||||
|
sis.cachedUnmodifiableList = null;
|
||||||
|
sis.cachedUnmodifiableSet = null;
|
||||||
|
for(final SegmentInfo info : this) {
|
||||||
assert info.getSegmentCodecs() != null;
|
assert info.getSegmentCodecs() != null;
|
||||||
sis.set(i, (SegmentInfo) info.clone());
|
// dont directly access segments, use add method!!!
|
||||||
|
sis.add((SegmentInfo) info.clone());
|
||||||
}
|
}
|
||||||
sis.userData = new HashMap<String,String>(userData);
|
sis.userData = new HashMap<String,String>(userData);
|
||||||
return sis;
|
return sis;
|
||||||
|
} catch (CloneNotSupportedException e) {
|
||||||
|
throw new RuntimeException("should not happen", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -742,18 +758,6 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
|
protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a new SegmentInfos containing the SegmentInfo
|
|
||||||
* instances in the specified range first (inclusive) to
|
|
||||||
* last (exclusive), so total number of segments returned
|
|
||||||
* is last-first.
|
|
||||||
*/
|
|
||||||
public SegmentInfos range(int first, int last) {
|
|
||||||
SegmentInfos infos = new SegmentInfos(codecs);
|
|
||||||
infos.addAll(super.subList(first, last));
|
|
||||||
return infos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Carry over generation numbers from another SegmentInfos
|
// Carry over generation numbers from another SegmentInfos
|
||||||
void updateGeneration(SegmentInfos other) {
|
void updateGeneration(SegmentInfos other) {
|
||||||
lastGeneration = other.lastGeneration;
|
lastGeneration = other.lastGeneration;
|
||||||
|
@ -831,6 +835,10 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
// throw orig excp
|
// throw orig excp
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// we must sync here explicitly since during a commit
|
||||||
|
// IW will not sync the global field map.
|
||||||
|
dir.sync(Collections.singleton(name));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return version;
|
return version;
|
||||||
|
@ -956,7 +964,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public synchronized String toString(Directory directory) {
|
public String toString(Directory directory) {
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
buffer.append(getCurrentSegmentFileName()).append(": ");
|
buffer.append(getCurrentSegmentFileName()).append(": ");
|
||||||
final int count = size();
|
final int count = size();
|
||||||
|
@ -987,8 +995,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
* remain write once.
|
* remain write once.
|
||||||
*/
|
*/
|
||||||
void replace(SegmentInfos other) {
|
void replace(SegmentInfos other) {
|
||||||
clear();
|
rollbackSegmentInfos(other.asList());
|
||||||
addAll(other);
|
|
||||||
lastGeneration = other.lastGeneration;
|
lastGeneration = other.lastGeneration;
|
||||||
lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
|
lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
|
||||||
format = other.format;
|
format = other.format;
|
||||||
|
@ -1014,7 +1021,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
* Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
|
* Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
|
||||||
* If this {@link SegmentInfos} has no global field number map the returned instance is empty
|
* If this {@link SegmentInfos} has no global field number map the returned instance is empty
|
||||||
*/
|
*/
|
||||||
synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
|
FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
|
||||||
if (globalFieldNumberMap != null) {
|
if (globalFieldNumberMap != null) {
|
||||||
return globalFieldNumberMap;
|
return globalFieldNumberMap;
|
||||||
}
|
}
|
||||||
|
@ -1054,4 +1061,135 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||||
long getLastGlobalFieldMapVersion() {
|
long getLastGlobalFieldMapVersion() {
|
||||||
return lastGlobalFieldMapVersion;
|
return lastGlobalFieldMapVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** applies all changes caused by committing a merge to this SegmentInfos */
|
||||||
|
void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) {
|
||||||
|
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
|
||||||
|
boolean inserted = false;
|
||||||
|
int newSegIdx = 0;
|
||||||
|
for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) {
|
||||||
|
assert segIdx >= newSegIdx;
|
||||||
|
final SegmentInfo info = segments.get(segIdx);
|
||||||
|
if (mergedAway.contains(info)) {
|
||||||
|
if (!inserted && !dropSegment) {
|
||||||
|
segments.set(segIdx, merge.info);
|
||||||
|
inserted = true;
|
||||||
|
newSegIdx++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
segments.set(newSegIdx, info);
|
||||||
|
newSegIdx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Either we found place to insert segment, or, we did
|
||||||
|
// not, but only because all segments we merged became
|
||||||
|
// deleted while we are merging, in which case it should
|
||||||
|
// be the case that the new segment is also all deleted,
|
||||||
|
// we insert it at the beginning if it should not be dropped:
|
||||||
|
if (!inserted && !dropSegment) {
|
||||||
|
segments.add(0, merge.info);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the rest of the segments in list are duplicates, so don't remove from map, only list!
|
||||||
|
segments.subList(newSegIdx, segments.size()).clear();
|
||||||
|
|
||||||
|
// update the Set
|
||||||
|
if (!dropSegment) {
|
||||||
|
segmentSet.add(merge.info);
|
||||||
|
}
|
||||||
|
segmentSet.removeAll(mergedAway);
|
||||||
|
|
||||||
|
assert segmentSet.size() == segments.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<SegmentInfo> createBackupSegmentInfos(boolean cloneChildren) {
|
||||||
|
if (cloneChildren) {
|
||||||
|
final List<SegmentInfo> list = new ArrayList<SegmentInfo>(size());
|
||||||
|
for(final SegmentInfo info : this) {
|
||||||
|
assert info.getSegmentCodecs() != null;
|
||||||
|
list.add((SegmentInfo) info.clone());
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
} else {
|
||||||
|
return new ArrayList<SegmentInfo>(segments);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rollbackSegmentInfos(List<SegmentInfo> infos) {
|
||||||
|
this.clear();
|
||||||
|
this.addAll(infos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns an <b>unmodifiable</b> {@link Iterator} of contained segments in order. */
|
||||||
|
// @Override (comment out until Java 6)
|
||||||
|
public Iterator<SegmentInfo> iterator() {
|
||||||
|
return asList().iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns all contained segments as an <b>unmodifiable</b> {@link List} view. */
|
||||||
|
public List<SegmentInfo> asList() {
|
||||||
|
if (cachedUnmodifiableList == null) {
|
||||||
|
cachedUnmodifiableList = Collections.unmodifiableList(segments);
|
||||||
|
}
|
||||||
|
return cachedUnmodifiableList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns all contained segments as an <b>unmodifiable</b> {@link Set} view.
|
||||||
|
* The iterator is not sorted, use {@link List} view or {@link #iterator} to get all segments in order. */
|
||||||
|
public Set<SegmentInfo> asSet() {
|
||||||
|
if (cachedUnmodifiableSet == null) {
|
||||||
|
cachedUnmodifiableSet = Collections.unmodifiableSet(segmentSet);
|
||||||
|
}
|
||||||
|
return cachedUnmodifiableSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return segments.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(SegmentInfo si) {
|
||||||
|
if (segmentSet.contains(si)) {
|
||||||
|
throw new IllegalStateException("Cannot add the same segment two times to this SegmentInfos instance");
|
||||||
|
}
|
||||||
|
segments.add(si);
|
||||||
|
segmentSet.add(si);
|
||||||
|
assert segmentSet.size() == segments.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addAll(Iterable<SegmentInfo> sis) {
|
||||||
|
for (final SegmentInfo si : sis) {
|
||||||
|
this.add(si);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
segments.clear();
|
||||||
|
segmentSet.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove(SegmentInfo si) {
|
||||||
|
final int index = this.indexOf(si);
|
||||||
|
if (index >= 0) {
|
||||||
|
this.remove(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove(int index) {
|
||||||
|
segmentSet.remove(segments.remove(index));
|
||||||
|
assert segmentSet.size() == segments.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean contains(SegmentInfo si) {
|
||||||
|
return segmentSet.contains(si);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int indexOf(SegmentInfo si) {
|
||||||
|
if (segmentSet.contains(si)) {
|
||||||
|
return segments.indexOf(si);
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -251,9 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
||||||
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
|
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
|
||||||
|
|
||||||
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
|
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(infos.asList());
|
||||||
infosSorted.addAll(infos);
|
|
||||||
|
|
||||||
Collections.sort(infosSorted, segmentByteSizeDescending);
|
Collections.sort(infosSorted, segmentByteSizeDescending);
|
||||||
|
|
||||||
// Compute total index bytes & print details about the index
|
// Compute total index bytes & print details about the index
|
||||||
|
|
|
@ -40,6 +40,11 @@ import java.util.Set;
|
||||||
* w.optimize();
|
* w.optimize();
|
||||||
* w.close();
|
* w.close();
|
||||||
* </pre>
|
* </pre>
|
||||||
|
* <p><b>Warning:</b> This merge policy may reorder documents if the index was partially
|
||||||
|
* upgraded before calling optimize (e.g., documents were added). If your application relies
|
||||||
|
* on "monotonicity" of doc IDs (which means that the order in which the documents
|
||||||
|
* were added to the index is preserved), do a full optimize instead. Please note, the
|
||||||
|
* delegate {@code MergePolicy} may also reorder documents.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
* @see IndexUpgrader
|
* @see IndexUpgrader
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
private class FSTFieldWriter extends FieldWriter {
|
private class FSTFieldWriter extends FieldWriter {
|
||||||
private final Builder<Long> fstBuilder;
|
private final Builder<Long> fstBuilder;
|
||||||
private final PositiveIntOutputs fstOutputs;
|
private final PositiveIntOutputs fstOutputs;
|
||||||
|
private final long startTermsFilePointer;
|
||||||
|
|
||||||
final FieldInfo fieldInfo;
|
final FieldInfo fieldInfo;
|
||||||
int numIndexTerms;
|
int numIndexTerms;
|
||||||
|
@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
|
|
||||||
// Always put empty string in
|
// Always put empty string in
|
||||||
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
|
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
|
||||||
|
startTermsFilePointer = termsFilePointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
|
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
|
||||||
|
if (text.length == 0) {
|
||||||
|
// We already added empty string in ctor
|
||||||
|
assert termsFilePointer == startTermsFilePointer;
|
||||||
|
return;
|
||||||
|
}
|
||||||
final int lengthSave = text.length;
|
final int lengthSave = text.length;
|
||||||
text.length = indexedTermPrefixLength(lastTerm, text);
|
text.length = indexedTermPrefixLength(lastTerm, text);
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -0,0 +1,382 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caches all docs, and optionally also scores, coming from
|
||||||
|
* a search, and is then able to replay them to another
|
||||||
|
* collector. You specify the max RAM this class may use.
|
||||||
|
* Once the collection is done, call {@link #isCached}. If
|
||||||
|
* this returns true, you can use {@link #replay} against a
|
||||||
|
* new collector. If it returns false, this means too much
|
||||||
|
* RAM was required and you must instead re-run the original
|
||||||
|
* search.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: this class consumes 4 (or 8 bytes, if
|
||||||
|
* scoring is cached) per collected document. If the result
|
||||||
|
* set is large this can easily be a very substantial amount
|
||||||
|
* of RAM!
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: this class caches at least 128 documents
|
||||||
|
* before checking RAM limits.
|
||||||
|
*
|
||||||
|
* <p>See the Lucene <tt>modules/grouping</tt> module for more
|
||||||
|
* details including a full code example.</p>
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class CachingCollector extends Collector {
|
||||||
|
|
||||||
|
// Max out at 512K arrays
|
||||||
|
private static final int MAX_ARRAY_SIZE = 512 * 1024;
|
||||||
|
private static final int INITIAL_ARRAY_SIZE = 128;
|
||||||
|
private final static int[] EMPTY_INT_ARRAY = new int[0];
|
||||||
|
|
||||||
|
private static class SegStart {
|
||||||
|
public final AtomicReaderContext readerContext;
|
||||||
|
public final int end;
|
||||||
|
|
||||||
|
public SegStart(AtomicReaderContext readerContext, int end) {
|
||||||
|
this.readerContext = readerContext;
|
||||||
|
this.end = end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class CachedScorer extends Scorer {
|
||||||
|
|
||||||
|
// NOTE: these members are package-private b/c that way accessing them from
|
||||||
|
// the outer class does not incur access check by the JVM. The same
|
||||||
|
// situation would be if they were defined in the outer class as private
|
||||||
|
// members.
|
||||||
|
int doc;
|
||||||
|
float score;
|
||||||
|
|
||||||
|
private CachedScorer() { super(null); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final float score() { return score; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int advance(int target) { throw new UnsupportedOperationException(); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int docID() { return doc; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final float freq() { throw new UnsupportedOperationException(); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int nextDoc() { throw new UnsupportedOperationException(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
// A CachingCollector which caches scores
|
||||||
|
private static final class ScoreCachingCollector extends CachingCollector {
|
||||||
|
|
||||||
|
private final CachedScorer cachedScorer;
|
||||||
|
private final List<float[]> cachedScores;
|
||||||
|
|
||||||
|
private Scorer scorer;
|
||||||
|
private float[] curScores;
|
||||||
|
|
||||||
|
ScoreCachingCollector(Collector other, double maxRAMMB) {
|
||||||
|
super(other, maxRAMMB, true);
|
||||||
|
|
||||||
|
cachedScorer = new CachedScorer();
|
||||||
|
cachedScores = new ArrayList<float[]>();
|
||||||
|
curScores = new float[128];
|
||||||
|
cachedScores.add(curScores);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
|
||||||
|
if (curDocs == null) {
|
||||||
|
// Cache was too large
|
||||||
|
cachedScorer.score = scorer.score();
|
||||||
|
cachedScorer.doc = doc;
|
||||||
|
other.collect(doc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate a bigger array or abort caching
|
||||||
|
if (upto == curDocs.length) {
|
||||||
|
base += upto;
|
||||||
|
|
||||||
|
// Compute next array length - don't allocate too big arrays
|
||||||
|
int nextLength = 8*curDocs.length;
|
||||||
|
if (nextLength > MAX_ARRAY_SIZE) {
|
||||||
|
nextLength = MAX_ARRAY_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (base + nextLength > maxDocsToCache) {
|
||||||
|
// try to allocate a smaller array
|
||||||
|
nextLength = maxDocsToCache - base;
|
||||||
|
if (nextLength <= 0) {
|
||||||
|
// Too many docs to collect -- clear cache
|
||||||
|
curDocs = null;
|
||||||
|
curScores = null;
|
||||||
|
cachedSegs.clear();
|
||||||
|
cachedDocs.clear();
|
||||||
|
cachedScores.clear();
|
||||||
|
cachedScorer.score = scorer.score();
|
||||||
|
cachedScorer.doc = doc;
|
||||||
|
other.collect(doc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curDocs = new int[nextLength];
|
||||||
|
cachedDocs.add(curDocs);
|
||||||
|
curScores = new float[nextLength];
|
||||||
|
cachedScores.add(curScores);
|
||||||
|
upto = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
curDocs[upto] = doc;
|
||||||
|
cachedScorer.score = curScores[upto] = scorer.score();
|
||||||
|
upto++;
|
||||||
|
cachedScorer.doc = doc;
|
||||||
|
other.collect(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void replay(Collector other) throws IOException {
|
||||||
|
replayInit(other);
|
||||||
|
|
||||||
|
int curUpto = 0;
|
||||||
|
int curBase = 0;
|
||||||
|
int chunkUpto = 0;
|
||||||
|
other.setScorer(cachedScorer);
|
||||||
|
curDocs = EMPTY_INT_ARRAY;
|
||||||
|
for (SegStart seg : cachedSegs) {
|
||||||
|
other.setNextReader(seg.readerContext);
|
||||||
|
while (curBase + curUpto < seg.end) {
|
||||||
|
if (curUpto == curDocs.length) {
|
||||||
|
curBase += curDocs.length;
|
||||||
|
curDocs = cachedDocs.get(chunkUpto);
|
||||||
|
curScores = cachedScores.get(chunkUpto);
|
||||||
|
chunkUpto++;
|
||||||
|
curUpto = 0;
|
||||||
|
}
|
||||||
|
cachedScorer.score = curScores[curUpto];
|
||||||
|
other.collect(curDocs[curUpto++]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
other.setScorer(cachedScorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
if (isCached()) {
|
||||||
|
return "CachingCollector (" + (base+upto) + " docs & scores cached)";
|
||||||
|
} else {
|
||||||
|
return "CachingCollector (cache was cleared)";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// A CachingCollector which does not cache scores
|
||||||
|
private static final class NoScoreCachingCollector extends CachingCollector {
|
||||||
|
|
||||||
|
NoScoreCachingCollector(Collector other, double maxRAMMB) {
|
||||||
|
super(other, maxRAMMB, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
|
||||||
|
if (curDocs == null) {
|
||||||
|
// Cache was too large
|
||||||
|
other.collect(doc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate a bigger array or abort caching
|
||||||
|
if (upto == curDocs.length) {
|
||||||
|
base += upto;
|
||||||
|
|
||||||
|
// Compute next array length - don't allocate too big arrays
|
||||||
|
int nextLength = 8*curDocs.length;
|
||||||
|
if (nextLength > MAX_ARRAY_SIZE) {
|
||||||
|
nextLength = MAX_ARRAY_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (base + nextLength > maxDocsToCache) {
|
||||||
|
// try to allocate a smaller array
|
||||||
|
nextLength = maxDocsToCache - base;
|
||||||
|
if (nextLength <= 0) {
|
||||||
|
// Too many docs to collect -- clear cache
|
||||||
|
curDocs = null;
|
||||||
|
cachedSegs.clear();
|
||||||
|
cachedDocs.clear();
|
||||||
|
other.collect(doc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curDocs = new int[nextLength];
|
||||||
|
cachedDocs.add(curDocs);
|
||||||
|
upto = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
curDocs[upto] = doc;
|
||||||
|
upto++;
|
||||||
|
other.collect(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void replay(Collector other) throws IOException {
|
||||||
|
replayInit(other);
|
||||||
|
|
||||||
|
int curUpto = 0;
|
||||||
|
int curbase = 0;
|
||||||
|
int chunkUpto = 0;
|
||||||
|
curDocs = EMPTY_INT_ARRAY;
|
||||||
|
for (SegStart seg : cachedSegs) {
|
||||||
|
other.setNextReader(seg.readerContext);
|
||||||
|
while (curbase + curUpto < seg.end) {
|
||||||
|
if (curUpto == curDocs.length) {
|
||||||
|
curbase += curDocs.length;
|
||||||
|
curDocs = cachedDocs.get(chunkUpto);
|
||||||
|
chunkUpto++;
|
||||||
|
curUpto = 0;
|
||||||
|
}
|
||||||
|
other.collect(curDocs[curUpto++]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
other.setScorer(scorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
if (isCached()) {
|
||||||
|
return "CachingCollector (" + (base+upto) + " docs cached)";
|
||||||
|
} else {
|
||||||
|
return "CachingCollector (cache was cleared)";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: would be nice if a collector defined a
|
||||||
|
// needsScores() method so we can specialize / do checks
|
||||||
|
// up front. This is only relevant for the ScoreCaching
|
||||||
|
// version -- if the wrapped Collector does not need
|
||||||
|
// scores, it can avoid cachedScorer entirely.
|
||||||
|
protected final Collector other;
|
||||||
|
|
||||||
|
protected final int maxDocsToCache;
|
||||||
|
protected final List<SegStart> cachedSegs = new ArrayList<SegStart>();
|
||||||
|
protected final List<int[]> cachedDocs;
|
||||||
|
|
||||||
|
private AtomicReaderContext lastReaderContext;
|
||||||
|
|
||||||
|
protected int[] curDocs;
|
||||||
|
protected int upto;
|
||||||
|
protected int base;
|
||||||
|
protected int lastDocBase;
|
||||||
|
|
||||||
|
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
|
||||||
|
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prevent extension from non-internal classes
|
||||||
|
private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
|
||||||
|
this.other = other;
|
||||||
|
|
||||||
|
cachedDocs = new ArrayList<int[]>();
|
||||||
|
curDocs = new int[INITIAL_ARRAY_SIZE];
|
||||||
|
cachedDocs.add(curDocs);
|
||||||
|
|
||||||
|
int bytesPerDoc = RamUsageEstimator.NUM_BYTES_INT;
|
||||||
|
if (cacheScores) {
|
||||||
|
bytesPerDoc += RamUsageEstimator.NUM_BYTES_FLOAT;
|
||||||
|
}
|
||||||
|
maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return other.acceptsDocsOutOfOrder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isCached() {
|
||||||
|
return curDocs != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
other.setNextReader(context);
|
||||||
|
if (lastReaderContext != null) {
|
||||||
|
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
|
||||||
|
}
|
||||||
|
lastReaderContext = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reused by the specialized inner classes. */
|
||||||
|
void replayInit(Collector other) {
|
||||||
|
if (!isCached()) {
|
||||||
|
throw new IllegalStateException("cannot replay: cache was cleared because too much RAM was required");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!other.acceptsDocsOutOfOrder() && this.other.acceptsDocsOutOfOrder()) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"cannot replay: given collector does not support "
|
||||||
|
+ "out-of-order collection, while the wrapped collector does. "
|
||||||
|
+ "Therefore cached documents may be out-of-order.");
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println("CC: replay totHits=" + (upto + base));
|
||||||
|
if (lastReaderContext != null) {
|
||||||
|
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
|
||||||
|
lastReaderContext = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replays the cached doc IDs (and scores) to the given Collector. If this
|
||||||
|
* instance does not cache scores, then Scorer is not set on
|
||||||
|
* {@code other.setScorer} as well as scores are not replayed.
|
||||||
|
*
|
||||||
|
* @throws IllegalStateException
|
||||||
|
* if this collector is not cached (i.e., if the RAM limits were too
|
||||||
|
* low for the number of documents + scores to cache).
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if the given Collect's does not support out-of-order collection,
|
||||||
|
* while the collector passed to the ctor does.
|
||||||
|
*/
|
||||||
|
public abstract void replay(Collector other) throws IOException;
|
||||||
|
|
||||||
|
}
|
|
@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
|
||||||
public abstract int nextDoc() throws IOException;
|
public abstract int nextDoc() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advances to the first beyond the current whose document number is greater
|
* Advances to the first beyond (see NOTE below) the current whose document
|
||||||
* than or equal to <i>target</i>. Returns the current document number or
|
* number is greater than or equal to <i>target</i>. Returns the current
|
||||||
* {@link #NO_MORE_DOCS} if there are no more docs in the set.
|
* document number or {@link #NO_MORE_DOCS} if there are no more docs in the
|
||||||
|
* set.
|
||||||
* <p>
|
* <p>
|
||||||
* Behaves as if written:
|
* Behaves as if written:
|
||||||
*
|
*
|
||||||
|
|
|
@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
|
||||||
public QueryTermVector(String queryString, Analyzer analyzer) {
|
public QueryTermVector(String queryString, Analyzer analyzer) {
|
||||||
if (analyzer != null)
|
if (analyzer != null)
|
||||||
{
|
{
|
||||||
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
|
TokenStream stream;
|
||||||
|
try {
|
||||||
|
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
|
||||||
|
} catch (IOException e1) {
|
||||||
|
stream = null;
|
||||||
|
}
|
||||||
if (stream != null)
|
if (stream != null)
|
||||||
{
|
{
|
||||||
List<BytesRef> terms = new ArrayList<BytesRef>();
|
List<BytesRef> terms = new ArrayList<BytesRef>();
|
||||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashSet;
|
||||||
|
|
||||||
final class SloppyPhraseScorer extends PhraseScorer {
|
final class SloppyPhraseScorer extends PhraseScorer {
|
||||||
private int slop;
|
private int slop;
|
||||||
|
@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Init PhrasePositions in place.
|
* Init PhrasePositions in place.
|
||||||
* There is a one time initialization for this scorer:
|
* There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
|
||||||
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
|
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
|
||||||
|
* This relies on that the position in PP is computed as (TP.position - offset) and
|
||||||
|
* so by adding offset we actually compare positions and identify that the two are
|
||||||
|
* the same term.
|
||||||
|
* An exclusion to this is two distinct terms in the same offset in query and same
|
||||||
|
* position in doc. This case is detected by comparing just the (query) offsets,
|
||||||
|
* and two such PPs are not considered "repeating".
|
||||||
* <br>- Also mark each such pp by pp.repeats = true.
|
* <br>- Also mark each such pp by pp.repeats = true.
|
||||||
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
|
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
|
||||||
* In particular, this allows to score queries with no repetitions with no overhead due to this computation.
|
* In particular, this allows to score queries with no repetitions with no overhead due to this computation.
|
||||||
|
@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
||||||
if (!checkedRepeats) {
|
if (!checkedRepeats) {
|
||||||
checkedRepeats = true;
|
checkedRepeats = true;
|
||||||
// check for repeats
|
// check for repeats
|
||||||
HashMap<PhrasePositions, Object> m = null;
|
HashSet<PhrasePositions> m = null;
|
||||||
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
|
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
|
||||||
int tpPos = pp.position + pp.offset;
|
int tpPos = pp.position + pp.offset;
|
||||||
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
|
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
|
||||||
|
if (pp.offset == pp2.offset) {
|
||||||
|
continue; // not a repetition: the two PPs are originally in same offset in the query!
|
||||||
|
}
|
||||||
int tpPos2 = pp2.position + pp2.offset;
|
int tpPos2 = pp2.position + pp2.offset;
|
||||||
if (tpPos2 == tpPos) {
|
if (tpPos2 == tpPos) {
|
||||||
if (m == null)
|
if (m == null)
|
||||||
m = new HashMap<PhrasePositions, Object>();
|
m = new HashSet<PhrasePositions>();
|
||||||
pp.repeats = true;
|
pp.repeats = true;
|
||||||
pp2.repeats = true;
|
pp2.repeats = true;
|
||||||
m.put(pp,null);
|
m.add(pp);
|
||||||
m.put(pp2,null);
|
m.add(pp2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (m!=null)
|
if (m!=null)
|
||||||
repeats = m.keySet().toArray(new PhrasePositions[0]);
|
repeats = m.toArray(new PhrasePositions[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// with repeats must advance some repeating pp's so they all start with differing tp's
|
// with repeats must advance some repeating pp's so they all start with differing tp's
|
||||||
|
@ -204,12 +213,17 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
||||||
int tpPos = pp.position + pp.offset;
|
int tpPos = pp.position + pp.offset;
|
||||||
for (int i = 0; i < repeats.length; i++) {
|
for (int i = 0; i < repeats.length; i++) {
|
||||||
PhrasePositions pp2 = repeats[i];
|
PhrasePositions pp2 = repeats[i];
|
||||||
if (pp2 == pp)
|
if (pp2 == pp) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
if (pp.offset == pp2.offset) {
|
||||||
|
continue; // not a repetition: the two PPs are originally in same offset in the query!
|
||||||
|
}
|
||||||
int tpPos2 = pp2.position + pp2.offset;
|
int tpPos2 = pp2.position + pp2.offset;
|
||||||
if (tpPos2 == tpPos)
|
if (tpPos2 == tpPos) {
|
||||||
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
|
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,7 +172,7 @@ public class NIOFSDirectory extends FSDirectory {
|
||||||
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
||||||
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
||||||
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
||||||
+ "with a a value smaller than the current chunk size (" + chunkSize + ")");
|
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
|
||||||
outOfMemoryError.initCause(e);
|
outOfMemoryError.initCause(e);
|
||||||
throw outOfMemoryError;
|
throw outOfMemoryError;
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,7 +125,7 @@ public class SimpleFSDirectory extends FSDirectory {
|
||||||
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
||||||
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
||||||
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
||||||
+ "with a value smaller than the current chunks size (" + chunkSize + ")");
|
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
|
||||||
outOfMemoryError.initCause(e);
|
outOfMemoryError.initCause(e);
|
||||||
throw outOfMemoryError;
|
throw outOfMemoryError;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,6 @@ package org.apache.lucene.util;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.io.ObjectInput;
|
|
||||||
import java.io.ObjectOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/** Represents byte[], as a slice (offset + length) into an
|
/** Represents byte[], as a slice (offset + length) into an
|
||||||
* existing byte[].
|
* existing byte[].
|
||||||
|
@ -193,6 +190,9 @@ public final class BytesRef implements Comparable<BytesRef> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object other) {
|
public boolean equals(Object other) {
|
||||||
|
if (other == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return this.bytesEquals((BytesRef) other);
|
return this.bytesEquals((BytesRef) other);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
package org.apache.lucene.util;
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -54,4 +57,42 @@ public abstract class StringHelper {
|
||||||
|
|
||||||
private StringHelper() {
|
private StringHelper() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a Comparator over versioned strings such as X.YY.Z
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static Comparator<String> getVersionComparator() {
|
||||||
|
return versionComparator;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Comparator<String> versionComparator = new Comparator<String>() {
|
||||||
|
public int compare(String a, String b) {
|
||||||
|
StringTokenizer aTokens = new StringTokenizer(a, ".");
|
||||||
|
StringTokenizer bTokens = new StringTokenizer(b, ".");
|
||||||
|
|
||||||
|
while (aTokens.hasMoreTokens()) {
|
||||||
|
int aToken = Integer.parseInt(aTokens.nextToken());
|
||||||
|
if (bTokens.hasMoreTokens()) {
|
||||||
|
int bToken = Integer.parseInt(bTokens.nextToken());
|
||||||
|
if (aToken != bToken) {
|
||||||
|
return aToken - bToken;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// a has some extra trailing tokens. if these are all zeroes, thats ok.
|
||||||
|
if (aToken != 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// b has some extra trailing tokens. if these are all zeroes, thats ok.
|
||||||
|
while (bTokens.hasMoreTokens()) {
|
||||||
|
if (Integer.parseInt(bTokens.nextToken()) != 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -143,13 +143,16 @@ public class LevenshteinAutomata {
|
||||||
if (dest >= 0)
|
if (dest >= 0)
|
||||||
for (int r = 0; r < numRanges; r++)
|
for (int r = 0; r < numRanges; r++)
|
||||||
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
|
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
|
||||||
// reduce the state: this doesn't appear to help anything
|
|
||||||
//states[k].reduce();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Automaton a = new Automaton(states[0]);
|
Automaton a = new Automaton(states[0]);
|
||||||
a.setDeterministic(true);
|
a.setDeterministic(true);
|
||||||
a.setNumberedStates(states);
|
// we create some useless unconnected states, and its a net-win overall to remove these,
|
||||||
|
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
|
||||||
|
// so, while we could set our numberedStates here, its actually best not to, and instead to
|
||||||
|
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
|
||||||
|
//a.setNumberedStates(states);
|
||||||
|
a.reduce();
|
||||||
// we need not trim transitions to dead states, as they are not created.
|
// we need not trim transitions to dead states, as they are not created.
|
||||||
//a.restoreInvariant();
|
//a.restoreInvariant();
|
||||||
return a;
|
return a;
|
||||||
|
|
|
@ -30,6 +30,8 @@
|
||||||
package org.apache.lucene.util.automaton;
|
package org.apache.lucene.util.automaton;
|
||||||
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -72,8 +74,12 @@ final public class MinimizationOperations {
|
||||||
final int[] sigma = a.getStartPoints();
|
final int[] sigma = a.getStartPoints();
|
||||||
final State[] states = a.getNumberedStates();
|
final State[] states = a.getNumberedStates();
|
||||||
final int sigmaLen = sigma.length, statesLen = states.length;
|
final int sigmaLen = sigma.length, statesLen = states.length;
|
||||||
final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
|
@SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
|
||||||
final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
|
(ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
|
||||||
|
@SuppressWarnings("unchecked") final HashSet<State>[] partition =
|
||||||
|
(HashSet<State>[]) new HashSet[statesLen];
|
||||||
|
@SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
|
||||||
|
(ArrayList<State>[]) new ArrayList[statesLen];
|
||||||
final int[] block = new int[statesLen];
|
final int[] block = new int[statesLen];
|
||||||
final StateList[][] active = new StateList[statesLen][sigmaLen];
|
final StateList[][] active = new StateList[statesLen][sigmaLen];
|
||||||
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
|
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
|
||||||
|
@ -82,8 +88,8 @@ final public class MinimizationOperations {
|
||||||
final BitSet split = new BitSet(statesLen),
|
final BitSet split = new BitSet(statesLen),
|
||||||
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
|
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
|
||||||
for (int q = 0; q < statesLen; q++) {
|
for (int q = 0; q < statesLen; q++) {
|
||||||
splitblock[q] = new BitSet(statesLen);
|
splitblock[q] = new ArrayList<State>();
|
||||||
partition[q] = new BitSet(statesLen);
|
partition[q] = new HashSet<State>();
|
||||||
for (int x = 0; x < sigmaLen; x++) {
|
for (int x = 0; x < sigmaLen; x++) {
|
||||||
active[q][x] = new StateList();
|
active[q][x] = new StateList();
|
||||||
}
|
}
|
||||||
|
@ -92,23 +98,22 @@ final public class MinimizationOperations {
|
||||||
for (int q = 0; q < statesLen; q++) {
|
for (int q = 0; q < statesLen; q++) {
|
||||||
final State qq = states[q];
|
final State qq = states[q];
|
||||||
final int j = qq.accept ? 0 : 1;
|
final int j = qq.accept ? 0 : 1;
|
||||||
partition[j].set(q);
|
partition[j].add(qq);
|
||||||
block[q] = j;
|
block[q] = j;
|
||||||
for (int x = 0; x < sigmaLen; x++) {
|
for (int x = 0; x < sigmaLen; x++) {
|
||||||
final BitSet[] r =
|
final ArrayList<State>[] r =
|
||||||
reverse[qq.step(sigma[x]).number];
|
reverse[qq.step(sigma[x]).number];
|
||||||
if (r[x] == null)
|
if (r[x] == null)
|
||||||
r[x] = new BitSet();
|
r[x] = new ArrayList<State>();
|
||||||
r[x].set(q);
|
r[x].add(qq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// initialize active sets
|
// initialize active sets
|
||||||
for (int j = 0; j <= 1; j++) {
|
for (int j = 0; j <= 1; j++) {
|
||||||
final BitSet part = partition[j];
|
|
||||||
for (int x = 0; x < sigmaLen; x++) {
|
for (int x = 0; x < sigmaLen; x++) {
|
||||||
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
|
for (final State qq : partition[j]) {
|
||||||
if (reverse[i][x] != null)
|
if (reverse[qq.number][x] != null)
|
||||||
active2[i][x] = active[j][x].add(states[i]);
|
active2[qq.number][x] = active[j][x].add(qq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -121,18 +126,19 @@ final public class MinimizationOperations {
|
||||||
// process pending until fixed point
|
// process pending until fixed point
|
||||||
int k = 2;
|
int k = 2;
|
||||||
while (!pending.isEmpty()) {
|
while (!pending.isEmpty()) {
|
||||||
IntPair ip = pending.removeFirst();
|
final IntPair ip = pending.removeFirst();
|
||||||
final int p = ip.n1;
|
final int p = ip.n1;
|
||||||
final int x = ip.n2;
|
final int x = ip.n2;
|
||||||
pending2.clear(x*statesLen + p);
|
pending2.clear(x*statesLen + p);
|
||||||
// find states that need to be split off their blocks
|
// find states that need to be split off their blocks
|
||||||
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
|
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
|
||||||
final BitSet r = reverse[m.q.number][x];
|
final ArrayList<State> r = reverse[m.q.number][x];
|
||||||
if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
|
if (r != null) for (final State s : r) {
|
||||||
|
final int i = s.number;
|
||||||
if (!split.get(i)) {
|
if (!split.get(i)) {
|
||||||
split.set(i);
|
split.set(i);
|
||||||
final int j = block[i];
|
final int j = block[i];
|
||||||
splitblock[j].set(i);
|
splitblock[j].add(s);
|
||||||
if (!refine2.get(j)) {
|
if (!refine2.get(j)) {
|
||||||
refine2.set(j);
|
refine2.set(j);
|
||||||
refine.set(j);
|
refine.set(j);
|
||||||
|
@ -142,18 +148,19 @@ final public class MinimizationOperations {
|
||||||
}
|
}
|
||||||
// refine blocks
|
// refine blocks
|
||||||
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
|
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
|
||||||
final BitSet sb = splitblock[j];
|
final ArrayList<State> sb = splitblock[j];
|
||||||
if (sb.cardinality() < partition[j].cardinality()) {
|
if (sb.size() < partition[j].size()) {
|
||||||
final BitSet b1 = partition[j], b2 = partition[k];
|
final HashSet<State> b1 = partition[j];
|
||||||
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
|
final HashSet<State> b2 = partition[k];
|
||||||
b1.clear(i);
|
for (final State s : sb) {
|
||||||
b2.set(i);
|
b1.remove(s);
|
||||||
block[i] = k;
|
b2.add(s);
|
||||||
|
block[s.number] = k;
|
||||||
for (int c = 0; c < sigmaLen; c++) {
|
for (int c = 0; c < sigmaLen; c++) {
|
||||||
final StateListNode sn = active2[i][c];
|
final StateListNode sn = active2[s.number][c];
|
||||||
if (sn != null && sn.sl == active[j][c]) {
|
if (sn != null && sn.sl == active[j][c]) {
|
||||||
sn.remove();
|
sn.remove();
|
||||||
active2[i][c] = active[k][c].add(states[i]);
|
active2[s.number][c] = active[k][c].add(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -173,8 +180,8 @@ final public class MinimizationOperations {
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
refine2.clear(j);
|
refine2.clear(j);
|
||||||
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
|
for (final State s : sb)
|
||||||
split.clear(i);
|
split.clear(s.number);
|
||||||
sb.clear();
|
sb.clear();
|
||||||
}
|
}
|
||||||
refine.clear();
|
refine.clear();
|
||||||
|
@ -184,9 +191,7 @@ final public class MinimizationOperations {
|
||||||
for (int n = 0; n < newstates.length; n++) {
|
for (int n = 0; n < newstates.length; n++) {
|
||||||
final State s = new State();
|
final State s = new State();
|
||||||
newstates[n] = s;
|
newstates[n] = s;
|
||||||
BitSet part = partition[n];
|
for (State q : partition[n]) {
|
||||||
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
|
|
||||||
final State q = states[i];
|
|
||||||
if (q == a.initial) a.initial = s;
|
if (q == a.initial) a.initial = s;
|
||||||
s.accept = q.accept;
|
s.accept = q.accept;
|
||||||
s.number = q.number; // select representative
|
s.number = q.number; // select representative
|
||||||
|
|
|
@ -232,9 +232,7 @@ public class FST<T> {
|
||||||
|
|
||||||
void setEmptyOutput(T v) throws IOException {
|
void setEmptyOutput(T v) throws IOException {
|
||||||
if (emptyOutput != null) {
|
if (emptyOutput != null) {
|
||||||
if (!emptyOutput.equals(v)) {
|
|
||||||
emptyOutput = outputs.merge(emptyOutput, v);
|
emptyOutput = outputs.merge(emptyOutput, v);
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
emptyOutput = v;
|
emptyOutput = v;
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,7 +100,7 @@ public class MockTokenizer extends Tokenizer {
|
||||||
endOffset = off;
|
endOffset = off;
|
||||||
cp = readCodePoint();
|
cp = readCodePoint();
|
||||||
} while (cp >= 0 && isTokenChar(cp));
|
} while (cp >= 0 && isTokenChar(cp));
|
||||||
offsetAtt.setOffset(startOffset, endOffset);
|
offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
|
||||||
streamState = State.INCREMENT;
|
streamState = State.INCREMENT;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,14 +42,13 @@ public class MockRandomMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
|
if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
|
||||||
|
|
||||||
SegmentInfos segmentInfos2 = new SegmentInfos();
|
List<SegmentInfo> segments = new ArrayList<SegmentInfo>(segmentInfos.asList());
|
||||||
segmentInfos2.addAll(segmentInfos);
|
Collections.shuffle(segments, random);
|
||||||
Collections.shuffle(segmentInfos2, random);
|
|
||||||
|
|
||||||
// TODO: sometimes make more than 1 merge?
|
// TODO: sometimes make more than 1 merge?
|
||||||
mergeSpec = new MergeSpecification();
|
mergeSpec = new MergeSpecification();
|
||||||
final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
|
final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
|
||||||
mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge)));
|
mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return mergeSpec;
|
return mergeSpec;
|
||||||
|
|
|
@ -171,7 +171,14 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
|
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
|
||||||
|
|
||||||
/** Used to track if setUp and tearDown are called correctly from subclasses */
|
/** Used to track if setUp and tearDown are called correctly from subclasses */
|
||||||
private boolean setup;
|
private static State state = State.INITIAL;
|
||||||
|
|
||||||
|
private static enum State {
|
||||||
|
INITIAL, // no tests ran yet
|
||||||
|
SETUP, // test has called setUp()
|
||||||
|
RANTEST, // test is running
|
||||||
|
TEARDOWN // test has called tearDown()
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
|
* Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
|
||||||
|
@ -326,6 +333,7 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClassLuceneTestCaseJ4() {
|
public static void beforeClassLuceneTestCaseJ4() {
|
||||||
|
state = State.INITIAL;
|
||||||
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
|
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
|
||||||
random.setSeed(staticSeed);
|
random.setSeed(staticSeed);
|
||||||
tempDirs.clear();
|
tempDirs.clear();
|
||||||
|
@ -375,6 +383,11 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
public static void afterClassLuceneTestCaseJ4() {
|
public static void afterClassLuceneTestCaseJ4() {
|
||||||
|
if (!testsFailed) {
|
||||||
|
assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!",
|
||||||
|
state == State.INITIAL || state == State.TEARDOWN);
|
||||||
|
}
|
||||||
|
state = State.INITIAL;
|
||||||
if (! "false".equals(TEST_CLEAN_THREADS)) {
|
if (! "false".equals(TEST_CLEAN_THREADS)) {
|
||||||
int rogueThreads = threadCleanup("test class");
|
int rogueThreads = threadCleanup("test class");
|
||||||
if (rogueThreads > 0) {
|
if (rogueThreads > 0) {
|
||||||
|
@ -483,17 +496,22 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
public void starting(FrameworkMethod method) {
|
public void starting(FrameworkMethod method) {
|
||||||
// set current method name for logging
|
// set current method name for logging
|
||||||
LuceneTestCase.this.name = method.getName();
|
LuceneTestCase.this.name = method.getName();
|
||||||
|
if (!testsFailed) {
|
||||||
|
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP);
|
||||||
|
}
|
||||||
|
state = State.RANTEST;
|
||||||
super.starting(method);
|
super.starting(method);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
|
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
|
||||||
random.setSeed(seed);
|
random.setSeed(seed);
|
||||||
assertFalse("ensure your tearDown() calls super.tearDown()!!!", setup);
|
if (!testsFailed) {
|
||||||
setup = true;
|
assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN));
|
||||||
|
}
|
||||||
|
state = State.SETUP;
|
||||||
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
|
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
|
||||||
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
|
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
|
||||||
public void uncaughtException(Thread t, Throwable e) {
|
public void uncaughtException(Thread t, Throwable e) {
|
||||||
|
@ -529,8 +547,12 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
|
if (!testsFailed) {
|
||||||
setup = false;
|
// Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state)
|
||||||
|
// because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know...
|
||||||
|
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP);
|
||||||
|
}
|
||||||
|
state = State.TEARDOWN;
|
||||||
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
|
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
|
||||||
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
|
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
|
||||||
int rogueThreads = threadCleanup("test method: '" + getName() + "'");
|
int rogueThreads = threadCleanup("test method: '" + getName() + "'");
|
||||||
|
|
|
@ -397,4 +397,15 @@ public class AutomatonTestUtil {
|
||||||
path.remove(s);
|
path.remove(s);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks that an automaton has no detached states that are unreachable
|
||||||
|
* from the initial state.
|
||||||
|
*/
|
||||||
|
public static void assertNoDetachedStates(Automaton a) {
|
||||||
|
int numStates = a.getNumberOfStates();
|
||||||
|
a.clearNumberedStates(); // force recomputation of cached numbered states
|
||||||
|
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a binary tokenstream that lets you index a BytesRef
|
||||||
|
*/
|
||||||
|
public final class BinaryTokenStream extends TokenStream {
|
||||||
|
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||||
|
private boolean available = true;
|
||||||
|
|
||||||
|
public BinaryTokenStream(BytesRef bytes) {
|
||||||
|
bytesAtt.setBytesRef(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
if (available) {
|
||||||
|
available = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
available = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||||
|
public void setBytesRef(BytesRef bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
|
||||||
|
private BytesRef bytes;
|
||||||
|
|
||||||
|
public int fillBytesRef() {
|
||||||
|
return bytes.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
public BytesRef getBytesRef() {
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBytesRef(BytesRef bytes) {
|
||||||
|
this.bytes = bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copyTo(AttributeImpl target) {
|
||||||
|
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||||
|
other.bytes = bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test indexing and searching some byte[] terms
|
||||||
|
*/
|
||||||
|
public class TestBinaryTerms extends LuceneTestCase {
|
||||||
|
public void testBinary() throws IOException {
|
||||||
|
assumeFalse("PreFlex codec cannot work with binary terms!",
|
||||||
|
"PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
|
||||||
|
BytesRef bytes = new BytesRef(2);
|
||||||
|
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
|
||||||
|
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
bytes.bytes[0] = (byte) i;
|
||||||
|
bytes.bytes[1] = (byte) (255 - i);
|
||||||
|
bytes.length = 2;
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.NO));
|
||||||
|
doc.add(new Field("bytes", tokenStream));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
IndexSearcher is = newSearcher(ir);
|
||||||
|
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
bytes.bytes[0] = (byte) i;
|
||||||
|
bytes.bytes[1] = (byte) (255 - i);
|
||||||
|
bytes.length = 2;
|
||||||
|
TopDocs docs = is.search(new TermQuery(new Term("bytes", bytes)), 5);
|
||||||
|
assertEquals(1, docs.totalHits);
|
||||||
|
assertEquals("" + i, is.doc(docs.scoreDocs[0].doc).get("id"));
|
||||||
|
}
|
||||||
|
|
||||||
|
is.close();
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.store.LockObtainFailedException;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.apache.lucene.util.LineFileDocs;
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.ThrottledIndexOutput;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||||
|
@ -105,7 +104,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||||
assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
|
assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
|
||||||
}
|
}
|
||||||
if (ensureNotStalled) {
|
if (ensureNotStalled) {
|
||||||
assertFalse(docsWriter.healthiness.wasStalled);
|
assertFalse(docsWriter.flushControl.stallControl.wasStalled);
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
assertEquals(0, flushControl.activeBytes());
|
assertEquals(0, flushControl.activeBytes());
|
||||||
|
@ -216,15 +215,15 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||||
assertEquals(numDocumentsToIndex, r.numDocs());
|
assertEquals(numDocumentsToIndex, r.numDocs());
|
||||||
assertEquals(numDocumentsToIndex, r.maxDoc());
|
assertEquals(numDocumentsToIndex, r.maxDoc());
|
||||||
if (!flushPolicy.flushOnRAM()) {
|
if (!flushPolicy.flushOnRAM()) {
|
||||||
assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled);
|
assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled);
|
||||||
assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked());
|
assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.hasBlocked());
|
||||||
}
|
}
|
||||||
r.close();
|
r.close();
|
||||||
writer.close();
|
writer.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testHealthyness() throws InterruptedException,
|
public void testStallControl() throws InterruptedException,
|
||||||
CorruptIndexException, LockObtainFailedException, IOException {
|
CorruptIndexException, LockObtainFailedException, IOException {
|
||||||
|
|
||||||
int[] numThreads = new int[] { 4 + random.nextInt(8), 1 };
|
int[] numThreads = new int[] { 4 + random.nextInt(8), 1 };
|
||||||
|
@ -264,12 +263,12 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||||
assertEquals(numDocumentsToIndex, writer.numDocs());
|
assertEquals(numDocumentsToIndex, writer.numDocs());
|
||||||
assertEquals(numDocumentsToIndex, writer.maxDoc());
|
assertEquals(numDocumentsToIndex, writer.maxDoc());
|
||||||
if (numThreads[i] == 1) {
|
if (numThreads[i] == 1) {
|
||||||
assertFalse(
|
|
||||||
"single thread must not stall",
|
|
||||||
docsWriter.healthiness.wasStalled);
|
|
||||||
assertFalse(
|
assertFalse(
|
||||||
"single thread must not block numThreads: " + numThreads[i],
|
"single thread must not block numThreads: " + numThreads[i],
|
||||||
docsWriter.healthiness.hasBlocked());
|
docsWriter.flushControl.stallControl.hasBlocked());
|
||||||
|
}
|
||||||
|
if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) {
|
||||||
|
assertTrue(docsWriter.flushControl.stallControl.wasStalled);
|
||||||
}
|
}
|
||||||
assertActiveBytesAfter(flushControl);
|
assertActiveBytesAfter(flushControl);
|
||||||
writer.close(true);
|
writer.close(true);
|
||||||
|
|
|
@ -363,7 +363,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
SegmentInfos sis = new SegmentInfos();
|
SegmentInfos sis = new SegmentInfos();
|
||||||
sis.read(base);
|
sis.read(base);
|
||||||
SegmentInfo segmentInfo = sis.get(sis.size() - 1);// last segment must
|
SegmentInfo segmentInfo = sis.info(sis.size() - 1);// last segment must
|
||||||
// have all fields with
|
// have all fields with
|
||||||
// consistent numbers
|
// consistent numbers
|
||||||
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
||||||
|
|
|
@ -1231,13 +1231,17 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
System.out.println("TEST: pass=" + pass);
|
System.out.println("TEST: pass=" + pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexWriter writer = new IndexWriter(
|
IndexWriterConfig conf = newIndexWriterConfig(
|
||||||
directory,
|
TEST_VERSION_CURRENT, new MockAnalyzer(random)).
|
||||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
|
|
||||||
setOpenMode(OpenMode.CREATE).
|
setOpenMode(OpenMode.CREATE).
|
||||||
setMaxBufferedDocs(2).
|
setMaxBufferedDocs(2).
|
||||||
setMergePolicy(newLogMergePolicy())
|
setMergePolicy(newLogMergePolicy());
|
||||||
);
|
if (pass == 2) {
|
||||||
|
conf.setMergeScheduler(new SerialMergeScheduler());
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexWriter writer = new IndexWriter(directory, conf);
|
||||||
|
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
|
||||||
writer.setInfoStream(VERBOSE ? System.out : null);
|
writer.setInfoStream(VERBOSE ? System.out : null);
|
||||||
|
|
||||||
for(int iter=0;iter<10;iter++) {
|
for(int iter=0;iter<10;iter++) {
|
||||||
|
@ -2139,7 +2143,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
while(!finish) {
|
while(!finish) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
while(true) {
|
while(!finish) {
|
||||||
if (w != null) {
|
if (w != null) {
|
||||||
w.close();
|
w.close();
|
||||||
w = null;
|
w = null;
|
||||||
|
@ -2157,6 +2161,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
w.close();
|
w.close();
|
||||||
|
w = null;
|
||||||
_TestUtil.checkIndex(dir);
|
_TestUtil.checkIndex(dir);
|
||||||
IndexReader.open(dir, true).close();
|
IndexReader.open(dir, true).close();
|
||||||
|
|
||||||
|
|
|
@ -71,9 +71,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||||
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
|
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
|
||||||
assertNull(conf.getFlushPolicy());
|
assertNull(conf.getFlushPolicy());
|
||||||
assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
|
assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Sanity check - validate that all getters are covered.
|
// Sanity check - validate that all getters are covered.
|
||||||
Set<String> getters = new HashSet<String>();
|
Set<String> getters = new HashSet<String>();
|
||||||
getters.add("getAnalyzer");
|
getters.add("getAnalyzer");
|
||||||
|
|
|
@ -128,8 +128,8 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
||||||
fsmp.length = 2;
|
fsmp.length = 2;
|
||||||
System.out.println("maybeMerge "+writer.segmentInfos);
|
System.out.println("maybeMerge "+writer.segmentInfos);
|
||||||
|
|
||||||
SegmentInfo info0 = writer.segmentInfos.get(0);
|
SegmentInfo info0 = writer.segmentInfos.info(0);
|
||||||
SegmentInfo info1 = writer.segmentInfos.get(1);
|
SegmentInfo info1 = writer.segmentInfos.info(1);
|
||||||
|
|
||||||
writer.maybeMerge();
|
writer.maybeMerge();
|
||||||
System.out.println("maybeMerge after "+writer.segmentInfos);
|
System.out.println("maybeMerge after "+writer.segmentInfos);
|
||||||
|
@ -199,7 +199,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
||||||
// deletes for info1, the newly created segment from the
|
// deletes for info1, the newly created segment from the
|
||||||
// merge should have no deletes because they were applied in
|
// merge should have no deletes because they were applied in
|
||||||
// the merge
|
// the merge
|
||||||
//SegmentInfo info1 = writer.segmentInfos.get(1);
|
//SegmentInfo info1 = writer.segmentInfos.info(1);
|
||||||
//assertFalse(exists(info1, writer.docWriter.segmentDeletes));
|
//assertFalse(exists(info1, writer.docWriter.segmentDeletes));
|
||||||
|
|
||||||
//System.out.println("infos4:"+writer.segmentInfos);
|
//System.out.println("infos4:"+writer.segmentInfos);
|
||||||
|
@ -261,11 +261,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
MergeSpecification ms = new MergeSpecification();
|
MergeSpecification ms = new MergeSpecification();
|
||||||
if (doMerge) {
|
if (doMerge) {
|
||||||
SegmentInfos mergeInfos = new SegmentInfos();
|
OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length));
|
||||||
for (int x=start; x < (start+length); x++) {
|
|
||||||
mergeInfos.add(segmentInfos.get(x));
|
|
||||||
}
|
|
||||||
OneMerge om = new OneMerge(mergeInfos);
|
|
||||||
ms.add(om);
|
ms.add(om);
|
||||||
doMerge = false;
|
doMerge = false;
|
||||||
return ms;
|
return ms;
|
||||||
|
|
|
@ -0,0 +1,175 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.search.CachingCollector;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestCachingCollector extends LuceneTestCase {
|
||||||
|
|
||||||
|
private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
|
||||||
|
|
||||||
|
private static class MockScorer extends Scorer {
|
||||||
|
|
||||||
|
private MockScorer() {
|
||||||
|
super((Weight) null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException { return 0; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() { return 0; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException { return 0; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException { return 0; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class NoOpCollector extends Collector {
|
||||||
|
|
||||||
|
private final boolean acceptDocsOutOfOrder;
|
||||||
|
|
||||||
|
public NoOpCollector(boolean acceptDocsOutOfOrder) {
|
||||||
|
this.acceptDocsOutOfOrder = acceptDocsOutOfOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return acceptDocsOutOfOrder;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasic() throws Exception {
|
||||||
|
for (boolean cacheScores : new boolean[] { false, true }) {
|
||||||
|
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
|
||||||
|
cc.setScorer(new MockScorer());
|
||||||
|
|
||||||
|
// collect 1000 docs
|
||||||
|
for (int i = 0; i < 1000; i++) {
|
||||||
|
cc.collect(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now replay them
|
||||||
|
cc.replay(new Collector() {
|
||||||
|
int prevDocID = -1;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
assertEquals(prevDocID + 1, doc);
|
||||||
|
prevDocID = doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIllegalStateOnReplay() throws Exception {
|
||||||
|
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
|
||||||
|
cc.setScorer(new MockScorer());
|
||||||
|
|
||||||
|
// collect 130 docs, this should be enough for triggering cache abort.
|
||||||
|
for (int i = 0; i < 130; i++) {
|
||||||
|
cc.collect(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertFalse("CachingCollector should not be cached due to low memory limit", cc.isCached());
|
||||||
|
|
||||||
|
try {
|
||||||
|
cc.replay(new NoOpCollector(false));
|
||||||
|
fail("replay should fail if CachingCollector is not cached");
|
||||||
|
} catch (IllegalStateException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIllegalCollectorOnReplay() throws Exception {
|
||||||
|
// tests that the Collector passed to replay() has an out-of-order mode that
|
||||||
|
// is valid with the Collector passed to the ctor
|
||||||
|
|
||||||
|
// 'src' Collector does not support out-of-order
|
||||||
|
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
|
||||||
|
cc.setScorer(new MockScorer());
|
||||||
|
for (int i = 0; i < 10; i++) cc.collect(i);
|
||||||
|
cc.replay(new NoOpCollector(true)); // this call should not fail
|
||||||
|
cc.replay(new NoOpCollector(false)); // this call should not fail
|
||||||
|
|
||||||
|
// 'src' Collector supports out-of-order
|
||||||
|
cc = CachingCollector.create(new NoOpCollector(true), true, 50 * ONE_BYTE);
|
||||||
|
cc.setScorer(new MockScorer());
|
||||||
|
for (int i = 0; i < 10; i++) cc.collect(i);
|
||||||
|
cc.replay(new NoOpCollector(true)); // this call should not fail
|
||||||
|
try {
|
||||||
|
cc.replay(new NoOpCollector(false)); // this call should fail
|
||||||
|
fail("should have failed if an in-order Collector was given to replay(), " +
|
||||||
|
"while CachingCollector was initialized with out-of-order collection");
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
// ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCachedArraysAllocation() throws Exception {
|
||||||
|
// tests the cached arrays allocation -- if the 'nextLength' was too high,
|
||||||
|
// caching would terminate even if a smaller length would suffice.
|
||||||
|
|
||||||
|
// set RAM limit enough for 150 docs + random(10000)
|
||||||
|
int numDocs = random.nextInt(10000) + 150;
|
||||||
|
for (boolean cacheScores : new boolean[] { false, true }) {
|
||||||
|
int bytesPerDoc = cacheScores ? 8 : 4;
|
||||||
|
CachingCollector cc = CachingCollector.create(new NoOpCollector(false),
|
||||||
|
cacheScores, bytesPerDoc * ONE_BYTE * numDocs);
|
||||||
|
cc.setScorer(new MockScorer());
|
||||||
|
for (int i = 0; i < numDocs; i++) cc.collect(i);
|
||||||
|
assertTrue(cc.isCached());
|
||||||
|
|
||||||
|
// The 151's document should terminate caching
|
||||||
|
cc.collect(numDocs);
|
||||||
|
assertFalse(cc.isCached());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -17,11 +17,14 @@ package org.apache.lucene.search;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
import org.apache.lucene.search.Explanation.IDFExplanation;
|
import org.apache.lucene.search.Explanation.IDFExplanation;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -423,7 +426,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
||||||
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
|
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
|
||||||
}
|
}
|
||||||
TopDocs hits = s.search(mpq, 2);
|
TopDocs hits = s.search(mpq, 2);
|
||||||
assert hits.totalHits == 2;
|
assertEquals(2, hits.totalHits);
|
||||||
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
|
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
|
||||||
/*
|
/*
|
||||||
for(int hit=0;hit<hits.totalHits;hit++) {
|
for(int hit=0;hit<hits.totalHits;hit++) {
|
||||||
|
@ -434,4 +437,156 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
||||||
r.close();
|
r.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static TokenAndPos[] INCR_0_DOC_TOKENS = new TokenAndPos[] {
|
||||||
|
new TokenAndPos("x", 0),
|
||||||
|
new TokenAndPos("a", 1),
|
||||||
|
new TokenAndPos("1", 1),
|
||||||
|
new TokenAndPos("m", 2), // not existing, relying on slop=2
|
||||||
|
new TokenAndPos("b", 3),
|
||||||
|
new TokenAndPos("1", 3),
|
||||||
|
new TokenAndPos("n", 4), // not existing, relying on slop=2
|
||||||
|
new TokenAndPos("c", 5),
|
||||||
|
new TokenAndPos("y", 6)
|
||||||
|
};
|
||||||
|
|
||||||
|
private final static TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] {
|
||||||
|
new TokenAndPos("a", 0),
|
||||||
|
new TokenAndPos("1", 0),
|
||||||
|
new TokenAndPos("b", 1),
|
||||||
|
new TokenAndPos("1", 1),
|
||||||
|
new TokenAndPos("c", 2)
|
||||||
|
};
|
||||||
|
|
||||||
|
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new TokenAndPos[][] {
|
||||||
|
{ new TokenAndPos("a", 0) },
|
||||||
|
{ new TokenAndPos("x", 0), new TokenAndPos("1", 0) },
|
||||||
|
{ new TokenAndPos("b", 1) },
|
||||||
|
{ new TokenAndPos("x", 1), new TokenAndPos("1", 1) },
|
||||||
|
{ new TokenAndPos("c", 2) }
|
||||||
|
};
|
||||||
|
|
||||||
|
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new TokenAndPos[][] {
|
||||||
|
{ new TokenAndPos("x", 0) },
|
||||||
|
{ new TokenAndPos("a", 0), new TokenAndPos("1", 0) },
|
||||||
|
{ new TokenAndPos("x", 1) },
|
||||||
|
{ new TokenAndPos("b", 1), new TokenAndPos("1", 1) },
|
||||||
|
{ new TokenAndPos("c", 2) }
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* using query parser, MPQ will be created, and will not be strict about having all query terms
|
||||||
|
* in each position - one of each position is sufficient (OR logic)
|
||||||
|
*/
|
||||||
|
public void testZeroPosIncrSloppyParsedAnd() throws IOException, ParseException {
|
||||||
|
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
|
||||||
|
final Query q = qp.parse("\"this text is acually ignored\"");
|
||||||
|
assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
|
||||||
|
doTestZeroPosIncrSloppy(q, 0);
|
||||||
|
((MultiPhraseQuery) q).setSlop(1);
|
||||||
|
doTestZeroPosIncrSloppy(q, 0);
|
||||||
|
((MultiPhraseQuery) q).setSlop(2);
|
||||||
|
doTestZeroPosIncrSloppy(q, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
|
||||||
|
Directory dir = newDirectory(); // random dir
|
||||||
|
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
|
||||||
|
IndexWriter writer = new IndexWriter(dir, cfg);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
IndexReader r = IndexReader.open(writer,false);
|
||||||
|
writer.close();
|
||||||
|
IndexSearcher s = new IndexSearcher(r);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("QUERY=" + q);
|
||||||
|
}
|
||||||
|
|
||||||
|
TopDocs hits = s.search(q, 1);
|
||||||
|
assertEquals("wrong number of results", nExpected, hits.totalHits);
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
for(int hit=0;hit<hits.totalHits;hit++) {
|
||||||
|
ScoreDoc sd = hits.scoreDocs[hit];
|
||||||
|
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PQ AND Mode - Manually creating a phrase query
|
||||||
|
*/
|
||||||
|
public void testZeroPosIncrSloppyPqAnd() throws IOException, ParseException {
|
||||||
|
final PhraseQuery pq = new PhraseQuery();
|
||||||
|
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
|
||||||
|
pq.add(new Term("field",tap.token), tap.pos);
|
||||||
|
}
|
||||||
|
doTestZeroPosIncrSloppy(pq, 0);
|
||||||
|
pq.setSlop(1);
|
||||||
|
doTestZeroPosIncrSloppy(pq, 0);
|
||||||
|
pq.setSlop(2);
|
||||||
|
doTestZeroPosIncrSloppy(pq, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MPQ AND Mode - Manually creating a multiple phrase query
|
||||||
|
*/
|
||||||
|
public void testZeroPosIncrSloppyMpqAnd() throws IOException, ParseException {
|
||||||
|
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||||
|
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
|
||||||
|
mpq.add(new Term[]{new Term("field",tap.token)}, tap.pos); //AND logic
|
||||||
|
}
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 0);
|
||||||
|
mpq.setSlop(1);
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 0);
|
||||||
|
mpq.setSlop(2);
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query
|
||||||
|
*/
|
||||||
|
public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException, ParseException {
|
||||||
|
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||||
|
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
|
||||||
|
Term[] terms = tapTerms(tap);
|
||||||
|
final int pos = tap[0].pos;
|
||||||
|
mpq.add(terms, pos); //AND logic in pos, OR across lines
|
||||||
|
}
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 0);
|
||||||
|
mpq.setSlop(1);
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 0);
|
||||||
|
mpq.setSlop(2);
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query - with no match
|
||||||
|
*/
|
||||||
|
public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException, ParseException {
|
||||||
|
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||||
|
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
|
||||||
|
Term[] terms = tapTerms(tap);
|
||||||
|
final int pos = tap[0].pos;
|
||||||
|
mpq.add(terms, pos); //AND logic in pos, OR across lines
|
||||||
|
}
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 0);
|
||||||
|
mpq.setSlop(2);
|
||||||
|
doTestZeroPosIncrSloppy(mpq, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Term[] tapTerms(TokenAndPos[] tap) {
|
||||||
|
Term[] terms = new Term[tap.length];
|
||||||
|
for (int i=0; i<terms.length; i++) {
|
||||||
|
terms[i] = new Term("field",tap[i].token);
|
||||||
|
}
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,7 @@ public class TestTermScorer extends LuceneTestCase {
|
||||||
indexSearcher.close();
|
indexSearcher.close();
|
||||||
indexReader.close();
|
indexReader.close();
|
||||||
directory.close();
|
directory.close();
|
||||||
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test() throws IOException {
|
public void test() throws IOException {
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for StringHelper.getVersionComparator
|
||||||
|
*/
|
||||||
|
public class TestVersionComparator extends LuceneTestCase {
|
||||||
|
public void testVersions() {
|
||||||
|
Comparator<String> comp = StringHelper.getVersionComparator();
|
||||||
|
assertTrue(comp.compare("1", "2") < 0);
|
||||||
|
assertTrue(comp.compare("1", "1") == 0);
|
||||||
|
assertTrue(comp.compare("2", "1") > 0);
|
||||||
|
|
||||||
|
assertTrue(comp.compare("1.1", "1") > 0);
|
||||||
|
assertTrue(comp.compare("1", "1.1") < 0);
|
||||||
|
assertTrue(comp.compare("1.1", "1.1") == 0);
|
||||||
|
|
||||||
|
assertTrue(comp.compare("1.0", "1") == 0);
|
||||||
|
assertTrue(comp.compare("1", "1.0") == 0);
|
||||||
|
assertTrue(comp.compare("1.0.1", "1.0") > 0);
|
||||||
|
assertTrue(comp.compare("1.0", "1.0.1") < 0);
|
||||||
|
|
||||||
|
assertTrue(comp.compare("1.02.003", "1.2.3.0") == 0);
|
||||||
|
assertTrue(comp.compare("1.2.3.0", "1.02.003") == 0);
|
||||||
|
|
||||||
|
assertTrue(comp.compare("1.10", "1.9") > 0);
|
||||||
|
assertTrue(comp.compare("1.9", "1.10") < 0);
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
assertCharVectors(2);
|
assertCharVectors(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-3094
|
||||||
|
public void testNoWastedStates() throws Exception {
|
||||||
|
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests all possible characteristic vectors for some n
|
* Tests all possible characteristic vectors for some n
|
||||||
* This exhaustively tests the parametric transitions tables.
|
* This exhaustively tests the parametric transitions tables.
|
||||||
|
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
assertNotNull(automata[n]);
|
assertNotNull(automata[n]);
|
||||||
assertTrue(automata[n].isDeterministic());
|
assertTrue(automata[n].isDeterministic());
|
||||||
assertTrue(SpecialOperations.isFinite(automata[n]));
|
assertTrue(SpecialOperations.isFinite(automata[n]));
|
||||||
|
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
|
||||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
assertTrue(automata[n-1].subsetOf(automata[n]));
|
assertTrue(automata[n-1].subsetOf(automata[n]));
|
||||||
|
|
|
@ -49,4 +49,9 @@ public class TestMinimize extends LuceneTestCase {
|
||||||
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
|
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** n^2 space usage in Hopcroft minimization? */
|
||||||
|
public void testMinimizeHuge() {
|
||||||
|
new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,14 +54,16 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
private MockDirectoryWrapper dir;
|
private MockDirectoryWrapper dir;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setUp() throws IOException {
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
dir = newDirectory();
|
dir = newDirectory();
|
||||||
dir.setPreventDoubleWrite(false);
|
dir.setPreventDoubleWrite(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void tearDown() throws IOException {
|
public void tearDown() throws Exception {
|
||||||
dir.close();
|
dir.close();
|
||||||
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BytesRef toBytesRef(IntsRef ir) {
|
private static BytesRef toBytesRef(IntsRef ir) {
|
||||||
|
@ -456,8 +458,9 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
|
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
|
||||||
final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
|
final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
|
||||||
final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
|
final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
|
||||||
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.first));
|
@SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
|
||||||
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.second));
|
builderObject.add(pair.input, _outputs.get(twoLongs.first));
|
||||||
|
builderObject.add(pair.input, _outputs.get(twoLongs.second));
|
||||||
} else {
|
} else {
|
||||||
builder.add(pair.input, pair.output);
|
builder.add(pair.input, pair.output);
|
||||||
}
|
}
|
||||||
|
@ -537,7 +540,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
Object output = run(fst, term, null);
|
Object output = run(fst, term, null);
|
||||||
|
|
||||||
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
|
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
|
||||||
assertEquals(output, pair.output);
|
assertEquals(pair.output, output);
|
||||||
|
|
||||||
// verify enum's next
|
// verify enum's next
|
||||||
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
|
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
|
||||||
|
|
|
@ -49,6 +49,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
||||||
|
|
||||||
private State previous;
|
private State previous;
|
||||||
private String previousType;
|
private String previousType;
|
||||||
|
private boolean exhausted;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter
|
* Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter
|
||||||
|
@ -67,6 +68,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
||||||
super.reset();
|
super.reset();
|
||||||
previous = null;
|
previous = null;
|
||||||
previousType = null;
|
previousType = null;
|
||||||
|
exhausted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -79,7 +81,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
while (input.incrementToken()) {
|
while (!exhausted && input.incrementToken()) {
|
||||||
State current = captureState();
|
State current = captureState();
|
||||||
|
|
||||||
if (previous != null && !isGramType()) {
|
if (previous != null && !isGramType()) {
|
||||||
|
@ -96,6 +98,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
||||||
previous = current;
|
previous = current;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exhausted = true;
|
||||||
|
|
||||||
if (previous == null || GRAM_TYPE.equals(previousType)) {
|
if (previous == null || GRAM_TYPE.equals(previousType)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
|
|
||||||
private final StringBuilder hyphenated = new StringBuilder();
|
private final StringBuilder hyphenated = new StringBuilder();
|
||||||
private State savedState;
|
private State savedState;
|
||||||
|
private boolean exhausted = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new HyphenatedWordsFilter
|
* Creates a new HyphenatedWordsFilter
|
||||||
|
@ -74,7 +75,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
while (input.incrementToken()) {
|
while (!exhausted && input.incrementToken()) {
|
||||||
char[] term = termAttribute.buffer();
|
char[] term = termAttribute.buffer();
|
||||||
int termLength = termAttribute.length();
|
int termLength = termAttribute.length();
|
||||||
|
|
||||||
|
@ -96,6 +97,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exhausted = true;
|
||||||
|
|
||||||
if (savedState != null) {
|
if (savedState != null) {
|
||||||
// the final term ends with a hyphen
|
// the final term ends with a hyphen
|
||||||
// add back the hyphen, for backwards compatibility.
|
// add back the hyphen, for backwards compatibility.
|
||||||
|
@ -115,6 +118,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
super.reset();
|
super.reset();
|
||||||
hyphenated.setLength(0);
|
hyphenated.setLength(0);
|
||||||
savedState = null;
|
savedState = null;
|
||||||
|
exhausted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ================================================= Helper Methods ================================================
|
// ================================================= Helper Methods ================================================
|
||||||
|
|
|
@ -76,4 +76,9 @@ public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
suffix.close();
|
suffix.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
suffix.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,6 +158,12 @@ public class PrefixAwareTokenFilter extends TokenStream {
|
||||||
return suffixToken;
|
return suffixToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
prefix.end();
|
||||||
|
suffix.end();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
prefix.close();
|
prefix.close();
|
||||||
|
|
|
@ -225,7 +225,6 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
|
||||||
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
|
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
|
||||||
if (result == streams.wrapped) {
|
if (result == streams.wrapped) {
|
||||||
/* the wrapped analyzer reused the stream */
|
/* the wrapped analyzer reused the stream */
|
||||||
streams.withStopFilter.reset();
|
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* the wrapped analyzer did not. if there are any stopwords for the
|
* the wrapped analyzer did not. if there are any stopwords for the
|
||||||
|
|
|
@ -199,10 +199,7 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
|
||||||
setPreviousTokenStream(streams);
|
setPreviousTokenStream(streams);
|
||||||
} else {
|
} else {
|
||||||
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
|
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
|
||||||
if (result == streams.wrapped) {
|
if (result != streams.wrapped) {
|
||||||
/* the wrapped analyzer reused the stream */
|
|
||||||
streams.shingle.reset();
|
|
||||||
} else {
|
|
||||||
/* the wrapped analyzer did not, create a new shingle around the new one */
|
/* the wrapped analyzer did not, create a new shingle around the new one */
|
||||||
streams.wrapped = result;
|
streams.wrapped = result;
|
||||||
streams.shingle = new ShingleFilter(streams.wrapped);
|
streams.shingle = new ShingleFilter(streams.wrapped);
|
||||||
|
|
|
@ -327,6 +327,8 @@ public final class ShingleFilter extends TokenFilter {
|
||||||
return tokenAvailable;
|
return tokenAvailable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean exhausted;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Get the next token from the input stream.
|
* <p>Get the next token from the input stream.
|
||||||
* <p>If the next token has <code>positionIncrement > 1</code>,
|
* <p>If the next token has <code>positionIncrement > 1</code>,
|
||||||
|
@ -359,7 +361,7 @@ public final class ShingleFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
isNextInputStreamToken = false;
|
isNextInputStreamToken = false;
|
||||||
newTarget.isFiller = false;
|
newTarget.isFiller = false;
|
||||||
} else if (input.incrementToken()) {
|
} else if (!exhausted && input.incrementToken()) {
|
||||||
if (null == target) {
|
if (null == target) {
|
||||||
newTarget = new InputWindowToken(cloneAttributes());
|
newTarget = new InputWindowToken(cloneAttributes());
|
||||||
} else {
|
} else {
|
||||||
|
@ -387,6 +389,7 @@ public final class ShingleFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newTarget = null;
|
newTarget = null;
|
||||||
|
exhausted = true;
|
||||||
}
|
}
|
||||||
return newTarget;
|
return newTarget;
|
||||||
}
|
}
|
||||||
|
@ -436,6 +439,7 @@ public final class ShingleFilter extends TokenFilter {
|
||||||
numFillerTokensToInsert = 0;
|
numFillerTokensToInsert = 0;
|
||||||
isOutputHere = false;
|
isOutputHere = false;
|
||||||
noShingleOutput = true;
|
noShingleOutput = true;
|
||||||
|
exhausted = false;
|
||||||
if (outputUnigramsIfNoShingles && ! outputUnigrams) {
|
if (outputUnigramsIfNoShingles && ! outputUnigrams) {
|
||||||
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
|
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
|
||||||
gramSize.minValue = minShingleSize;
|
gramSize.minValue = minShingleSize;
|
||||||
|
|
|
@ -190,16 +190,20 @@ public final class SynonymFilter extends TokenFilter {
|
||||||
private LinkedList<AttributeSource> buffer;
|
private LinkedList<AttributeSource> buffer;
|
||||||
private LinkedList<AttributeSource> matched;
|
private LinkedList<AttributeSource> matched;
|
||||||
|
|
||||||
|
private boolean exhausted;
|
||||||
|
|
||||||
private AttributeSource nextTok() throws IOException {
|
private AttributeSource nextTok() throws IOException {
|
||||||
if (buffer!=null && !buffer.isEmpty()) {
|
if (buffer!=null && !buffer.isEmpty()) {
|
||||||
return buffer.removeFirst();
|
return buffer.removeFirst();
|
||||||
} else {
|
} else {
|
||||||
if (input.incrementToken()) {
|
if (!exhausted && input.incrementToken()) {
|
||||||
return this;
|
return this;
|
||||||
} else
|
} else {
|
||||||
|
exhausted = true;
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void pushTok(AttributeSource t) {
|
private void pushTok(AttributeSource t) {
|
||||||
if (buffer==null) buffer=new LinkedList<AttributeSource>();
|
if (buffer==null) buffer=new LinkedList<AttributeSource>();
|
||||||
|
@ -250,5 +254,6 @@ public final class SynonymFilter extends TokenFilter {
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
input.reset();
|
input.reset();
|
||||||
replacement = null;
|
replacement = null;
|
||||||
|
exhausted = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,8 +159,6 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
||||||
*/
|
*/
|
||||||
protected boolean reset(final Reader reader) throws IOException {
|
protected boolean reset(final Reader reader) throws IOException {
|
||||||
source.reset(reader);
|
source.reset(reader);
|
||||||
if(sink != source)
|
|
||||||
sink.reset(); // only reset if the sink reference is different from source
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
@ -215,8 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
||||||
public void testWithKeywordAttribute() throws IOException {
|
public void testWithKeywordAttribute() throws IOException {
|
||||||
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
|
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
|
||||||
set.add("строеве");
|
set.add("строеве");
|
||||||
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);
|
||||||
new StringReader("строевете строеве"));
|
|
||||||
|
|
||||||
BulgarianStemFilter filter = new BulgarianStemFilter(
|
BulgarianStemFilter filter = new BulgarianStemFilter(
|
||||||
new KeywordMarkerFilter(tokenStream, set));
|
new KeywordMarkerFilter(tokenStream, set));
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.StringReader;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CharReader;
|
import org.apache.lucene.analysis.CharReader;
|
||||||
import org.apache.lucene.analysis.CharStream;
|
import org.apache.lucene.analysis.CharStream;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
@ -64,55 +64,55 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testNothingChange() throws Exception {
|
public void testNothingChange() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
|
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test1to1() throws Exception {
|
public void test1to1() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
|
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test1to2() throws Exception {
|
public void test1to2() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
|
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test1to3() throws Exception {
|
public void test1to3() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
|
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2to4() throws Exception {
|
public void test2to4() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
|
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2to1() throws Exception {
|
public void test2to1() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
|
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test3to1() throws Exception {
|
public void test3to1() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
|
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test4to2() throws Exception {
|
public void test4to2() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
|
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test5to0() throws Exception {
|
public void test5to0() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
|
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts, new String[0]);
|
assertTokenStreamContents(ts, new String[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||||
//
|
//
|
||||||
public void testTokenStream() throws Exception {
|
public void testTokenStream() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
|
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
|
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
|
||||||
new int[]{0,2,4,6,8,11,16,20},
|
new int[]{0,2,4,6,8,11,16,20},
|
||||||
|
@ -157,7 +157,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||||
public void testChained() throws Exception {
|
public void testChained() throws Exception {
|
||||||
CharStream cs = new MappingCharFilter( normMap,
|
CharStream cs = new MappingCharFilter( normMap,
|
||||||
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
|
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
|
||||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||||
assertTokenStreamContents(ts,
|
assertTokenStreamContents(ts,
|
||||||
new String[]{"a","llllllll","i"},
|
new String[]{"a","llllllll","i"},
|
||||||
new int[]{0,5,8},
|
new int[]{0,5,8},
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.StringReader;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
@ -90,7 +91,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader in) {
|
public TokenStream tokenStream(String field, Reader in) {
|
||||||
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
|
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -159,7 +160,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String field, Reader in) {
|
public TokenStream tokenStream(String field, Reader in) {
|
||||||
return new CommonGramsFilter(TEST_VERSION_CURRENT,
|
return new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
|
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -245,7 +246,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void testCaseSensitive() throws Exception {
|
public void testCaseSensitive() throws Exception {
|
||||||
final String input = "How The s a brown s cow d like A B thing?";
|
final String input = "How The s a brown s cow d like A B thing?";
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||||
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
|
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
|
||||||
"s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
|
"s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
|
||||||
|
@ -257,7 +258,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void testLastWordisStopWord() throws Exception {
|
public void testLastWordisStopWord() throws Exception {
|
||||||
final String input = "dog the";
|
final String input = "dog the";
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||||
assertTokenStreamContents(nsf, new String[] { "dog_the" });
|
assertTokenStreamContents(nsf, new String[] { "dog_the" });
|
||||||
|
@ -268,7 +269,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void testFirstWordisStopWord() throws Exception {
|
public void testFirstWordisStopWord() throws Exception {
|
||||||
final String input = "the dog";
|
final String input = "the dog";
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||||
assertTokenStreamContents(nsf, new String[] { "the_dog" });
|
assertTokenStreamContents(nsf, new String[] { "the_dog" });
|
||||||
|
@ -279,7 +280,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void testOneWordQueryStopWord() throws Exception {
|
public void testOneWordQueryStopWord() throws Exception {
|
||||||
final String input = "the";
|
final String input = "the";
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||||
assertTokenStreamContents(nsf, new String[] { "the" });
|
assertTokenStreamContents(nsf, new String[] { "the" });
|
||||||
|
@ -290,7 +291,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void testOneWordQuery() throws Exception {
|
public void testOneWordQuery() throws Exception {
|
||||||
final String input = "monster";
|
final String input = "monster";
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||||
assertTokenStreamContents(nsf, new String[] { "monster" });
|
assertTokenStreamContents(nsf, new String[] { "monster" });
|
||||||
|
@ -301,7 +302,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
||||||
*/
|
*/
|
||||||
public void TestFirstAndLastStopWord() throws Exception {
|
public void TestFirstAndLastStopWord() throws Exception {
|
||||||
final String input = "the of";
|
final String input = "the of";
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||||
assertTokenStreamContents(nsf, new String[] { "the_of" });
|
assertTokenStreamContents(nsf, new String[] { "the_of" });
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.io.StringReader;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
@ -35,8 +36,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
.getHyphenationTree(is);
|
.getHyphenationTree(is);
|
||||||
|
|
||||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false),
|
||||||
"min veninde som er lidt af en læsehest")), hyphenator,
|
hyphenator,
|
||||||
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
|
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
|
||||||
|
@ -55,8 +56,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
// the word basket will not be added due to the longest match option
|
// the word basket will not be added due to the longest match option
|
||||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||||
"basketballkurv")), hyphenator, dict,
|
hyphenator, dict,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
|
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
|
||||||
assertTokenStreamContents(tf,
|
assertTokenStreamContents(tf,
|
||||||
|
@ -77,7 +78,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
|
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
|
||||||
TEST_VERSION_CURRENT,
|
TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
|
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||||
hyphenator,
|
hyphenator,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||||
2, 4);
|
2, 4);
|
||||||
|
@ -89,7 +90,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
tf = new HyphenationCompoundWordTokenFilter(
|
tf = new HyphenationCompoundWordTokenFilter(
|
||||||
TEST_VERSION_CURRENT,
|
TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
|
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||||
hyphenator,
|
hyphenator,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||||
4, 6);
|
4, 6);
|
||||||
|
@ -101,7 +102,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
tf = new HyphenationCompoundWordTokenFilter(
|
tf = new HyphenationCompoundWordTokenFilter(
|
||||||
TEST_VERSION_CURRENT,
|
TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
|
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||||
hyphenator,
|
hyphenator,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||||
4, 10);
|
4, 10);
|
||||||
|
@ -120,9 +121,10 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
"Sko", "Vind", "Rute", "Torkare", "Blad" };
|
"Sko", "Vind", "Rute", "Torkare", "Blad" };
|
||||||
|
|
||||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
new MockTokenizer(
|
||||||
new StringReader(
|
new StringReader(
|
||||||
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
|
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"),
|
||||||
|
MockTokenizer.WHITESPACE, false),
|
||||||
dict);
|
dict);
|
||||||
|
|
||||||
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
|
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
|
||||||
|
@ -149,7 +151,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
||||||
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
|
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
|
||||||
|
|
||||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
|
new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
|
||||||
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||||
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
|
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
@ -36,36 +37,23 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
||||||
public void testExactCase() throws IOException {
|
public void testExactCase() throws IOException {
|
||||||
StringReader reader = new StringReader("Now is The Time");
|
StringReader reader = new StringReader("Now is The Time");
|
||||||
Set<String> stopWords = asSet("is", "the", "Time");
|
Set<String> stopWords = asSet("is", "the", "Time");
|
||||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
|
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
|
||||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
assertTokenStreamContents(stream, new String[] { "Now", "The" });
|
||||||
assertTrue(stream.incrementToken());
|
|
||||||
assertEquals("Now", termAtt.toString());
|
|
||||||
assertTrue(stream.incrementToken());
|
|
||||||
assertEquals("The", termAtt.toString());
|
|
||||||
assertFalse(stream.incrementToken());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIgnoreCase() throws IOException {
|
public void testIgnoreCase() throws IOException {
|
||||||
StringReader reader = new StringReader("Now is The Time");
|
StringReader reader = new StringReader("Now is The Time");
|
||||||
Set<String> stopWords = asSet( "is", "the", "Time" );
|
Set<String> stopWords = asSet( "is", "the", "Time" );
|
||||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
|
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
|
||||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
assertTokenStreamContents(stream, new String[] { "Now" });
|
||||||
assertTrue(stream.incrementToken());
|
|
||||||
assertEquals("Now", termAtt.toString());
|
|
||||||
assertFalse(stream.incrementToken());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testStopFilt() throws IOException {
|
public void testStopFilt() throws IOException {
|
||||||
StringReader reader = new StringReader("Now is The Time");
|
StringReader reader = new StringReader("Now is The Time");
|
||||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||||
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
||||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
assertTokenStreamContents(stream, new String[] { "Now", "The" });
|
||||||
assertTrue(stream.incrementToken());
|
|
||||||
assertEquals("Now", termAtt.toString());
|
|
||||||
assertTrue(stream.incrementToken());
|
|
||||||
assertEquals("The", termAtt.toString());
|
|
||||||
assertFalse(stream.incrementToken());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -85,11 +73,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
||||||
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
||||||
// with increments
|
// with increments
|
||||||
StringReader reader = new StringReader(sb.toString());
|
StringReader reader = new StringReader(sb.toString());
|
||||||
StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||||
doTestStopPositons(stpf,true);
|
doTestStopPositons(stpf,true);
|
||||||
// without increments
|
// without increments
|
||||||
reader = new StringReader(sb.toString());
|
reader = new StringReader(sb.toString());
|
||||||
stpf = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||||
doTestStopPositons(stpf,false);
|
doTestStopPositons(stpf,false);
|
||||||
// with increments, concatenating two stop filters
|
// with increments, concatenating two stop filters
|
||||||
ArrayList<String> a0 = new ArrayList<String>();
|
ArrayList<String> a0 = new ArrayList<String>();
|
||||||
|
@ -108,7 +96,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
||||||
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
|
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
|
||||||
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
|
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
|
||||||
reader = new StringReader(sb.toString());
|
reader = new StringReader(sb.toString());
|
||||||
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet0); // first part of the set
|
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
|
||||||
stpf0.setEnablePositionIncrements(true);
|
stpf0.setEnablePositionIncrements(true);
|
||||||
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
|
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
|
||||||
doTestStopPositons(stpf01,true);
|
doTestStopPositons(stpf01,true);
|
||||||
|
@ -119,6 +107,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
||||||
stpf.setEnablePositionIncrements(enableIcrements);
|
stpf.setEnablePositionIncrements(enableIcrements);
|
||||||
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
||||||
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
|
||||||
|
stpf.reset();
|
||||||
for (int i=0; i<20; i+=3) {
|
for (int i=0; i<20; i+=3) {
|
||||||
assertTrue(stpf.incrementToken());
|
assertTrue(stpf.incrementToken());
|
||||||
log("Token "+i+": "+stpf);
|
log("Token "+i+": "+stpf);
|
||||||
|
@ -127,6 +116,8 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
||||||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
|
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
|
||||||
}
|
}
|
||||||
assertFalse(stpf.incrementToken());
|
assertFalse(stpf.incrementToken());
|
||||||
|
stpf.end();
|
||||||
|
stpf.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// print debug info depending on VERBOSE
|
// print debug info depending on VERBOSE
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
|
|
||||||
|
@ -278,7 +278,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
||||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set.add("hole");
|
set.add("hole");
|
||||||
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
|
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
|
||||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
|
new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
|
||||||
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||||
|
@ -36,7 +36,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
|
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||||
|
@ -36,7 +36,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
|
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -34,7 +34,7 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
|
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,12 +22,11 @@ import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
|
||||||
|
@ -41,7 +40,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer t = new KeywordTokenizer(reader);
|
Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
|
||||||
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -57,7 +56,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
||||||
public void testWithKeywordAttribute() throws IOException {
|
public void testWithKeywordAttribute() throws IOException {
|
||||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||||
set.add("yourselves");
|
set.add("yourselves");
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
|
Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
|
||||||
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
|
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
|
||||||
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||||
|
@ -36,7 +36,7 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
|
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||||
|
@ -36,7 +36,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
|
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||||
|
@ -36,7 +36,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
|
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||||
|
|
||||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||||
|
@ -36,7 +36,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName,
|
||||||
Reader reader) {
|
Reader reader) {
|
||||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
|
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,9 +21,9 @@ import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test HindiNormalizer
|
* Test HindiNormalizer
|
||||||
|
@ -59,8 +59,7 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
|
||||||
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
|
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
|
||||||
}
|
}
|
||||||
private void check(String input, String output) throws IOException {
|
private void check(String input, String output) throws IOException {
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||||
new StringReader(input));
|
|
||||||
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
|
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
|
||||||
assertTokenStreamContents(tf, new String[] { output });
|
assertTokenStreamContents(tf, new String[] { output });
|
||||||
}
|
}
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue