mirror of https://github.com/apache/lucene.git
merged with trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1124321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
43e40e8844
|
@ -53,6 +53,8 @@
|
|||
<classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
|
||||
<classpathentry kind="src" path="modules/benchmark/src/java"/>
|
||||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||
<classpathentry kind="src" path="solr/src/java"/>
|
||||
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
||||
<classpathentry kind="src" path="solr/src/common"/>
|
||||
|
@ -124,8 +126,8 @@
|
|||
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-glassfish-2.1.v20091210.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-jetty-6.1.26.jar"/>
|
||||
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-api-2.1-glassfish-2.1.v20091210.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.1.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.5.0.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.3.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-mapper-asl-1.5.2.jar"/>
|
||||
<classpathentry kind="lib" path="solr/contrib/clustering/lib/mahout-collections-0.3.jar"/>
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
||||
|
|
|
@ -71,6 +71,13 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="grouping module" type="JUnit" factoryName="JUnit">
|
||||
<module name="grouping" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/grouping/build" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="highlighter contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="highlighter" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
@ -204,7 +211,7 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<list size="29">
|
||||
<list size="30">
|
||||
<item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" />
|
||||
<item index="1" class="java.lang.String" itemvalue="JUnit.ant contrib" />
|
||||
<item index="2" class="java.lang.String" itemvalue="JUnit.bdb contrib" />
|
||||
|
@ -215,25 +222,26 @@
|
|||
<item index="7" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
|
||||
<item index="8" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
|
||||
<item index="9" class="java.lang.String" itemvalue="JUnit.extras from dataimporthandler contrib" />
|
||||
<item index="10" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
|
||||
<item index="11" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
|
||||
<item index="12" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
|
||||
<item index="13" class="java.lang.String" itemvalue="JUnit.lucene" />
|
||||
<item index="14" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
|
||||
<item index="15" class="java.lang.String" itemvalue="JUnit.memory contrib" />
|
||||
<item index="16" class="java.lang.String" itemvalue="JUnit.misc contrib" />
|
||||
<item index="17" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
|
||||
<item index="18" class="java.lang.String" itemvalue="JUnit.queries contrib" />
|
||||
<item index="19" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||
<item index="22" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||
<item index="23" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
||||
<item index="24" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="25" class="java.lang.String" itemvalue="JUnit.swing contrib" />
|
||||
<item index="26" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||
<item index="27" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||
<item index="28" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||
<item index="10" class="java.lang.String" itemvalue="JUnit.grouping module" />
|
||||
<item index="11" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
|
||||
<item index="12" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
|
||||
<item index="13" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
|
||||
<item index="14" class="java.lang.String" itemvalue="JUnit.lucene" />
|
||||
<item index="15" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
|
||||
<item index="16" class="java.lang.String" itemvalue="JUnit.memory contrib" />
|
||||
<item index="17" class="java.lang.String" itemvalue="JUnit.misc contrib" />
|
||||
<item index="18" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
|
||||
<item index="19" class="java.lang.String" itemvalue="JUnit.queries contrib" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||
<item index="22" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||
<item index="23" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||
<item index="24" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
||||
<item index="25" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="26" class="java.lang.String" itemvalue="JUnit.swing contrib" />
|
||||
<item index="27" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||
<item index="28" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||
<item index="29" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||
</list>
|
||||
</component>
|
||||
</project>
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/work" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,71 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../lucene/pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-grouping</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Grouping</name>
|
||||
<description>Lucene Grouping Module</description>
|
||||
<properties>
|
||||
<module-directory>modules/grouping</module-directory>
|
||||
<build-directory>build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||
<sourceDirectory>src/java</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -33,6 +33,7 @@
|
|||
<modules>
|
||||
<module>analysis</module>
|
||||
<module>benchmark</module>
|
||||
<module>grouping</module>
|
||||
</modules>
|
||||
<build>
|
||||
<directory>build/lucene-modules-aggregator</directory>
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
<groupId>org.apache</groupId>
|
||||
<artifactId>apache</artifactId>
|
||||
<version>8</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-solr-grandparent</artifactId>
|
||||
|
@ -105,14 +106,6 @@
|
|||
</license>
|
||||
</licenses>
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>carrot2.org</id>
|
||||
<name>Carrot2 Maven2 repository</name>
|
||||
<url>http://download.carrot2.org/maven2/</url>
|
||||
<snapshots>
|
||||
<updatePolicy>never</updatePolicy>
|
||||
</snapshots>
|
||||
</repository>
|
||||
<repository>
|
||||
<id>apache.snapshots</id>
|
||||
<name>Apache Snapshot Repository</name>
|
||||
|
@ -305,7 +298,7 @@
|
|||
<dependency>
|
||||
<groupId>org.carrot2</groupId>
|
||||
<artifactId>carrot2-core</artifactId>
|
||||
<version>3.4.2</version>
|
||||
<version>3.5.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.woodstox</groupId>
|
||||
|
|
|
@ -1,76 +1,76 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-test-framework</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Apache Solr Test Framework</name>
|
||||
<description>Apache Solr Test Framework</description>
|
||||
<properties>
|
||||
<module-directory>solr/src/test-framework</module-directory>
|
||||
<build-directory>../../build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>solr-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/test-framework</outputDirectory>
|
||||
<sourceDirectory>.</sourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>.</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</resource>
|
||||
</resources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.solr</groupId>
|
||||
<artifactId>solr-test-framework</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Apache Solr Test Framework</name>
|
||||
<description>Apache Solr Test Framework</description>
|
||||
<properties>
|
||||
<module-directory>solr/src/test-framework</module-directory>
|
||||
<build-directory>../../build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>solr-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/test-framework</outputDirectory>
|
||||
<sourceDirectory>.</sourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>.</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</resource>
|
||||
</resources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -162,11 +162,6 @@ Changes in Runtime Behavior
|
|||
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
|
||||
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
|
||||
|
||||
* LUCENE-1076: The default merge policy (TieredMergePolicy) is now
|
||||
able to merge non-contiguous segments, which means docIDs no longer
|
||||
necessarily say "in order". If this is a problem then you can use
|
||||
either of the LogMergePolicy impls. (Mike McCandless)
|
||||
|
||||
* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
|
||||
per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
|
||||
properties from all previous segments combined. Field numbers are now tracked
|
||||
|
@ -416,6 +411,10 @@ New features
|
|||
it's able to handle multi-valued fields and does not hold the term
|
||||
bytes in RAM. (Mike McCandless)
|
||||
|
||||
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
|
||||
document IDs and scores encountered during the search, and "reply" them to
|
||||
another Collector. (Mike McCandless, Shai Erera)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||
|
@ -452,6 +451,9 @@ Bug fixes
|
|||
indexes, causing existing deletions to be applied on the incoming indexes as
|
||||
well. (Shai Erera, Mike McCandless)
|
||||
|
||||
* LUCENE-3068: sloppy phrase query failed to match valid documents when multiple
|
||||
query terms had same position in the query. (Doron Cohen)
|
||||
|
||||
Test Cases
|
||||
|
||||
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
||||
|
@ -476,9 +478,15 @@ Changes in backwards compatibility policy
|
|||
(Mike McCandless, Shai Erera)
|
||||
|
||||
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
|
||||
SegmentInfos to a List<SegmentInfo>; this is actually a minor change
|
||||
because SegmentInfos itself extends Vector<SegmentInfo>. (Uwe
|
||||
Schindler, Mike McCandless)
|
||||
SegmentInfos to a List<SegmentInfo>. SegmentInfos itsself was changed
|
||||
to no longer extend Vector<SegmentInfo> (to update code that is using
|
||||
Vector-API, use the new asList() and asSet() methods returning unmodifiable
|
||||
collections; modifying SegmentInfos is now only possible through
|
||||
the explicitely declared methods). IndexWriter.segString() now takes
|
||||
Iterable<SegmentInfo> instead of List<SegmentInfo>. A simple recompile
|
||||
should fix this. MergePolicy and SegmentInfos are internal/experimental
|
||||
APIs not covered by the strict backwards compatibility policy.
|
||||
(Uwe Schindler, Mike McCandless)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
@ -492,6 +500,13 @@ Changes in runtime behavior
|
|||
returns NumericField instances. (Uwe Schindler, Ryan McKinley,
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-1076: Changed the default merge policy from
|
||||
LogByteSizeMergePolicy to TieredMergePolicy, as of Version.LUCENE_32
|
||||
(passed to IndexWriterConfig), which is able to merge non-contiguous
|
||||
segments. This means docIDs no longer necessarily stay "in order"
|
||||
during indexing. If this is a problem then you can use either of
|
||||
the LogMergePolicy impls. (Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
|
||||
|
|
|
@ -75,10 +75,36 @@ Bug Fixes
|
|||
caused a problem if you consumed a tokenstream, then reused it, added different
|
||||
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
|
||||
|
||||
* LUCENE-3113: Fixed some minor analysis bugs: double-reset() in ReusableAnalyzerBase
|
||||
and ShingleAnalyzerWrapper, missing end() implementations in PrefixAwareTokenFilter
|
||||
and PrefixAndSuffixAwareTokenFilter, invocations of incrementToken() after it
|
||||
already returned false in CommonGramsQueryFilter, HyphenatedWordsFilter,
|
||||
ShingleFilter, and SynonymsFilter. (Robert Muir, Steven Rowe, Uwe Schindler)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
|
||||
|
||||
* LUCENE-1421: create new grouping contrib module, enabling search
|
||||
results to be grouped by a single-valued indexed field. This
|
||||
module was factored out of Solr's grouping implementation, but
|
||||
it cannot group by function queries nor arbitrary queries. (Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-3098: add AllGroupsCollector, to collect all unique groups
|
||||
(but in unspecified order). (Martijn van Groningen via Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-3092: Added NRTCachingDirectory in contrib/misc, which
|
||||
caches small segments in RAM. This is useful, in the near-real-time
|
||||
case where the indexing rate is lowish but the reopen rate is
|
||||
highish, to take load off the IO system. (Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
|
||||
over to reusableTokenStream(). (Robert Muir)
|
||||
|
||||
======================= Lucene 3.1.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.ant;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.ant.DocumentTestCase;
|
||||
import org.apache.lucene.ant.HtmlDocument;
|
||||
|
||||
|
@ -27,7 +25,8 @@ public class HtmlDocumentTest extends DocumentTestCase
|
|||
HtmlDocument doc;
|
||||
|
||||
@Override
|
||||
public void setUp() throws IOException {
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
doc = new HtmlDocument(getFile("test.html"));
|
||||
}
|
||||
|
||||
|
@ -37,8 +36,9 @@ public class HtmlDocumentTest extends DocumentTestCase
|
|||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() {
|
||||
public void tearDown() throws Exception {
|
||||
doc = null;
|
||||
super.tearDown();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.ant;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.ant.DocumentTestCase;
|
||||
import org.apache.lucene.ant.TextDocument;
|
||||
|
||||
|
@ -27,7 +25,8 @@ public class TextDocumentTest extends DocumentTestCase
|
|||
TextDocument doc;
|
||||
|
||||
@Override
|
||||
public void setUp() throws IOException {
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
doc = new TextDocument(getFile("test.txt"));
|
||||
}
|
||||
|
||||
|
@ -36,8 +35,9 @@ public class TextDocumentTest extends DocumentTestCase
|
|||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() {
|
||||
public void tearDown() throws Exception {
|
||||
doc = null;
|
||||
super.tearDown();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ public class Highlighter
|
|||
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
return getBestFragment(tokenStream, text);
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ public class Highlighter
|
|||
int maxNumFragments)
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
return getBestFragments(tokenStream, text, maxNumFragments);
|
||||
}
|
||||
|
||||
|
|
|
@ -286,7 +286,11 @@ public class TokenSources {
|
|||
// convenience method
|
||||
public static TokenStream getTokenStream(String field, String contents,
|
||||
Analyzer analyzer) {
|
||||
return analyzer.tokenStream(field, new StringReader(contents));
|
||||
try {
|
||||
return analyzer.reusableTokenStream(field, new StringReader(contents));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
if (field.tokenStreamValue() != null) {
|
||||
tokenStream = field.tokenStreamValue();
|
||||
} else {
|
||||
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
|
||||
tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
|
||||
}
|
||||
|
||||
// reset the TokenStream to the first token
|
||||
|
|
|
@ -305,11 +305,12 @@ class LuceneMethods {
|
|||
|
||||
int position = 0;
|
||||
// Tokenize field and add to postingTable
|
||||
TokenStream stream = analyzer.tokenStream(fieldName, reader);
|
||||
TokenStream stream = analyzer.reusableTokenStream(fieldName, reader);
|
||||
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
try {
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
position += (posIncrAtt.getPositionIncrement() - 1);
|
||||
position++;
|
||||
|
@ -323,6 +324,7 @@ class LuceneMethods {
|
|||
}
|
||||
if (position > maxFieldLength) break;
|
||||
}
|
||||
stream.end();
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
|
|
|
@ -262,8 +262,12 @@ public class MemoryIndex {
|
|||
if (analyzer == null)
|
||||
throw new IllegalArgumentException("analyzer must not be null");
|
||||
|
||||
TokenStream stream = analyzer.tokenStream(fieldName,
|
||||
new StringReader(text));
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
addField(fieldName, stream);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
|
@ -135,7 +136,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
if (last > 1 || !isOptimized(infos.info(0))) {
|
||||
|
||||
spec = new MergeSpecification();
|
||||
spec.add(new OneMerge(infos.range(0, last)));
|
||||
spec.add(new OneMerge(infos.asList().subList(0, last)));
|
||||
}
|
||||
} else if (last > maxNumSegments) {
|
||||
|
||||
|
@ -192,7 +193,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
prev = backLink[i][prev];
|
||||
int mergeStart = i + prev;
|
||||
if((mergeEnd - mergeStart) > 1) {
|
||||
spec.add(new OneMerge(infos.range(mergeStart, mergeEnd)));
|
||||
spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
|
||||
} else {
|
||||
if(partialExpunge) {
|
||||
SegmentInfo info = infos.info(mergeStart);
|
||||
|
@ -208,7 +209,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
|
||||
if(partialExpunge && maxDelCount > 0) {
|
||||
// expunge deletes
|
||||
spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)));
|
||||
spec.add(new OneMerge(Collections.singletonList(infos.info(expungeCandidate))));
|
||||
}
|
||||
|
||||
return spec;
|
||||
|
@ -250,7 +251,10 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
MergeSpecification spec = null;
|
||||
|
||||
if(numLargeSegs < numSegs) {
|
||||
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs);
|
||||
// hack to create a shallow sub-range as SegmentInfos instance,
|
||||
// it does not clone all metadata, but LogMerge does not need it
|
||||
final SegmentInfos smallSegments = new SegmentInfos();
|
||||
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
|
||||
spec = super.findMergesToExpungeDeletes(smallSegments);
|
||||
}
|
||||
|
||||
|
@ -258,7 +262,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
for(int i = 0; i < numLargeSegs; i++) {
|
||||
SegmentInfo info = infos.info(i);
|
||||
if(info.hasDeletions()) {
|
||||
spec.add(new OneMerge(infos.range(i, i + 1)));
|
||||
spec.add(new OneMerge(Collections.singletonList(infos.info(i))));
|
||||
}
|
||||
}
|
||||
return spec;
|
||||
|
@ -296,7 +300,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
if(totalSmallSegSize < targetSegSize * 2) {
|
||||
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
|
||||
if(spec == null) spec = new MergeSpecification(); // should not happen
|
||||
spec.add(new OneMerge(infos.range(numLargeSegs, numSegs)));
|
||||
spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
|
||||
return spec;
|
||||
} else {
|
||||
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
|
||||
|
@ -311,11 +315,13 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
if(size(info) < sizeThreshold) break;
|
||||
startSeg++;
|
||||
}
|
||||
spec.add(new OneMerge(infos.range(startSeg, numSegs)));
|
||||
spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
|
||||
return spec;
|
||||
} else {
|
||||
// apply the log merge policy to small segments.
|
||||
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs);
|
||||
// hack to create a shallow sub-range as SegmentInfos instance,
|
||||
// it does not clone all metadata, but LogMerge does not need it
|
||||
final SegmentInfos smallSegments = new SegmentInfos();
|
||||
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
|
||||
MergeSpecification spec = super.findMerges(smallSegments);
|
||||
|
||||
if(_partialExpunge) {
|
||||
|
@ -342,7 +348,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
}
|
||||
}
|
||||
if (maxDelCount > 0) {
|
||||
return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1));
|
||||
return new OneMerge(Collections.singletonList(infos.info(expungeCandidate)));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,289 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.store.RAMDirectory; // javadocs
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
// TODO
|
||||
// - let subclass dictate policy...?
|
||||
// - rename to MergeCacheingDir? NRTCachingDir
|
||||
|
||||
/**
|
||||
* Wraps a {@link RAMDirectory}
|
||||
* around any provided delegate directory, to
|
||||
* be used during NRT search. Make sure you pull the merge
|
||||
* scheduler using {@link #getMergeScheduler} and pass that to your
|
||||
* {@link IndexWriter}; this class uses that to keep track of which
|
||||
* merges are being done by which threads, to decide when to
|
||||
* cache each written file.
|
||||
*
|
||||
* <p>This class is likely only useful in a near-real-time
|
||||
* context, where indexing rate is lowish but reopen
|
||||
* rate is highish, resulting in many tiny files being
|
||||
* written. This directory keeps such segments (as well as
|
||||
* the segments produced by merging them, as long as they
|
||||
* are small enough), in RAM.</p>
|
||||
*
|
||||
* <p>This is safe to use: when your app calls {IndexWriter#commit},
|
||||
* all cached files will be flushed from the cached and sync'd.</p>
|
||||
*
|
||||
* <p><b>NOTE</b>: this class is somewhat sneaky in its
|
||||
* approach for spying on merges to determine the size of a
|
||||
* merge: it records which threads are running which merges
|
||||
* by watching ConcurrentMergeScheduler's doMerge method.
|
||||
* While this works correctly, likely future versions of
|
||||
* this class will take a more general approach.
|
||||
*
|
||||
* <p>Here's a simple example usage:
|
||||
*
|
||||
* <pre>
|
||||
* Directory fsDir = FSDirectory.open(new File("/path/to/index"));
|
||||
* NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
|
||||
* IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
|
||||
* conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
|
||||
* IndexWriter writer = new IndexWriter(cachedFSDir, conf);
|
||||
* </pre>
|
||||
*
|
||||
* <p>This will cache all newly flushed segments, all merges
|
||||
* whose expected segment size is <= 5 MB, unless the net
|
||||
* cached bytes exceeds 60 MB at which point all writes will
|
||||
* not be cached (until the net bytes falls below 60 MB).</p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class NRTCachingDirectory extends Directory {
|
||||
|
||||
private final RAMDirectory cache = new RAMDirectory();
|
||||
|
||||
private final Directory delegate;
|
||||
|
||||
private final long maxMergeSizeBytes;
|
||||
private final long maxCachedBytes;
|
||||
|
||||
private static final boolean VERBOSE = false;
|
||||
|
||||
/**
|
||||
* We will cache a newly created output if 1) it's a
|
||||
* flush or a merge and the estimated size of the merged segmnt is <=
|
||||
* maxMergeSizeMB, and 2) the total cached bytes is <=
|
||||
* maxCachedMB */
|
||||
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
|
||||
this.delegate = delegate;
|
||||
maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024);
|
||||
maxCachedBytes = (long) (maxCachedMB*1024*1024);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String[] listAll() throws IOException {
|
||||
final Set<String> files = new HashSet<String>();
|
||||
for(String f : cache.listAll()) {
|
||||
files.add(f);
|
||||
}
|
||||
for(String f : delegate.listAll()) {
|
||||
assert !files.contains(f);
|
||||
files.add(f);
|
||||
}
|
||||
return files.toArray(new String[files.size()]);
|
||||
}
|
||||
|
||||
/** Returns how many bytes are being used by the
|
||||
* RAMDirectory cache */
|
||||
public long sizeInBytes() {
|
||||
return cache.sizeInBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean fileExists(String name) throws IOException {
|
||||
return cache.fileExists(name) || delegate.fileExists(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long fileModified(String name) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
return cache.fileModified(name);
|
||||
} else {
|
||||
return delegate.fileModified(name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void touchFile(String name) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
cache.touchFile(name);
|
||||
} else {
|
||||
delegate.touchFile(name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void deleteFile(String name) throws IOException {
|
||||
// Delete from both, in case we are currently uncaching:
|
||||
if (VERBOSE) {
|
||||
System.out.println("nrtdir.deleteFile name=" + name);
|
||||
}
|
||||
cache.deleteFile(name);
|
||||
delegate.deleteFile(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long fileLength(String name) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
return cache.fileLength(name);
|
||||
} else {
|
||||
return delegate.fileLength(name);
|
||||
}
|
||||
}
|
||||
|
||||
public String[] listCachedFiles() {
|
||||
return cache.listAll();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name) throws IOException {
|
||||
if (VERBOSE) {
|
||||
System.out.println("nrtdir.createOutput name=" + name);
|
||||
}
|
||||
if (doCacheWrite(name)) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" to cache");
|
||||
}
|
||||
return cache.createOutput(name);
|
||||
} else {
|
||||
return delegate.createOutput(name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> fileNames) throws IOException {
|
||||
if (VERBOSE) {
|
||||
System.out.println("nrtdir.sync files=" + fileNames);
|
||||
}
|
||||
for(String fileName : fileNames) {
|
||||
unCache(fileName);
|
||||
}
|
||||
delegate.sync(fileNames);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized IndexInput openInput(String name) throws IOException {
|
||||
if (VERBOSE) {
|
||||
System.out.println("nrtdir.openInput name=" + name);
|
||||
}
|
||||
if (cache.fileExists(name)) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" from cache");
|
||||
}
|
||||
return cache.openInput(name);
|
||||
} else {
|
||||
return delegate.openInput(name);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
|
||||
if (cache.fileExists(name)) {
|
||||
return cache.openInput(name, bufferSize);
|
||||
} else {
|
||||
return delegate.openInput(name, bufferSize);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Lock makeLock(String name) {
|
||||
return delegate.makeLock(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clearLock(String name) throws IOException {
|
||||
delegate.clearLock(name);
|
||||
}
|
||||
|
||||
/** Close thius directory, which flushes any cached files
|
||||
* to the delegate and then closes the delegate. */
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
for(String fileName : cache.listAll()) {
|
||||
unCache(fileName);
|
||||
}
|
||||
cache.close();
|
||||
delegate.close();
|
||||
}
|
||||
|
||||
private final ConcurrentHashMap<Thread,MergePolicy.OneMerge> merges = new ConcurrentHashMap<Thread,MergePolicy.OneMerge>();
|
||||
|
||||
public MergeScheduler getMergeScheduler() {
|
||||
return new ConcurrentMergeScheduler() {
|
||||
@Override
|
||||
protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
|
||||
try {
|
||||
merges.put(Thread.currentThread(), merge);
|
||||
super.doMerge(merge);
|
||||
} finally {
|
||||
merges.remove(Thread.currentThread());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Subclass can override this to customize logic; return
|
||||
* true if this file should be written to the RAMDirectory. */
|
||||
protected boolean doCacheWrite(String name) {
|
||||
final MergePolicy.OneMerge merge = merges.get(Thread.currentThread());
|
||||
//System.out.println(Thread.currentThread().getName() + ": CACHE check merge=" + merge + " size=" + (merge==null ? 0 : merge.estimatedMergeBytes));
|
||||
return !name.equals(IndexFileNames.SEGMENTS_GEN) && (merge == null || merge.estimatedMergeBytes <= maxMergeSizeBytes) && cache.sizeInBytes() <= maxCachedBytes;
|
||||
}
|
||||
|
||||
private void unCache(String fileName) throws IOException {
|
||||
final IndexOutput out;
|
||||
synchronized(this) {
|
||||
if (!delegate.fileExists(fileName)) {
|
||||
assert cache.fileExists(fileName);
|
||||
out = delegate.createOutput(fileName);
|
||||
} else {
|
||||
out = null;
|
||||
}
|
||||
}
|
||||
|
||||
if (out != null) {
|
||||
IndexInput in = null;
|
||||
try {
|
||||
in = cache.openInput(fileName);
|
||||
in.copyBytes(out, in.length());
|
||||
} finally {
|
||||
IOUtils.closeSafely(in, out);
|
||||
}
|
||||
synchronized(this) {
|
||||
cache.deleteFile(fileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestNRTCachingDirectory extends LuceneTestCase {
|
||||
|
||||
public void testNRTAndCommit() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
|
||||
conf.setMergeScheduler(cachedDir.getMergeScheduler());
|
||||
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
|
||||
w.w.setInfoStream(VERBOSE ? System.out : null);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
final int numDocs = _TestUtil.nextInt(random, 100, 400);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: numDocs=" + numDocs);
|
||||
}
|
||||
|
||||
final List<BytesRef> ids = new ArrayList<BytesRef>();
|
||||
IndexReader r = null;
|
||||
for(int docCount=0;docCount<numDocs;docCount++) {
|
||||
final Document doc = docs.nextDoc();
|
||||
ids.add(new BytesRef(doc.get("docid")));
|
||||
w.addDocument(doc);
|
||||
if (random.nextInt(20) == 17) {
|
||||
if (r == null) {
|
||||
r = IndexReader.open(w.w, false);
|
||||
} else {
|
||||
final IndexReader r2 = r.reopen();
|
||||
if (r2 != r) {
|
||||
r.close();
|
||||
r = r2;
|
||||
}
|
||||
}
|
||||
assertEquals(1+docCount, r.numDocs());
|
||||
final IndexSearcher s = new IndexSearcher(r);
|
||||
// Just make sure search can run; we can't assert
|
||||
// totHits since it could be 0
|
||||
TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
|
||||
// System.out.println("tot hits " + hits.totalHits);
|
||||
}
|
||||
}
|
||||
|
||||
if (r != null) {
|
||||
r.close();
|
||||
}
|
||||
|
||||
// Close should force cache to clear since all files are sync'd
|
||||
w.close();
|
||||
|
||||
final String[] cachedFiles = cachedDir.listCachedFiles();
|
||||
for(String file : cachedFiles) {
|
||||
System.out.println("FAIL: cached file " + file + " remains after sync");
|
||||
}
|
||||
assertEquals(0, cachedFiles.length);
|
||||
|
||||
r = IndexReader.open(dir);
|
||||
for(BytesRef id : ids) {
|
||||
assertEquals(1, r.docFreq("docid", id));
|
||||
}
|
||||
r.close();
|
||||
cachedDir.close();
|
||||
}
|
||||
|
||||
// NOTE: not a test; just here to make sure the code frag
|
||||
// in the javadocs is correct!
|
||||
public void verifyCompiles() throws Exception {
|
||||
Analyzer analyzer = null;
|
||||
|
||||
Directory fsDir = FSDirectory.open(new File("/path/to/index"));
|
||||
NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0);
|
||||
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
|
||||
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
|
||||
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
|
||||
}
|
||||
}
|
|
@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||
private void addTerms(IndexReader reader,FieldVals f) throws IOException
|
||||
{
|
||||
if(f.queryString==null) return;
|
||||
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
|
||||
TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
int corpusNumDocs=reader.numDocs();
|
||||
|
|
|
@ -881,7 +881,7 @@ public final class MoreLikeThis {
|
|||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||
"term vectors, you must provide an Analyzer");
|
||||
}
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
|
||||
int tokenCount=0;
|
||||
// for every token
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -85,7 +85,7 @@ public final class SimilarityQueries
|
|||
Set<?> stop)
|
||||
throws IOException
|
||||
{
|
||||
TokenStream ts = a.tokenStream( field, new StringReader( body));
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
BooleanQuery tmp = new BooleanQuery();
|
||||
|
|
|
@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
}
|
||||
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
TokenStream source;
|
||||
|
||||
int countTokens = 0;
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
}
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
while (true) {
|
||||
try {
|
||||
if (!source.incrementToken()) break;
|
||||
|
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
@Override
|
||||
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
TokenStream source;
|
||||
List<String> tlist = new ArrayList<String>();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
}
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
while (true) {
|
||||
try {
|
||||
if (!source.incrementToken()) break;
|
||||
|
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
|
||||
throws ParseException {
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
TokenStream source = null;
|
||||
String nextToken = null;
|
||||
boolean multipleTokens = false;
|
||||
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
if (source.incrementToken()) {
|
||||
nextToken = termAtt.toString();
|
||||
|
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
if (part1 != null) {
|
||||
// part1
|
||||
try {
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
multipleTokens = false;
|
||||
|
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
}
|
||||
|
||||
if (part2 != null) {
|
||||
// part2
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
|
||||
try {
|
||||
// part2
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
if (source.incrementToken()) {
|
||||
part2 = termAtt.toString();
|
||||
|
|
|
@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
|||
String text = fieldNode.getTextAsString();
|
||||
String field = fieldNode.getFieldAsString();
|
||||
|
||||
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
|
||||
text));
|
||||
TokenStream source;
|
||||
try {
|
||||
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
|
|
|
@ -631,8 +631,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() {
|
||||
public void tearDown() throws Exception {
|
||||
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -116,7 +116,7 @@ public final class SynExpand {
|
|||
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
|
|
|
@ -124,7 +124,7 @@ public class SynLookup {
|
|||
List<String> top = new LinkedList<String>(); // needs to be separately listed..
|
||||
|
||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
while (ts.incrementToken()) {
|
||||
|
|
|
@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
|
|||
stopWordsSet=new HashSet<String>();
|
||||
for (int i = 0; i < fields.length; i++)
|
||||
{
|
||||
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
try
|
||||
{
|
||||
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while(ts.incrementToken()) {
|
||||
stopWordsSet.add(termAtt.toString());
|
||||
|
|
|
@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
|
|||
try
|
||||
{
|
||||
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
|
||||
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
||||
TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
|
|||
TermsFilter tf = new TermsFilter();
|
||||
String text = DOMUtils.getNonBlankTextOrFail(e);
|
||||
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
try
|
||||
{
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
|
|||
|
||||
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
|
||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
try
|
||||
{
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
|
|
@ -733,8 +733,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
|||
// case we have to roll back:
|
||||
startCommit();
|
||||
|
||||
final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
|
||||
rollbackSegmentInfos.addAll(segmentInfos);
|
||||
final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -766,8 +765,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
|||
deleter.refresh();
|
||||
|
||||
// Restore all SegmentInfos (in case we pruned some)
|
||||
segmentInfos.clear();
|
||||
segmentInfos.addAll(rollbackSegmentInfos);
|
||||
segmentInfos.rollbackSegmentInfos(rollbackSegments);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -126,7 +126,6 @@ final class DocumentsWriter {
|
|||
final DocumentsWriterPerThreadPool perThreadPool;
|
||||
final FlushPolicy flushPolicy;
|
||||
final DocumentsWriterFlushControl flushControl;
|
||||
final Healthiness healthiness;
|
||||
DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
|
||||
BufferedDeletesStream bufferedDeletesStream) throws IOException {
|
||||
this.directory = directory;
|
||||
|
@ -142,10 +141,7 @@ final class DocumentsWriter {
|
|||
flushPolicy = configuredPolicy;
|
||||
}
|
||||
flushPolicy.init(this);
|
||||
|
||||
healthiness = new Healthiness();
|
||||
final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
|
||||
flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
|
||||
flushControl = new DocumentsWriterFlushControl(this, config );
|
||||
}
|
||||
|
||||
synchronized void deleteQueries(final Query... queries) throws IOException {
|
||||
|
@ -283,31 +279,28 @@ final class DocumentsWriter {
|
|||
ensureOpen();
|
||||
boolean maybeMerge = false;
|
||||
final boolean isUpdate = delTerm != null;
|
||||
if (healthiness.anyStalledThreads()) {
|
||||
|
||||
// Help out flushing any pending DWPTs so we can un-stall:
|
||||
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
|
||||
// Help out flushing any queued DWPTs so we can un-stall:
|
||||
if (infoStream != null) {
|
||||
message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)");
|
||||
message("DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
|
||||
}
|
||||
|
||||
// Try pick up pending threads here if possible
|
||||
DocumentsWriterPerThread flushingDWPT;
|
||||
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
|
||||
// Don't push the delete here since the update could fail!
|
||||
maybeMerge = doFlush(flushingDWPT);
|
||||
if (!healthiness.anyStalledThreads()) {
|
||||
break;
|
||||
do {
|
||||
// Try pick up pending threads here if possible
|
||||
DocumentsWriterPerThread flushingDWPT;
|
||||
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
|
||||
// Don't push the delete here since the update could fail!
|
||||
maybeMerge |= doFlush(flushingDWPT);
|
||||
}
|
||||
}
|
||||
|
||||
if (infoStream != null && flushControl.anyStalledThreads()) {
|
||||
message("WARNING DocumentsWriter has stalled threads; waiting");
|
||||
}
|
||||
|
||||
flushControl.waitIfStalled(); // block if stalled
|
||||
} while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
|
||||
|
||||
if (infoStream != null && healthiness.anyStalledThreads()) {
|
||||
message("WARNING DocumentsWriter still has stalled threads; waiting");
|
||||
}
|
||||
|
||||
healthiness.waitIfStalled(); // block if stalled
|
||||
|
||||
if (infoStream != null && healthiness.anyStalledThreads()) {
|
||||
message("WARNING DocumentsWriter done waiting");
|
||||
if (infoStream != null) {
|
||||
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -353,7 +346,6 @@ final class DocumentsWriter {
|
|||
maybeMerge = true;
|
||||
boolean success = false;
|
||||
FlushTicket ticket = null;
|
||||
|
||||
try {
|
||||
assert currentFullFlushDelQueue == null
|
||||
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
|
||||
|
@ -511,9 +503,7 @@ final class DocumentsWriter {
|
|||
anythingFlushed |= doFlush(flushingDWPT);
|
||||
}
|
||||
// If a concurrent flush is still in flight wait for it
|
||||
while (flushControl.anyFlushing()) {
|
||||
flushControl.waitForFlush();
|
||||
}
|
||||
flushControl.waitForFlush();
|
||||
if (!anythingFlushed) { // apply deletes if we did not flush any document
|
||||
synchronized (ticketQueue) {
|
||||
ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));
|
||||
|
|
|
@ -44,30 +44,32 @@ public final class DocumentsWriterFlushControl {
|
|||
private long activeBytes = 0;
|
||||
private long flushBytes = 0;
|
||||
private volatile int numPending = 0;
|
||||
private volatile int numFlushing = 0;
|
||||
final AtomicBoolean flushDeletes = new AtomicBoolean(false);
|
||||
private boolean fullFlush = false;
|
||||
private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
|
||||
private final Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
|
||||
// only for safety reasons if a DWPT is close to the RAM limit
|
||||
private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>();
|
||||
|
||||
private final Queue<BlockedFlush> blockedFlushes = new LinkedList<BlockedFlush>();
|
||||
|
||||
double maxConfiguredRamBuffer = 0;
|
||||
long peakActiveBytes = 0;// only with assert
|
||||
long peakFlushBytes = 0;// only with assert
|
||||
long peakNetBytes = 0;// only with assert
|
||||
private final Healthiness healthiness;
|
||||
long peakDelta = 0; // only with assert
|
||||
final DocumentsWriterStallControl stallControl;
|
||||
private final DocumentsWriterPerThreadPool perThreadPool;
|
||||
private final FlushPolicy flushPolicy;
|
||||
private boolean closed = false;
|
||||
private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
|
||||
private final DocumentsWriter documentsWriter;
|
||||
private final IndexWriterConfig config;
|
||||
|
||||
DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
|
||||
Healthiness healthiness, long hardMaxBytesPerDWPT) {
|
||||
this.healthiness = healthiness;
|
||||
IndexWriterConfig config) {
|
||||
this.stallControl = new DocumentsWriterStallControl();
|
||||
this.perThreadPool = documentsWriter.perThreadPool;
|
||||
this.flushPolicy = documentsWriter.flushPolicy;
|
||||
this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT;
|
||||
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
|
||||
this.config = config;
|
||||
this.documentsWriter = documentsWriter;
|
||||
}
|
||||
|
||||
|
@ -82,6 +84,24 @@ public final class DocumentsWriterFlushControl {
|
|||
public synchronized long netBytes() {
|
||||
return flushBytes + activeBytes;
|
||||
}
|
||||
|
||||
long stallLimitBytes() {
|
||||
final double maxRamMB = config.getRAMBufferSizeMB();
|
||||
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
private boolean assertMemory() {
|
||||
final double maxRamMB = config.getRAMBufferSizeMB();
|
||||
if (maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
|
||||
// for this assert we must be tolerant to ram buffer changes!
|
||||
maxConfiguredRamBuffer = Math.max(maxRamMB, maxConfiguredRamBuffer);
|
||||
final long ram = flushBytes + activeBytes;
|
||||
// take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
|
||||
final long expected = (long)(2 * (maxConfiguredRamBuffer * 1024 * 1024)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
|
||||
assert ram <= expected : "ram was " + ram + " expected: " + expected + " flush mem: " + flushBytes + " active: " + activeBytes ;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void commitPerThreadBytes(ThreadState perThread) {
|
||||
final long delta = perThread.perThread.bytesUsed()
|
||||
|
@ -105,53 +125,62 @@ public final class DocumentsWriterFlushControl {
|
|||
peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
|
||||
peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
|
||||
peakNetBytes = Math.max(peakNetBytes, netBytes());
|
||||
peakDelta = Math.max(peakDelta, delta);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
|
||||
boolean isUpdate) {
|
||||
commitPerThreadBytes(perThread);
|
||||
if (!perThread.flushPending) {
|
||||
if (isUpdate) {
|
||||
flushPolicy.onUpdate(this, perThread);
|
||||
} else {
|
||||
flushPolicy.onInsert(this, perThread);
|
||||
}
|
||||
if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
|
||||
// Safety check to prevent a single DWPT exceeding its RAM limit. This
|
||||
// is super important since we can not address more than 2048 MB per DWPT
|
||||
setFlushPending(perThread);
|
||||
if (fullFlush) {
|
||||
DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
|
||||
assert toBlock != null;
|
||||
blockedFlushes.add(toBlock);
|
||||
try {
|
||||
commitPerThreadBytes(perThread);
|
||||
if (!perThread.flushPending) {
|
||||
if (isUpdate) {
|
||||
flushPolicy.onUpdate(this, perThread);
|
||||
} else {
|
||||
flushPolicy.onInsert(this, perThread);
|
||||
}
|
||||
if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
|
||||
// Safety check to prevent a single DWPT exceeding its RAM limit. This
|
||||
// is super important since we can not address more than 2048 MB per DWPT
|
||||
setFlushPending(perThread);
|
||||
}
|
||||
}
|
||||
final DocumentsWriterPerThread flushingDWPT;
|
||||
if (fullFlush) {
|
||||
if (perThread.flushPending) {
|
||||
checkoutAndBlock(perThread);
|
||||
flushingDWPT = nextPendingFlush();
|
||||
} else {
|
||||
flushingDWPT = null;
|
||||
}
|
||||
} else {
|
||||
flushingDWPT = tryCheckoutForFlush(perThread);
|
||||
}
|
||||
return flushingDWPT;
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
assert assertMemory();
|
||||
}
|
||||
final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
|
||||
healthiness.updateStalled(this);
|
||||
return flushingDWPT;
|
||||
|
||||
|
||||
}
|
||||
|
||||
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
||||
assert flushingWriters.containsKey(dwpt);
|
||||
try {
|
||||
numFlushing--;
|
||||
Long bytes = flushingWriters.remove(dwpt);
|
||||
flushBytes -= bytes.longValue();
|
||||
perThreadPool.recycle(dwpt);
|
||||
healthiness.updateStalled(this);
|
||||
stallControl.updateStalled(this);
|
||||
assert assertMemory();
|
||||
} finally {
|
||||
notifyAll();
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized boolean anyFlushing() {
|
||||
return numFlushing != 0;
|
||||
}
|
||||
|
||||
public synchronized void waitForFlush() {
|
||||
if (numFlushing != 0) {
|
||||
while (flushingWriters.size() != 0) {
|
||||
try {
|
||||
this.wait();
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -173,32 +202,51 @@ public final class DocumentsWriterFlushControl {
|
|||
flushBytes += bytes;
|
||||
activeBytes -= bytes;
|
||||
numPending++; // write access synced
|
||||
assert assertMemory();
|
||||
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
|
||||
|
||||
}
|
||||
|
||||
synchronized void doOnAbort(ThreadState state) {
|
||||
if (state.flushPending) {
|
||||
flushBytes -= state.bytesUsed;
|
||||
} else {
|
||||
activeBytes -= state.bytesUsed;
|
||||
try {
|
||||
if (state.flushPending) {
|
||||
flushBytes -= state.bytesUsed;
|
||||
} else {
|
||||
activeBytes -= state.bytesUsed;
|
||||
}
|
||||
assert assertMemory();
|
||||
// Take it out of the loop this DWPT is stale
|
||||
perThreadPool.replaceForFlush(state, closed);
|
||||
}finally {
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
// Take it out of the loop this DWPT is stale
|
||||
perThreadPool.replaceForFlush(state, closed);
|
||||
healthiness.updateStalled(this);
|
||||
}
|
||||
|
||||
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
|
||||
ThreadState perThread) {
|
||||
if (fullFlush) {
|
||||
return null;
|
||||
return perThread.flushPending ? internalTryCheckOutForFlush(perThread) : null;
|
||||
}
|
||||
|
||||
private void checkoutAndBlock(ThreadState perThread) {
|
||||
perThread.lock();
|
||||
try {
|
||||
assert perThread.flushPending : "can not block non-pending threadstate";
|
||||
assert fullFlush : "can not block if fullFlush == false";
|
||||
final DocumentsWriterPerThread dwpt;
|
||||
final long bytes = perThread.bytesUsed;
|
||||
dwpt = perThreadPool.replaceForFlush(perThread, closed);
|
||||
numPending--;
|
||||
blockedFlushes.add(new BlockedFlush(dwpt, bytes));
|
||||
}finally {
|
||||
perThread.unlock();
|
||||
}
|
||||
return internalTryCheckOutForFlush(perThread);
|
||||
}
|
||||
|
||||
private DocumentsWriterPerThread internalTryCheckOutForFlush(
|
||||
ThreadState perThread) {
|
||||
if (perThread.flushPending) {
|
||||
assert Thread.holdsLock(this);
|
||||
assert perThread.flushPending;
|
||||
try {
|
||||
// We are pending so all memory is already moved to flushBytes
|
||||
if (perThread.tryLock()) {
|
||||
try {
|
||||
|
@ -212,15 +260,16 @@ public final class DocumentsWriterFlushControl {
|
|||
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
|
||||
flushingWriters.put(dwpt, Long.valueOf(bytes));
|
||||
numPending--; // write access synced
|
||||
numFlushing++;
|
||||
return dwpt;
|
||||
}
|
||||
} finally {
|
||||
perThread.unlock();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -231,12 +280,13 @@ public final class DocumentsWriterFlushControl {
|
|||
|
||||
DocumentsWriterPerThread nextPendingFlush() {
|
||||
synchronized (this) {
|
||||
DocumentsWriterPerThread poll = flushQueue.poll();
|
||||
if (poll != null) {
|
||||
final DocumentsWriterPerThread poll;
|
||||
if ((poll = flushQueue.poll()) != null) {
|
||||
stallControl.updateStalled(this);
|
||||
return poll;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numPending > 0) {
|
||||
if (numPending > 0 && !fullFlush) { // don't check if we are doing a full flush
|
||||
final Iterator<ThreadState> allActiveThreads = perThreadPool
|
||||
.getActivePerThreadsIterator();
|
||||
while (allActiveThreads.hasNext() && numPending > 0) {
|
||||
|
@ -276,8 +326,8 @@ public final class DocumentsWriterFlushControl {
|
|||
return documentsWriter.deleteQueue.numGlobalTermDeletes();
|
||||
}
|
||||
|
||||
int numFlushingDWPT() {
|
||||
return numFlushing;
|
||||
synchronized int numFlushingDWPT() {
|
||||
return flushingWriters.size();
|
||||
}
|
||||
|
||||
public boolean doApplyAllDeletes() {
|
||||
|
@ -289,7 +339,7 @@ public final class DocumentsWriterFlushControl {
|
|||
}
|
||||
|
||||
int numActiveDWPT() {
|
||||
return this.perThreadPool.getMaxThreadStates();
|
||||
return this.perThreadPool.getActiveThreadState();
|
||||
}
|
||||
|
||||
void markForFullFlush() {
|
||||
|
@ -331,11 +381,11 @@ public final class DocumentsWriterFlushControl {
|
|||
if (!next.flushPending) {
|
||||
setFlushPending(next);
|
||||
}
|
||||
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
|
||||
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
|
||||
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
|
||||
toFlush.add(flushingDWPT);
|
||||
}
|
||||
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
|
||||
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
|
||||
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
|
||||
toFlush.add(flushingDWPT);
|
||||
} else {
|
||||
// get the new delete queue from DW
|
||||
next.perThread.initialize();
|
||||
|
@ -345,31 +395,54 @@ public final class DocumentsWriterFlushControl {
|
|||
}
|
||||
}
|
||||
synchronized (this) {
|
||||
assert assertBlockedFlushes(flushingQueue);
|
||||
flushQueue.addAll(blockedFlushes);
|
||||
blockedFlushes.clear();
|
||||
/* make sure we move all DWPT that are where concurrently marked as
|
||||
* pending and moved to blocked are moved over to the flushQueue. There is
|
||||
* a chance that this happens since we marking DWPT for full flush without
|
||||
* blocking indexing.*/
|
||||
pruneBlockedQueue(flushingQueue);
|
||||
assert assertBlockedFlushes(documentsWriter.deleteQueue);
|
||||
flushQueue.addAll(toFlush);
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prunes the blockedQueue by removing all DWPT that are associated with the given flush queue.
|
||||
*/
|
||||
private void pruneBlockedQueue(final DocumentsWriterDeleteQueue flushingQueue) {
|
||||
Iterator<BlockedFlush> iterator = blockedFlushes.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
BlockedFlush blockedFlush = iterator.next();
|
||||
if (blockedFlush.dwpt.deleteQueue == flushingQueue) {
|
||||
iterator.remove();
|
||||
assert !flushingWriters.containsKey(blockedFlush.dwpt) : "DWPT is already flushing";
|
||||
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
|
||||
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
// don't decr pending here - its already done when DWPT is blocked
|
||||
flushQueue.add(blockedFlush.dwpt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
synchronized void finishFullFlush() {
|
||||
assert fullFlush;
|
||||
assert flushQueue.isEmpty();
|
||||
assert flushingWriters.isEmpty();
|
||||
try {
|
||||
if (!blockedFlushes.isEmpty()) {
|
||||
assert assertBlockedFlushes(documentsWriter.deleteQueue);
|
||||
flushQueue.addAll(blockedFlushes);
|
||||
blockedFlushes.clear();
|
||||
pruneBlockedQueue(documentsWriter.deleteQueue);
|
||||
assert blockedFlushes.isEmpty();
|
||||
}
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
}
|
||||
|
||||
boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
|
||||
Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes;
|
||||
for (DocumentsWriterPerThread documentsWriterPerThread : flushes) {
|
||||
assert documentsWriterPerThread.deleteQueue == flushingQueue;
|
||||
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||
assert blockedFlush.dwpt.deleteQueue == flushingQueue;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -379,18 +452,65 @@ public final class DocumentsWriterFlushControl {
|
|||
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
||||
doAfterFlush(dwpt);
|
||||
}
|
||||
for (DocumentsWriterPerThread dwpt : blockedFlushes) {
|
||||
doAfterFlush(dwpt);
|
||||
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
doAfterFlush(blockedFlush.dwpt);
|
||||
}
|
||||
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
flushQueue.clear();
|
||||
blockedFlushes.clear();
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
}
|
||||
|
||||
synchronized boolean isFullFlush() {
|
||||
/**
|
||||
* Returns <code>true</code> if a full flush is currently running
|
||||
*/
|
||||
synchronized boolean isFullFlush() { // used by assert
|
||||
return fullFlush;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of flushes that are already checked out but not yet
|
||||
* actively flushing
|
||||
*/
|
||||
synchronized int numQueuedFlushes() {
|
||||
return flushQueue.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of flushes that are checked out but not yet available
|
||||
* for flushing. This only applies during a full flush if a DWPT needs
|
||||
* flushing but must not be flushed until the full flush has finished.
|
||||
*/
|
||||
synchronized int numBlockedFlushes() {
|
||||
return blockedFlushes.size();
|
||||
}
|
||||
|
||||
private static class BlockedFlush {
|
||||
final DocumentsWriterPerThread dwpt;
|
||||
final long bytes;
|
||||
BlockedFlush(DocumentsWriterPerThread dwpt, long bytes) {
|
||||
super();
|
||||
this.dwpt = dwpt;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method will block if too many DWPT are currently flushing and no
|
||||
* checked out DWPT are available
|
||||
*/
|
||||
void waitIfStalled() {
|
||||
stallControl.waitIfStalled();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff stalled
|
||||
*/
|
||||
boolean anyStalledThreads() {
|
||||
return stallControl.anyStalledThreads();
|
||||
}
|
||||
|
||||
}
|
|
@ -165,6 +165,13 @@ public abstract class DocumentsWriterPerThreadPool {
|
|||
public int getMaxThreadStates() {
|
||||
return perThreads.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the active number of {@link ThreadState} instances.
|
||||
*/
|
||||
public int getActiveThreadState() {
|
||||
return numThreadStatesActive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new {@link ThreadState} iff any new state is available otherwise
|
||||
|
|
|
@ -36,8 +36,7 @@ import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
|||
* continue indexing.
|
||||
*/
|
||||
//TODO: rename this to DocumentsWriterStallControl (or something like that)?
|
||||
final class Healthiness {
|
||||
|
||||
final class DocumentsWriterStallControl {
|
||||
@SuppressWarnings("serial")
|
||||
private static final class Sync extends AbstractQueuedSynchronizer {
|
||||
volatile boolean hasBlockedThreads = false; // only with assert
|
||||
|
@ -96,13 +95,14 @@ final class Healthiness {
|
|||
* <code>true</code> iff the number of flushing
|
||||
* {@link DocumentsWriterPerThread} is greater than the number of active
|
||||
* {@link DocumentsWriterPerThread}. Otherwise it will reset the
|
||||
* {@link Healthiness} to healthy and release all threads waiting on
|
||||
* {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
|
||||
* {@link #waitIfStalled()}
|
||||
*/
|
||||
void updateStalled(DocumentsWriterFlushControl flushControl) {
|
||||
do {
|
||||
// if we have more flushing DWPT than numActiveDWPT we stall!
|
||||
while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) {
|
||||
// if we have more flushing / blocked DWPT than numActiveDWPT we stall!
|
||||
// don't stall if we have queued flushes - threads should be hijacked instead
|
||||
while (flushControl.netBytes() > flushControl.stallLimitBytes()) {
|
||||
if (sync.trySetStalled()) {
|
||||
assert wasStalled = true;
|
||||
return;
|
||||
|
@ -114,8 +114,8 @@ final class Healthiness {
|
|||
void waitIfStalled() {
|
||||
sync.acquireShared(0);
|
||||
}
|
||||
|
||||
boolean hasBlocked() {
|
||||
|
||||
boolean hasBlocked() { // for tests
|
||||
return sync.hasBlockedThreads;
|
||||
}
|
||||
}
|
|
@ -40,7 +40,13 @@ import java.util.Collection;
|
|||
* refuses to run by default. Specify {@code -delete-prior-commits}
|
||||
* to override this, allowing the tool to delete all but the last commit.
|
||||
* From Java code this can be enabled by passing {@code true} to
|
||||
* {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
|
||||
* {@link #IndexUpgrader(Directory,Version,PrintStream,boolean)}.
|
||||
* <p><b>Warning:</b> This tool may reorder documents if the index was partially
|
||||
* upgraded before execution (e.g., documents were added). If your application relies
|
||||
* on "monotonicity" of doc IDs (which means that the order in which the documents
|
||||
* were added to the index is preserved), do a full optimize instead.
|
||||
* The {@link MergePolicy} set by {@link IndexWriterConfig} may also reorder
|
||||
* documents.
|
||||
*/
|
||||
public final class IndexUpgrader {
|
||||
|
||||
|
@ -52,9 +58,11 @@ public final class IndexUpgrader {
|
|||
System.err.println("reason, if the incoming index has more than one commit, the tool");
|
||||
System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
|
||||
System.err.println("this, allowing the tool to delete all but the last commit.");
|
||||
System.err.println("WARNING: This tool may reorder document IDs!");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public static void main(String[] args) throws IOException {
|
||||
String dir = null;
|
||||
boolean deletePriorCommits = false;
|
||||
|
@ -74,7 +82,7 @@ public final class IndexUpgrader {
|
|||
printUsage();
|
||||
}
|
||||
|
||||
new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
|
||||
new IndexUpgrader(FSDirectory.open(new File(dir)), Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
|
||||
}
|
||||
|
||||
private final Directory dir;
|
||||
|
@ -82,16 +90,22 @@ public final class IndexUpgrader {
|
|||
private final IndexWriterConfig iwc;
|
||||
private final boolean deletePriorCommits;
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public IndexUpgrader(Directory dir) {
|
||||
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
|
||||
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
|
||||
* {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points. */
|
||||
public IndexUpgrader(Directory dir, Version matchVersion) {
|
||||
this(dir, new IndexWriterConfig(matchVersion, null), null, false);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
|
||||
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
|
||||
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
|
||||
* {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
|
||||
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
|
||||
public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
|
||||
this(dir, new IndexWriterConfig(matchVersion, null), infoStream, deletePriorCommits);
|
||||
}
|
||||
|
||||
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
|
||||
* config. You have the possibility to upgrade indexes with multiple commit points by removing
|
||||
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
|
||||
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
|
||||
this.dir = dir;
|
||||
this.iwc = iwc;
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -221,7 +222,7 @@ public class IndexWriter implements Closeable {
|
|||
private volatile long changeCount; // increments every time a change is completed
|
||||
private long lastCommitChangeCount; // last changeCount that was committed
|
||||
|
||||
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
|
||||
private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
|
||||
|
||||
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
|
||||
volatile long pendingCommitChangeCount;
|
||||
|
@ -440,14 +441,14 @@ public class IndexWriter implements Closeable {
|
|||
public synchronized boolean infoIsLive(SegmentInfo info) {
|
||||
int idx = segmentInfos.indexOf(info);
|
||||
assert idx != -1: "info=" + info + " isn't in pool";
|
||||
assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
|
||||
assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
|
||||
return true;
|
||||
}
|
||||
|
||||
public synchronized SegmentInfo mapToLive(SegmentInfo info) {
|
||||
int idx = segmentInfos.indexOf(info);
|
||||
if (idx != -1) {
|
||||
info = segmentInfos.get(idx);
|
||||
info = segmentInfos.info(idx);
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
@ -818,7 +819,7 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
setRollbackSegmentInfos(segmentInfos);
|
||||
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
|
||||
|
||||
// start with previous field numbers, but new FieldInfos
|
||||
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
|
||||
|
@ -862,10 +863,6 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
|
||||
rollbackSegmentInfos = (SegmentInfos) infos.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the private {@link IndexWriterConfig}, cloned
|
||||
* from the {@link IndexWriterConfig} passed to
|
||||
|
@ -1126,8 +1123,7 @@ public class IndexWriter implements Closeable {
|
|||
else
|
||||
count = 0;
|
||||
|
||||
for (int i = 0; i < segmentInfos.size(); i++)
|
||||
count += segmentInfos.info(i).docCount;
|
||||
count += segmentInfos.totalDocCount();
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -1144,8 +1140,7 @@ public class IndexWriter implements Closeable {
|
|||
else
|
||||
count = 0;
|
||||
|
||||
for (int i = 0; i < segmentInfos.size(); i++) {
|
||||
final SegmentInfo info = segmentInfos.info(i);
|
||||
for (final SegmentInfo info : segmentInfos) {
|
||||
count += info.docCount - numDeletedDocs(info);
|
||||
}
|
||||
return count;
|
||||
|
@ -1159,9 +1154,11 @@ public class IndexWriter implements Closeable {
|
|||
if (docWriter.anyDeletions()) {
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < segmentInfos.size(); i++)
|
||||
if (segmentInfos.info(i).hasDeletions())
|
||||
for (final SegmentInfo info : segmentInfos) {
|
||||
if (info.hasDeletions()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1554,7 +1551,8 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
synchronized(this) {
|
||||
resetMergeExceptions();
|
||||
segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos);
|
||||
segmentsToOptimize.clear();
|
||||
segmentsToOptimize.addAll(segmentInfos.asSet());
|
||||
optimizeMaxNumSegments = maxNumSegments;
|
||||
|
||||
// Now mark all pending & running merges as optimize
|
||||
|
@ -1778,7 +1776,7 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
final MergePolicy.MergeSpecification spec;
|
||||
if (optimize) {
|
||||
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
|
||||
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
|
||||
|
||||
if (spec != null) {
|
||||
final int numMerges = spec.merges.size();
|
||||
|
@ -1889,8 +1887,7 @@ public class IndexWriter implements Closeable {
|
|||
// attempt to commit using this instance of IndexWriter
|
||||
// will always write to a new generation ("write
|
||||
// once").
|
||||
segmentInfos.clear();
|
||||
segmentInfos.addAll(rollbackSegmentInfos);
|
||||
segmentInfos.rollbackSegmentInfos(rollbackSegments);
|
||||
|
||||
docWriter.abort();
|
||||
|
||||
|
@ -2555,7 +2552,7 @@ public class IndexWriter implements Closeable {
|
|||
lastCommitChangeCount = pendingCommitChangeCount;
|
||||
segmentInfos.updateGeneration(pendingCommit);
|
||||
segmentInfos.setUserData(pendingCommit.getUserData());
|
||||
setRollbackSegmentInfos(pendingCommit);
|
||||
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
|
||||
deleter.checkpoint(pendingCommit, true);
|
||||
} finally {
|
||||
// Matches the incRef done in startCommit:
|
||||
|
@ -2660,7 +2657,7 @@ public class IndexWriter implements Closeable {
|
|||
final synchronized void applyAllDeletes() throws IOException {
|
||||
flushDeletesCount.incrementAndGet();
|
||||
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
|
||||
.applyDeletes(readerPool, segmentInfos);
|
||||
.applyDeletes(readerPool, segmentInfos.asList());
|
||||
if (result.anyDeletes) {
|
||||
checkpoint();
|
||||
}
|
||||
|
@ -2709,7 +2706,7 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
|
||||
for(SegmentInfo info : merge.segments) {
|
||||
if (segmentInfos.indexOf(info) == -1) {
|
||||
if (!segmentInfos.contains(info)) {
|
||||
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
|
||||
}
|
||||
}
|
||||
|
@ -2847,39 +2844,13 @@ public class IndexWriter implements Closeable {
|
|||
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
|
||||
}
|
||||
|
||||
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
|
||||
int segIdx = 0;
|
||||
int newSegIdx = 0;
|
||||
boolean inserted = false;
|
||||
final int curSegCount = segmentInfos.size();
|
||||
while(segIdx < curSegCount) {
|
||||
final SegmentInfo info = segmentInfos.info(segIdx++);
|
||||
if (mergedAway.contains(info)) {
|
||||
if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
|
||||
segmentInfos.set(segIdx-1, merge.info);
|
||||
inserted = true;
|
||||
newSegIdx++;
|
||||
}
|
||||
} else {
|
||||
segmentInfos.set(newSegIdx++, info);
|
||||
}
|
||||
final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
|
||||
segmentInfos.applyMergeChanges(merge, dropSegment);
|
||||
|
||||
if (dropSegment) {
|
||||
readerPool.drop(merge.info);
|
||||
}
|
||||
|
||||
// Either we found place to insert segment, or, we did
|
||||
// not, but only because all segments we merged became
|
||||
// deleted while we are merging, in which case it should
|
||||
// be the case that the new segment is also all deleted:
|
||||
if (!inserted) {
|
||||
assert allDeleted;
|
||||
if (keepFullyDeletedSegments) {
|
||||
segmentInfos.add(0, merge.info);
|
||||
} else {
|
||||
readerPool.drop(merge.info);
|
||||
}
|
||||
}
|
||||
|
||||
segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
|
||||
|
||||
|
||||
if (infoStream != null) {
|
||||
message("after commit: " + segString());
|
||||
}
|
||||
|
@ -3014,7 +2985,7 @@ public class IndexWriter implements Closeable {
|
|||
if (mergingSegments.contains(info)) {
|
||||
return false;
|
||||
}
|
||||
if (segmentInfos.indexOf(info) == -1) {
|
||||
if (!segmentInfos.contains(info)) {
|
||||
return false;
|
||||
}
|
||||
if (info.dir != directory) {
|
||||
|
@ -3462,7 +3433,7 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
|
||||
// utility routines for tests
|
||||
SegmentInfo newestSegment() {
|
||||
synchronized SegmentInfo newestSegment() {
|
||||
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
|
||||
}
|
||||
|
||||
|
@ -3472,19 +3443,18 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public synchronized String segString(List<SegmentInfo> infos) throws IOException {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
final int count = infos.size();
|
||||
for(int i = 0; i < count; i++) {
|
||||
if (i > 0) {
|
||||
public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
for(final SegmentInfo s : infos) {
|
||||
if (buffer.length() > 0) {
|
||||
buffer.append(' ');
|
||||
}
|
||||
buffer.append(segString(infos.get(i)));
|
||||
buffer.append(segString(s));
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public synchronized String segString(SegmentInfo info) throws IOException {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
SegmentReader reader = readerPool.getIfExists(info);
|
||||
|
|
|
@ -133,10 +133,15 @@ public final class IndexWriterConfig implements Cloneable {
|
|||
|
||||
/**
|
||||
* Creates a new config that with defaults that match the specified
|
||||
* {@link Version} as well as the default {@link Analyzer}. {@link Version} is
|
||||
* a placeholder for future changes. The default settings are relevant to 3.1
|
||||
* and before. In the future, if different settings will apply to different
|
||||
* versions, they will be documented here.
|
||||
* {@link Version} as well as the default {@link
|
||||
* Analyzer}. If matchVersion is >= {@link
|
||||
* Version#LUCENE_32}, {@link TieredMergePolicy} is used
|
||||
* for merging; else {@link LogByteSizeMergePolicy}.
|
||||
* Note that {@link TieredMergePolicy} is free to select
|
||||
* non-contiguous merges, which means docIDs may not
|
||||
* remain montonic over time. If this is a problem you
|
||||
* should switch to {@link LogByteSizeMergePolicy} or
|
||||
* {@link LogDocMergePolicy}.
|
||||
*/
|
||||
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
|
||||
this.matchVersion = matchVersion;
|
||||
|
@ -154,7 +159,11 @@ public final class IndexWriterConfig implements Cloneable {
|
|||
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
|
||||
mergedSegmentWarmer = null;
|
||||
codecProvider = CodecProvider.getDefault();
|
||||
mergePolicy = new TieredMergePolicy();
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_32)) {
|
||||
mergePolicy = new TieredMergePolicy();
|
||||
} else {
|
||||
mergePolicy = new LogByteSizeMergePolicy();
|
||||
}
|
||||
readerPooling = DEFAULT_READER_POOLING;
|
||||
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
|
||||
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
|
||||
|
|
|
@ -242,6 +242,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
private MergeSpecification findMergesForOptimizeSizeLimit(
|
||||
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
||||
MergeSpecification spec = new MergeSpecification();
|
||||
final List<SegmentInfo> segments = infos.asList();
|
||||
|
||||
int start = last - 1;
|
||||
while (start >= 0) {
|
||||
|
@ -254,12 +255,12 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
// unless there is only 1 which is optimized.
|
||||
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
|
||||
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
|
||||
spec.add(new OneMerge(infos.range(start + 1, last)));
|
||||
spec.add(new OneMerge(segments.subList(start + 1, last)));
|
||||
}
|
||||
last = start;
|
||||
} else if (last - start == mergeFactor) {
|
||||
// mergeFactor eligible segments were found, add them as a merge.
|
||||
spec.add(new OneMerge(infos.range(start, last)));
|
||||
spec.add(new OneMerge(segments.subList(start, last)));
|
||||
last = start;
|
||||
}
|
||||
--start;
|
||||
|
@ -267,7 +268,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
|
||||
// Add any left-over segments, unless there is just 1 already optimized.
|
||||
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
|
||||
spec.add(new OneMerge(infos.range(start, last)));
|
||||
spec.add(new OneMerge(segments.subList(start, last)));
|
||||
}
|
||||
|
||||
return spec.merges.size() == 0 ? null : spec;
|
||||
|
@ -280,11 +281,12 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
*/
|
||||
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
||||
MergeSpecification spec = new MergeSpecification();
|
||||
final List<SegmentInfo> segments = infos.asList();
|
||||
|
||||
// First, enroll all "full" merges (size
|
||||
// mergeFactor) to potentially be run concurrently:
|
||||
while (last - maxNumSegments + 1 >= mergeFactor) {
|
||||
spec.add(new OneMerge(infos.range(last - mergeFactor, last)));
|
||||
spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
|
||||
last -= mergeFactor;
|
||||
}
|
||||
|
||||
|
@ -296,7 +298,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
// Since we must optimize down to 1 segment, the
|
||||
// choice is simple:
|
||||
if (last > 1 || !isOptimized(infos.info(0))) {
|
||||
spec.add(new OneMerge(infos.range(0, last)));
|
||||
spec.add(new OneMerge(segments.subList(0, last)));
|
||||
}
|
||||
} else if (last > maxNumSegments) {
|
||||
|
||||
|
@ -325,7 +327,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
}
|
||||
}
|
||||
|
||||
spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize)));
|
||||
spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
|
||||
}
|
||||
}
|
||||
return spec.merges.size() == 0 ? null : spec;
|
||||
|
@ -412,7 +414,8 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
@Override
|
||||
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
|
||||
throws CorruptIndexException, IOException {
|
||||
final int numSegments = segmentInfos.size();
|
||||
final List<SegmentInfo> segments = segmentInfos.asList();
|
||||
final int numSegments = segments.size();
|
||||
|
||||
if (verbose())
|
||||
message("findMergesToExpungeDeletes: " + numSegments + " segments");
|
||||
|
@ -434,7 +437,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
// deletions, so force a merge now:
|
||||
if (verbose())
|
||||
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
||||
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
|
||||
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
|
||||
firstSegmentWithDeletions = i;
|
||||
}
|
||||
} else if (firstSegmentWithDeletions != -1) {
|
||||
|
@ -443,7 +446,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
// mergeFactor segments
|
||||
if (verbose())
|
||||
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
||||
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
|
||||
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
|
||||
firstSegmentWithDeletions = -1;
|
||||
}
|
||||
}
|
||||
|
@ -451,7 +454,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
if (firstSegmentWithDeletions != -1) {
|
||||
if (verbose())
|
||||
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
|
||||
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments)));
|
||||
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
|
||||
}
|
||||
|
||||
return spec;
|
||||
|
|
|
@ -72,7 +72,7 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
long mergeGen; // used by IndexWriter
|
||||
boolean isExternal; // used by IndexWriter
|
||||
int maxNumSegmentsOptimize; // used by IndexWriter
|
||||
long estimatedMergeBytes; // used by IndexWriter
|
||||
public long estimatedMergeBytes; // used by IndexWriter
|
||||
List<SegmentReader> readers; // used by IndexWriter
|
||||
List<SegmentReader> readerClones; // used by IndexWriter
|
||||
public final List<SegmentInfo> segments;
|
||||
|
@ -84,7 +84,8 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
public OneMerge(List<SegmentInfo> segments) {
|
||||
if (0 == segments.size())
|
||||
throw new RuntimeException("segments must include at least one segment");
|
||||
this.segments = segments;
|
||||
// clone the list, as the in list may be based off original SegmentInfos and may be modified
|
||||
this.segments = new ArrayList<SegmentInfo>(segments);
|
||||
int count = 0;
|
||||
for(SegmentInfo info : segments) {
|
||||
count += info.docCount;
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.Constants;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class SegmentInfo {
|
||||
public final class SegmentInfo implements Cloneable {
|
||||
// TODO: remove with hasVector and hasProx
|
||||
private static final int CHECK_FIELDINFO = -2;
|
||||
static final int NO = -1; // e.g. no norms; no deletes;
|
||||
|
|
|
@ -20,13 +20,16 @@ package org.apache.lucene.index;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Vector;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
|
@ -45,7 +48,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class SegmentInfos extends Vector<SegmentInfo> {
|
||||
public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
||||
|
||||
/*
|
||||
* The file format version, a negative number.
|
||||
|
@ -84,7 +87,12 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
private int format;
|
||||
|
||||
private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
|
||||
|
||||
|
||||
private List<SegmentInfo> segments = new ArrayList<SegmentInfo>();
|
||||
private Set<SegmentInfo> segmentSet = new HashSet<SegmentInfo>();
|
||||
private transient List<SegmentInfo> cachedUnmodifiableList;
|
||||
private transient Set<SegmentInfo> cachedUnmodifiableSet;
|
||||
|
||||
/**
|
||||
* If non-null, information about loading segments_N files
|
||||
* will be printed here. @see #setInfoStream.
|
||||
|
@ -107,8 +115,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
return format;
|
||||
}
|
||||
|
||||
public final SegmentInfo info(int i) {
|
||||
return get(i);
|
||||
public SegmentInfo info(int i) {
|
||||
return segments.get(i);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -237,7 +245,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
boolean success = false;
|
||||
|
||||
// Clear any previous segments:
|
||||
clear();
|
||||
this.clear();
|
||||
|
||||
generation = generationFromSegmentsFileName(segmentFileName);
|
||||
|
||||
|
@ -252,7 +260,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
if (!success) {
|
||||
// Clear any segment infos we had loaded so we
|
||||
// have a clean slate on retry:
|
||||
clear();
|
||||
this.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -349,15 +357,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
|
||||
/** Prunes any segment whose docs are all deleted. */
|
||||
public void pruneDeletedSegments() {
|
||||
int segIdx = 0;
|
||||
while(segIdx < size()) {
|
||||
final SegmentInfo info = info(segIdx);
|
||||
for(final Iterator<SegmentInfo> it = segments.iterator(); it.hasNext();) {
|
||||
final SegmentInfo info = it.next();
|
||||
if (info.getDelCount() == info.docCount) {
|
||||
remove(segIdx);
|
||||
} else {
|
||||
segIdx++;
|
||||
it.remove();
|
||||
segmentSet.remove(info);
|
||||
}
|
||||
}
|
||||
assert segmentSet.size() == segments.size();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -367,14 +374,23 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
|
||||
@Override
|
||||
public Object clone() {
|
||||
SegmentInfos sis = (SegmentInfos) super.clone();
|
||||
for(int i=0;i<sis.size();i++) {
|
||||
final SegmentInfo info = sis.info(i);
|
||||
assert info.getSegmentCodecs() != null;
|
||||
sis.set(i, (SegmentInfo) info.clone());
|
||||
try {
|
||||
final SegmentInfos sis = (SegmentInfos) super.clone();
|
||||
// deep clone, first recreate all collections:
|
||||
sis.segments = new ArrayList<SegmentInfo>(size());
|
||||
sis.segmentSet = new HashSet<SegmentInfo>(size());
|
||||
sis.cachedUnmodifiableList = null;
|
||||
sis.cachedUnmodifiableSet = null;
|
||||
for(final SegmentInfo info : this) {
|
||||
assert info.getSegmentCodecs() != null;
|
||||
// dont directly access segments, use add method!!!
|
||||
sis.add((SegmentInfo) info.clone());
|
||||
}
|
||||
sis.userData = new HashMap<String,String>(userData);
|
||||
return sis;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new RuntimeException("should not happen", e);
|
||||
}
|
||||
sis.userData = new HashMap<String,String>(userData);
|
||||
return sis;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -742,18 +758,6 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new SegmentInfos containing the SegmentInfo
|
||||
* instances in the specified range first (inclusive) to
|
||||
* last (exclusive), so total number of segments returned
|
||||
* is last-first.
|
||||
*/
|
||||
public SegmentInfos range(int first, int last) {
|
||||
SegmentInfos infos = new SegmentInfos(codecs);
|
||||
infos.addAll(super.subList(first, last));
|
||||
return infos;
|
||||
}
|
||||
|
||||
// Carry over generation numbers from another SegmentInfos
|
||||
void updateGeneration(SegmentInfos other) {
|
||||
lastGeneration = other.lastGeneration;
|
||||
|
@ -831,6 +835,10 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
} catch (Throwable t) {
|
||||
// throw orig excp
|
||||
}
|
||||
} else {
|
||||
// we must sync here explicitly since during a commit
|
||||
// IW will not sync the global field map.
|
||||
dir.sync(Collections.singleton(name));
|
||||
}
|
||||
}
|
||||
return version;
|
||||
|
@ -956,7 +964,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
}
|
||||
|
||||
|
||||
public synchronized String toString(Directory directory) {
|
||||
public String toString(Directory directory) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
buffer.append(getCurrentSegmentFileName()).append(": ");
|
||||
final int count = size();
|
||||
|
@ -987,8 +995,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
* remain write once.
|
||||
*/
|
||||
void replace(SegmentInfos other) {
|
||||
clear();
|
||||
addAll(other);
|
||||
rollbackSegmentInfos(other.asList());
|
||||
lastGeneration = other.lastGeneration;
|
||||
lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
|
||||
format = other.format;
|
||||
|
@ -1014,7 +1021,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
* Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
|
||||
* If this {@link SegmentInfos} has no global field number map the returned instance is empty
|
||||
*/
|
||||
synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
|
||||
FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
|
||||
if (globalFieldNumberMap != null) {
|
||||
return globalFieldNumberMap;
|
||||
}
|
||||
|
@ -1054,4 +1061,135 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
|
|||
long getLastGlobalFieldMapVersion() {
|
||||
return lastGlobalFieldMapVersion;
|
||||
}
|
||||
|
||||
/** applies all changes caused by committing a merge to this SegmentInfos */
|
||||
void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) {
|
||||
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
|
||||
boolean inserted = false;
|
||||
int newSegIdx = 0;
|
||||
for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) {
|
||||
assert segIdx >= newSegIdx;
|
||||
final SegmentInfo info = segments.get(segIdx);
|
||||
if (mergedAway.contains(info)) {
|
||||
if (!inserted && !dropSegment) {
|
||||
segments.set(segIdx, merge.info);
|
||||
inserted = true;
|
||||
newSegIdx++;
|
||||
}
|
||||
} else {
|
||||
segments.set(newSegIdx, info);
|
||||
newSegIdx++;
|
||||
}
|
||||
}
|
||||
|
||||
// Either we found place to insert segment, or, we did
|
||||
// not, but only because all segments we merged became
|
||||
// deleted while we are merging, in which case it should
|
||||
// be the case that the new segment is also all deleted,
|
||||
// we insert it at the beginning if it should not be dropped:
|
||||
if (!inserted && !dropSegment) {
|
||||
segments.add(0, merge.info);
|
||||
}
|
||||
|
||||
// the rest of the segments in list are duplicates, so don't remove from map, only list!
|
||||
segments.subList(newSegIdx, segments.size()).clear();
|
||||
|
||||
// update the Set
|
||||
if (!dropSegment) {
|
||||
segmentSet.add(merge.info);
|
||||
}
|
||||
segmentSet.removeAll(mergedAway);
|
||||
|
||||
assert segmentSet.size() == segments.size();
|
||||
}
|
||||
|
||||
List<SegmentInfo> createBackupSegmentInfos(boolean cloneChildren) {
|
||||
if (cloneChildren) {
|
||||
final List<SegmentInfo> list = new ArrayList<SegmentInfo>(size());
|
||||
for(final SegmentInfo info : this) {
|
||||
assert info.getSegmentCodecs() != null;
|
||||
list.add((SegmentInfo) info.clone());
|
||||
}
|
||||
return list;
|
||||
} else {
|
||||
return new ArrayList<SegmentInfo>(segments);
|
||||
}
|
||||
}
|
||||
|
||||
void rollbackSegmentInfos(List<SegmentInfo> infos) {
|
||||
this.clear();
|
||||
this.addAll(infos);
|
||||
}
|
||||
|
||||
/** Returns an <b>unmodifiable</b> {@link Iterator} of contained segments in order. */
|
||||
// @Override (comment out until Java 6)
|
||||
public Iterator<SegmentInfo> iterator() {
|
||||
return asList().iterator();
|
||||
}
|
||||
|
||||
/** Returns all contained segments as an <b>unmodifiable</b> {@link List} view. */
|
||||
public List<SegmentInfo> asList() {
|
||||
if (cachedUnmodifiableList == null) {
|
||||
cachedUnmodifiableList = Collections.unmodifiableList(segments);
|
||||
}
|
||||
return cachedUnmodifiableList;
|
||||
}
|
||||
|
||||
/** Returns all contained segments as an <b>unmodifiable</b> {@link Set} view.
|
||||
* The iterator is not sorted, use {@link List} view or {@link #iterator} to get all segments in order. */
|
||||
public Set<SegmentInfo> asSet() {
|
||||
if (cachedUnmodifiableSet == null) {
|
||||
cachedUnmodifiableSet = Collections.unmodifiableSet(segmentSet);
|
||||
}
|
||||
return cachedUnmodifiableSet;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return segments.size();
|
||||
}
|
||||
|
||||
public void add(SegmentInfo si) {
|
||||
if (segmentSet.contains(si)) {
|
||||
throw new IllegalStateException("Cannot add the same segment two times to this SegmentInfos instance");
|
||||
}
|
||||
segments.add(si);
|
||||
segmentSet.add(si);
|
||||
assert segmentSet.size() == segments.size();
|
||||
}
|
||||
|
||||
public void addAll(Iterable<SegmentInfo> sis) {
|
||||
for (final SegmentInfo si : sis) {
|
||||
this.add(si);
|
||||
}
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
segments.clear();
|
||||
segmentSet.clear();
|
||||
}
|
||||
|
||||
public void remove(SegmentInfo si) {
|
||||
final int index = this.indexOf(si);
|
||||
if (index >= 0) {
|
||||
this.remove(index);
|
||||
}
|
||||
}
|
||||
|
||||
public void remove(int index) {
|
||||
segmentSet.remove(segments.remove(index));
|
||||
assert segmentSet.size() == segments.size();
|
||||
}
|
||||
|
||||
public boolean contains(SegmentInfo si) {
|
||||
return segmentSet.contains(si);
|
||||
}
|
||||
|
||||
public int indexOf(SegmentInfo si) {
|
||||
if (segmentSet.contains(si)) {
|
||||
return segments.indexOf(si);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -251,9 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
||||
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
|
||||
|
||||
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
|
||||
infosSorted.addAll(infos);
|
||||
|
||||
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(infos.asList());
|
||||
Collections.sort(infosSorted, segmentByteSizeDescending);
|
||||
|
||||
// Compute total index bytes & print details about the index
|
||||
|
|
|
@ -40,6 +40,11 @@ import java.util.Set;
|
|||
* w.optimize();
|
||||
* w.close();
|
||||
* </pre>
|
||||
* <p><b>Warning:</b> This merge policy may reorder documents if the index was partially
|
||||
* upgraded before calling optimize (e.g., documents were added). If your application relies
|
||||
* on "monotonicity" of doc IDs (which means that the order in which the documents
|
||||
* were added to the index is preserved), do a full optimize instead. Please note, the
|
||||
* delegate {@code MergePolicy} may also reorder documents.
|
||||
* @lucene.experimental
|
||||
* @see IndexUpgrader
|
||||
*/
|
||||
|
|
|
@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
private class FSTFieldWriter extends FieldWriter {
|
||||
private final Builder<Long> fstBuilder;
|
||||
private final PositiveIntOutputs fstOutputs;
|
||||
private final long startTermsFilePointer;
|
||||
|
||||
final FieldInfo fieldInfo;
|
||||
int numIndexTerms;
|
||||
|
@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
|
||||
// Always put empty string in
|
||||
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
|
||||
startTermsFilePointer = termsFilePointer;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
|
||||
@Override
|
||||
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
|
||||
if (text.length == 0) {
|
||||
// We already added empty string in ctor
|
||||
assert termsFilePointer == startTermsFilePointer;
|
||||
return;
|
||||
}
|
||||
final int lengthSave = text.length;
|
||||
text.length = indexedTermPrefixLength(lastTerm, text);
|
||||
try {
|
||||
|
|
|
@ -0,0 +1,382 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Caches all docs, and optionally also scores, coming from
|
||||
* a search, and is then able to replay them to another
|
||||
* collector. You specify the max RAM this class may use.
|
||||
* Once the collection is done, call {@link #isCached}. If
|
||||
* this returns true, you can use {@link #replay} against a
|
||||
* new collector. If it returns false, this means too much
|
||||
* RAM was required and you must instead re-run the original
|
||||
* search.
|
||||
*
|
||||
* <p><b>NOTE</b>: this class consumes 4 (or 8 bytes, if
|
||||
* scoring is cached) per collected document. If the result
|
||||
* set is large this can easily be a very substantial amount
|
||||
* of RAM!
|
||||
*
|
||||
* <p><b>NOTE</b>: this class caches at least 128 documents
|
||||
* before checking RAM limits.
|
||||
*
|
||||
* <p>See the Lucene <tt>modules/grouping</tt> module for more
|
||||
* details including a full code example.</p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class CachingCollector extends Collector {
|
||||
|
||||
// Max out at 512K arrays
|
||||
private static final int MAX_ARRAY_SIZE = 512 * 1024;
|
||||
private static final int INITIAL_ARRAY_SIZE = 128;
|
||||
private final static int[] EMPTY_INT_ARRAY = new int[0];
|
||||
|
||||
private static class SegStart {
|
||||
public final AtomicReaderContext readerContext;
|
||||
public final int end;
|
||||
|
||||
public SegStart(AtomicReaderContext readerContext, int end) {
|
||||
this.readerContext = readerContext;
|
||||
this.end = end;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class CachedScorer extends Scorer {
|
||||
|
||||
// NOTE: these members are package-private b/c that way accessing them from
|
||||
// the outer class does not incur access check by the JVM. The same
|
||||
// situation would be if they were defined in the outer class as private
|
||||
// members.
|
||||
int doc;
|
||||
float score;
|
||||
|
||||
private CachedScorer() { super(null); }
|
||||
|
||||
@Override
|
||||
public final float score() { return score; }
|
||||
|
||||
@Override
|
||||
public final int advance(int target) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public final int docID() { return doc; }
|
||||
|
||||
@Override
|
||||
public final float freq() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public final int nextDoc() { throw new UnsupportedOperationException(); }
|
||||
}
|
||||
|
||||
// A CachingCollector which caches scores
|
||||
private static final class ScoreCachingCollector extends CachingCollector {
|
||||
|
||||
private final CachedScorer cachedScorer;
|
||||
private final List<float[]> cachedScores;
|
||||
|
||||
private Scorer scorer;
|
||||
private float[] curScores;
|
||||
|
||||
ScoreCachingCollector(Collector other, double maxRAMMB) {
|
||||
super(other, maxRAMMB, true);
|
||||
|
||||
cachedScorer = new CachedScorer();
|
||||
cachedScores = new ArrayList<float[]>();
|
||||
curScores = new float[128];
|
||||
cachedScores.add(curScores);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
|
||||
if (curDocs == null) {
|
||||
// Cache was too large
|
||||
cachedScorer.score = scorer.score();
|
||||
cachedScorer.doc = doc;
|
||||
other.collect(doc);
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate a bigger array or abort caching
|
||||
if (upto == curDocs.length) {
|
||||
base += upto;
|
||||
|
||||
// Compute next array length - don't allocate too big arrays
|
||||
int nextLength = 8*curDocs.length;
|
||||
if (nextLength > MAX_ARRAY_SIZE) {
|
||||
nextLength = MAX_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
if (base + nextLength > maxDocsToCache) {
|
||||
// try to allocate a smaller array
|
||||
nextLength = maxDocsToCache - base;
|
||||
if (nextLength <= 0) {
|
||||
// Too many docs to collect -- clear cache
|
||||
curDocs = null;
|
||||
curScores = null;
|
||||
cachedSegs.clear();
|
||||
cachedDocs.clear();
|
||||
cachedScores.clear();
|
||||
cachedScorer.score = scorer.score();
|
||||
cachedScorer.doc = doc;
|
||||
other.collect(doc);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
curDocs = new int[nextLength];
|
||||
cachedDocs.add(curDocs);
|
||||
curScores = new float[nextLength];
|
||||
cachedScores.add(curScores);
|
||||
upto = 0;
|
||||
}
|
||||
|
||||
curDocs[upto] = doc;
|
||||
cachedScorer.score = curScores[upto] = scorer.score();
|
||||
upto++;
|
||||
cachedScorer.doc = doc;
|
||||
other.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void replay(Collector other) throws IOException {
|
||||
replayInit(other);
|
||||
|
||||
int curUpto = 0;
|
||||
int curBase = 0;
|
||||
int chunkUpto = 0;
|
||||
other.setScorer(cachedScorer);
|
||||
curDocs = EMPTY_INT_ARRAY;
|
||||
for (SegStart seg : cachedSegs) {
|
||||
other.setNextReader(seg.readerContext);
|
||||
while (curBase + curUpto < seg.end) {
|
||||
if (curUpto == curDocs.length) {
|
||||
curBase += curDocs.length;
|
||||
curDocs = cachedDocs.get(chunkUpto);
|
||||
curScores = cachedScores.get(chunkUpto);
|
||||
chunkUpto++;
|
||||
curUpto = 0;
|
||||
}
|
||||
cachedScorer.score = curScores[curUpto];
|
||||
other.collect(curDocs[curUpto++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
other.setScorer(cachedScorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (isCached()) {
|
||||
return "CachingCollector (" + (base+upto) + " docs & scores cached)";
|
||||
} else {
|
||||
return "CachingCollector (cache was cleared)";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// A CachingCollector which does not cache scores
|
||||
private static final class NoScoreCachingCollector extends CachingCollector {
|
||||
|
||||
NoScoreCachingCollector(Collector other, double maxRAMMB) {
|
||||
super(other, maxRAMMB, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
|
||||
if (curDocs == null) {
|
||||
// Cache was too large
|
||||
other.collect(doc);
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate a bigger array or abort caching
|
||||
if (upto == curDocs.length) {
|
||||
base += upto;
|
||||
|
||||
// Compute next array length - don't allocate too big arrays
|
||||
int nextLength = 8*curDocs.length;
|
||||
if (nextLength > MAX_ARRAY_SIZE) {
|
||||
nextLength = MAX_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
if (base + nextLength > maxDocsToCache) {
|
||||
// try to allocate a smaller array
|
||||
nextLength = maxDocsToCache - base;
|
||||
if (nextLength <= 0) {
|
||||
// Too many docs to collect -- clear cache
|
||||
curDocs = null;
|
||||
cachedSegs.clear();
|
||||
cachedDocs.clear();
|
||||
other.collect(doc);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
curDocs = new int[nextLength];
|
||||
cachedDocs.add(curDocs);
|
||||
upto = 0;
|
||||
}
|
||||
|
||||
curDocs[upto] = doc;
|
||||
upto++;
|
||||
other.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void replay(Collector other) throws IOException {
|
||||
replayInit(other);
|
||||
|
||||
int curUpto = 0;
|
||||
int curbase = 0;
|
||||
int chunkUpto = 0;
|
||||
curDocs = EMPTY_INT_ARRAY;
|
||||
for (SegStart seg : cachedSegs) {
|
||||
other.setNextReader(seg.readerContext);
|
||||
while (curbase + curUpto < seg.end) {
|
||||
if (curUpto == curDocs.length) {
|
||||
curbase += curDocs.length;
|
||||
curDocs = cachedDocs.get(chunkUpto);
|
||||
chunkUpto++;
|
||||
curUpto = 0;
|
||||
}
|
||||
other.collect(curDocs[curUpto++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
other.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (isCached()) {
|
||||
return "CachingCollector (" + (base+upto) + " docs cached)";
|
||||
} else {
|
||||
return "CachingCollector (cache was cleared)";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO: would be nice if a collector defined a
|
||||
// needsScores() method so we can specialize / do checks
|
||||
// up front. This is only relevant for the ScoreCaching
|
||||
// version -- if the wrapped Collector does not need
|
||||
// scores, it can avoid cachedScorer entirely.
|
||||
protected final Collector other;
|
||||
|
||||
protected final int maxDocsToCache;
|
||||
protected final List<SegStart> cachedSegs = new ArrayList<SegStart>();
|
||||
protected final List<int[]> cachedDocs;
|
||||
|
||||
private AtomicReaderContext lastReaderContext;
|
||||
|
||||
protected int[] curDocs;
|
||||
protected int upto;
|
||||
protected int base;
|
||||
protected int lastDocBase;
|
||||
|
||||
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
|
||||
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
|
||||
}
|
||||
|
||||
// Prevent extension from non-internal classes
|
||||
private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
|
||||
this.other = other;
|
||||
|
||||
cachedDocs = new ArrayList<int[]>();
|
||||
curDocs = new int[INITIAL_ARRAY_SIZE];
|
||||
cachedDocs.add(curDocs);
|
||||
|
||||
int bytesPerDoc = RamUsageEstimator.NUM_BYTES_INT;
|
||||
if (cacheScores) {
|
||||
bytesPerDoc += RamUsageEstimator.NUM_BYTES_FLOAT;
|
||||
}
|
||||
maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return other.acceptsDocsOutOfOrder();
|
||||
}
|
||||
|
||||
public boolean isCached() {
|
||||
return curDocs != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
other.setNextReader(context);
|
||||
if (lastReaderContext != null) {
|
||||
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
|
||||
}
|
||||
lastReaderContext = context;
|
||||
}
|
||||
|
||||
/** Reused by the specialized inner classes. */
|
||||
void replayInit(Collector other) {
|
||||
if (!isCached()) {
|
||||
throw new IllegalStateException("cannot replay: cache was cleared because too much RAM was required");
|
||||
}
|
||||
|
||||
if (!other.acceptsDocsOutOfOrder() && this.other.acceptsDocsOutOfOrder()) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot replay: given collector does not support "
|
||||
+ "out-of-order collection, while the wrapped collector does. "
|
||||
+ "Therefore cached documents may be out-of-order.");
|
||||
}
|
||||
|
||||
//System.out.println("CC: replay totHits=" + (upto + base));
|
||||
if (lastReaderContext != null) {
|
||||
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
|
||||
lastReaderContext = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replays the cached doc IDs (and scores) to the given Collector. If this
|
||||
* instance does not cache scores, then Scorer is not set on
|
||||
* {@code other.setScorer} as well as scores are not replayed.
|
||||
*
|
||||
* @throws IllegalStateException
|
||||
* if this collector is not cached (i.e., if the RAM limits were too
|
||||
* low for the number of documents + scores to cache).
|
||||
* @throws IllegalArgumentException
|
||||
* if the given Collect's does not support out-of-order collection,
|
||||
* while the collector passed to the ctor does.
|
||||
*/
|
||||
public abstract void replay(Collector other) throws IOException;
|
||||
|
||||
}
|
|
@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
|
|||
public abstract int nextDoc() throws IOException;
|
||||
|
||||
/**
|
||||
* Advances to the first beyond the current whose document number is greater
|
||||
* than or equal to <i>target</i>. Returns the current document number or
|
||||
* {@link #NO_MORE_DOCS} if there are no more docs in the set.
|
||||
* Advances to the first beyond (see NOTE below) the current whose document
|
||||
* number is greater than or equal to <i>target</i>. Returns the current
|
||||
* document number or {@link #NO_MORE_DOCS} if there are no more docs in the
|
||||
* set.
|
||||
* <p>
|
||||
* Behaves as if written:
|
||||
*
|
||||
|
@ -78,7 +79,7 @@ public abstract class DocIdSetIterator {
|
|||
*
|
||||
* Some implementations are considerably more efficient than that.
|
||||
* <p>
|
||||
* <b>NOTE:</b> when <code> target ≤ current</code> implementations may opt
|
||||
* <b>NOTE:</b> when <code> target ≤ current</code> implementations may opt
|
||||
* not to advance beyond their current {@link #docID()}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this method may be called with {@link #NO_MORE_DOCS} for
|
||||
|
|
|
@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
|
|||
public QueryTermVector(String queryString, Analyzer analyzer) {
|
||||
if (analyzer != null)
|
||||
{
|
||||
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
|
||||
} catch (IOException e1) {
|
||||
stream = null;
|
||||
}
|
||||
if (stream != null)
|
||||
{
|
||||
List<BytesRef> terms = new ArrayList<BytesRef>();
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
||||
final class SloppyPhraseScorer extends PhraseScorer {
|
||||
private int slop;
|
||||
|
@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
|
||||
/**
|
||||
* Init PhrasePositions in place.
|
||||
* There is a one time initialization for this scorer:
|
||||
* There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
|
||||
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
|
||||
* This relies on that the position in PP is computed as (TP.position - offset) and
|
||||
* so by adding offset we actually compare positions and identify that the two are
|
||||
* the same term.
|
||||
* An exclusion to this is two distinct terms in the same offset in query and same
|
||||
* position in doc. This case is detected by comparing just the (query) offsets,
|
||||
* and two such PPs are not considered "repeating".
|
||||
* <br>- Also mark each such pp by pp.repeats = true.
|
||||
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
|
||||
* In particular, this allows to score queries with no repetitions with no overhead due to this computation.
|
||||
|
@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
if (!checkedRepeats) {
|
||||
checkedRepeats = true;
|
||||
// check for repeats
|
||||
HashMap<PhrasePositions, Object> m = null;
|
||||
HashSet<PhrasePositions> m = null;
|
||||
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
|
||||
int tpPos = pp.position + pp.offset;
|
||||
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
|
||||
if (pp.offset == pp2.offset) {
|
||||
continue; // not a repetition: the two PPs are originally in same offset in the query!
|
||||
}
|
||||
int tpPos2 = pp2.position + pp2.offset;
|
||||
if (tpPos2 == tpPos) {
|
||||
if (m == null)
|
||||
m = new HashMap<PhrasePositions, Object>();
|
||||
m = new HashSet<PhrasePositions>();
|
||||
pp.repeats = true;
|
||||
pp2.repeats = true;
|
||||
m.put(pp,null);
|
||||
m.put(pp2,null);
|
||||
m.add(pp);
|
||||
m.add(pp2);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (m!=null)
|
||||
repeats = m.keySet().toArray(new PhrasePositions[0]);
|
||||
repeats = m.toArray(new PhrasePositions[0]);
|
||||
}
|
||||
|
||||
// with repeats must advance some repeating pp's so they all start with differing tp's
|
||||
|
@ -204,11 +213,16 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
int tpPos = pp.position + pp.offset;
|
||||
for (int i = 0; i < repeats.length; i++) {
|
||||
PhrasePositions pp2 = repeats[i];
|
||||
if (pp2 == pp)
|
||||
if (pp2 == pp) {
|
||||
continue;
|
||||
}
|
||||
if (pp.offset == pp2.offset) {
|
||||
continue; // not a repetition: the two PPs are originally in same offset in the query!
|
||||
}
|
||||
int tpPos2 = pp2.position + pp2.offset;
|
||||
if (tpPos2 == tpPos)
|
||||
if (tpPos2 == tpPos) {
|
||||
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -172,7 +172,7 @@ public class NIOFSDirectory extends FSDirectory {
|
|||
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
||||
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
||||
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
||||
+ "with a a value smaller than the current chunk size (" + chunkSize + ")");
|
||||
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
|
||||
outOfMemoryError.initCause(e);
|
||||
throw outOfMemoryError;
|
||||
}
|
||||
|
|
|
@ -125,7 +125,7 @@ public class SimpleFSDirectory extends FSDirectory {
|
|||
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
|
||||
"OutOfMemoryError likely caused by the Sun VM Bug described in "
|
||||
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
|
||||
+ "with a value smaller than the current chunks size (" + chunkSize + ")");
|
||||
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
|
||||
outOfMemoryError.initCause(e);
|
||||
throw outOfMemoryError;
|
||||
}
|
||||
|
|
|
@ -20,9 +20,6 @@ package org.apache.lucene.util;
|
|||
import java.util.Comparator;
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.ObjectInput;
|
||||
import java.io.ObjectOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Represents byte[], as a slice (offset + length) into an
|
||||
* existing byte[].
|
||||
|
@ -193,6 +190,9 @@ public final class BytesRef implements Comparable<BytesRef> {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) {
|
||||
return false;
|
||||
}
|
||||
return this.bytesEquals((BytesRef) other);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -54,4 +57,42 @@ public abstract class StringHelper {
|
|||
|
||||
private StringHelper() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a Comparator over versioned strings such as X.YY.Z
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static Comparator<String> getVersionComparator() {
|
||||
return versionComparator;
|
||||
}
|
||||
|
||||
private static Comparator<String> versionComparator = new Comparator<String>() {
|
||||
public int compare(String a, String b) {
|
||||
StringTokenizer aTokens = new StringTokenizer(a, ".");
|
||||
StringTokenizer bTokens = new StringTokenizer(b, ".");
|
||||
|
||||
while (aTokens.hasMoreTokens()) {
|
||||
int aToken = Integer.parseInt(aTokens.nextToken());
|
||||
if (bTokens.hasMoreTokens()) {
|
||||
int bToken = Integer.parseInt(bTokens.nextToken());
|
||||
if (aToken != bToken) {
|
||||
return aToken - bToken;
|
||||
}
|
||||
} else {
|
||||
// a has some extra trailing tokens. if these are all zeroes, thats ok.
|
||||
if (aToken != 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// b has some extra trailing tokens. if these are all zeroes, thats ok.
|
||||
while (bTokens.hasMoreTokens()) {
|
||||
if (Integer.parseInt(bTokens.nextToken()) != 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -143,13 +143,16 @@ public class LevenshteinAutomata {
|
|||
if (dest >= 0)
|
||||
for (int r = 0; r < numRanges; r++)
|
||||
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
|
||||
// reduce the state: this doesn't appear to help anything
|
||||
//states[k].reduce();
|
||||
}
|
||||
|
||||
Automaton a = new Automaton(states[0]);
|
||||
a.setDeterministic(true);
|
||||
a.setNumberedStates(states);
|
||||
// we create some useless unconnected states, and its a net-win overall to remove these,
|
||||
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
|
||||
// so, while we could set our numberedStates here, its actually best not to, and instead to
|
||||
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
|
||||
//a.setNumberedStates(states);
|
||||
a.reduce();
|
||||
// we need not trim transitions to dead states, as they are not created.
|
||||
//a.restoreInvariant();
|
||||
return a;
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
package org.apache.lucene.util.automaton;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
|
@ -72,8 +74,12 @@ final public class MinimizationOperations {
|
|||
final int[] sigma = a.getStartPoints();
|
||||
final State[] states = a.getNumberedStates();
|
||||
final int sigmaLen = sigma.length, statesLen = states.length;
|
||||
final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
|
||||
final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
|
||||
@SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
|
||||
(ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
|
||||
@SuppressWarnings("unchecked") final HashSet<State>[] partition =
|
||||
(HashSet<State>[]) new HashSet[statesLen];
|
||||
@SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
|
||||
(ArrayList<State>[]) new ArrayList[statesLen];
|
||||
final int[] block = new int[statesLen];
|
||||
final StateList[][] active = new StateList[statesLen][sigmaLen];
|
||||
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
|
||||
|
@ -82,8 +88,8 @@ final public class MinimizationOperations {
|
|||
final BitSet split = new BitSet(statesLen),
|
||||
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
|
||||
for (int q = 0; q < statesLen; q++) {
|
||||
splitblock[q] = new BitSet(statesLen);
|
||||
partition[q] = new BitSet(statesLen);
|
||||
splitblock[q] = new ArrayList<State>();
|
||||
partition[q] = new HashSet<State>();
|
||||
for (int x = 0; x < sigmaLen; x++) {
|
||||
active[q][x] = new StateList();
|
||||
}
|
||||
|
@ -92,23 +98,22 @@ final public class MinimizationOperations {
|
|||
for (int q = 0; q < statesLen; q++) {
|
||||
final State qq = states[q];
|
||||
final int j = qq.accept ? 0 : 1;
|
||||
partition[j].set(q);
|
||||
partition[j].add(qq);
|
||||
block[q] = j;
|
||||
for (int x = 0; x < sigmaLen; x++) {
|
||||
final BitSet[] r =
|
||||
final ArrayList<State>[] r =
|
||||
reverse[qq.step(sigma[x]).number];
|
||||
if (r[x] == null)
|
||||
r[x] = new BitSet();
|
||||
r[x].set(q);
|
||||
r[x] = new ArrayList<State>();
|
||||
r[x].add(qq);
|
||||
}
|
||||
}
|
||||
// initialize active sets
|
||||
for (int j = 0; j <= 1; j++) {
|
||||
final BitSet part = partition[j];
|
||||
for (int x = 0; x < sigmaLen; x++) {
|
||||
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
|
||||
if (reverse[i][x] != null)
|
||||
active2[i][x] = active[j][x].add(states[i]);
|
||||
for (final State qq : partition[j]) {
|
||||
if (reverse[qq.number][x] != null)
|
||||
active2[qq.number][x] = active[j][x].add(qq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -121,18 +126,19 @@ final public class MinimizationOperations {
|
|||
// process pending until fixed point
|
||||
int k = 2;
|
||||
while (!pending.isEmpty()) {
|
||||
IntPair ip = pending.removeFirst();
|
||||
final IntPair ip = pending.removeFirst();
|
||||
final int p = ip.n1;
|
||||
final int x = ip.n2;
|
||||
pending2.clear(x*statesLen + p);
|
||||
// find states that need to be split off their blocks
|
||||
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
|
||||
final BitSet r = reverse[m.q.number][x];
|
||||
if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
|
||||
final ArrayList<State> r = reverse[m.q.number][x];
|
||||
if (r != null) for (final State s : r) {
|
||||
final int i = s.number;
|
||||
if (!split.get(i)) {
|
||||
split.set(i);
|
||||
final int j = block[i];
|
||||
splitblock[j].set(i);
|
||||
splitblock[j].add(s);
|
||||
if (!refine2.get(j)) {
|
||||
refine2.set(j);
|
||||
refine.set(j);
|
||||
|
@ -142,18 +148,19 @@ final public class MinimizationOperations {
|
|||
}
|
||||
// refine blocks
|
||||
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
|
||||
final BitSet sb = splitblock[j];
|
||||
if (sb.cardinality() < partition[j].cardinality()) {
|
||||
final BitSet b1 = partition[j], b2 = partition[k];
|
||||
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
|
||||
b1.clear(i);
|
||||
b2.set(i);
|
||||
block[i] = k;
|
||||
final ArrayList<State> sb = splitblock[j];
|
||||
if (sb.size() < partition[j].size()) {
|
||||
final HashSet<State> b1 = partition[j];
|
||||
final HashSet<State> b2 = partition[k];
|
||||
for (final State s : sb) {
|
||||
b1.remove(s);
|
||||
b2.add(s);
|
||||
block[s.number] = k;
|
||||
for (int c = 0; c < sigmaLen; c++) {
|
||||
final StateListNode sn = active2[i][c];
|
||||
final StateListNode sn = active2[s.number][c];
|
||||
if (sn != null && sn.sl == active[j][c]) {
|
||||
sn.remove();
|
||||
active2[i][c] = active[k][c].add(states[i]);
|
||||
active2[s.number][c] = active[k][c].add(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -173,8 +180,8 @@ final public class MinimizationOperations {
|
|||
k++;
|
||||
}
|
||||
refine2.clear(j);
|
||||
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
|
||||
split.clear(i);
|
||||
for (final State s : sb)
|
||||
split.clear(s.number);
|
||||
sb.clear();
|
||||
}
|
||||
refine.clear();
|
||||
|
@ -184,9 +191,7 @@ final public class MinimizationOperations {
|
|||
for (int n = 0; n < newstates.length; n++) {
|
||||
final State s = new State();
|
||||
newstates[n] = s;
|
||||
BitSet part = partition[n];
|
||||
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
|
||||
final State q = states[i];
|
||||
for (State q : partition[n]) {
|
||||
if (q == a.initial) a.initial = s;
|
||||
s.accept = q.accept;
|
||||
s.number = q.number; // select representative
|
||||
|
|
|
@ -232,9 +232,7 @@ public class FST<T> {
|
|||
|
||||
void setEmptyOutput(T v) throws IOException {
|
||||
if (emptyOutput != null) {
|
||||
if (!emptyOutput.equals(v)) {
|
||||
emptyOutput = outputs.merge(emptyOutput, v);
|
||||
}
|
||||
emptyOutput = outputs.merge(emptyOutput, v);
|
||||
} else {
|
||||
emptyOutput = v;
|
||||
}
|
||||
|
|
|
@ -100,7 +100,7 @@ public class MockTokenizer extends Tokenizer {
|
|||
endOffset = off;
|
||||
cp = readCodePoint();
|
||||
} while (cp >= 0 && isTokenChar(cp));
|
||||
offsetAtt.setOffset(startOffset, endOffset);
|
||||
offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
|
||||
streamState = State.INCREMENT;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -42,14 +42,13 @@ public class MockRandomMergePolicy extends MergePolicy {
|
|||
|
||||
if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
|
||||
|
||||
SegmentInfos segmentInfos2 = new SegmentInfos();
|
||||
segmentInfos2.addAll(segmentInfos);
|
||||
Collections.shuffle(segmentInfos2, random);
|
||||
List<SegmentInfo> segments = new ArrayList<SegmentInfo>(segmentInfos.asList());
|
||||
Collections.shuffle(segments, random);
|
||||
|
||||
// TODO: sometimes make more than 1 merge?
|
||||
mergeSpec = new MergeSpecification();
|
||||
final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
|
||||
mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge)));
|
||||
mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
|
||||
}
|
||||
|
||||
return mergeSpec;
|
||||
|
|
|
@ -171,8 +171,15 @@ public abstract class LuceneTestCase extends Assert {
|
|||
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
|
||||
|
||||
/** Used to track if setUp and tearDown are called correctly from subclasses */
|
||||
private boolean setup;
|
||||
private static State state = State.INITIAL;
|
||||
|
||||
private static enum State {
|
||||
INITIAL, // no tests ran yet
|
||||
SETUP, // test has called setUp()
|
||||
RANTEST, // test is running
|
||||
TEARDOWN // test has called tearDown()
|
||||
};
|
||||
|
||||
/**
|
||||
* Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
|
||||
* This is an utility method to help them.
|
||||
|
@ -326,6 +333,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClassLuceneTestCaseJ4() {
|
||||
state = State.INITIAL;
|
||||
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
|
||||
random.setSeed(staticSeed);
|
||||
tempDirs.clear();
|
||||
|
@ -375,6 +383,11 @@ public abstract class LuceneTestCase extends Assert {
|
|||
|
||||
@AfterClass
|
||||
public static void afterClassLuceneTestCaseJ4() {
|
||||
if (!testsFailed) {
|
||||
assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!",
|
||||
state == State.INITIAL || state == State.TEARDOWN);
|
||||
}
|
||||
state = State.INITIAL;
|
||||
if (! "false".equals(TEST_CLEAN_THREADS)) {
|
||||
int rogueThreads = threadCleanup("test class");
|
||||
if (rogueThreads > 0) {
|
||||
|
@ -483,17 +496,22 @@ public abstract class LuceneTestCase extends Assert {
|
|||
public void starting(FrameworkMethod method) {
|
||||
// set current method name for logging
|
||||
LuceneTestCase.this.name = method.getName();
|
||||
if (!testsFailed) {
|
||||
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP);
|
||||
}
|
||||
state = State.RANTEST;
|
||||
super.starting(method);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
|
||||
random.setSeed(seed);
|
||||
assertFalse("ensure your tearDown() calls super.tearDown()!!!", setup);
|
||||
setup = true;
|
||||
if (!testsFailed) {
|
||||
assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN));
|
||||
}
|
||||
state = State.SETUP;
|
||||
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
|
||||
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
|
||||
public void uncaughtException(Thread t, Throwable e) {
|
||||
|
@ -529,8 +547,12 @@ public abstract class LuceneTestCase extends Assert {
|
|||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
|
||||
setup = false;
|
||||
if (!testsFailed) {
|
||||
// Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state)
|
||||
// because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know...
|
||||
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP);
|
||||
}
|
||||
state = State.TEARDOWN;
|
||||
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
|
||||
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
|
||||
int rogueThreads = threadCleanup("test method: '" + getName() + "'");
|
||||
|
|
|
@ -397,4 +397,15 @@ public class AutomatonTestUtil {
|
|||
path.remove(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks that an automaton has no detached states that are unreachable
|
||||
* from the initial state.
|
||||
*/
|
||||
public static void assertNoDetachedStates(Automaton a) {
|
||||
int numStates = a.getNumberOfStates();
|
||||
a.clearNumberedStates(); // force recomputation of cached numbered states
|
||||
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* a binary tokenstream that lets you index a BytesRef
|
||||
*/
|
||||
public final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
private boolean available = true;
|
||||
|
||||
public BinaryTokenStream(BytesRef bytes) {
|
||||
bytesAtt.setBytesRef(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (available) {
|
||||
available = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
available = true;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
public int fillBytesRef() {
|
||||
return bytes.hashCode();
|
||||
}
|
||||
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Test indexing and searching some byte[] terms
|
||||
*/
|
||||
public class TestBinaryTerms extends LuceneTestCase {
|
||||
public void testBinary() throws IOException {
|
||||
assumeFalse("PreFlex codec cannot work with binary terms!",
|
||||
"PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
|
||||
BytesRef bytes = new BytesRef(2);
|
||||
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
bytes.bytes[0] = (byte) i;
|
||||
bytes.bytes[1] = (byte) (255 - i);
|
||||
bytes.length = 2;
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.NO));
|
||||
doc.add(new Field("bytes", tokenStream));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
IndexSearcher is = newSearcher(ir);
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
bytes.bytes[0] = (byte) i;
|
||||
bytes.bytes[1] = (byte) (255 - i);
|
||||
bytes.length = 2;
|
||||
TopDocs docs = is.search(new TermQuery(new Term("bytes", bytes)), 5);
|
||||
assertEquals(1, docs.totalHits);
|
||||
assertEquals("" + i, is.doc(docs.scoreDocs[0].doc).get("id"));
|
||||
}
|
||||
|
||||
is.close();
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.store.LockObtainFailedException;
|
|||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ThrottledIndexOutput;
|
||||
import org.junit.Before;
|
||||
|
||||
public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||
|
@ -105,7 +104,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
|
||||
}
|
||||
if (ensureNotStalled) {
|
||||
assertFalse(docsWriter.healthiness.wasStalled);
|
||||
assertFalse(docsWriter.flushControl.stallControl.wasStalled);
|
||||
}
|
||||
writer.close();
|
||||
assertEquals(0, flushControl.activeBytes());
|
||||
|
@ -216,15 +215,15 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
assertEquals(numDocumentsToIndex, r.numDocs());
|
||||
assertEquals(numDocumentsToIndex, r.maxDoc());
|
||||
if (!flushPolicy.flushOnRAM()) {
|
||||
assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled);
|
||||
assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked());
|
||||
assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled);
|
||||
assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.hasBlocked());
|
||||
}
|
||||
r.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testHealthyness() throws InterruptedException,
|
||||
public void testStallControl() throws InterruptedException,
|
||||
CorruptIndexException, LockObtainFailedException, IOException {
|
||||
|
||||
int[] numThreads = new int[] { 4 + random.nextInt(8), 1 };
|
||||
|
@ -240,7 +239,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
|
||||
FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy();
|
||||
iwc.setFlushPolicy(flushPolicy);
|
||||
|
||||
|
||||
DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
|
||||
numThreads[i]== 1 ? 1 : 2);
|
||||
iwc.setIndexerThreadPool(threadPool);
|
||||
|
@ -264,12 +263,12 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
assertEquals(numDocumentsToIndex, writer.numDocs());
|
||||
assertEquals(numDocumentsToIndex, writer.maxDoc());
|
||||
if (numThreads[i] == 1) {
|
||||
assertFalse(
|
||||
"single thread must not stall",
|
||||
docsWriter.healthiness.wasStalled);
|
||||
assertFalse(
|
||||
"single thread must not block numThreads: " + numThreads[i],
|
||||
docsWriter.healthiness.hasBlocked());
|
||||
docsWriter.flushControl.stallControl.hasBlocked());
|
||||
}
|
||||
if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) {
|
||||
assertTrue(docsWriter.flushControl.stallControl.wasStalled);
|
||||
}
|
||||
assertActiveBytesAfter(flushControl);
|
||||
writer.close(true);
|
||||
|
|
|
@ -363,7 +363,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase {
|
|||
w.close();
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(base);
|
||||
SegmentInfo segmentInfo = sis.get(sis.size() - 1);// last segment must
|
||||
SegmentInfo segmentInfo = sis.info(sis.size() - 1);// last segment must
|
||||
// have all fields with
|
||||
// consistent numbers
|
||||
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
||||
|
|
|
@ -1231,13 +1231,17 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
System.out.println("TEST: pass=" + pass);
|
||||
}
|
||||
|
||||
IndexWriter writer = new IndexWriter(
|
||||
directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
|
||||
IndexWriterConfig conf = newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(random)).
|
||||
setOpenMode(OpenMode.CREATE).
|
||||
setMaxBufferedDocs(2).
|
||||
setMergePolicy(newLogMergePolicy())
|
||||
);
|
||||
setMergePolicy(newLogMergePolicy());
|
||||
if (pass == 2) {
|
||||
conf.setMergeScheduler(new SerialMergeScheduler());
|
||||
}
|
||||
|
||||
IndexWriter writer = new IndexWriter(directory, conf);
|
||||
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
|
||||
writer.setInfoStream(VERBOSE ? System.out : null);
|
||||
|
||||
for(int iter=0;iter<10;iter++) {
|
||||
|
@ -2139,7 +2143,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
while(!finish) {
|
||||
try {
|
||||
|
||||
while(true) {
|
||||
while(!finish) {
|
||||
if (w != null) {
|
||||
w.close();
|
||||
w = null;
|
||||
|
@ -2157,6 +2161,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
w.close();
|
||||
w = null;
|
||||
_TestUtil.checkIndex(dir);
|
||||
IndexReader.open(dir, true).close();
|
||||
|
||||
|
|
|
@ -71,9 +71,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
|||
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
|
||||
assertNull(conf.getFlushPolicy());
|
||||
assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
|
||||
|
||||
|
||||
|
||||
// Sanity check - validate that all getters are covered.
|
||||
Set<String> getters = new HashSet<String>();
|
||||
getters.add("getAnalyzer");
|
||||
|
|
|
@ -128,8 +128,8 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||
fsmp.length = 2;
|
||||
System.out.println("maybeMerge "+writer.segmentInfos);
|
||||
|
||||
SegmentInfo info0 = writer.segmentInfos.get(0);
|
||||
SegmentInfo info1 = writer.segmentInfos.get(1);
|
||||
SegmentInfo info0 = writer.segmentInfos.info(0);
|
||||
SegmentInfo info1 = writer.segmentInfos.info(1);
|
||||
|
||||
writer.maybeMerge();
|
||||
System.out.println("maybeMerge after "+writer.segmentInfos);
|
||||
|
@ -199,7 +199,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||
// deletes for info1, the newly created segment from the
|
||||
// merge should have no deletes because they were applied in
|
||||
// the merge
|
||||
//SegmentInfo info1 = writer.segmentInfos.get(1);
|
||||
//SegmentInfo info1 = writer.segmentInfos.info(1);
|
||||
//assertFalse(exists(info1, writer.docWriter.segmentDeletes));
|
||||
|
||||
//System.out.println("infos4:"+writer.segmentInfos);
|
||||
|
@ -261,11 +261,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||
throws CorruptIndexException, IOException {
|
||||
MergeSpecification ms = new MergeSpecification();
|
||||
if (doMerge) {
|
||||
SegmentInfos mergeInfos = new SegmentInfos();
|
||||
for (int x=start; x < (start+length); x++) {
|
||||
mergeInfos.add(segmentInfos.get(x));
|
||||
}
|
||||
OneMerge om = new OneMerge(mergeInfos);
|
||||
OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length));
|
||||
ms.add(om);
|
||||
doMerge = false;
|
||||
return ms;
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.CachingCollector;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestCachingCollector extends LuceneTestCase {
|
||||
|
||||
private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
|
||||
|
||||
private static class MockScorer extends Scorer {
|
||||
|
||||
private MockScorer() {
|
||||
super((Weight) null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException { return 0; }
|
||||
|
||||
@Override
|
||||
public int docID() { return 0; }
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException { return 0; }
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException { return 0; }
|
||||
|
||||
}
|
||||
|
||||
private static class NoOpCollector extends Collector {
|
||||
|
||||
private final boolean acceptDocsOutOfOrder;
|
||||
|
||||
public NoOpCollector(boolean acceptDocsOutOfOrder) {
|
||||
this.acceptDocsOutOfOrder = acceptDocsOutOfOrder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return acceptDocsOutOfOrder;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
for (boolean cacheScores : new boolean[] { false, true }) {
|
||||
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
|
||||
cc.setScorer(new MockScorer());
|
||||
|
||||
// collect 1000 docs
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
cc.collect(i);
|
||||
}
|
||||
|
||||
// now replay them
|
||||
cc.replay(new Collector() {
|
||||
int prevDocID = -1;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
assertEquals(prevDocID + 1, doc);
|
||||
prevDocID = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public void testIllegalStateOnReplay() throws Exception {
|
||||
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
|
||||
cc.setScorer(new MockScorer());
|
||||
|
||||
// collect 130 docs, this should be enough for triggering cache abort.
|
||||
for (int i = 0; i < 130; i++) {
|
||||
cc.collect(i);
|
||||
}
|
||||
|
||||
assertFalse("CachingCollector should not be cached due to low memory limit", cc.isCached());
|
||||
|
||||
try {
|
||||
cc.replay(new NoOpCollector(false));
|
||||
fail("replay should fail if CachingCollector is not cached");
|
||||
} catch (IllegalStateException e) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
public void testIllegalCollectorOnReplay() throws Exception {
|
||||
// tests that the Collector passed to replay() has an out-of-order mode that
|
||||
// is valid with the Collector passed to the ctor
|
||||
|
||||
// 'src' Collector does not support out-of-order
|
||||
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
|
||||
cc.setScorer(new MockScorer());
|
||||
for (int i = 0; i < 10; i++) cc.collect(i);
|
||||
cc.replay(new NoOpCollector(true)); // this call should not fail
|
||||
cc.replay(new NoOpCollector(false)); // this call should not fail
|
||||
|
||||
// 'src' Collector supports out-of-order
|
||||
cc = CachingCollector.create(new NoOpCollector(true), true, 50 * ONE_BYTE);
|
||||
cc.setScorer(new MockScorer());
|
||||
for (int i = 0; i < 10; i++) cc.collect(i);
|
||||
cc.replay(new NoOpCollector(true)); // this call should not fail
|
||||
try {
|
||||
cc.replay(new NoOpCollector(false)); // this call should fail
|
||||
fail("should have failed if an in-order Collector was given to replay(), " +
|
||||
"while CachingCollector was initialized with out-of-order collection");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
|
||||
public void testCachedArraysAllocation() throws Exception {
|
||||
// tests the cached arrays allocation -- if the 'nextLength' was too high,
|
||||
// caching would terminate even if a smaller length would suffice.
|
||||
|
||||
// set RAM limit enough for 150 docs + random(10000)
|
||||
int numDocs = random.nextInt(10000) + 150;
|
||||
for (boolean cacheScores : new boolean[] { false, true }) {
|
||||
int bytesPerDoc = cacheScores ? 8 : 4;
|
||||
CachingCollector cc = CachingCollector.create(new NoOpCollector(false),
|
||||
cacheScores, bytesPerDoc * ONE_BYTE * numDocs);
|
||||
cc.setScorer(new MockScorer());
|
||||
for (int i = 0; i < numDocs; i++) cc.collect(i);
|
||||
assertTrue(cc.isCached());
|
||||
|
||||
// The 151's document should terminate caching
|
||||
cc.collect(numDocs);
|
||||
assertFalse(cc.isCached());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,11 +17,14 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.Explanation.IDFExplanation;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -423,7 +426,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
|
||||
}
|
||||
TopDocs hits = s.search(mpq, 2);
|
||||
assert hits.totalHits == 2;
|
||||
assertEquals(2, hits.totalHits);
|
||||
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
|
||||
/*
|
||||
for(int hit=0;hit<hits.totalHits;hit++) {
|
||||
|
@ -434,4 +437,156 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private final static TokenAndPos[] INCR_0_DOC_TOKENS = new TokenAndPos[] {
|
||||
new TokenAndPos("x", 0),
|
||||
new TokenAndPos("a", 1),
|
||||
new TokenAndPos("1", 1),
|
||||
new TokenAndPos("m", 2), // not existing, relying on slop=2
|
||||
new TokenAndPos("b", 3),
|
||||
new TokenAndPos("1", 3),
|
||||
new TokenAndPos("n", 4), // not existing, relying on slop=2
|
||||
new TokenAndPos("c", 5),
|
||||
new TokenAndPos("y", 6)
|
||||
};
|
||||
|
||||
private final static TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] {
|
||||
new TokenAndPos("a", 0),
|
||||
new TokenAndPos("1", 0),
|
||||
new TokenAndPos("b", 1),
|
||||
new TokenAndPos("1", 1),
|
||||
new TokenAndPos("c", 2)
|
||||
};
|
||||
|
||||
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new TokenAndPos[][] {
|
||||
{ new TokenAndPos("a", 0) },
|
||||
{ new TokenAndPos("x", 0), new TokenAndPos("1", 0) },
|
||||
{ new TokenAndPos("b", 1) },
|
||||
{ new TokenAndPos("x", 1), new TokenAndPos("1", 1) },
|
||||
{ new TokenAndPos("c", 2) }
|
||||
};
|
||||
|
||||
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new TokenAndPos[][] {
|
||||
{ new TokenAndPos("x", 0) },
|
||||
{ new TokenAndPos("a", 0), new TokenAndPos("1", 0) },
|
||||
{ new TokenAndPos("x", 1) },
|
||||
{ new TokenAndPos("b", 1), new TokenAndPos("1", 1) },
|
||||
{ new TokenAndPos("c", 2) }
|
||||
};
|
||||
|
||||
/**
|
||||
* using query parser, MPQ will be created, and will not be strict about having all query terms
|
||||
* in each position - one of each position is sufficient (OR logic)
|
||||
*/
|
||||
public void testZeroPosIncrSloppyParsedAnd() throws IOException, ParseException {
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
|
||||
final Query q = qp.parse("\"this text is acually ignored\"");
|
||||
assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
|
||||
doTestZeroPosIncrSloppy(q, 0);
|
||||
((MultiPhraseQuery) q).setSlop(1);
|
||||
doTestZeroPosIncrSloppy(q, 0);
|
||||
((MultiPhraseQuery) q).setSlop(2);
|
||||
doTestZeroPosIncrSloppy(q, 1);
|
||||
}
|
||||
|
||||
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
|
||||
Directory dir = newDirectory(); // random dir
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
|
||||
IndexWriter writer = new IndexWriter(dir, cfg);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
IndexReader r = IndexReader.open(writer,false);
|
||||
writer.close();
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("QUERY=" + q);
|
||||
}
|
||||
|
||||
TopDocs hits = s.search(q, 1);
|
||||
assertEquals("wrong number of results", nExpected, hits.totalHits);
|
||||
|
||||
if (VERBOSE) {
|
||||
for(int hit=0;hit<hits.totalHits;hit++) {
|
||||
ScoreDoc sd = hits.scoreDocs[hit];
|
||||
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* PQ AND Mode - Manually creating a phrase query
|
||||
*/
|
||||
public void testZeroPosIncrSloppyPqAnd() throws IOException, ParseException {
|
||||
final PhraseQuery pq = new PhraseQuery();
|
||||
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
|
||||
pq.add(new Term("field",tap.token), tap.pos);
|
||||
}
|
||||
doTestZeroPosIncrSloppy(pq, 0);
|
||||
pq.setSlop(1);
|
||||
doTestZeroPosIncrSloppy(pq, 0);
|
||||
pq.setSlop(2);
|
||||
doTestZeroPosIncrSloppy(pq, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* MPQ AND Mode - Manually creating a multiple phrase query
|
||||
*/
|
||||
public void testZeroPosIncrSloppyMpqAnd() throws IOException, ParseException {
|
||||
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
|
||||
mpq.add(new Term[]{new Term("field",tap.token)}, tap.pos); //AND logic
|
||||
}
|
||||
doTestZeroPosIncrSloppy(mpq, 0);
|
||||
mpq.setSlop(1);
|
||||
doTestZeroPosIncrSloppy(mpq, 0);
|
||||
mpq.setSlop(2);
|
||||
doTestZeroPosIncrSloppy(mpq, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query
|
||||
*/
|
||||
public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException, ParseException {
|
||||
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
|
||||
Term[] terms = tapTerms(tap);
|
||||
final int pos = tap[0].pos;
|
||||
mpq.add(terms, pos); //AND logic in pos, OR across lines
|
||||
}
|
||||
doTestZeroPosIncrSloppy(mpq, 0);
|
||||
mpq.setSlop(1);
|
||||
doTestZeroPosIncrSloppy(mpq, 0);
|
||||
mpq.setSlop(2);
|
||||
doTestZeroPosIncrSloppy(mpq, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query - with no match
|
||||
*/
|
||||
public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException, ParseException {
|
||||
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
|
||||
Term[] terms = tapTerms(tap);
|
||||
final int pos = tap[0].pos;
|
||||
mpq.add(terms, pos); //AND logic in pos, OR across lines
|
||||
}
|
||||
doTestZeroPosIncrSloppy(mpq, 0);
|
||||
mpq.setSlop(2);
|
||||
doTestZeroPosIncrSloppy(mpq, 0);
|
||||
}
|
||||
|
||||
private Term[] tapTerms(TokenAndPos[] tap) {
|
||||
Term[] terms = new Term[tap.length];
|
||||
for (int i=0; i<terms.length; i++) {
|
||||
terms[i] = new Term("field",tap[i].token);
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -65,6 +65,7 @@ public class TestTermScorer extends LuceneTestCase {
|
|||
indexSearcher.close();
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void test() throws IOException {
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* Tests for StringHelper.getVersionComparator
|
||||
*/
|
||||
public class TestVersionComparator extends LuceneTestCase {
|
||||
public void testVersions() {
|
||||
Comparator<String> comp = StringHelper.getVersionComparator();
|
||||
assertTrue(comp.compare("1", "2") < 0);
|
||||
assertTrue(comp.compare("1", "1") == 0);
|
||||
assertTrue(comp.compare("2", "1") > 0);
|
||||
|
||||
assertTrue(comp.compare("1.1", "1") > 0);
|
||||
assertTrue(comp.compare("1", "1.1") < 0);
|
||||
assertTrue(comp.compare("1.1", "1.1") == 0);
|
||||
|
||||
assertTrue(comp.compare("1.0", "1") == 0);
|
||||
assertTrue(comp.compare("1", "1.0") == 0);
|
||||
assertTrue(comp.compare("1.0.1", "1.0") > 0);
|
||||
assertTrue(comp.compare("1.0", "1.0.1") < 0);
|
||||
|
||||
assertTrue(comp.compare("1.02.003", "1.2.3.0") == 0);
|
||||
assertTrue(comp.compare("1.2.3.0", "1.02.003") == 0);
|
||||
|
||||
assertTrue(comp.compare("1.10", "1.9") > 0);
|
||||
assertTrue(comp.compare("1.9", "1.10") < 0);
|
||||
}
|
||||
}
|
|
@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
assertCharVectors(2);
|
||||
}
|
||||
|
||||
// LUCENE-3094
|
||||
public void testNoWastedStates() throws Exception {
|
||||
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests all possible characteristic vectors for some n
|
||||
* This exhaustively tests the parametric transitions tables.
|
||||
|
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
assertNotNull(automata[n]);
|
||||
assertTrue(automata[n].isDeterministic());
|
||||
assertTrue(SpecialOperations.isFinite(automata[n]));
|
||||
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
|
||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||
if (n > 0) {
|
||||
assertTrue(automata[n-1].subsetOf(automata[n]));
|
||||
|
|
|
@ -49,4 +49,9 @@ public class TestMinimize extends LuceneTestCase {
|
|||
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
|
||||
}
|
||||
}
|
||||
|
||||
/** n^2 space usage in Hopcroft minimization? */
|
||||
public void testMinimizeHuge() {
|
||||
new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,14 +54,16 @@ public class TestFSTs extends LuceneTestCase {
|
|||
private MockDirectoryWrapper dir;
|
||||
|
||||
@Override
|
||||
public void setUp() throws IOException {
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
dir = newDirectory();
|
||||
dir.setPreventDoubleWrite(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws IOException {
|
||||
public void tearDown() throws Exception {
|
||||
dir.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
private static BytesRef toBytesRef(IntsRef ir) {
|
||||
|
@ -456,8 +458,9 @@ public class TestFSTs extends LuceneTestCase {
|
|||
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
|
||||
final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
|
||||
final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
|
||||
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.first));
|
||||
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.second));
|
||||
@SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
|
||||
builderObject.add(pair.input, _outputs.get(twoLongs.first));
|
||||
builderObject.add(pair.input, _outputs.get(twoLongs.second));
|
||||
} else {
|
||||
builder.add(pair.input, pair.output);
|
||||
}
|
||||
|
@ -537,7 +540,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
Object output = run(fst, term, null);
|
||||
|
||||
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
|
||||
assertEquals(output, pair.output);
|
||||
assertEquals(pair.output, output);
|
||||
|
||||
// verify enum's next
|
||||
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
|
||||
|
|
|
@ -49,6 +49,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
|
||||
private State previous;
|
||||
private String previousType;
|
||||
private boolean exhausted;
|
||||
|
||||
/**
|
||||
* Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter
|
||||
|
@ -67,6 +68,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
super.reset();
|
||||
previous = null;
|
||||
previousType = null;
|
||||
exhausted = false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -79,7 +81,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
*/
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
while (input.incrementToken()) {
|
||||
while (!exhausted && input.incrementToken()) {
|
||||
State current = captureState();
|
||||
|
||||
if (previous != null && !isGramType()) {
|
||||
|
@ -96,6 +98,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
|
|||
previous = current;
|
||||
}
|
||||
|
||||
exhausted = true;
|
||||
|
||||
if (previous == null || GRAM_TYPE.equals(previousType)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
|||
|
||||
private final StringBuilder hyphenated = new StringBuilder();
|
||||
private State savedState;
|
||||
private boolean exhausted = false;
|
||||
|
||||
/**
|
||||
* Creates a new HyphenatedWordsFilter
|
||||
|
@ -74,7 +75,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
|||
*/
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
while (input.incrementToken()) {
|
||||
while (!exhausted && input.incrementToken()) {
|
||||
char[] term = termAttribute.buffer();
|
||||
int termLength = termAttribute.length();
|
||||
|
||||
|
@ -96,6 +97,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
|
||||
exhausted = true;
|
||||
|
||||
if (savedState != null) {
|
||||
// the final term ends with a hyphen
|
||||
// add back the hyphen, for backwards compatibility.
|
||||
|
@ -115,6 +118,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
|||
super.reset();
|
||||
hyphenated.setLength(0);
|
||||
savedState = null;
|
||||
exhausted = false;
|
||||
}
|
||||
|
||||
// ================================================= Helper Methods ================================================
|
||||
|
|
|
@ -76,4 +76,9 @@ public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
|
|||
public void close() throws IOException {
|
||||
suffix.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
suffix.end();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -158,6 +158,12 @@ public class PrefixAwareTokenFilter extends TokenStream {
|
|||
return suffixToken;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
prefix.end();
|
||||
suffix.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
prefix.close();
|
||||
|
|
|
@ -225,7 +225,6 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
|
||||
if (result == streams.wrapped) {
|
||||
/* the wrapped analyzer reused the stream */
|
||||
streams.withStopFilter.reset();
|
||||
} else {
|
||||
/*
|
||||
* the wrapped analyzer did not. if there are any stopwords for the
|
||||
|
|
|
@ -199,10 +199,7 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
|
|||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
|
||||
if (result == streams.wrapped) {
|
||||
/* the wrapped analyzer reused the stream */
|
||||
streams.shingle.reset();
|
||||
} else {
|
||||
if (result != streams.wrapped) {
|
||||
/* the wrapped analyzer did not, create a new shingle around the new one */
|
||||
streams.wrapped = result;
|
||||
streams.shingle = new ShingleFilter(streams.wrapped);
|
||||
|
|
|
@ -327,6 +327,8 @@ public final class ShingleFilter extends TokenFilter {
|
|||
return tokenAvailable;
|
||||
}
|
||||
|
||||
private boolean exhausted;
|
||||
|
||||
/**
|
||||
* <p>Get the next token from the input stream.
|
||||
* <p>If the next token has <code>positionIncrement > 1</code>,
|
||||
|
@ -359,7 +361,7 @@ public final class ShingleFilter extends TokenFilter {
|
|||
}
|
||||
isNextInputStreamToken = false;
|
||||
newTarget.isFiller = false;
|
||||
} else if (input.incrementToken()) {
|
||||
} else if (!exhausted && input.incrementToken()) {
|
||||
if (null == target) {
|
||||
newTarget = new InputWindowToken(cloneAttributes());
|
||||
} else {
|
||||
|
@ -387,6 +389,7 @@ public final class ShingleFilter extends TokenFilter {
|
|||
}
|
||||
} else {
|
||||
newTarget = null;
|
||||
exhausted = true;
|
||||
}
|
||||
return newTarget;
|
||||
}
|
||||
|
@ -435,7 +438,8 @@ public final class ShingleFilter extends TokenFilter {
|
|||
inputWindow.clear();
|
||||
numFillerTokensToInsert = 0;
|
||||
isOutputHere = false;
|
||||
noShingleOutput = true;
|
||||
noShingleOutput = true;
|
||||
exhausted = false;
|
||||
if (outputUnigramsIfNoShingles && ! outputUnigrams) {
|
||||
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
|
||||
gramSize.minValue = minShingleSize;
|
||||
|
|
|
@ -190,14 +190,18 @@ public final class SynonymFilter extends TokenFilter {
|
|||
private LinkedList<AttributeSource> buffer;
|
||||
private LinkedList<AttributeSource> matched;
|
||||
|
||||
private boolean exhausted;
|
||||
|
||||
private AttributeSource nextTok() throws IOException {
|
||||
if (buffer!=null && !buffer.isEmpty()) {
|
||||
return buffer.removeFirst();
|
||||
} else {
|
||||
if (input.incrementToken()) {
|
||||
if (!exhausted && input.incrementToken()) {
|
||||
return this;
|
||||
} else
|
||||
} else {
|
||||
exhausted = true;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -250,5 +254,6 @@ public final class SynonymFilter extends TokenFilter {
|
|||
public void reset() throws IOException {
|
||||
input.reset();
|
||||
replacement = null;
|
||||
exhausted = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,8 +159,6 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
|||
*/
|
||||
protected boolean reset(final Reader reader) throws IOException {
|
||||
source.reset(reader);
|
||||
if(sink != source)
|
||||
sink.reset(); // only reset if the sink reference is different from source
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -215,8 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
|
|||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
|
||||
set.add("строеве");
|
||||
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("строевете строеве"));
|
||||
MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);
|
||||
|
||||
BulgarianStemFilter filter = new BulgarianStemFilter(
|
||||
new KeywordMarkerFilter(tokenStream, set));
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
|
||||
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -64,55 +64,55 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testNothingChange() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test1to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test1to2() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test1to3() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
|
||||
}
|
||||
|
||||
public void test2to4() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
|
||||
}
|
||||
|
||||
public void test2to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
|
||||
}
|
||||
|
||||
public void test3to1() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
|
||||
}
|
||||
|
||||
public void test4to2() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
|
||||
}
|
||||
|
||||
public void test5to0() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts, new String[0]);
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
//
|
||||
public void testTokenStream() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
|
||||
new int[]{0,2,4,6,8,11,16,20},
|
||||
|
@ -157,7 +157,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
public void testChained() throws Exception {
|
||||
CharStream cs = new MappingCharFilter( normMap,
|
||||
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
|
||||
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
|
||||
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
|
||||
assertTokenStreamContents(ts,
|
||||
new String[]{"a","llllllll","i"},
|
||||
new int[]{0,5,8},
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.StringReader;
|
|||
import java.util.Arrays;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
|
@ -90,7 +91,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
public TokenStream tokenStream(String field, Reader in) {
|
||||
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
|
||||
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -159,7 +160,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
public TokenStream tokenStream(String field, Reader in) {
|
||||
return new CommonGramsFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
|
||||
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -245,7 +246,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testCaseSensitive() throws Exception {
|
||||
final String input = "How The s a brown s cow d like A B thing?";
|
||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
|
||||
"s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
|
||||
|
@ -257,7 +258,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testLastWordisStopWord() throws Exception {
|
||||
final String input = "dog the";
|
||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||
assertTokenStreamContents(nsf, new String[] { "dog_the" });
|
||||
|
@ -268,7 +269,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testFirstWordisStopWord() throws Exception {
|
||||
final String input = "the dog";
|
||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||
assertTokenStreamContents(nsf, new String[] { "the_dog" });
|
||||
|
@ -279,7 +280,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testOneWordQueryStopWord() throws Exception {
|
||||
final String input = "the";
|
||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||
assertTokenStreamContents(nsf, new String[] { "the" });
|
||||
|
@ -290,7 +291,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testOneWordQuery() throws Exception {
|
||||
final String input = "monster";
|
||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||
assertTokenStreamContents(nsf, new String[] { "monster" });
|
||||
|
@ -301,7 +302,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void TestFirstAndLastStopWord() throws Exception {
|
||||
final String input = "the of";
|
||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
|
||||
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
|
||||
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
|
||||
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||
assertTokenStreamContents(nsf, new String[] { "the_of" });
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.StringReader;
|
|||
import org.xml.sax.InputSource;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
|
@ -35,8 +36,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
.getHyphenationTree(is);
|
||||
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"min veninde som er lidt af en læsehest")), hyphenator,
|
||||
new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false),
|
||||
hyphenator,
|
||||
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
|
||||
|
@ -55,8 +56,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
// the word basket will not be added due to the longest match option
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
|
||||
"basketballkurv")), hyphenator, dict,
|
||||
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||
hyphenator, dict,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
|
||||
assertTokenStreamContents(tf,
|
||||
|
@ -77,7 +78,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
|
||||
TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
|
||||
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||
hyphenator,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
2, 4);
|
||||
|
@ -89,7 +90,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
tf = new HyphenationCompoundWordTokenFilter(
|
||||
TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
|
||||
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||
hyphenator,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
4, 6);
|
||||
|
@ -101,7 +102,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
tf = new HyphenationCompoundWordTokenFilter(
|
||||
TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
|
||||
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
|
||||
hyphenator,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
4, 10);
|
||||
|
@ -120,9 +121,10 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
"Sko", "Vind", "Rute", "Torkare", "Blad" };
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new MockTokenizer(
|
||||
new StringReader(
|
||||
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
|
||||
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"),
|
||||
MockTokenizer.WHITESPACE, false),
|
||||
dict);
|
||||
|
||||
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
|
||||
|
@ -149,7 +151,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
|
||||
|
||||
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
|
||||
new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
|
||||
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
|
||||
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -36,36 +37,23 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
public void testExactCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
Set<String> stopWords = asSet("is", "the", "Time");
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
|
||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.toString());
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("The", termAtt.toString());
|
||||
assertFalse(stream.incrementToken());
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
|
||||
assertTokenStreamContents(stream, new String[] { "Now", "The" });
|
||||
}
|
||||
|
||||
public void testIgnoreCase() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
Set<String> stopWords = asSet( "is", "the", "Time" );
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
|
||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.toString());
|
||||
assertFalse(stream.incrementToken());
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
|
||||
assertTokenStreamContents(stream, new String[] { "Now" });
|
||||
}
|
||||
|
||||
public void testStopFilt() throws IOException {
|
||||
StringReader reader = new StringReader("Now is The Time");
|
||||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.toString());
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("The", termAtt.toString());
|
||||
assertFalse(stream.incrementToken());
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||
assertTokenStreamContents(stream, new String[] { "Now", "The" });
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -85,11 +73,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
||||
// with increments
|
||||
StringReader reader = new StringReader(sb.toString());
|
||||
StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
||||
StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||
doTestStopPositons(stpf,true);
|
||||
// without increments
|
||||
reader = new StringReader(sb.toString());
|
||||
stpf = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
||||
stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
|
||||
doTestStopPositons(stpf,false);
|
||||
// with increments, concatenating two stop filters
|
||||
ArrayList<String> a0 = new ArrayList<String>();
|
||||
|
@ -108,7 +96,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
|
||||
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
|
||||
reader = new StringReader(sb.toString());
|
||||
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet0); // first part of the set
|
||||
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
|
||||
stpf0.setEnablePositionIncrements(true);
|
||||
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
|
||||
doTestStopPositons(stpf01,true);
|
||||
|
@ -119,6 +107,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
stpf.setEnablePositionIncrements(enableIcrements);
|
||||
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
|
||||
stpf.reset();
|
||||
for (int i=0; i<20; i+=3) {
|
||||
assertTrue(stpf.incrementToken());
|
||||
log("Token "+i+": "+stpf);
|
||||
|
@ -127,6 +116,8 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
|
||||
}
|
||||
assertFalse(stpf.incrementToken());
|
||||
stpf.end();
|
||||
stpf.close();
|
||||
}
|
||||
|
||||
// print debug info depending on VERBOSE
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
|
||||
|
@ -278,7 +278,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
|
|||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("hole");
|
||||
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
|
||||
new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
|
||||
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
|
||||
}
|
||||
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -36,7 +36,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -36,7 +36,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
|
@ -34,7 +34,7 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,12 +22,11 @@ import java.io.Reader;
|
|||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
|
@ -41,7 +40,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer t = new KeywordTokenizer(reader);
|
||||
Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
|
||||
return new TokenStreamComponents(t, new PorterStemFilter(t));
|
||||
}
|
||||
};
|
||||
|
@ -57,7 +56,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
|
|||
public void testWithKeywordAttribute() throws IOException {
|
||||
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
|
||||
set.add("yourselves");
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
|
||||
Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
|
||||
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
|
||||
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -36,7 +36,7 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -36,7 +36,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -36,7 +36,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
|
||||
|
@ -36,7 +36,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName,
|
||||
Reader reader) {
|
||||
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
|
||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
|
||||
}
|
||||
};
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue