merged with trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1124321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-05-18 16:24:27 +00:00
commit 43e40e8844
193 changed files with 5893 additions and 1300 deletions

View File

@ -53,6 +53,8 @@
<classpathentry kind="src" path="modules/analysis/stempel/src/test"/> <classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
<classpathentry kind="src" path="modules/benchmark/src/java"/> <classpathentry kind="src" path="modules/benchmark/src/java"/>
<classpathentry kind="src" path="modules/benchmark/src/test"/> <classpathentry kind="src" path="modules/benchmark/src/test"/>
<classpathentry kind="src" path="modules/grouping/src/java"/>
<classpathentry kind="src" path="modules/grouping/src/test"/>
<classpathentry kind="src" path="solr/src/java"/> <classpathentry kind="src" path="solr/src/java"/>
<classpathentry kind="src" path="solr/src/webapp/src"/> <classpathentry kind="src" path="solr/src/webapp/src"/>
<classpathentry kind="src" path="solr/src/common"/> <classpathentry kind="src" path="solr/src/common"/>
@ -124,8 +126,8 @@
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-glassfish-2.1.v20091210.jar"/> <classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-glassfish-2.1.v20091210.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-jetty-6.1.26.jar"/> <classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-jetty-6.1.26.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-api-2.1-glassfish-2.1.v20091210.jar"/> <classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-api-2.1-glassfish-2.1.v20091210.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.4.2.jar"/> <classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.5.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.1.jar"/> <classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/> <classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-mapper-asl-1.5.2.jar"/> <classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-mapper-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/mahout-collections-0.3.jar"/> <classpathentry kind="lib" path="solr/contrib/clustering/lib/mahout-collections-0.3.jar"/>

View File

@ -26,6 +26,7 @@
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" /> <buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" /> <buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" /> <buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />

View File

@ -26,6 +26,7 @@
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" /> <module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" /> <module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" /> <module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
<module filepath="$PROJECT_DIR$/solr/solr.iml" /> <module filepath="$PROJECT_DIR$/solr/solr.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" /> <module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" /> <module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />

View File

@ -71,6 +71,13 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" /> <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option> <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration> </configuration>
<configuration default="false" name="grouping module" type="JUnit" factoryName="JUnit">
<module name="grouping" />
<option name="TEST_OBJECT" value="package" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/grouping/build" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="highlighter contrib" type="JUnit" factoryName="JUnit"> <configuration default="false" name="highlighter contrib" type="JUnit" factoryName="JUnit">
<module name="highlighter" /> <module name="highlighter" />
<option name="TEST_OBJECT" value="package" /> <option name="TEST_OBJECT" value="package" />
@ -204,7 +211,7 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" /> <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option> <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration> </configuration>
<list size="29"> <list size="30">
<item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" /> <item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" />
<item index="1" class="java.lang.String" itemvalue="JUnit.ant contrib" /> <item index="1" class="java.lang.String" itemvalue="JUnit.ant contrib" />
<item index="2" class="java.lang.String" itemvalue="JUnit.bdb contrib" /> <item index="2" class="java.lang.String" itemvalue="JUnit.bdb contrib" />
@ -215,25 +222,26 @@
<item index="7" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" /> <item index="7" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
<item index="8" class="java.lang.String" itemvalue="JUnit.extraction contrib" /> <item index="8" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
<item index="9" class="java.lang.String" itemvalue="JUnit.extras from dataimporthandler contrib" /> <item index="9" class="java.lang.String" itemvalue="JUnit.extras from dataimporthandler contrib" />
<item index="10" class="java.lang.String" itemvalue="JUnit.highlighter contrib" /> <item index="10" class="java.lang.String" itemvalue="JUnit.grouping module" />
<item index="11" class="java.lang.String" itemvalue="JUnit.icu analysis module" /> <item index="11" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
<item index="12" class="java.lang.String" itemvalue="JUnit.instantiated contrib" /> <item index="12" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
<item index="13" class="java.lang.String" itemvalue="JUnit.lucene" /> <item index="13" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
<item index="14" class="java.lang.String" itemvalue="JUnit.lucli contrib" /> <item index="14" class="java.lang.String" itemvalue="JUnit.lucene" />
<item index="15" class="java.lang.String" itemvalue="JUnit.memory contrib" /> <item index="15" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
<item index="16" class="java.lang.String" itemvalue="JUnit.misc contrib" /> <item index="16" class="java.lang.String" itemvalue="JUnit.memory contrib" />
<item index="17" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" /> <item index="17" class="java.lang.String" itemvalue="JUnit.misc contrib" />
<item index="18" class="java.lang.String" itemvalue="JUnit.queries contrib" /> <item index="18" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
<item index="19" class="java.lang.String" itemvalue="JUnit.queryparser contrib" /> <item index="19" class="java.lang.String" itemvalue="JUnit.queries contrib" />
<item index="20" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" /> <item index="20" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
<item index="21" class="java.lang.String" itemvalue="JUnit.solr" /> <item index="21" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
<item index="22" class="java.lang.String" itemvalue="JUnit.spatial contrib" /> <item index="22" class="java.lang.String" itemvalue="JUnit.solr" />
<item index="23" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" /> <item index="23" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
<item index="24" class="java.lang.String" itemvalue="JUnit.stempel analysis module" /> <item index="24" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
<item index="25" class="java.lang.String" itemvalue="JUnit.swing contrib" /> <item index="25" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
<item index="26" class="java.lang.String" itemvalue="JUnit.uima contrib" /> <item index="26" class="java.lang.String" itemvalue="JUnit.swing contrib" />
<item index="27" class="java.lang.String" itemvalue="JUnit.wordnet contrib" /> <item index="27" class="java.lang.String" itemvalue="JUnit.uima contrib" />
<item index="28" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" /> <item index="28" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
<item index="29" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
</list> </list>
</component> </component>
</project> </project>

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/build/classes/java" />
<output-test url="file://$MODULE_DIR$/build/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/work" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" module-name="lucene" />
</component>
</module>

View File

@ -0,0 +1,71 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-parent</artifactId>
<version>@version@</version>
<relativePath>../../lucene/pom.xml</relativePath>
</parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-grouping</artifactId>
<packaging>jar</packaging>
<name>Lucene Grouping</name>
<description>Lucene Grouping Module</description>
<properties>
<module-directory>modules/grouping</module-directory>
<build-directory>build</build-directory>
</properties>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>lucene-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<directory>${build-directory}</directory>
<outputDirectory>${build-directory}/classes/java</outputDirectory>
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
<sourceDirectory>src/java</sourceDirectory>
<testSourceDirectory>src/test</testSourceDirectory>
<testResources>
<testResource>
<directory>${project.build.testSourceDirectory}</directory>
<excludes>
<exclude>**/*.java</exclude>
</excludes>
</testResource>
</testResources>
</build>
</project>

View File

@ -33,6 +33,7 @@
<modules> <modules>
<module>analysis</module> <module>analysis</module>
<module>benchmark</module> <module>benchmark</module>
<module>grouping</module>
</modules> </modules>
<build> <build>
<directory>build/lucene-modules-aggregator</directory> <directory>build/lucene-modules-aggregator</directory>

View File

@ -24,6 +24,7 @@
<groupId>org.apache</groupId> <groupId>org.apache</groupId>
<artifactId>apache</artifactId> <artifactId>apache</artifactId>
<version>8</version> <version>8</version>
<relativePath/>
</parent> </parent>
<groupId>org.apache.lucene</groupId> <groupId>org.apache.lucene</groupId>
<artifactId>lucene-solr-grandparent</artifactId> <artifactId>lucene-solr-grandparent</artifactId>
@ -105,14 +106,6 @@
</license> </license>
</licenses> </licenses>
<repositories> <repositories>
<repository>
<id>carrot2.org</id>
<name>Carrot2 Maven2 repository</name>
<url>http://download.carrot2.org/maven2/</url>
<snapshots>
<updatePolicy>never</updatePolicy>
</snapshots>
</repository>
<repository> <repository>
<id>apache.snapshots</id> <id>apache.snapshots</id>
<name>Apache Snapshot Repository</name> <name>Apache Snapshot Repository</name>
@ -305,7 +298,7 @@
<dependency> <dependency>
<groupId>org.carrot2</groupId> <groupId>org.carrot2</groupId>
<artifactId>carrot2-core</artifactId> <artifactId>carrot2-core</artifactId>
<version>3.4.2</version> <version>3.5.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.codehaus.woodstox</groupId> <groupId>org.codehaus.woodstox</groupId>

View File

@ -162,11 +162,6 @@ Changes in Runtime Behavior
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather * LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler) than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
* LUCENE-1076: The default merge policy (TieredMergePolicy) is now
able to merge non-contiguous segments, which means docIDs no longer
necessarily say "in order". If this is a problem then you can use
either of the LogMergePolicy impls. (Mike McCandless)
* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked * LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
per IndexWriter session, which resulted in FieldInfos that had the FieldInfo per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
properties from all previous segments combined. Field numbers are now tracked properties from all previous segments combined. Field numbers are now tracked
@ -416,6 +411,10 @@ New features
it's able to handle multi-valued fields and does not hold the term it's able to handle multi-valued fields and does not hold the term
bytes in RAM. (Mike McCandless) bytes in RAM. (Mike McCandless)
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
document IDs and scores encountered during the search, and "reply" them to
another Collector. (Mike McCandless, Shai Erera)
Optimizations Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms * LUCENE-2588: Don't store unnecessary suffixes when writing the terms
@ -452,6 +451,9 @@ Bug fixes
indexes, causing existing deletions to be applied on the incoming indexes as indexes, causing existing deletions to be applied on the incoming indexes as
well. (Shai Erera, Mike McCandless) well. (Shai Erera, Mike McCandless)
* LUCENE-3068: sloppy phrase query failed to match valid documents when multiple
query terms had same position in the query. (Doron Cohen)
Test Cases Test Cases
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to * LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
@ -476,9 +478,15 @@ Changes in backwards compatibility policy
(Mike McCandless, Shai Erera) (Mike McCandless, Shai Erera)
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from * LUCENE-3084: MergePolicy.OneMerge.segments was changed from
SegmentInfos to a List<SegmentInfo>; this is actually a minor change SegmentInfos to a List<SegmentInfo>. SegmentInfos itsself was changed
because SegmentInfos itself extends Vector<SegmentInfo>. (Uwe to no longer extend Vector<SegmentInfo> (to update code that is using
Schindler, Mike McCandless) Vector-API, use the new asList() and asSet() methods returning unmodifiable
collections; modifying SegmentInfos is now only possible through
the explicitely declared methods). IndexWriter.segString() now takes
Iterable<SegmentInfo> instead of List<SegmentInfo>. A simple recompile
should fix this. MergePolicy and SegmentInfos are internal/experimental
APIs not covered by the strict backwards compatibility policy.
(Uwe Schindler, Mike McCandless)
Changes in runtime behavior Changes in runtime behavior
@ -492,6 +500,13 @@ Changes in runtime behavior
returns NumericField instances. (Uwe Schindler, Ryan McKinley, returns NumericField instances. (Uwe Schindler, Ryan McKinley,
Mike McCandless) Mike McCandless)
* LUCENE-1076: Changed the default merge policy from
LogByteSizeMergePolicy to TieredMergePolicy, as of Version.LUCENE_32
(passed to IndexWriterConfig), which is able to merge non-contiguous
segments. This means docIDs no longer necessarily stay "in order"
during indexing. If this is a problem then you can use either of
the LogMergePolicy impls. (Mike McCandless)
New features New features
* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader * LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader

View File

@ -75,10 +75,36 @@ Bug Fixes
caused a problem if you consumed a tokenstream, then reused it, added different caused a problem if you consumed a tokenstream, then reused it, added different
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler) attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
* LUCENE-3113: Fixed some minor analysis bugs: double-reset() in ReusableAnalyzerBase
and ShingleAnalyzerWrapper, missing end() implementations in PrefixAwareTokenFilter
and PrefixAndSuffixAwareTokenFilter, invocations of incrementToken() after it
already returned false in CommonGramsQueryFilter, HyphenatedWordsFilter,
ShingleFilter, and SynonymsFilter. (Robert Muir, Steven Rowe, Uwe Schindler)
New Features New Features
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir) * LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
* LUCENE-1421: create new grouping contrib module, enabling search
results to be grouped by a single-valued indexed field. This
module was factored out of Solr's grouping implementation, but
it cannot group by function queries nor arbitrary queries. (Mike
McCandless)
* LUCENE-3098: add AllGroupsCollector, to collect all unique groups
(but in unspecified order). (Martijn van Groningen via Mike
McCandless)
* LUCENE-3092: Added NRTCachingDirectory in contrib/misc, which
caches small segments in RAM. This is useful, in the near-real-time
case where the indexing rate is lowish but the reopen rate is
highish, to take load off the IO system. (Mike McCandless)
Optimizations
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
over to reusableTokenStream(). (Robert Muir)
======================= Lucene 3.1.0 ======================= ======================= Lucene 3.1.0 =======================
Changes in backwards compatibility policy Changes in backwards compatibility policy

View File

@ -17,8 +17,6 @@ package org.apache.lucene.ant;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.ant.DocumentTestCase; import org.apache.lucene.ant.DocumentTestCase;
import org.apache.lucene.ant.HtmlDocument; import org.apache.lucene.ant.HtmlDocument;
@ -27,7 +25,8 @@ public class HtmlDocumentTest extends DocumentTestCase
HtmlDocument doc; HtmlDocument doc;
@Override @Override
public void setUp() throws IOException { public void setUp() throws Exception {
super.setUp();
doc = new HtmlDocument(getFile("test.html")); doc = new HtmlDocument(getFile("test.html"));
} }
@ -37,8 +36,9 @@ public class HtmlDocumentTest extends DocumentTestCase
} }
@Override @Override
public void tearDown() { public void tearDown() throws Exception {
doc = null; doc = null;
super.tearDown();
} }
} }

View File

@ -17,8 +17,6 @@ package org.apache.lucene.ant;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.ant.DocumentTestCase; import org.apache.lucene.ant.DocumentTestCase;
import org.apache.lucene.ant.TextDocument; import org.apache.lucene.ant.TextDocument;
@ -27,7 +25,8 @@ public class TextDocumentTest extends DocumentTestCase
TextDocument doc; TextDocument doc;
@Override @Override
public void setUp() throws IOException { public void setUp() throws Exception {
super.setUp();
doc = new TextDocument(getFile("test.txt")); doc = new TextDocument(getFile("test.txt"));
} }
@ -36,8 +35,9 @@ public class TextDocumentTest extends DocumentTestCase
} }
@Override @Override
public void tearDown() { public void tearDown() throws Exception {
doc = null; doc = null;
super.tearDown();
} }
} }

View File

@ -78,7 +78,7 @@ public class Highlighter
public final String getBestFragment(Analyzer analyzer, String fieldName,String text) public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
throws IOException, InvalidTokenOffsetsException throws IOException, InvalidTokenOffsetsException
{ {
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
return getBestFragment(tokenStream, text); return getBestFragment(tokenStream, text);
} }
@ -130,7 +130,7 @@ public class Highlighter
int maxNumFragments) int maxNumFragments)
throws IOException, InvalidTokenOffsetsException throws IOException, InvalidTokenOffsetsException
{ {
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text)); TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
return getBestFragments(tokenStream, text, maxNumFragments); return getBestFragments(tokenStream, text, maxNumFragments);
} }

View File

@ -286,7 +286,11 @@ public class TokenSources {
// convenience method // convenience method
public static TokenStream getTokenStream(String field, String contents, public static TokenStream getTokenStream(String field, String contents,
Analyzer analyzer) { Analyzer analyzer) {
return analyzer.tokenStream(field, new StringReader(contents)); try {
return analyzer.reusableTokenStream(field, new StringReader(contents));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
} }
} }

View File

@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
if (field.tokenStreamValue() != null) { if (field.tokenStreamValue() != null) {
tokenStream = field.tokenStreamValue(); tokenStream = field.tokenStreamValue();
} else { } else {
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue())); tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
} }
// reset the TokenStream to the first token // reset the TokenStream to the first token

View File

@ -305,11 +305,12 @@ class LuceneMethods {
int position = 0; int position = 0;
// Tokenize field and add to postingTable // Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader); TokenStream stream = analyzer.reusableTokenStream(fieldName, reader);
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
try { try {
stream.reset();
while (stream.incrementToken()) { while (stream.incrementToken()) {
position += (posIncrAtt.getPositionIncrement() - 1); position += (posIncrAtt.getPositionIncrement() - 1);
position++; position++;
@ -323,6 +324,7 @@ class LuceneMethods {
} }
if (position > maxFieldLength) break; if (position > maxFieldLength) break;
} }
stream.end();
} finally { } finally {
stream.close(); stream.close();
} }

View File

@ -262,8 +262,12 @@ public class MemoryIndex {
if (analyzer == null) if (analyzer == null)
throw new IllegalArgumentException("analyzer must not be null"); throw new IllegalArgumentException("analyzer must not be null");
TokenStream stream = analyzer.tokenStream(fieldName, TokenStream stream;
new StringReader(text)); try {
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
addField(fieldName, stream); addField(fieldName, stream);
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.Set; import java.util.Set;
/** /**
@ -135,7 +136,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if (last > 1 || !isOptimized(infos.info(0))) { if (last > 1 || !isOptimized(infos.info(0))) {
spec = new MergeSpecification(); spec = new MergeSpecification();
spec.add(new OneMerge(infos.range(0, last))); spec.add(new OneMerge(infos.asList().subList(0, last)));
} }
} else if (last > maxNumSegments) { } else if (last > maxNumSegments) {
@ -192,7 +193,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
prev = backLink[i][prev]; prev = backLink[i][prev];
int mergeStart = i + prev; int mergeStart = i + prev;
if((mergeEnd - mergeStart) > 1) { if((mergeEnd - mergeStart) > 1) {
spec.add(new OneMerge(infos.range(mergeStart, mergeEnd))); spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
} else { } else {
if(partialExpunge) { if(partialExpunge) {
SegmentInfo info = infos.info(mergeStart); SegmentInfo info = infos.info(mergeStart);
@ -208,7 +209,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(partialExpunge && maxDelCount > 0) { if(partialExpunge && maxDelCount > 0) {
// expunge deletes // expunge deletes
spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1))); spec.add(new OneMerge(Collections.singletonList(infos.info(expungeCandidate))));
} }
return spec; return spec;
@ -250,7 +251,10 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
MergeSpecification spec = null; MergeSpecification spec = null;
if(numLargeSegs < numSegs) { if(numLargeSegs < numSegs) {
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs); // hack to create a shallow sub-range as SegmentInfos instance,
// it does not clone all metadata, but LogMerge does not need it
final SegmentInfos smallSegments = new SegmentInfos();
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
spec = super.findMergesToExpungeDeletes(smallSegments); spec = super.findMergesToExpungeDeletes(smallSegments);
} }
@ -258,7 +262,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
for(int i = 0; i < numLargeSegs; i++) { for(int i = 0; i < numLargeSegs; i++) {
SegmentInfo info = infos.info(i); SegmentInfo info = infos.info(i);
if(info.hasDeletions()) { if(info.hasDeletions()) {
spec.add(new OneMerge(infos.range(i, i + 1))); spec.add(new OneMerge(Collections.singletonList(infos.info(i))));
} }
} }
return spec; return spec;
@ -296,7 +300,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(totalSmallSegSize < targetSegSize * 2) { if(totalSmallSegSize < targetSegSize * 2) {
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge); MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
if(spec == null) spec = new MergeSpecification(); // should not happen if(spec == null) spec = new MergeSpecification(); // should not happen
spec.add(new OneMerge(infos.range(numLargeSegs, numSegs))); spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
return spec; return spec;
} else { } else {
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge); return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
@ -311,11 +315,13 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(size(info) < sizeThreshold) break; if(size(info) < sizeThreshold) break;
startSeg++; startSeg++;
} }
spec.add(new OneMerge(infos.range(startSeg, numSegs))); spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
return spec; return spec;
} else { } else {
// apply the log merge policy to small segments. // hack to create a shallow sub-range as SegmentInfos instance,
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs); // it does not clone all metadata, but LogMerge does not need it
final SegmentInfos smallSegments = new SegmentInfos();
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
MergeSpecification spec = super.findMerges(smallSegments); MergeSpecification spec = super.findMerges(smallSegments);
if(_partialExpunge) { if(_partialExpunge) {
@ -342,7 +348,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
} }
} }
if (maxDelCount > 0) { if (maxDelCount > 0) {
return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)); return new OneMerge(Collections.singletonList(infos.info(expungeCandidate)));
} }
return null; return null;
} }

View File

@ -0,0 +1,289 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.store.RAMDirectory; // javadocs
import org.apache.lucene.util.IOUtils;
// TODO
// - let subclass dictate policy...?
// - rename to MergeCacheingDir? NRTCachingDir
/**
* Wraps a {@link RAMDirectory}
* around any provided delegate directory, to
* be used during NRT search. Make sure you pull the merge
* scheduler using {@link #getMergeScheduler} and pass that to your
* {@link IndexWriter}; this class uses that to keep track of which
* merges are being done by which threads, to decide when to
* cache each written file.
*
* <p>This class is likely only useful in a near-real-time
* context, where indexing rate is lowish but reopen
* rate is highish, resulting in many tiny files being
* written. This directory keeps such segments (as well as
* the segments produced by merging them, as long as they
* are small enough), in RAM.</p>
*
* <p>This is safe to use: when your app calls {IndexWriter#commit},
* all cached files will be flushed from the cached and sync'd.</p>
*
* <p><b>NOTE</b>: this class is somewhat sneaky in its
* approach for spying on merges to determine the size of a
* merge: it records which threads are running which merges
* by watching ConcurrentMergeScheduler's doMerge method.
* While this works correctly, likely future versions of
* this class will take a more general approach.
*
* <p>Here's a simple example usage:
*
* <pre>
* Directory fsDir = FSDirectory.open(new File("/path/to/index"));
* NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
* IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
* conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
* IndexWriter writer = new IndexWriter(cachedFSDir, conf);
* </pre>
*
* <p>This will cache all newly flushed segments, all merges
* whose expected segment size is <= 5 MB, unless the net
* cached bytes exceeds 60 MB at which point all writes will
* not be cached (until the net bytes falls below 60 MB).</p>
*
* @lucene.experimental
*/
public class NRTCachingDirectory extends Directory {
private final RAMDirectory cache = new RAMDirectory();
private final Directory delegate;
private final long maxMergeSizeBytes;
private final long maxCachedBytes;
private static final boolean VERBOSE = false;
/**
* We will cache a newly created output if 1) it's a
* flush or a merge and the estimated size of the merged segmnt is <=
* maxMergeSizeMB, and 2) the total cached bytes is <=
* maxCachedMB */
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
this.delegate = delegate;
maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024);
maxCachedBytes = (long) (maxCachedMB*1024*1024);
}
@Override
public synchronized String[] listAll() throws IOException {
final Set<String> files = new HashSet<String>();
for(String f : cache.listAll()) {
files.add(f);
}
for(String f : delegate.listAll()) {
assert !files.contains(f);
files.add(f);
}
return files.toArray(new String[files.size()]);
}
/** Returns how many bytes are being used by the
* RAMDirectory cache */
public long sizeInBytes() {
return cache.sizeInBytes();
}
@Override
public synchronized boolean fileExists(String name) throws IOException {
return cache.fileExists(name) || delegate.fileExists(name);
}
@Override
public synchronized long fileModified(String name) throws IOException {
if (cache.fileExists(name)) {
return cache.fileModified(name);
} else {
return delegate.fileModified(name);
}
}
@Override
public synchronized void touchFile(String name) throws IOException {
if (cache.fileExists(name)) {
cache.touchFile(name);
} else {
delegate.touchFile(name);
}
}
@Override
public synchronized void deleteFile(String name) throws IOException {
// Delete from both, in case we are currently uncaching:
if (VERBOSE) {
System.out.println("nrtdir.deleteFile name=" + name);
}
cache.deleteFile(name);
delegate.deleteFile(name);
}
@Override
public synchronized long fileLength(String name) throws IOException {
if (cache.fileExists(name)) {
return cache.fileLength(name);
} else {
return delegate.fileLength(name);
}
}
public String[] listCachedFiles() {
return cache.listAll();
}
@Override
public IndexOutput createOutput(String name) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.createOutput name=" + name);
}
if (doCacheWrite(name)) {
if (VERBOSE) {
System.out.println(" to cache");
}
return cache.createOutput(name);
} else {
return delegate.createOutput(name);
}
}
@Override
public void sync(Collection<String> fileNames) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.sync files=" + fileNames);
}
for(String fileName : fileNames) {
unCache(fileName);
}
delegate.sync(fileNames);
}
@Override
public synchronized IndexInput openInput(String name) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.openInput name=" + name);
}
if (cache.fileExists(name)) {
if (VERBOSE) {
System.out.println(" from cache");
}
return cache.openInput(name);
} else {
return delegate.openInput(name);
}
}
@Override
public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
if (cache.fileExists(name)) {
return cache.openInput(name, bufferSize);
} else {
return delegate.openInput(name, bufferSize);
}
}
@Override
public Lock makeLock(String name) {
return delegate.makeLock(name);
}
@Override
public void clearLock(String name) throws IOException {
delegate.clearLock(name);
}
/** Close thius directory, which flushes any cached files
* to the delegate and then closes the delegate. */
@Override
public void close() throws IOException {
for(String fileName : cache.listAll()) {
unCache(fileName);
}
cache.close();
delegate.close();
}
private final ConcurrentHashMap<Thread,MergePolicy.OneMerge> merges = new ConcurrentHashMap<Thread,MergePolicy.OneMerge>();
public MergeScheduler getMergeScheduler() {
return new ConcurrentMergeScheduler() {
@Override
protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
try {
merges.put(Thread.currentThread(), merge);
super.doMerge(merge);
} finally {
merges.remove(Thread.currentThread());
}
}
};
}
/** Subclass can override this to customize logic; return
* true if this file should be written to the RAMDirectory. */
protected boolean doCacheWrite(String name) {
final MergePolicy.OneMerge merge = merges.get(Thread.currentThread());
//System.out.println(Thread.currentThread().getName() + ": CACHE check merge=" + merge + " size=" + (merge==null ? 0 : merge.estimatedMergeBytes));
return !name.equals(IndexFileNames.SEGMENTS_GEN) && (merge == null || merge.estimatedMergeBytes <= maxMergeSizeBytes) && cache.sizeInBytes() <= maxCachedBytes;
}
private void unCache(String fileName) throws IOException {
final IndexOutput out;
synchronized(this) {
if (!delegate.fileExists(fileName)) {
assert cache.fileExists(fileName);
out = delegate.createOutput(fileName);
} else {
out = null;
}
}
if (out != null) {
IndexInput in = null;
try {
in = cache.openInput(fileName);
in.copyBytes(out, in.length());
} finally {
IOUtils.closeSafely(in, out);
}
synchronized(this) {
cache.deleteFile(fileName);
}
}
}
}

View File

@ -0,0 +1,114 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
public class TestNRTCachingDirectory extends LuceneTestCase {
public void testNRTAndCommit() throws Exception {
Directory dir = newDirectory();
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
conf.setMergeScheduler(cachedDir.getMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
w.w.setInfoStream(VERBOSE ? System.out : null);
final LineFileDocs docs = new LineFileDocs(random);
final int numDocs = _TestUtil.nextInt(random, 100, 400);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
final List<BytesRef> ids = new ArrayList<BytesRef>();
IndexReader r = null;
for(int docCount=0;docCount<numDocs;docCount++) {
final Document doc = docs.nextDoc();
ids.add(new BytesRef(doc.get("docid")));
w.addDocument(doc);
if (random.nextInt(20) == 17) {
if (r == null) {
r = IndexReader.open(w.w, false);
} else {
final IndexReader r2 = r.reopen();
if (r2 != r) {
r.close();
r = r2;
}
}
assertEquals(1+docCount, r.numDocs());
final IndexSearcher s = new IndexSearcher(r);
// Just make sure search can run; we can't assert
// totHits since it could be 0
TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
// System.out.println("tot hits " + hits.totalHits);
}
}
if (r != null) {
r.close();
}
// Close should force cache to clear since all files are sync'd
w.close();
final String[] cachedFiles = cachedDir.listCachedFiles();
for(String file : cachedFiles) {
System.out.println("FAIL: cached file " + file + " remains after sync");
}
assertEquals(0, cachedFiles.length);
r = IndexReader.open(dir);
for(BytesRef id : ids) {
assertEquals(1, r.docFreq("docid", id));
}
r.close();
cachedDir.close();
}
// NOTE: not a test; just here to make sure the code frag
// in the javadocs is correct!
public void verifyCompiles() throws Exception {
Analyzer analyzer = null;
Directory fsDir = FSDirectory.open(new File("/path/to/index"));
NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
}
}

View File

@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
private void addTerms(IndexReader reader,FieldVals f) throws IOException private void addTerms(IndexReader reader,FieldVals f) throws IOException
{ {
if(f.queryString==null) return; if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString)); TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
int corpusNumDocs=reader.numDocs(); int corpusNumDocs=reader.numDocs();

View File

@ -881,7 +881,7 @@ public final class MoreLikeThis {
throw new UnsupportedOperationException("To use MoreLikeThis without " + throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer"); "term vectors, you must provide an Analyzer");
} }
TokenStream ts = analyzer.tokenStream(fieldName, r); TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
int tokenCount=0; int tokenCount=0;
// for every token // for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -85,7 +85,7 @@ public final class SimilarityQueries
Set<?> stop) Set<?> stop)
throws IOException throws IOException
{ {
TokenStream ts = a.tokenStream( field, new StringReader( body)); TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
BooleanQuery tmp = new BooleanQuery(); BooleanQuery tmp = new BooleanQuery();

View File

@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
} }
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); TokenStream source;
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
int countTokens = 0; int countTokens = 0;
try { try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset(); source.reset();
} catch (IOException e1) { } catch (IOException e1) {
throw new RuntimeException(e1); throw new RuntimeException(e1);
} }
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) { while (true) {
try { try {
if (!source.incrementToken()) break; if (!source.incrementToken()) break;
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
@Override @Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException { protected Query getPrefixQuery(String field, String termStr) throws ParseException {
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); TokenStream source;
List<String> tlist = new ArrayList<String>(); List<String> tlist = new ArrayList<String>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
try { try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset(); source.reset();
} catch (IOException e1) { } catch (IOException e1) {
throw new RuntimeException(e1); throw new RuntimeException(e1);
} }
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) { while (true) {
try { try {
if (!source.incrementToken()) break; if (!source.incrementToken()) break;
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException { throws ParseException {
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); TokenStream source = null;
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
String nextToken = null; String nextToken = null;
boolean multipleTokens = false; boolean multipleTokens = false;
try { try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
source.reset(); source.reset();
if (source.incrementToken()) { if (source.incrementToken()) {
nextToken = termAtt.toString(); nextToken = termAtt.toString();
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
if (part1 != null) { if (part1 != null) {
// part1 // part1
try { try {
source = getAnalyzer().tokenStream(field, new StringReader(part1)); source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
termAtt = source.addAttribute(CharTermAttribute.class); termAtt = source.addAttribute(CharTermAttribute.class);
source.reset(); source.reset();
multipleTokens = false; multipleTokens = false;
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
} }
if (part2 != null) { if (part2 != null) {
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
try { try {
// part2
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
source.reset(); source.reset();
if (source.incrementToken()) { if (source.incrementToken()) {
part2 = termAtt.toString(); part2 = termAtt.toString();

View File

@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
String text = fieldNode.getTextAsString(); String text = fieldNode.getTextAsString();
String field = fieldNode.getFieldAsString(); String field = fieldNode.getFieldAsString();
TokenStream source = this.analyzer.tokenStream(field, new StringReader( TokenStream source;
text));
try { try {
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
source.reset(); source.reset();
} catch (IOException e1) { } catch (IOException e1) {
throw new RuntimeException(e1); throw new RuntimeException(e1);

View File

@ -631,8 +631,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
} }
@Override @Override
public void tearDown() { public void tearDown() throws Exception {
BooleanQuery.setMaxClauseCount(originalMaxClauses); BooleanQuery.setMaxClauseCount(originalMaxClauses);
super.tearDown();
} }
} }

View File

@ -116,7 +116,7 @@ public final class SynExpand {
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT); if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
// [1] Parse query into separate words so that when we expand we can avoid dups // [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query)); TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset(); ts.reset();
while (ts.incrementToken()) { while (ts.incrementToken()) {

View File

@ -124,7 +124,7 @@ public class SynLookup {
List<String> top = new LinkedList<String>(); // needs to be separately listed.. List<String> top = new LinkedList<String>(); // needs to be separately listed..
// [1] Parse query into separate words so that when we expand we can avoid dups // [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query)); TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) { while (ts.incrementToken()) {

View File

@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
stopWordsSet=new HashSet<String>(); stopWordsSet=new HashSet<String>();
for (int i = 0; i < fields.length; i++) for (int i = 0; i < fields.length; i++)
{ {
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
try try
{ {
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset(); ts.reset();
while(ts.incrementToken()) { while(ts.incrementToken()) {
stopWordsSet.add(termAtt.toString()); stopWordsSet.add(termAtt.toString());

View File

@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
try try
{ {
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>(); ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value)); TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef(); BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();

View File

@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
TermsFilter tf = new TermsFilter(); TermsFilter tf = new TermsFilter();
String text = DOMUtils.getNonBlankTextOrFail(e); String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
try try
{ {
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef(); BytesRef bytes = termAtt.getBytesRef();
ts.reset(); ts.reset();

View File

@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false)); BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0)); bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try try
{ {
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null; Term term = null;
BytesRef bytes = termAtt.getBytesRef(); BytesRef bytes = termAtt.getBytesRef();

View File

@ -733,8 +733,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
// case we have to roll back: // case we have to roll back:
startCommit(); startCommit();
final SegmentInfos rollbackSegmentInfos = new SegmentInfos(); final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
rollbackSegmentInfos.addAll(segmentInfos);
boolean success = false; boolean success = false;
try { try {
@ -766,8 +765,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
deleter.refresh(); deleter.refresh();
// Restore all SegmentInfos (in case we pruned some) // Restore all SegmentInfos (in case we pruned some)
segmentInfos.clear(); segmentInfos.rollbackSegmentInfos(rollbackSegments);
segmentInfos.addAll(rollbackSegmentInfos);
} }
} }

View File

@ -126,7 +126,6 @@ final class DocumentsWriter {
final DocumentsWriterPerThreadPool perThreadPool; final DocumentsWriterPerThreadPool perThreadPool;
final FlushPolicy flushPolicy; final FlushPolicy flushPolicy;
final DocumentsWriterFlushControl flushControl; final DocumentsWriterFlushControl flushControl;
final Healthiness healthiness;
DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers, DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
BufferedDeletesStream bufferedDeletesStream) throws IOException { BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory; this.directory = directory;
@ -142,10 +141,7 @@ final class DocumentsWriter {
flushPolicy = configuredPolicy; flushPolicy = configuredPolicy;
} }
flushPolicy.init(this); flushPolicy.init(this);
flushControl = new DocumentsWriterFlushControl(this, config );
healthiness = new Healthiness();
final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
} }
synchronized void deleteQueries(final Query... queries) throws IOException { synchronized void deleteQueries(final Query... queries) throws IOException {
@ -283,31 +279,28 @@ final class DocumentsWriter {
ensureOpen(); ensureOpen();
boolean maybeMerge = false; boolean maybeMerge = false;
final boolean isUpdate = delTerm != null; final boolean isUpdate = delTerm != null;
if (healthiness.anyStalledThreads()) { if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall:
// Help out flushing any pending DWPTs so we can un-stall:
if (infoStream != null) { if (infoStream != null) {
message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)"); message("DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
} }
do {
// Try pick up pending threads here if possible // Try pick up pending threads here if possible
DocumentsWriterPerThread flushingDWPT; DocumentsWriterPerThread flushingDWPT;
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
// Don't push the delete here since the update could fail! // Don't push the delete here since the update could fail!
maybeMerge = doFlush(flushingDWPT); maybeMerge |= doFlush(flushingDWPT);
if (!healthiness.anyStalledThreads()) {
break;
}
} }
if (infoStream != null && healthiness.anyStalledThreads()) { if (infoStream != null && flushControl.anyStalledThreads()) {
message("WARNING DocumentsWriter still has stalled threads; waiting"); message("WARNING DocumentsWriter has stalled threads; waiting");
} }
healthiness.waitIfStalled(); // block if stalled flushControl.waitIfStalled(); // block if stalled
} while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
if (infoStream != null && healthiness.anyStalledThreads()) { if (infoStream != null) {
message("WARNING DocumentsWriter done waiting"); message("continue indexing after helpling out flushing DocumentsWriter is healthy");
} }
} }
@ -353,7 +346,6 @@ final class DocumentsWriter {
maybeMerge = true; maybeMerge = true;
boolean success = false; boolean success = false;
FlushTicket ticket = null; FlushTicket ticket = null;
try { try {
assert currentFullFlushDelQueue == null assert currentFullFlushDelQueue == null
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
@ -511,9 +503,7 @@ final class DocumentsWriter {
anythingFlushed |= doFlush(flushingDWPT); anythingFlushed |= doFlush(flushingDWPT);
} }
// If a concurrent flush is still in flight wait for it // If a concurrent flush is still in flight wait for it
while (flushControl.anyFlushing()) {
flushControl.waitForFlush(); flushControl.waitForFlush();
}
if (!anythingFlushed) { // apply deletes if we did not flush any document if (!anythingFlushed) { // apply deletes if we did not flush any document
synchronized (ticketQueue) { synchronized (ticketQueue) {
ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false)); ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));

View File

@ -44,30 +44,32 @@ public final class DocumentsWriterFlushControl {
private long activeBytes = 0; private long activeBytes = 0;
private long flushBytes = 0; private long flushBytes = 0;
private volatile int numPending = 0; private volatile int numPending = 0;
private volatile int numFlushing = 0;
final AtomicBoolean flushDeletes = new AtomicBoolean(false); final AtomicBoolean flushDeletes = new AtomicBoolean(false);
private boolean fullFlush = false; private boolean fullFlush = false;
private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>(); private final Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
// only for safety reasons if a DWPT is close to the RAM limit // only for safety reasons if a DWPT is close to the RAM limit
private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>(); private final Queue<BlockedFlush> blockedFlushes = new LinkedList<BlockedFlush>();
double maxConfiguredRamBuffer = 0;
long peakActiveBytes = 0;// only with assert long peakActiveBytes = 0;// only with assert
long peakFlushBytes = 0;// only with assert long peakFlushBytes = 0;// only with assert
long peakNetBytes = 0;// only with assert long peakNetBytes = 0;// only with assert
private final Healthiness healthiness; long peakDelta = 0; // only with assert
final DocumentsWriterStallControl stallControl;
private final DocumentsWriterPerThreadPool perThreadPool; private final DocumentsWriterPerThreadPool perThreadPool;
private final FlushPolicy flushPolicy; private final FlushPolicy flushPolicy;
private boolean closed = false; private boolean closed = false;
private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>(); private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
private final DocumentsWriter documentsWriter; private final DocumentsWriter documentsWriter;
private final IndexWriterConfig config;
DocumentsWriterFlushControl(DocumentsWriter documentsWriter, DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
Healthiness healthiness, long hardMaxBytesPerDWPT) { IndexWriterConfig config) {
this.healthiness = healthiness; this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool; this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy; this.flushPolicy = documentsWriter.flushPolicy;
this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT; this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
this.config = config;
this.documentsWriter = documentsWriter; this.documentsWriter = documentsWriter;
} }
@ -83,6 +85,24 @@ public final class DocumentsWriterFlushControl {
return flushBytes + activeBytes; return flushBytes + activeBytes;
} }
long stallLimitBytes() {
final double maxRamMB = config.getRAMBufferSizeMB();
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
}
private boolean assertMemory() {
final double maxRamMB = config.getRAMBufferSizeMB();
if (maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
// for this assert we must be tolerant to ram buffer changes!
maxConfiguredRamBuffer = Math.max(maxRamMB, maxConfiguredRamBuffer);
final long ram = flushBytes + activeBytes;
// take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
final long expected = (long)(2 * (maxConfiguredRamBuffer * 1024 * 1024)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
assert ram <= expected : "ram was " + ram + " expected: " + expected + " flush mem: " + flushBytes + " active: " + activeBytes ;
}
return true;
}
private void commitPerThreadBytes(ThreadState perThread) { private void commitPerThreadBytes(ThreadState perThread) {
final long delta = perThread.perThread.bytesUsed() final long delta = perThread.perThread.bytesUsed()
- perThread.bytesUsed; - perThread.bytesUsed;
@ -105,11 +125,14 @@ public final class DocumentsWriterFlushControl {
peakActiveBytes = Math.max(peakActiveBytes, activeBytes); peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
peakFlushBytes = Math.max(peakFlushBytes, flushBytes); peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
peakNetBytes = Math.max(peakNetBytes, netBytes()); peakNetBytes = Math.max(peakNetBytes, netBytes());
peakDelta = Math.max(peakDelta, delta);
return true; return true;
} }
synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread, synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
boolean isUpdate) { boolean isUpdate) {
try {
commitPerThreadBytes(perThread); commitPerThreadBytes(perThread);
if (!perThread.flushPending) { if (!perThread.flushPending) {
if (isUpdate) { if (isUpdate) {
@ -121,37 +144,43 @@ public final class DocumentsWriterFlushControl {
// Safety check to prevent a single DWPT exceeding its RAM limit. This // Safety check to prevent a single DWPT exceeding its RAM limit. This
// is super important since we can not address more than 2048 MB per DWPT // is super important since we can not address more than 2048 MB per DWPT
setFlushPending(perThread); setFlushPending(perThread);
}
}
final DocumentsWriterPerThread flushingDWPT;
if (fullFlush) { if (fullFlush) {
DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread); if (perThread.flushPending) {
assert toBlock != null; checkoutAndBlock(perThread);
blockedFlushes.add(toBlock); flushingDWPT = nextPendingFlush();
} else {
flushingDWPT = null;
} }
} else {
flushingDWPT = tryCheckoutForFlush(perThread);
} }
}
final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
healthiness.updateStalled(this);
return flushingDWPT; return flushingDWPT;
} finally {
stallControl.updateStalled(this);
assert assertMemory();
}
} }
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) { synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
assert flushingWriters.containsKey(dwpt); assert flushingWriters.containsKey(dwpt);
try { try {
numFlushing--;
Long bytes = flushingWriters.remove(dwpt); Long bytes = flushingWriters.remove(dwpt);
flushBytes -= bytes.longValue(); flushBytes -= bytes.longValue();
perThreadPool.recycle(dwpt); perThreadPool.recycle(dwpt);
healthiness.updateStalled(this); stallControl.updateStalled(this);
assert assertMemory();
} finally { } finally {
notifyAll(); notifyAll();
} }
} }
public synchronized boolean anyFlushing() {
return numFlushing != 0;
}
public synchronized void waitForFlush() { public synchronized void waitForFlush() {
if (numFlushing != 0) { while (flushingWriters.size() != 0) {
try { try {
this.wait(); this.wait();
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -173,32 +202,51 @@ public final class DocumentsWriterFlushControl {
flushBytes += bytes; flushBytes += bytes;
activeBytes -= bytes; activeBytes -= bytes;
numPending++; // write access synced numPending++; // write access synced
assert assertMemory();
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing } // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
} }
synchronized void doOnAbort(ThreadState state) { synchronized void doOnAbort(ThreadState state) {
try {
if (state.flushPending) { if (state.flushPending) {
flushBytes -= state.bytesUsed; flushBytes -= state.bytesUsed;
} else { } else {
activeBytes -= state.bytesUsed; activeBytes -= state.bytesUsed;
} }
assert assertMemory();
// Take it out of the loop this DWPT is stale // Take it out of the loop this DWPT is stale
perThreadPool.replaceForFlush(state, closed); perThreadPool.replaceForFlush(state, closed);
healthiness.updateStalled(this); }finally {
stallControl.updateStalled(this);
}
} }
synchronized DocumentsWriterPerThread tryCheckoutForFlush( synchronized DocumentsWriterPerThread tryCheckoutForFlush(
ThreadState perThread) { ThreadState perThread) {
if (fullFlush) { return perThread.flushPending ? internalTryCheckOutForFlush(perThread) : null;
return null; }
private void checkoutAndBlock(ThreadState perThread) {
perThread.lock();
try {
assert perThread.flushPending : "can not block non-pending threadstate";
assert fullFlush : "can not block if fullFlush == false";
final DocumentsWriterPerThread dwpt;
final long bytes = perThread.bytesUsed;
dwpt = perThreadPool.replaceForFlush(perThread, closed);
numPending--;
blockedFlushes.add(new BlockedFlush(dwpt, bytes));
}finally {
perThread.unlock();
} }
return internalTryCheckOutForFlush(perThread);
} }
private DocumentsWriterPerThread internalTryCheckOutForFlush( private DocumentsWriterPerThread internalTryCheckOutForFlush(
ThreadState perThread) { ThreadState perThread) {
if (perThread.flushPending) { assert Thread.holdsLock(this);
assert perThread.flushPending;
try {
// We are pending so all memory is already moved to flushBytes // We are pending so all memory is already moved to flushBytes
if (perThread.tryLock()) { if (perThread.tryLock()) {
try { try {
@ -212,15 +260,16 @@ public final class DocumentsWriterFlushControl {
// Record the flushing DWPT to reduce flushBytes in doAfterFlush // Record the flushing DWPT to reduce flushBytes in doAfterFlush
flushingWriters.put(dwpt, Long.valueOf(bytes)); flushingWriters.put(dwpt, Long.valueOf(bytes));
numPending--; // write access synced numPending--; // write access synced
numFlushing++;
return dwpt; return dwpt;
} }
} finally { } finally {
perThread.unlock(); perThread.unlock();
} }
} }
}
return null; return null;
} finally {
stallControl.updateStalled(this);
}
} }
@Override @Override
@ -231,12 +280,13 @@ public final class DocumentsWriterFlushControl {
DocumentsWriterPerThread nextPendingFlush() { DocumentsWriterPerThread nextPendingFlush() {
synchronized (this) { synchronized (this) {
DocumentsWriterPerThread poll = flushQueue.poll(); final DocumentsWriterPerThread poll;
if (poll != null) { if ((poll = flushQueue.poll()) != null) {
stallControl.updateStalled(this);
return poll; return poll;
} }
} }
if (numPending > 0) { if (numPending > 0 && !fullFlush) { // don't check if we are doing a full flush
final Iterator<ThreadState> allActiveThreads = perThreadPool final Iterator<ThreadState> allActiveThreads = perThreadPool
.getActivePerThreadsIterator(); .getActivePerThreadsIterator();
while (allActiveThreads.hasNext() && numPending > 0) { while (allActiveThreads.hasNext() && numPending > 0) {
@ -276,8 +326,8 @@ public final class DocumentsWriterFlushControl {
return documentsWriter.deleteQueue.numGlobalTermDeletes(); return documentsWriter.deleteQueue.numGlobalTermDeletes();
} }
int numFlushingDWPT() { synchronized int numFlushingDWPT() {
return numFlushing; return flushingWriters.size();
} }
public boolean doApplyAllDeletes() { public boolean doApplyAllDeletes() {
@ -289,7 +339,7 @@ public final class DocumentsWriterFlushControl {
} }
int numActiveDWPT() { int numActiveDWPT() {
return this.perThreadPool.getMaxThreadStates(); return this.perThreadPool.getActiveThreadState();
} }
void markForFullFlush() { void markForFullFlush() {
@ -331,11 +381,11 @@ public final class DocumentsWriterFlushControl {
if (!next.flushPending) { if (!next.flushPending) {
setFlushPending(next); setFlushPending(next);
} }
}
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next); final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents"; assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
assert dwpt == flushingDWPT : "flushControl returned different DWPT"; assert dwpt == flushingDWPT : "flushControl returned different DWPT";
toFlush.add(flushingDWPT); toFlush.add(flushingDWPT);
}
} else { } else {
// get the new delete queue from DW // get the new delete queue from DW
next.perThread.initialize(); next.perThread.initialize();
@ -345,31 +395,54 @@ public final class DocumentsWriterFlushControl {
} }
} }
synchronized (this) { synchronized (this) {
assert assertBlockedFlushes(flushingQueue); /* make sure we move all DWPT that are where concurrently marked as
flushQueue.addAll(blockedFlushes); * pending and moved to blocked are moved over to the flushQueue. There is
blockedFlushes.clear(); * a chance that this happens since we marking DWPT for full flush without
* blocking indexing.*/
pruneBlockedQueue(flushingQueue);
assert assertBlockedFlushes(documentsWriter.deleteQueue);
flushQueue.addAll(toFlush); flushQueue.addAll(toFlush);
stallControl.updateStalled(this);
}
}
/**
* Prunes the blockedQueue by removing all DWPT that are associated with the given flush queue.
*/
private void pruneBlockedQueue(final DocumentsWriterDeleteQueue flushingQueue) {
Iterator<BlockedFlush> iterator = blockedFlushes.iterator();
while (iterator.hasNext()) {
BlockedFlush blockedFlush = iterator.next();
if (blockedFlush.dwpt.deleteQueue == flushingQueue) {
iterator.remove();
assert !flushingWriters.containsKey(blockedFlush.dwpt) : "DWPT is already flushing";
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
// don't decr pending here - its already done when DWPT is blocked
flushQueue.add(blockedFlush.dwpt);
}
} }
} }
synchronized void finishFullFlush() { synchronized void finishFullFlush() {
assert fullFlush; assert fullFlush;
assert flushQueue.isEmpty(); assert flushQueue.isEmpty();
assert flushingWriters.isEmpty();
try { try {
if (!blockedFlushes.isEmpty()) { if (!blockedFlushes.isEmpty()) {
assert assertBlockedFlushes(documentsWriter.deleteQueue); assert assertBlockedFlushes(documentsWriter.deleteQueue);
flushQueue.addAll(blockedFlushes); pruneBlockedQueue(documentsWriter.deleteQueue);
blockedFlushes.clear(); assert blockedFlushes.isEmpty();
} }
} finally { } finally {
fullFlush = false; fullFlush = false;
stallControl.updateStalled(this);
} }
} }
boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) { boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes; for (BlockedFlush blockedFlush : blockedFlushes) {
for (DocumentsWriterPerThread documentsWriterPerThread : flushes) { assert blockedFlush.dwpt.deleteQueue == flushingQueue;
assert documentsWriterPerThread.deleteQueue == flushingQueue;
} }
return true; return true;
} }
@ -379,18 +452,65 @@ public final class DocumentsWriterFlushControl {
for (DocumentsWriterPerThread dwpt : flushQueue) { for (DocumentsWriterPerThread dwpt : flushQueue) {
doAfterFlush(dwpt); doAfterFlush(dwpt);
} }
for (DocumentsWriterPerThread dwpt : blockedFlushes) { for (BlockedFlush blockedFlush : blockedFlushes) {
doAfterFlush(dwpt); flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
doAfterFlush(blockedFlush.dwpt);
} }
} finally { } finally {
fullFlush = false; fullFlush = false;
flushQueue.clear(); flushQueue.clear();
blockedFlushes.clear(); blockedFlushes.clear();
stallControl.updateStalled(this);
} }
} }
synchronized boolean isFullFlush() { /**
* Returns <code>true</code> if a full flush is currently running
*/
synchronized boolean isFullFlush() { // used by assert
return fullFlush; return fullFlush;
} }
/**
* Returns the number of flushes that are already checked out but not yet
* actively flushing
*/
synchronized int numQueuedFlushes() {
return flushQueue.size();
}
/**
* Returns the number of flushes that are checked out but not yet available
* for flushing. This only applies during a full flush if a DWPT needs
* flushing but must not be flushed until the full flush has finished.
*/
synchronized int numBlockedFlushes() {
return blockedFlushes.size();
}
private static class BlockedFlush {
final DocumentsWriterPerThread dwpt;
final long bytes;
BlockedFlush(DocumentsWriterPerThread dwpt, long bytes) {
super();
this.dwpt = dwpt;
this.bytes = bytes;
}
}
/**
* This method will block if too many DWPT are currently flushing and no
* checked out DWPT are available
*/
void waitIfStalled() {
stallControl.waitIfStalled();
}
/**
* Returns <code>true</code> iff stalled
*/
boolean anyStalledThreads() {
return stallControl.anyStalledThreads();
}
} }

View File

@ -166,6 +166,13 @@ public abstract class DocumentsWriterPerThreadPool {
return perThreads.length; return perThreads.length;
} }
/**
* Returns the active number of {@link ThreadState} instances.
*/
public int getActiveThreadState() {
return numThreadStatesActive;
}
/** /**
* Returns a new {@link ThreadState} iff any new state is available otherwise * Returns a new {@link ThreadState} iff any new state is available otherwise
* <code>null</code>. * <code>null</code>.

View File

@ -36,8 +36,7 @@ import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
* continue indexing. * continue indexing.
*/ */
//TODO: rename this to DocumentsWriterStallControl (or something like that)? //TODO: rename this to DocumentsWriterStallControl (or something like that)?
final class Healthiness { final class DocumentsWriterStallControl {
@SuppressWarnings("serial") @SuppressWarnings("serial")
private static final class Sync extends AbstractQueuedSynchronizer { private static final class Sync extends AbstractQueuedSynchronizer {
volatile boolean hasBlockedThreads = false; // only with assert volatile boolean hasBlockedThreads = false; // only with assert
@ -96,13 +95,14 @@ final class Healthiness {
* <code>true</code> iff the number of flushing * <code>true</code> iff the number of flushing
* {@link DocumentsWriterPerThread} is greater than the number of active * {@link DocumentsWriterPerThread} is greater than the number of active
* {@link DocumentsWriterPerThread}. Otherwise it will reset the * {@link DocumentsWriterPerThread}. Otherwise it will reset the
* {@link Healthiness} to healthy and release all threads waiting on * {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
* {@link #waitIfStalled()} * {@link #waitIfStalled()}
*/ */
void updateStalled(DocumentsWriterFlushControl flushControl) { void updateStalled(DocumentsWriterFlushControl flushControl) {
do { do {
// if we have more flushing DWPT than numActiveDWPT we stall! // if we have more flushing / blocked DWPT than numActiveDWPT we stall!
while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) { // don't stall if we have queued flushes - threads should be hijacked instead
while (flushControl.netBytes() > flushControl.stallLimitBytes()) {
if (sync.trySetStalled()) { if (sync.trySetStalled()) {
assert wasStalled = true; assert wasStalled = true;
return; return;
@ -115,7 +115,7 @@ final class Healthiness {
sync.acquireShared(0); sync.acquireShared(0);
} }
boolean hasBlocked() { boolean hasBlocked() { // for tests
return sync.hasBlockedThreads; return sync.hasBlockedThreads;
} }
} }

View File

@ -40,7 +40,13 @@ import java.util.Collection;
* refuses to run by default. Specify {@code -delete-prior-commits} * refuses to run by default. Specify {@code -delete-prior-commits}
* to override this, allowing the tool to delete all but the last commit. * to override this, allowing the tool to delete all but the last commit.
* From Java code this can be enabled by passing {@code true} to * From Java code this can be enabled by passing {@code true} to
* {@link #IndexUpgrader(Directory,PrintStream,boolean)}. * {@link #IndexUpgrader(Directory,Version,PrintStream,boolean)}.
* <p><b>Warning:</b> This tool may reorder documents if the index was partially
* upgraded before execution (e.g., documents were added). If your application relies
* on &quot;monotonicity&quot; of doc IDs (which means that the order in which the documents
* were added to the index is preserved), do a full optimize instead.
* The {@link MergePolicy} set by {@link IndexWriterConfig} may also reorder
* documents.
*/ */
public final class IndexUpgrader { public final class IndexUpgrader {
@ -52,9 +58,11 @@ public final class IndexUpgrader {
System.err.println("reason, if the incoming index has more than one commit, the tool"); System.err.println("reason, if the incoming index has more than one commit, the tool");
System.err.println("refuses to run by default. Specify -delete-prior-commits to override"); System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
System.err.println("this, allowing the tool to delete all but the last commit."); System.err.println("this, allowing the tool to delete all but the last commit.");
System.err.println("WARNING: This tool may reorder document IDs!");
System.exit(1); System.exit(1);
} }
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
String dir = null; String dir = null;
boolean deletePriorCommits = false; boolean deletePriorCommits = false;
@ -74,7 +82,7 @@ public final class IndexUpgrader {
printUsage(); printUsage();
} }
new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade(); new IndexUpgrader(FSDirectory.open(new File(dir)), Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
} }
private final Directory dir; private final Directory dir;
@ -82,16 +90,22 @@ public final class IndexUpgrader {
private final IndexWriterConfig iwc; private final IndexWriterConfig iwc;
private final boolean deletePriorCommits; private final boolean deletePriorCommits;
@SuppressWarnings("deprecation") /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
public IndexUpgrader(Directory dir) { * {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points. */
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false); public IndexUpgrader(Directory dir, Version matchVersion) {
this(dir, new IndexWriterConfig(matchVersion, null), null, false);
} }
@SuppressWarnings("deprecation") /** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) { * {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits); * all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
this(dir, new IndexWriterConfig(matchVersion, null), infoStream, deletePriorCommits);
} }
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
* config. You have the possibility to upgrade indexes with multiple commit points by removing
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) { public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
this.dir = dir; this.dir = dir;
this.iwc = iwc; this.iwc = iwc;

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -221,7 +222,7 @@ public class IndexWriter implements Closeable {
private volatile long changeCount; // increments every time a change is completed private volatile long changeCount; // increments every time a change is completed
private long lastCommitChangeCount; // last changeCount that was committed private long lastCommitChangeCount; // last changeCount that was committed
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount; volatile long pendingCommitChangeCount;
@ -440,14 +441,14 @@ public class IndexWriter implements Closeable {
public synchronized boolean infoIsLive(SegmentInfo info) { public synchronized boolean infoIsLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info); int idx = segmentInfos.indexOf(info);
assert idx != -1: "info=" + info + " isn't in pool"; assert idx != -1: "info=" + info + " isn't in pool";
assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos"; assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
return true; return true;
} }
public synchronized SegmentInfo mapToLive(SegmentInfo info) { public synchronized SegmentInfo mapToLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info); int idx = segmentInfos.indexOf(info);
if (idx != -1) { if (idx != -1) {
info = segmentInfos.get(idx); info = segmentInfos.info(idx);
} }
return info; return info;
} }
@ -818,7 +819,7 @@ public class IndexWriter implements Closeable {
} }
} }
setRollbackSegmentInfos(segmentInfos); rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
// start with previous field numbers, but new FieldInfos // start with previous field numbers, but new FieldInfos
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory); globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
@ -862,10 +863,6 @@ public class IndexWriter implements Closeable {
} }
} }
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
}
/** /**
* Returns the private {@link IndexWriterConfig}, cloned * Returns the private {@link IndexWriterConfig}, cloned
* from the {@link IndexWriterConfig} passed to * from the {@link IndexWriterConfig} passed to
@ -1126,8 +1123,7 @@ public class IndexWriter implements Closeable {
else else
count = 0; count = 0;
for (int i = 0; i < segmentInfos.size(); i++) count += segmentInfos.totalDocCount();
count += segmentInfos.info(i).docCount;
return count; return count;
} }
@ -1144,8 +1140,7 @@ public class IndexWriter implements Closeable {
else else
count = 0; count = 0;
for (int i = 0; i < segmentInfos.size(); i++) { for (final SegmentInfo info : segmentInfos) {
final SegmentInfo info = segmentInfos.info(i);
count += info.docCount - numDeletedDocs(info); count += info.docCount - numDeletedDocs(info);
} }
return count; return count;
@ -1159,9 +1154,11 @@ public class IndexWriter implements Closeable {
if (docWriter.anyDeletions()) { if (docWriter.anyDeletions()) {
return true; return true;
} }
for (int i = 0; i < segmentInfos.size(); i++) for (final SegmentInfo info : segmentInfos) {
if (segmentInfos.info(i).hasDeletions()) if (info.hasDeletions()) {
return true; return true;
}
}
return false; return false;
} }
@ -1554,7 +1551,8 @@ public class IndexWriter implements Closeable {
synchronized(this) { synchronized(this) {
resetMergeExceptions(); resetMergeExceptions();
segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos); segmentsToOptimize.clear();
segmentsToOptimize.addAll(segmentInfos.asSet());
optimizeMaxNumSegments = maxNumSegments; optimizeMaxNumSegments = maxNumSegments;
// Now mark all pending & running merges as optimize // Now mark all pending & running merges as optimize
@ -1778,7 +1776,7 @@ public class IndexWriter implements Closeable {
final MergePolicy.MergeSpecification spec; final MergePolicy.MergeSpecification spec;
if (optimize) { if (optimize) {
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
if (spec != null) { if (spec != null) {
final int numMerges = spec.merges.size(); final int numMerges = spec.merges.size();
@ -1889,8 +1887,7 @@ public class IndexWriter implements Closeable {
// attempt to commit using this instance of IndexWriter // attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write // will always write to a new generation ("write
// once"). // once").
segmentInfos.clear(); segmentInfos.rollbackSegmentInfos(rollbackSegments);
segmentInfos.addAll(rollbackSegmentInfos);
docWriter.abort(); docWriter.abort();
@ -2555,7 +2552,7 @@ public class IndexWriter implements Closeable {
lastCommitChangeCount = pendingCommitChangeCount; lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit); segmentInfos.updateGeneration(pendingCommit);
segmentInfos.setUserData(pendingCommit.getUserData()); segmentInfos.setUserData(pendingCommit.getUserData());
setRollbackSegmentInfos(pendingCommit); rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
deleter.checkpoint(pendingCommit, true); deleter.checkpoint(pendingCommit, true);
} finally { } finally {
// Matches the incRef done in startCommit: // Matches the incRef done in startCommit:
@ -2660,7 +2657,7 @@ public class IndexWriter implements Closeable {
final synchronized void applyAllDeletes() throws IOException { final synchronized void applyAllDeletes() throws IOException {
flushDeletesCount.incrementAndGet(); flushDeletesCount.incrementAndGet();
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
.applyDeletes(readerPool, segmentInfos); .applyDeletes(readerPool, segmentInfos.asList());
if (result.anyDeletes) { if (result.anyDeletes) {
checkpoint(); checkpoint();
} }
@ -2709,7 +2706,7 @@ public class IndexWriter implements Closeable {
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException { private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
for(SegmentInfo info : merge.segments) { for(SegmentInfo info : merge.segments) {
if (segmentInfos.indexOf(info) == -1) { if (!segmentInfos.contains(info)) {
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory); throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
} }
} }
@ -2847,38 +2844,12 @@ public class IndexWriter implements Closeable {
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
} }
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments); final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
int segIdx = 0; segmentInfos.applyMergeChanges(merge, dropSegment);
int newSegIdx = 0;
boolean inserted = false;
final int curSegCount = segmentInfos.size();
while(segIdx < curSegCount) {
final SegmentInfo info = segmentInfos.info(segIdx++);
if (mergedAway.contains(info)) {
if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
segmentInfos.set(segIdx-1, merge.info);
inserted = true;
newSegIdx++;
}
} else {
segmentInfos.set(newSegIdx++, info);
}
}
// Either we found place to insert segment, or, we did if (dropSegment) {
// not, but only because all segments we merged became
// deleted while we are merging, in which case it should
// be the case that the new segment is also all deleted:
if (!inserted) {
assert allDeleted;
if (keepFullyDeletedSegments) {
segmentInfos.add(0, merge.info);
} else {
readerPool.drop(merge.info); readerPool.drop(merge.info);
} }
}
segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
if (infoStream != null) { if (infoStream != null) {
message("after commit: " + segString()); message("after commit: " + segString());
@ -3014,7 +2985,7 @@ public class IndexWriter implements Closeable {
if (mergingSegments.contains(info)) { if (mergingSegments.contains(info)) {
return false; return false;
} }
if (segmentInfos.indexOf(info) == -1) { if (!segmentInfos.contains(info)) {
return false; return false;
} }
if (info.dir != directory) { if (info.dir != directory) {
@ -3462,7 +3433,7 @@ public class IndexWriter implements Closeable {
} }
// utility routines for tests // utility routines for tests
SegmentInfo newestSegment() { synchronized SegmentInfo newestSegment() {
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null; return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
} }
@ -3472,19 +3443,18 @@ public class IndexWriter implements Closeable {
} }
/** @lucene.internal */ /** @lucene.internal */
public synchronized String segString(List<SegmentInfo> infos) throws IOException { public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
StringBuilder buffer = new StringBuilder(); final StringBuilder buffer = new StringBuilder();
final int count = infos.size(); for(final SegmentInfo s : infos) {
for(int i = 0; i < count; i++) { if (buffer.length() > 0) {
if (i > 0) {
buffer.append(' '); buffer.append(' ');
} }
buffer.append(segString(infos.get(i))); buffer.append(segString(s));
} }
return buffer.toString(); return buffer.toString();
} }
/** @lucene.internal */
public synchronized String segString(SegmentInfo info) throws IOException { public synchronized String segString(SegmentInfo info) throws IOException {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
SegmentReader reader = readerPool.getIfExists(info); SegmentReader reader = readerPool.getIfExists(info);

View File

@ -133,10 +133,15 @@ public final class IndexWriterConfig implements Cloneable {
/** /**
* Creates a new config that with defaults that match the specified * Creates a new config that with defaults that match the specified
* {@link Version} as well as the default {@link Analyzer}. {@link Version} is * {@link Version} as well as the default {@link
* a placeholder for future changes. The default settings are relevant to 3.1 * Analyzer}. If matchVersion is >= {@link
* and before. In the future, if different settings will apply to different * Version#LUCENE_32}, {@link TieredMergePolicy} is used
* versions, they will be documented here. * for merging; else {@link LogByteSizeMergePolicy}.
* Note that {@link TieredMergePolicy} is free to select
* non-contiguous merges, which means docIDs may not
* remain montonic over time. If this is a problem you
* should switch to {@link LogByteSizeMergePolicy} or
* {@link LogDocMergePolicy}.
*/ */
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) { public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
this.matchVersion = matchVersion; this.matchVersion = matchVersion;
@ -154,7 +159,11 @@ public final class IndexWriterConfig implements Cloneable {
indexingChain = DocumentsWriterPerThread.defaultIndexingChain; indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
mergedSegmentWarmer = null; mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault(); codecProvider = CodecProvider.getDefault();
if (matchVersion.onOrAfter(Version.LUCENE_32)) {
mergePolicy = new TieredMergePolicy(); mergePolicy = new TieredMergePolicy();
} else {
mergePolicy = new LogByteSizeMergePolicy();
}
readerPooling = DEFAULT_READER_POOLING; readerPooling = DEFAULT_READER_POOLING;
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(); indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;

View File

@ -242,6 +242,7 @@ public abstract class LogMergePolicy extends MergePolicy {
private MergeSpecification findMergesForOptimizeSizeLimit( private MergeSpecification findMergesForOptimizeSizeLimit(
SegmentInfos infos, int maxNumSegments, int last) throws IOException { SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification(); MergeSpecification spec = new MergeSpecification();
final List<SegmentInfo> segments = infos.asList();
int start = last - 1; int start = last - 1;
while (start >= 0) { while (start >= 0) {
@ -254,12 +255,12 @@ public abstract class LogMergePolicy extends MergePolicy {
// unless there is only 1 which is optimized. // unless there is only 1 which is optimized.
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) { if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
// there is more than 1 segment to the right of this one, or an unoptimized single segment. // there is more than 1 segment to the right of this one, or an unoptimized single segment.
spec.add(new OneMerge(infos.range(start + 1, last))); spec.add(new OneMerge(segments.subList(start + 1, last)));
} }
last = start; last = start;
} else if (last - start == mergeFactor) { } else if (last - start == mergeFactor) {
// mergeFactor eligible segments were found, add them as a merge. // mergeFactor eligible segments were found, add them as a merge.
spec.add(new OneMerge(infos.range(start, last))); spec.add(new OneMerge(segments.subList(start, last)));
last = start; last = start;
} }
--start; --start;
@ -267,7 +268,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Add any left-over segments, unless there is just 1 already optimized. // Add any left-over segments, unless there is just 1 already optimized.
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) { if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
spec.add(new OneMerge(infos.range(start, last))); spec.add(new OneMerge(segments.subList(start, last)));
} }
return spec.merges.size() == 0 ? null : spec; return spec.merges.size() == 0 ? null : spec;
@ -280,11 +281,12 @@ public abstract class LogMergePolicy extends MergePolicy {
*/ */
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException { private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification(); MergeSpecification spec = new MergeSpecification();
final List<SegmentInfo> segments = infos.asList();
// First, enroll all "full" merges (size // First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently: // mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) { while (last - maxNumSegments + 1 >= mergeFactor) {
spec.add(new OneMerge(infos.range(last - mergeFactor, last))); spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
last -= mergeFactor; last -= mergeFactor;
} }
@ -296,7 +298,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Since we must optimize down to 1 segment, the // Since we must optimize down to 1 segment, the
// choice is simple: // choice is simple:
if (last > 1 || !isOptimized(infos.info(0))) { if (last > 1 || !isOptimized(infos.info(0))) {
spec.add(new OneMerge(infos.range(0, last))); spec.add(new OneMerge(segments.subList(0, last)));
} }
} else if (last > maxNumSegments) { } else if (last > maxNumSegments) {
@ -325,7 +327,7 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
} }
spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize))); spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
} }
} }
return spec.merges.size() == 0 ? null : spec; return spec.merges.size() == 0 ? null : spec;
@ -412,7 +414,8 @@ public abstract class LogMergePolicy extends MergePolicy {
@Override @Override
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
final int numSegments = segmentInfos.size(); final List<SegmentInfo> segments = segmentInfos.asList();
final int numSegments = segments.size();
if (verbose()) if (verbose())
message("findMergesToExpungeDeletes: " + numSegments + " segments"); message("findMergesToExpungeDeletes: " + numSegments + " segments");
@ -434,7 +437,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// deletions, so force a merge now: // deletions, so force a merge now:
if (verbose()) if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i; firstSegmentWithDeletions = i;
} }
} else if (firstSegmentWithDeletions != -1) { } else if (firstSegmentWithDeletions != -1) {
@ -443,7 +446,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// mergeFactor segments // mergeFactor segments
if (verbose()) if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i))); spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1; firstSegmentWithDeletions = -1;
} }
} }
@ -451,7 +454,7 @@ public abstract class LogMergePolicy extends MergePolicy {
if (firstSegmentWithDeletions != -1) { if (firstSegmentWithDeletions != -1) {
if (verbose()) if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments))); spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
} }
return spec; return spec;

View File

@ -72,7 +72,7 @@ public abstract class MergePolicy implements java.io.Closeable {
long mergeGen; // used by IndexWriter long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter int maxNumSegmentsOptimize; // used by IndexWriter
long estimatedMergeBytes; // used by IndexWriter public long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter List<SegmentReader> readerClones; // used by IndexWriter
public final List<SegmentInfo> segments; public final List<SegmentInfo> segments;
@ -84,7 +84,8 @@ public abstract class MergePolicy implements java.io.Closeable {
public OneMerge(List<SegmentInfo> segments) { public OneMerge(List<SegmentInfo> segments) {
if (0 == segments.size()) if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment"); throw new RuntimeException("segments must include at least one segment");
this.segments = segments; // clone the list, as the in list may be based off original SegmentInfos and may be modified
this.segments = new ArrayList<SegmentInfo>(segments);
int count = 0; int count = 0;
for(SegmentInfo info : segments) { for(SegmentInfo info : segments) {
count += info.docCount; count += info.docCount;

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.Constants;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public final class SegmentInfo { public final class SegmentInfo implements Cloneable {
// TODO: remove with hasVector and hasProx // TODO: remove with hasVector and hasProx
private static final int CHECK_FIELDINFO = -2; private static final int CHECK_FIELDINFO = -2;
static final int NO = -1; // e.g. no norms; no deletes; static final int NO = -1; // e.g. no norms; no deletes;

View File

@ -20,13 +20,16 @@ package org.apache.lucene.index;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Vector; import java.util.Set;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
@ -45,7 +48,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public final class SegmentInfos extends Vector<SegmentInfo> { public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
/* /*
* The file format version, a negative number. * The file format version, a negative number.
@ -85,6 +88,11 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
private List<SegmentInfo> segments = new ArrayList<SegmentInfo>();
private Set<SegmentInfo> segmentSet = new HashSet<SegmentInfo>();
private transient List<SegmentInfo> cachedUnmodifiableList;
private transient Set<SegmentInfo> cachedUnmodifiableSet;
/** /**
* If non-null, information about loading segments_N files * If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream. * will be printed here. @see #setInfoStream.
@ -107,8 +115,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
return format; return format;
} }
public final SegmentInfo info(int i) { public SegmentInfo info(int i) {
return get(i); return segments.get(i);
} }
/** /**
@ -237,7 +245,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
boolean success = false; boolean success = false;
// Clear any previous segments: // Clear any previous segments:
clear(); this.clear();
generation = generationFromSegmentsFileName(segmentFileName); generation = generationFromSegmentsFileName(segmentFileName);
@ -252,7 +260,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
if (!success) { if (!success) {
// Clear any segment infos we had loaded so we // Clear any segment infos we had loaded so we
// have a clean slate on retry: // have a clean slate on retry:
clear(); this.clear();
} }
} }
} }
@ -349,15 +357,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
/** Prunes any segment whose docs are all deleted. */ /** Prunes any segment whose docs are all deleted. */
public void pruneDeletedSegments() { public void pruneDeletedSegments() {
int segIdx = 0; for(final Iterator<SegmentInfo> it = segments.iterator(); it.hasNext();) {
while(segIdx < size()) { final SegmentInfo info = it.next();
final SegmentInfo info = info(segIdx);
if (info.getDelCount() == info.docCount) { if (info.getDelCount() == info.docCount) {
remove(segIdx); it.remove();
} else { segmentSet.remove(info);
segIdx++;
} }
} }
assert segmentSet.size() == segments.size();
} }
/** /**
@ -367,14 +374,23 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
@Override @Override
public Object clone() { public Object clone() {
SegmentInfos sis = (SegmentInfos) super.clone(); try {
for(int i=0;i<sis.size();i++) { final SegmentInfos sis = (SegmentInfos) super.clone();
final SegmentInfo info = sis.info(i); // deep clone, first recreate all collections:
sis.segments = new ArrayList<SegmentInfo>(size());
sis.segmentSet = new HashSet<SegmentInfo>(size());
sis.cachedUnmodifiableList = null;
sis.cachedUnmodifiableSet = null;
for(final SegmentInfo info : this) {
assert info.getSegmentCodecs() != null; assert info.getSegmentCodecs() != null;
sis.set(i, (SegmentInfo) info.clone()); // dont directly access segments, use add method!!!
sis.add((SegmentInfo) info.clone());
} }
sis.userData = new HashMap<String,String>(userData); sis.userData = new HashMap<String,String>(userData);
return sis; return sis;
} catch (CloneNotSupportedException e) {
throw new RuntimeException("should not happen", e);
}
} }
/** /**
@ -742,18 +758,6 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException; protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
} }
/**
* Returns a new SegmentInfos containing the SegmentInfo
* instances in the specified range first (inclusive) to
* last (exclusive), so total number of segments returned
* is last-first.
*/
public SegmentInfos range(int first, int last) {
SegmentInfos infos = new SegmentInfos(codecs);
infos.addAll(super.subList(first, last));
return infos;
}
// Carry over generation numbers from another SegmentInfos // Carry over generation numbers from another SegmentInfos
void updateGeneration(SegmentInfos other) { void updateGeneration(SegmentInfos other) {
lastGeneration = other.lastGeneration; lastGeneration = other.lastGeneration;
@ -831,6 +835,10 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
} catch (Throwable t) { } catch (Throwable t) {
// throw orig excp // throw orig excp
} }
} else {
// we must sync here explicitly since during a commit
// IW will not sync the global field map.
dir.sync(Collections.singleton(name));
} }
} }
return version; return version;
@ -956,7 +964,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
} }
public synchronized String toString(Directory directory) { public String toString(Directory directory) {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
buffer.append(getCurrentSegmentFileName()).append(": "); buffer.append(getCurrentSegmentFileName()).append(": ");
final int count = size(); final int count = size();
@ -987,8 +995,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
* remain write once. * remain write once.
*/ */
void replace(SegmentInfos other) { void replace(SegmentInfos other) {
clear(); rollbackSegmentInfos(other.asList());
addAll(other);
lastGeneration = other.lastGeneration; lastGeneration = other.lastGeneration;
lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion; lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
format = other.format; format = other.format;
@ -1014,7 +1021,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
* Loads or returns the already loaded the global field number map for this {@link SegmentInfos}. * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
* If this {@link SegmentInfos} has no global field number map the returned instance is empty * If this {@link SegmentInfos} has no global field number map the returned instance is empty
*/ */
synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException { FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
if (globalFieldNumberMap != null) { if (globalFieldNumberMap != null) {
return globalFieldNumberMap; return globalFieldNumberMap;
} }
@ -1054,4 +1061,135 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
long getLastGlobalFieldMapVersion() { long getLastGlobalFieldMapVersion() {
return lastGlobalFieldMapVersion; return lastGlobalFieldMapVersion;
} }
/** applies all changes caused by committing a merge to this SegmentInfos */
void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) {
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
boolean inserted = false;
int newSegIdx = 0;
for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) {
assert segIdx >= newSegIdx;
final SegmentInfo info = segments.get(segIdx);
if (mergedAway.contains(info)) {
if (!inserted && !dropSegment) {
segments.set(segIdx, merge.info);
inserted = true;
newSegIdx++;
}
} else {
segments.set(newSegIdx, info);
newSegIdx++;
}
}
// Either we found place to insert segment, or, we did
// not, but only because all segments we merged became
// deleted while we are merging, in which case it should
// be the case that the new segment is also all deleted,
// we insert it at the beginning if it should not be dropped:
if (!inserted && !dropSegment) {
segments.add(0, merge.info);
}
// the rest of the segments in list are duplicates, so don't remove from map, only list!
segments.subList(newSegIdx, segments.size()).clear();
// update the Set
if (!dropSegment) {
segmentSet.add(merge.info);
}
segmentSet.removeAll(mergedAway);
assert segmentSet.size() == segments.size();
}
List<SegmentInfo> createBackupSegmentInfos(boolean cloneChildren) {
if (cloneChildren) {
final List<SegmentInfo> list = new ArrayList<SegmentInfo>(size());
for(final SegmentInfo info : this) {
assert info.getSegmentCodecs() != null;
list.add((SegmentInfo) info.clone());
}
return list;
} else {
return new ArrayList<SegmentInfo>(segments);
}
}
void rollbackSegmentInfos(List<SegmentInfo> infos) {
this.clear();
this.addAll(infos);
}
/** Returns an <b>unmodifiable</b> {@link Iterator} of contained segments in order. */
// @Override (comment out until Java 6)
public Iterator<SegmentInfo> iterator() {
return asList().iterator();
}
/** Returns all contained segments as an <b>unmodifiable</b> {@link List} view. */
public List<SegmentInfo> asList() {
if (cachedUnmodifiableList == null) {
cachedUnmodifiableList = Collections.unmodifiableList(segments);
}
return cachedUnmodifiableList;
}
/** Returns all contained segments as an <b>unmodifiable</b> {@link Set} view.
* The iterator is not sorted, use {@link List} view or {@link #iterator} to get all segments in order. */
public Set<SegmentInfo> asSet() {
if (cachedUnmodifiableSet == null) {
cachedUnmodifiableSet = Collections.unmodifiableSet(segmentSet);
}
return cachedUnmodifiableSet;
}
public int size() {
return segments.size();
}
public void add(SegmentInfo si) {
if (segmentSet.contains(si)) {
throw new IllegalStateException("Cannot add the same segment two times to this SegmentInfos instance");
}
segments.add(si);
segmentSet.add(si);
assert segmentSet.size() == segments.size();
}
public void addAll(Iterable<SegmentInfo> sis) {
for (final SegmentInfo si : sis) {
this.add(si);
}
}
public void clear() {
segments.clear();
segmentSet.clear();
}
public void remove(SegmentInfo si) {
final int index = this.indexOf(si);
if (index >= 0) {
this.remove(index);
}
}
public void remove(int index) {
segmentSet.remove(segments.remove(index));
assert segmentSet.size() == segments.size();
}
public boolean contains(SegmentInfo si) {
return segmentSet.contains(si);
}
public int indexOf(SegmentInfo si) {
if (segmentSet.contains(si)) {
return segments.indexOf(si);
} else {
return -1;
}
}
} }

View File

@ -251,9 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
final Collection<SegmentInfo> merging = writer.get().getMergingSegments(); final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>(); final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(); final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(infos.asList());
infosSorted.addAll(infos);
Collections.sort(infosSorted, segmentByteSizeDescending); Collections.sort(infosSorted, segmentByteSizeDescending);
// Compute total index bytes & print details about the index // Compute total index bytes & print details about the index

View File

@ -40,6 +40,11 @@ import java.util.Set;
* w.optimize(); * w.optimize();
* w.close(); * w.close();
* </pre> * </pre>
* <p><b>Warning:</b> This merge policy may reorder documents if the index was partially
* upgraded before calling optimize (e.g., documents were added). If your application relies
* on &quot;monotonicity&quot; of doc IDs (which means that the order in which the documents
* were added to the index is preserved), do a full optimize instead. Please note, the
* delegate {@code MergePolicy} may also reorder documents.
* @lucene.experimental * @lucene.experimental
* @see IndexUpgrader * @see IndexUpgrader
*/ */

View File

@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
private class FSTFieldWriter extends FieldWriter { private class FSTFieldWriter extends FieldWriter {
private final Builder<Long> fstBuilder; private final Builder<Long> fstBuilder;
private final PositiveIntOutputs fstOutputs; private final PositiveIntOutputs fstOutputs;
private final long startTermsFilePointer;
final FieldInfo fieldInfo; final FieldInfo fieldInfo;
int numIndexTerms; int numIndexTerms;
@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
// Always put empty string in // Always put empty string in
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer)); fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
startTermsFilePointer = termsFilePointer;
} }
@Override @Override
@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
@Override @Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException { public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
if (text.length == 0) {
// We already added empty string in ctor
assert termsFilePointer == startTermsFilePointer;
return;
}
final int lengthSave = text.length; final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text); text.length = indexedTermPrefixLength(lastTerm, text);
try { try {

View File

@ -0,0 +1,382 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Caches all docs, and optionally also scores, coming from
* a search, and is then able to replay them to another
* collector. You specify the max RAM this class may use.
* Once the collection is done, call {@link #isCached}. If
* this returns true, you can use {@link #replay} against a
* new collector. If it returns false, this means too much
* RAM was required and you must instead re-run the original
* search.
*
* <p><b>NOTE</b>: this class consumes 4 (or 8 bytes, if
* scoring is cached) per collected document. If the result
* set is large this can easily be a very substantial amount
* of RAM!
*
* <p><b>NOTE</b>: this class caches at least 128 documents
* before checking RAM limits.
*
* <p>See the Lucene <tt>modules/grouping</tt> module for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public abstract class CachingCollector extends Collector {
// Max out at 512K arrays
private static final int MAX_ARRAY_SIZE = 512 * 1024;
private static final int INITIAL_ARRAY_SIZE = 128;
private final static int[] EMPTY_INT_ARRAY = new int[0];
private static class SegStart {
public final AtomicReaderContext readerContext;
public final int end;
public SegStart(AtomicReaderContext readerContext, int end) {
this.readerContext = readerContext;
this.end = end;
}
}
private static final class CachedScorer extends Scorer {
// NOTE: these members are package-private b/c that way accessing them from
// the outer class does not incur access check by the JVM. The same
// situation would be if they were defined in the outer class as private
// members.
int doc;
float score;
private CachedScorer() { super(null); }
@Override
public final float score() { return score; }
@Override
public final int advance(int target) { throw new UnsupportedOperationException(); }
@Override
public final int docID() { return doc; }
@Override
public final float freq() { throw new UnsupportedOperationException(); }
@Override
public final int nextDoc() { throw new UnsupportedOperationException(); }
}
// A CachingCollector which caches scores
private static final class ScoreCachingCollector extends CachingCollector {
private final CachedScorer cachedScorer;
private final List<float[]> cachedScores;
private Scorer scorer;
private float[] curScores;
ScoreCachingCollector(Collector other, double maxRAMMB) {
super(other, maxRAMMB, true);
cachedScorer = new CachedScorer();
cachedScores = new ArrayList<float[]>();
curScores = new float[128];
cachedScores.add(curScores);
}
@Override
public void collect(int doc) throws IOException {
if (curDocs == null) {
// Cache was too large
cachedScorer.score = scorer.score();
cachedScorer.doc = doc;
other.collect(doc);
return;
}
// Allocate a bigger array or abort caching
if (upto == curDocs.length) {
base += upto;
// Compute next array length - don't allocate too big arrays
int nextLength = 8*curDocs.length;
if (nextLength > MAX_ARRAY_SIZE) {
nextLength = MAX_ARRAY_SIZE;
}
if (base + nextLength > maxDocsToCache) {
// try to allocate a smaller array
nextLength = maxDocsToCache - base;
if (nextLength <= 0) {
// Too many docs to collect -- clear cache
curDocs = null;
curScores = null;
cachedSegs.clear();
cachedDocs.clear();
cachedScores.clear();
cachedScorer.score = scorer.score();
cachedScorer.doc = doc;
other.collect(doc);
return;
}
}
curDocs = new int[nextLength];
cachedDocs.add(curDocs);
curScores = new float[nextLength];
cachedScores.add(curScores);
upto = 0;
}
curDocs[upto] = doc;
cachedScorer.score = curScores[upto] = scorer.score();
upto++;
cachedScorer.doc = doc;
other.collect(doc);
}
@Override
public void replay(Collector other) throws IOException {
replayInit(other);
int curUpto = 0;
int curBase = 0;
int chunkUpto = 0;
other.setScorer(cachedScorer);
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
while (curBase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curBase += curDocs.length;
curDocs = cachedDocs.get(chunkUpto);
curScores = cachedScores.get(chunkUpto);
chunkUpto++;
curUpto = 0;
}
cachedScorer.score = curScores[curUpto];
other.collect(curDocs[curUpto++]);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
other.setScorer(cachedScorer);
}
@Override
public String toString() {
if (isCached()) {
return "CachingCollector (" + (base+upto) + " docs & scores cached)";
} else {
return "CachingCollector (cache was cleared)";
}
}
}
// A CachingCollector which does not cache scores
private static final class NoScoreCachingCollector extends CachingCollector {
NoScoreCachingCollector(Collector other, double maxRAMMB) {
super(other, maxRAMMB, false);
}
@Override
public void collect(int doc) throws IOException {
if (curDocs == null) {
// Cache was too large
other.collect(doc);
return;
}
// Allocate a bigger array or abort caching
if (upto == curDocs.length) {
base += upto;
// Compute next array length - don't allocate too big arrays
int nextLength = 8*curDocs.length;
if (nextLength > MAX_ARRAY_SIZE) {
nextLength = MAX_ARRAY_SIZE;
}
if (base + nextLength > maxDocsToCache) {
// try to allocate a smaller array
nextLength = maxDocsToCache - base;
if (nextLength <= 0) {
// Too many docs to collect -- clear cache
curDocs = null;
cachedSegs.clear();
cachedDocs.clear();
other.collect(doc);
return;
}
}
curDocs = new int[nextLength];
cachedDocs.add(curDocs);
upto = 0;
}
curDocs[upto] = doc;
upto++;
other.collect(doc);
}
@Override
public void replay(Collector other) throws IOException {
replayInit(other);
int curUpto = 0;
int curbase = 0;
int chunkUpto = 0;
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
while (curbase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curbase += curDocs.length;
curDocs = cachedDocs.get(chunkUpto);
chunkUpto++;
curUpto = 0;
}
other.collect(curDocs[curUpto++]);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
other.setScorer(scorer);
}
@Override
public String toString() {
if (isCached()) {
return "CachingCollector (" + (base+upto) + " docs cached)";
} else {
return "CachingCollector (cache was cleared)";
}
}
}
// TODO: would be nice if a collector defined a
// needsScores() method so we can specialize / do checks
// up front. This is only relevant for the ScoreCaching
// version -- if the wrapped Collector does not need
// scores, it can avoid cachedScorer entirely.
protected final Collector other;
protected final int maxDocsToCache;
protected final List<SegStart> cachedSegs = new ArrayList<SegStart>();
protected final List<int[]> cachedDocs;
private AtomicReaderContext lastReaderContext;
protected int[] curDocs;
protected int upto;
protected int base;
protected int lastDocBase;
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
}
// Prevent extension from non-internal classes
private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
this.other = other;
cachedDocs = new ArrayList<int[]>();
curDocs = new int[INITIAL_ARRAY_SIZE];
cachedDocs.add(curDocs);
int bytesPerDoc = RamUsageEstimator.NUM_BYTES_INT;
if (cacheScores) {
bytesPerDoc += RamUsageEstimator.NUM_BYTES_FLOAT;
}
maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return other.acceptsDocsOutOfOrder();
}
public boolean isCached() {
return curDocs != null;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
other.setNextReader(context);
if (lastReaderContext != null) {
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
}
lastReaderContext = context;
}
/** Reused by the specialized inner classes. */
void replayInit(Collector other) {
if (!isCached()) {
throw new IllegalStateException("cannot replay: cache was cleared because too much RAM was required");
}
if (!other.acceptsDocsOutOfOrder() && this.other.acceptsDocsOutOfOrder()) {
throw new IllegalArgumentException(
"cannot replay: given collector does not support "
+ "out-of-order collection, while the wrapped collector does. "
+ "Therefore cached documents may be out-of-order.");
}
//System.out.println("CC: replay totHits=" + (upto + base));
if (lastReaderContext != null) {
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
lastReaderContext = null;
}
}
/**
* Replays the cached doc IDs (and scores) to the given Collector. If this
* instance does not cache scores, then Scorer is not set on
* {@code other.setScorer} as well as scores are not replayed.
*
* @throws IllegalStateException
* if this collector is not cached (i.e., if the RAM limits were too
* low for the number of documents + scores to cache).
* @throws IllegalArgumentException
* if the given Collect's does not support out-of-order collection,
* while the collector passed to the ctor does.
*/
public abstract void replay(Collector other) throws IOException;
}

View File

@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
public abstract int nextDoc() throws IOException; public abstract int nextDoc() throws IOException;
/** /**
* Advances to the first beyond the current whose document number is greater * Advances to the first beyond (see NOTE below) the current whose document
* than or equal to <i>target</i>. Returns the current document number or * number is greater than or equal to <i>target</i>. Returns the current
* {@link #NO_MORE_DOCS} if there are no more docs in the set. * document number or {@link #NO_MORE_DOCS} if there are no more docs in the
* set.
* <p> * <p>
* Behaves as if written: * Behaves as if written:
* *

View File

@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
public QueryTermVector(String queryString, Analyzer analyzer) { public QueryTermVector(String queryString, Analyzer analyzer) {
if (analyzer != null) if (analyzer != null)
{ {
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString)); TokenStream stream;
try {
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
} catch (IOException e1) {
stream = null;
}
if (stream != null) if (stream != null)
{ {
List<BytesRef> terms = new ArrayList<BytesRef>(); List<BytesRef> terms = new ArrayList<BytesRef>();

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashSet;
final class SloppyPhraseScorer extends PhraseScorer { final class SloppyPhraseScorer extends PhraseScorer {
private int slop; private int slop;
@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends PhraseScorer {
/** /**
* Init PhrasePositions in place. * Init PhrasePositions in place.
* There is a one time initialization for this scorer: * There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
* <br>- Put in repeats[] each pp that has another pp with same position in the doc. * <br>- Put in repeats[] each pp that has another pp with same position in the doc.
* This relies on that the position in PP is computed as (TP.position - offset) and
* so by adding offset we actually compare positions and identify that the two are
* the same term.
* An exclusion to this is two distinct terms in the same offset in query and same
* position in doc. This case is detected by comparing just the (query) offsets,
* and two such PPs are not considered "repeating".
* <br>- Also mark each such pp by pp.repeats = true. * <br>- Also mark each such pp by pp.repeats = true.
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. * <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
* In particular, this allows to score queries with no repetitions with no overhead due to this computation. * In particular, this allows to score queries with no repetitions with no overhead due to this computation.
@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends PhraseScorer {
if (!checkedRepeats) { if (!checkedRepeats) {
checkedRepeats = true; checkedRepeats = true;
// check for repeats // check for repeats
HashMap<PhrasePositions, Object> m = null; HashSet<PhrasePositions> m = null;
for (PhrasePositions pp = first; pp != null; pp = pp.next) { for (PhrasePositions pp = first; pp != null; pp = pp.next) {
int tpPos = pp.position + pp.offset; int tpPos = pp.position + pp.offset;
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) { for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
if (pp.offset == pp2.offset) {
continue; // not a repetition: the two PPs are originally in same offset in the query!
}
int tpPos2 = pp2.position + pp2.offset; int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos) { if (tpPos2 == tpPos) {
if (m == null) if (m == null)
m = new HashMap<PhrasePositions, Object>(); m = new HashSet<PhrasePositions>();
pp.repeats = true; pp.repeats = true;
pp2.repeats = true; pp2.repeats = true;
m.put(pp,null); m.add(pp);
m.put(pp2,null); m.add(pp2);
} }
} }
} }
if (m!=null) if (m!=null)
repeats = m.keySet().toArray(new PhrasePositions[0]); repeats = m.toArray(new PhrasePositions[0]);
} }
// with repeats must advance some repeating pp's so they all start with differing tp's // with repeats must advance some repeating pp's so they all start with differing tp's
@ -204,12 +213,17 @@ final class SloppyPhraseScorer extends PhraseScorer {
int tpPos = pp.position + pp.offset; int tpPos = pp.position + pp.offset;
for (int i = 0; i < repeats.length; i++) { for (int i = 0; i < repeats.length; i++) {
PhrasePositions pp2 = repeats[i]; PhrasePositions pp2 = repeats[i];
if (pp2 == pp) if (pp2 == pp) {
continue; continue;
}
if (pp.offset == pp2.offset) {
continue; // not a repetition: the two PPs are originally in same offset in the query!
}
int tpPos2 = pp2.position + pp2.offset; int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos) if (tpPos2 == tpPos) {
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset. return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
} }
}
return null; return null;
} }
} }

View File

@ -172,7 +172,7 @@ public class NIOFSDirectory extends FSDirectory {
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError( final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in " "OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ "with a a value smaller than the current chunk size (" + chunkSize + ")"); + "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e); outOfMemoryError.initCause(e);
throw outOfMemoryError; throw outOfMemoryError;
} }

View File

@ -125,7 +125,7 @@ public class SimpleFSDirectory extends FSDirectory {
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError( final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in " "OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ "with a value smaller than the current chunks size (" + chunkSize + ")"); + "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e); outOfMemoryError.initCause(e);
throw outOfMemoryError; throw outOfMemoryError;
} }

View File

@ -20,9 +20,6 @@ package org.apache.lucene.util;
import java.util.Comparator; import java.util.Comparator;
import java.io.Serializable; import java.io.Serializable;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.IOException;
/** Represents byte[], as a slice (offset + length) into an /** Represents byte[], as a slice (offset + length) into an
* existing byte[]. * existing byte[].
@ -193,6 +190,9 @@ public final class BytesRef implements Comparable<BytesRef> {
@Override @Override
public boolean equals(Object other) { public boolean equals(Object other) {
if (other == null) {
return false;
}
return this.bytesEquals((BytesRef) other); return this.bytesEquals((BytesRef) other);
} }

View File

@ -1,5 +1,8 @@
package org.apache.lucene.util; package org.apache.lucene.util;
import java.util.Comparator;
import java.util.StringTokenizer;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -54,4 +57,42 @@ public abstract class StringHelper {
private StringHelper() { private StringHelper() {
} }
/**
* @return a Comparator over versioned strings such as X.YY.Z
* @lucene.internal
*/
public static Comparator<String> getVersionComparator() {
return versionComparator;
}
private static Comparator<String> versionComparator = new Comparator<String>() {
public int compare(String a, String b) {
StringTokenizer aTokens = new StringTokenizer(a, ".");
StringTokenizer bTokens = new StringTokenizer(b, ".");
while (aTokens.hasMoreTokens()) {
int aToken = Integer.parseInt(aTokens.nextToken());
if (bTokens.hasMoreTokens()) {
int bToken = Integer.parseInt(bTokens.nextToken());
if (aToken != bToken) {
return aToken - bToken;
}
} else {
// a has some extra trailing tokens. if these are all zeroes, thats ok.
if (aToken != 0) {
return 1;
}
}
}
// b has some extra trailing tokens. if these are all zeroes, thats ok.
while (bTokens.hasMoreTokens()) {
if (Integer.parseInt(bTokens.nextToken()) != 0)
return -1;
}
return 0;
}
};
} }

View File

@ -143,13 +143,16 @@ public class LevenshteinAutomata {
if (dest >= 0) if (dest >= 0)
for (int r = 0; r < numRanges; r++) for (int r = 0; r < numRanges; r++)
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest])); states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
// reduce the state: this doesn't appear to help anything
//states[k].reduce();
} }
Automaton a = new Automaton(states[0]); Automaton a = new Automaton(states[0]);
a.setDeterministic(true); a.setDeterministic(true);
a.setNumberedStates(states); // we create some useless unconnected states, and its a net-win overall to remove these,
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
// so, while we could set our numberedStates here, its actually best not to, and instead to
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
//a.setNumberedStates(states);
a.reduce();
// we need not trim transitions to dead states, as they are not created. // we need not trim transitions to dead states, as they are not created.
//a.restoreInvariant(); //a.restoreInvariant();
return a; return a;

View File

@ -30,6 +30,8 @@
package org.apache.lucene.util.automaton; package org.apache.lucene.util.automaton;
import java.util.BitSet; import java.util.BitSet;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
/** /**
@ -72,8 +74,12 @@ final public class MinimizationOperations {
final int[] sigma = a.getStartPoints(); final int[] sigma = a.getStartPoints();
final State[] states = a.getNumberedStates(); final State[] states = a.getNumberedStates();
final int sigmaLen = sigma.length, statesLen = states.length; final int sigmaLen = sigma.length, statesLen = states.length;
final BitSet[][] reverse = new BitSet[statesLen][sigmaLen]; @SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen]; (ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
@SuppressWarnings("unchecked") final HashSet<State>[] partition =
(HashSet<State>[]) new HashSet[statesLen];
@SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
(ArrayList<State>[]) new ArrayList[statesLen];
final int[] block = new int[statesLen]; final int[] block = new int[statesLen];
final StateList[][] active = new StateList[statesLen][sigmaLen]; final StateList[][] active = new StateList[statesLen][sigmaLen];
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen]; final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
@ -82,8 +88,8 @@ final public class MinimizationOperations {
final BitSet split = new BitSet(statesLen), final BitSet split = new BitSet(statesLen),
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen); refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
for (int q = 0; q < statesLen; q++) { for (int q = 0; q < statesLen; q++) {
splitblock[q] = new BitSet(statesLen); splitblock[q] = new ArrayList<State>();
partition[q] = new BitSet(statesLen); partition[q] = new HashSet<State>();
for (int x = 0; x < sigmaLen; x++) { for (int x = 0; x < sigmaLen; x++) {
active[q][x] = new StateList(); active[q][x] = new StateList();
} }
@ -92,23 +98,22 @@ final public class MinimizationOperations {
for (int q = 0; q < statesLen; q++) { for (int q = 0; q < statesLen; q++) {
final State qq = states[q]; final State qq = states[q];
final int j = qq.accept ? 0 : 1; final int j = qq.accept ? 0 : 1;
partition[j].set(q); partition[j].add(qq);
block[q] = j; block[q] = j;
for (int x = 0; x < sigmaLen; x++) { for (int x = 0; x < sigmaLen; x++) {
final BitSet[] r = final ArrayList<State>[] r =
reverse[qq.step(sigma[x]).number]; reverse[qq.step(sigma[x]).number];
if (r[x] == null) if (r[x] == null)
r[x] = new BitSet(); r[x] = new ArrayList<State>();
r[x].set(q); r[x].add(qq);
} }
} }
// initialize active sets // initialize active sets
for (int j = 0; j <= 1; j++) { for (int j = 0; j <= 1; j++) {
final BitSet part = partition[j];
for (int x = 0; x < sigmaLen; x++) { for (int x = 0; x < sigmaLen; x++) {
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) { for (final State qq : partition[j]) {
if (reverse[i][x] != null) if (reverse[qq.number][x] != null)
active2[i][x] = active[j][x].add(states[i]); active2[qq.number][x] = active[j][x].add(qq);
} }
} }
} }
@ -121,18 +126,19 @@ final public class MinimizationOperations {
// process pending until fixed point // process pending until fixed point
int k = 2; int k = 2;
while (!pending.isEmpty()) { while (!pending.isEmpty()) {
IntPair ip = pending.removeFirst(); final IntPair ip = pending.removeFirst();
final int p = ip.n1; final int p = ip.n1;
final int x = ip.n2; final int x = ip.n2;
pending2.clear(x*statesLen + p); pending2.clear(x*statesLen + p);
// find states that need to be split off their blocks // find states that need to be split off their blocks
for (StateListNode m = active[p][x].first; m != null; m = m.next) { for (StateListNode m = active[p][x].first; m != null; m = m.next) {
final BitSet r = reverse[m.q.number][x]; final ArrayList<State> r = reverse[m.q.number][x];
if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) { if (r != null) for (final State s : r) {
final int i = s.number;
if (!split.get(i)) { if (!split.get(i)) {
split.set(i); split.set(i);
final int j = block[i]; final int j = block[i];
splitblock[j].set(i); splitblock[j].add(s);
if (!refine2.get(j)) { if (!refine2.get(j)) {
refine2.set(j); refine2.set(j);
refine.set(j); refine.set(j);
@ -142,18 +148,19 @@ final public class MinimizationOperations {
} }
// refine blocks // refine blocks
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) { for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
final BitSet sb = splitblock[j]; final ArrayList<State> sb = splitblock[j];
if (sb.cardinality() < partition[j].cardinality()) { if (sb.size() < partition[j].size()) {
final BitSet b1 = partition[j], b2 = partition[k]; final HashSet<State> b1 = partition[j];
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) { final HashSet<State> b2 = partition[k];
b1.clear(i); for (final State s : sb) {
b2.set(i); b1.remove(s);
block[i] = k; b2.add(s);
block[s.number] = k;
for (int c = 0; c < sigmaLen; c++) { for (int c = 0; c < sigmaLen; c++) {
final StateListNode sn = active2[i][c]; final StateListNode sn = active2[s.number][c];
if (sn != null && sn.sl == active[j][c]) { if (sn != null && sn.sl == active[j][c]) {
sn.remove(); sn.remove();
active2[i][c] = active[k][c].add(states[i]); active2[s.number][c] = active[k][c].add(s);
} }
} }
} }
@ -173,8 +180,8 @@ final public class MinimizationOperations {
k++; k++;
} }
refine2.clear(j); refine2.clear(j);
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) for (final State s : sb)
split.clear(i); split.clear(s.number);
sb.clear(); sb.clear();
} }
refine.clear(); refine.clear();
@ -184,9 +191,7 @@ final public class MinimizationOperations {
for (int n = 0; n < newstates.length; n++) { for (int n = 0; n < newstates.length; n++) {
final State s = new State(); final State s = new State();
newstates[n] = s; newstates[n] = s;
BitSet part = partition[n]; for (State q : partition[n]) {
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
final State q = states[i];
if (q == a.initial) a.initial = s; if (q == a.initial) a.initial = s;
s.accept = q.accept; s.accept = q.accept;
s.number = q.number; // select representative s.number = q.number; // select representative

View File

@ -232,9 +232,7 @@ public class FST<T> {
void setEmptyOutput(T v) throws IOException { void setEmptyOutput(T v) throws IOException {
if (emptyOutput != null) { if (emptyOutput != null) {
if (!emptyOutput.equals(v)) {
emptyOutput = outputs.merge(emptyOutput, v); emptyOutput = outputs.merge(emptyOutput, v);
}
} else { } else {
emptyOutput = v; emptyOutput = v;
} }

View File

@ -100,7 +100,7 @@ public class MockTokenizer extends Tokenizer {
endOffset = off; endOffset = off;
cp = readCodePoint(); cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp)); } while (cp >= 0 && isTokenChar(cp));
offsetAtt.setOffset(startOffset, endOffset); offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
streamState = State.INCREMENT; streamState = State.INCREMENT;
return true; return true;
} }

View File

@ -42,14 +42,13 @@ public class MockRandomMergePolicy extends MergePolicy {
if (segmentInfos.size() > 1 && random.nextInt(5) == 3) { if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
SegmentInfos segmentInfos2 = new SegmentInfos(); List<SegmentInfo> segments = new ArrayList<SegmentInfo>(segmentInfos.asList());
segmentInfos2.addAll(segmentInfos); Collections.shuffle(segments, random);
Collections.shuffle(segmentInfos2, random);
// TODO: sometimes make more than 1 merge? // TODO: sometimes make more than 1 merge?
mergeSpec = new MergeSpecification(); mergeSpec = new MergeSpecification();
final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size()); final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge))); mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
} }
return mergeSpec; return mergeSpec;

View File

@ -171,7 +171,14 @@ public abstract class LuceneTestCase extends Assert {
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null; private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
/** Used to track if setUp and tearDown are called correctly from subclasses */ /** Used to track if setUp and tearDown are called correctly from subclasses */
private boolean setup; private static State state = State.INITIAL;
private static enum State {
INITIAL, // no tests ran yet
SETUP, // test has called setUp()
RANTEST, // test is running
TEARDOWN // test has called tearDown()
};
/** /**
* Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader. * Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
@ -326,6 +333,7 @@ public abstract class LuceneTestCase extends Assert {
@BeforeClass @BeforeClass
public static void beforeClassLuceneTestCaseJ4() { public static void beforeClassLuceneTestCaseJ4() {
state = State.INITIAL;
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1; staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
random.setSeed(staticSeed); random.setSeed(staticSeed);
tempDirs.clear(); tempDirs.clear();
@ -375,6 +383,11 @@ public abstract class LuceneTestCase extends Assert {
@AfterClass @AfterClass
public static void afterClassLuceneTestCaseJ4() { public static void afterClassLuceneTestCaseJ4() {
if (!testsFailed) {
assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!",
state == State.INITIAL || state == State.TEARDOWN);
}
state = State.INITIAL;
if (! "false".equals(TEST_CLEAN_THREADS)) { if (! "false".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test class"); int rogueThreads = threadCleanup("test class");
if (rogueThreads > 0) { if (rogueThreads > 0) {
@ -483,17 +496,22 @@ public abstract class LuceneTestCase extends Assert {
public void starting(FrameworkMethod method) { public void starting(FrameworkMethod method) {
// set current method name for logging // set current method name for logging
LuceneTestCase.this.name = method.getName(); LuceneTestCase.this.name = method.getName();
if (!testsFailed) {
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP);
}
state = State.RANTEST;
super.starting(method); super.starting(method);
} }
}; };
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2; seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
random.setSeed(seed); random.setSeed(seed);
assertFalse("ensure your tearDown() calls super.tearDown()!!!", setup); if (!testsFailed) {
setup = true; assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN));
}
state = State.SETUP;
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler(); savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
public void uncaughtException(Thread t, Throwable e) { public void uncaughtException(Thread t, Throwable e) {
@ -529,8 +547,12 @@ public abstract class LuceneTestCase extends Assert {
@After @After
public void tearDown() throws Exception { public void tearDown() throws Exception {
assertTrue("ensure your setUp() calls super.setUp()!!!", setup); if (!testsFailed) {
setup = false; // Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state)
// because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know...
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP);
}
state = State.TEARDOWN;
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount); BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
if ("perMethod".equals(TEST_CLEAN_THREADS)) { if ("perMethod".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test method: '" + getName() + "'"); int rogueThreads = threadCleanup("test method: '" + getName() + "'");

View File

@ -397,4 +397,15 @@ public class AutomatonTestUtil {
path.remove(s); path.remove(s);
return true; return true;
} }
/**
* Checks that an automaton has no detached states that are unreachable
* from the initial state.
*/
public static void assertNoDetachedStates(Automaton a) {
int numStates = a.getNumberOfStates();
a.clearNumberedStates(); // force recomputation of cached numbered states
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
}
} }

View File

@ -0,0 +1,79 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
/**
* a binary tokenstream that lets you index a BytesRef
*/
public final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() throws IOException {
if (available) {
available = false;
return true;
}
return false;
}
@Override
public void reset() throws IOException {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
public int fillBytesRef() {
return bytes.hashCode();
}
public BytesRef getBytesRef() {
return bytes;
}
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}

View File

@ -0,0 +1,73 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
/**
* Test indexing and searching some byte[] terms
*/
public class TestBinaryTerms extends LuceneTestCase {
public void testBinary() throws IOException {
assumeFalse("PreFlex codec cannot work with binary terms!",
"PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
BytesRef bytes = new BytesRef(2);
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
for (int i = 0; i < 256; i++) {
bytes.bytes[0] = (byte) i;
bytes.bytes[1] = (byte) (255 - i);
bytes.length = 2;
Document doc = new Document();
doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.NO));
doc.add(new Field("bytes", tokenStream));
iw.addDocument(doc);
}
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
for (int i = 0; i < 256; i++) {
bytes.bytes[0] = (byte) i;
bytes.bytes[1] = (byte) (255 - i);
bytes.length = 2;
TopDocs docs = is.search(new TermQuery(new Term("bytes", bytes)), 5);
assertEquals(1, docs.totalHits);
assertEquals("" + i, is.doc(docs.scoreDocs[0].doc).get("id"));
}
is.close();
ir.close();
dir.close();
}
}

View File

@ -30,7 +30,6 @@ import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThrottledIndexOutput;
import org.junit.Before; import org.junit.Before;
public class TestFlushByRamOrCountsPolicy extends LuceneTestCase { public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
@ -105,7 +104,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertTrue(maxRAMBytes < flushControl.peakActiveBytes); assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
} }
if (ensureNotStalled) { if (ensureNotStalled) {
assertFalse(docsWriter.healthiness.wasStalled); assertFalse(docsWriter.flushControl.stallControl.wasStalled);
} }
writer.close(); writer.close();
assertEquals(0, flushControl.activeBytes()); assertEquals(0, flushControl.activeBytes());
@ -216,15 +215,15 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertEquals(numDocumentsToIndex, r.numDocs()); assertEquals(numDocumentsToIndex, r.numDocs());
assertEquals(numDocumentsToIndex, r.maxDoc()); assertEquals(numDocumentsToIndex, r.maxDoc());
if (!flushPolicy.flushOnRAM()) { if (!flushPolicy.flushOnRAM()) {
assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled); assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled);
assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked()); assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.hasBlocked());
} }
r.close(); r.close();
writer.close(); writer.close();
dir.close(); dir.close();
} }
public void testHealthyness() throws InterruptedException, public void testStallControl() throws InterruptedException,
CorruptIndexException, LockObtainFailedException, IOException { CorruptIndexException, LockObtainFailedException, IOException {
int[] numThreads = new int[] { 4 + random.nextInt(8), 1 }; int[] numThreads = new int[] { 4 + random.nextInt(8), 1 };
@ -264,12 +263,12 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertEquals(numDocumentsToIndex, writer.numDocs()); assertEquals(numDocumentsToIndex, writer.numDocs());
assertEquals(numDocumentsToIndex, writer.maxDoc()); assertEquals(numDocumentsToIndex, writer.maxDoc());
if (numThreads[i] == 1) { if (numThreads[i] == 1) {
assertFalse(
"single thread must not stall",
docsWriter.healthiness.wasStalled);
assertFalse( assertFalse(
"single thread must not block numThreads: " + numThreads[i], "single thread must not block numThreads: " + numThreads[i],
docsWriter.healthiness.hasBlocked()); docsWriter.flushControl.stallControl.hasBlocked());
}
if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) {
assertTrue(docsWriter.flushControl.stallControl.wasStalled);
} }
assertActiveBytesAfter(flushControl); assertActiveBytesAfter(flushControl);
writer.close(true); writer.close(true);

View File

@ -363,7 +363,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase {
w.close(); w.close();
SegmentInfos sis = new SegmentInfos(); SegmentInfos sis = new SegmentInfos();
sis.read(base); sis.read(base);
SegmentInfo segmentInfo = sis.get(sis.size() - 1);// last segment must SegmentInfo segmentInfo = sis.info(sis.size() - 1);// last segment must
// have all fields with // have all fields with
// consistent numbers // consistent numbers
FieldInfos fieldInfos = segmentInfo.getFieldInfos(); FieldInfos fieldInfos = segmentInfo.getFieldInfos();

View File

@ -1231,13 +1231,17 @@ public class TestIndexWriter extends LuceneTestCase {
System.out.println("TEST: pass=" + pass); System.out.println("TEST: pass=" + pass);
} }
IndexWriter writer = new IndexWriter( IndexWriterConfig conf = newIndexWriterConfig(
directory, TEST_VERSION_CURRENT, new MockAnalyzer(random)).
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
setOpenMode(OpenMode.CREATE). setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(2). setMaxBufferedDocs(2).
setMergePolicy(newLogMergePolicy()) setMergePolicy(newLogMergePolicy());
); if (pass == 2) {
conf.setMergeScheduler(new SerialMergeScheduler());
}
IndexWriter writer = new IndexWriter(directory, conf);
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
writer.setInfoStream(VERBOSE ? System.out : null); writer.setInfoStream(VERBOSE ? System.out : null);
for(int iter=0;iter<10;iter++) { for(int iter=0;iter<10;iter++) {
@ -2139,7 +2143,7 @@ public class TestIndexWriter extends LuceneTestCase {
while(!finish) { while(!finish) {
try { try {
while(true) { while(!finish) {
if (w != null) { if (w != null) {
w.close(); w.close();
w = null; w = null;
@ -2157,6 +2161,7 @@ public class TestIndexWriter extends LuceneTestCase {
} }
} }
w.close(); w.close();
w = null;
_TestUtil.checkIndex(dir); _TestUtil.checkIndex(dir);
IndexReader.open(dir, true).close(); IndexReader.open(dir, true).close();

View File

@ -71,9 +71,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass()); assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
assertNull(conf.getFlushPolicy()); assertNull(conf.getFlushPolicy());
assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB()); assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
// Sanity check - validate that all getters are covered. // Sanity check - validate that all getters are covered.
Set<String> getters = new HashSet<String>(); Set<String> getters = new HashSet<String>();
getters.add("getAnalyzer"); getters.add("getAnalyzer");

View File

@ -128,8 +128,8 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
fsmp.length = 2; fsmp.length = 2;
System.out.println("maybeMerge "+writer.segmentInfos); System.out.println("maybeMerge "+writer.segmentInfos);
SegmentInfo info0 = writer.segmentInfos.get(0); SegmentInfo info0 = writer.segmentInfos.info(0);
SegmentInfo info1 = writer.segmentInfos.get(1); SegmentInfo info1 = writer.segmentInfos.info(1);
writer.maybeMerge(); writer.maybeMerge();
System.out.println("maybeMerge after "+writer.segmentInfos); System.out.println("maybeMerge after "+writer.segmentInfos);
@ -199,7 +199,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
// deletes for info1, the newly created segment from the // deletes for info1, the newly created segment from the
// merge should have no deletes because they were applied in // merge should have no deletes because they were applied in
// the merge // the merge
//SegmentInfo info1 = writer.segmentInfos.get(1); //SegmentInfo info1 = writer.segmentInfos.info(1);
//assertFalse(exists(info1, writer.docWriter.segmentDeletes)); //assertFalse(exists(info1, writer.docWriter.segmentDeletes));
//System.out.println("infos4:"+writer.segmentInfos); //System.out.println("infos4:"+writer.segmentInfos);
@ -261,11 +261,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
MergeSpecification ms = new MergeSpecification(); MergeSpecification ms = new MergeSpecification();
if (doMerge) { if (doMerge) {
SegmentInfos mergeInfos = new SegmentInfos(); OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length));
for (int x=start; x < (start+length); x++) {
mergeInfos.add(segmentInfos.get(x));
}
OneMerge om = new OneMerge(mergeInfos);
ms.add(om); ms.add(om);
doMerge = false; doMerge = false;
return ms; return ms;

View File

@ -0,0 +1,175 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.CachingCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.LuceneTestCase;
public class TestCachingCollector extends LuceneTestCase {
private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
private static class MockScorer extends Scorer {
private MockScorer() {
super((Weight) null);
}
@Override
public float score() throws IOException { return 0; }
@Override
public int docID() { return 0; }
@Override
public int nextDoc() throws IOException { return 0; }
@Override
public int advance(int target) throws IOException { return 0; }
}
private static class NoOpCollector extends Collector {
private final boolean acceptDocsOutOfOrder;
public NoOpCollector(boolean acceptDocsOutOfOrder) {
this.acceptDocsOutOfOrder = acceptDocsOutOfOrder;
}
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void collect(int doc) throws IOException {}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
@Override
public boolean acceptsDocsOutOfOrder() {
return acceptDocsOutOfOrder;
}
}
public void testBasic() throws Exception {
for (boolean cacheScores : new boolean[] { false, true }) {
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
cc.setScorer(new MockScorer());
// collect 1000 docs
for (int i = 0; i < 1000; i++) {
cc.collect(i);
}
// now replay them
cc.replay(new Collector() {
int prevDocID = -1;
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
@Override
public void collect(int doc) throws IOException {
assertEquals(prevDocID + 1, doc);
prevDocID = doc;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
});
}
}
public void testIllegalStateOnReplay() throws Exception {
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
cc.setScorer(new MockScorer());
// collect 130 docs, this should be enough for triggering cache abort.
for (int i = 0; i < 130; i++) {
cc.collect(i);
}
assertFalse("CachingCollector should not be cached due to low memory limit", cc.isCached());
try {
cc.replay(new NoOpCollector(false));
fail("replay should fail if CachingCollector is not cached");
} catch (IllegalStateException e) {
// expected
}
}
public void testIllegalCollectorOnReplay() throws Exception {
// tests that the Collector passed to replay() has an out-of-order mode that
// is valid with the Collector passed to the ctor
// 'src' Collector does not support out-of-order
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
cc.setScorer(new MockScorer());
for (int i = 0; i < 10; i++) cc.collect(i);
cc.replay(new NoOpCollector(true)); // this call should not fail
cc.replay(new NoOpCollector(false)); // this call should not fail
// 'src' Collector supports out-of-order
cc = CachingCollector.create(new NoOpCollector(true), true, 50 * ONE_BYTE);
cc.setScorer(new MockScorer());
for (int i = 0; i < 10; i++) cc.collect(i);
cc.replay(new NoOpCollector(true)); // this call should not fail
try {
cc.replay(new NoOpCollector(false)); // this call should fail
fail("should have failed if an in-order Collector was given to replay(), " +
"while CachingCollector was initialized with out-of-order collection");
} catch (IllegalArgumentException e) {
// ok
}
}
public void testCachedArraysAllocation() throws Exception {
// tests the cached arrays allocation -- if the 'nextLength' was too high,
// caching would terminate even if a smaller length would suffice.
// set RAM limit enough for 150 docs + random(10000)
int numDocs = random.nextInt(10000) + 150;
for (boolean cacheScores : new boolean[] { false, true }) {
int bytesPerDoc = cacheScores ? 8 : 4;
CachingCollector cc = CachingCollector.create(new NoOpCollector(false),
cacheScores, bytesPerDoc * ONE_BYTE * numDocs);
cc.setScorer(new MockScorer());
for (int i = 0; i < numDocs; i++) cc.collect(i);
assertTrue(cc.isCached());
// The 151's document should terminate caching
cc.collect(numDocs);
assertFalse(cc.isCached());
}
}
}

View File

@ -17,11 +17,14 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation.IDFExplanation; import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -423,7 +426,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0); mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
} }
TopDocs hits = s.search(mpq, 2); TopDocs hits = s.search(mpq, 2);
assert hits.totalHits == 2; assertEquals(2, hits.totalHits);
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5); assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
/* /*
for(int hit=0;hit<hits.totalHits;hit++) { for(int hit=0;hit<hits.totalHits;hit++) {
@ -434,4 +437,156 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
r.close(); r.close();
dir.close(); dir.close();
} }
private final static TokenAndPos[] INCR_0_DOC_TOKENS = new TokenAndPos[] {
new TokenAndPos("x", 0),
new TokenAndPos("a", 1),
new TokenAndPos("1", 1),
new TokenAndPos("m", 2), // not existing, relying on slop=2
new TokenAndPos("b", 3),
new TokenAndPos("1", 3),
new TokenAndPos("n", 4), // not existing, relying on slop=2
new TokenAndPos("c", 5),
new TokenAndPos("y", 6)
};
private final static TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] {
new TokenAndPos("a", 0),
new TokenAndPos("1", 0),
new TokenAndPos("b", 1),
new TokenAndPos("1", 1),
new TokenAndPos("c", 2)
};
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new TokenAndPos[][] {
{ new TokenAndPos("a", 0) },
{ new TokenAndPos("x", 0), new TokenAndPos("1", 0) },
{ new TokenAndPos("b", 1) },
{ new TokenAndPos("x", 1), new TokenAndPos("1", 1) },
{ new TokenAndPos("c", 2) }
};
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new TokenAndPos[][] {
{ new TokenAndPos("x", 0) },
{ new TokenAndPos("a", 0), new TokenAndPos("1", 0) },
{ new TokenAndPos("x", 1) },
{ new TokenAndPos("b", 1), new TokenAndPos("1", 1) },
{ new TokenAndPos("c", 2) }
};
/**
* using query parser, MPQ will be created, and will not be strict about having all query terms
* in each position - one of each position is sufficient (OR logic)
*/
public void testZeroPosIncrSloppyParsedAnd() throws IOException, ParseException {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
final Query q = qp.parse("\"this text is acually ignored\"");
assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
doTestZeroPosIncrSloppy(q, 0);
((MultiPhraseQuery) q).setSlop(1);
doTestZeroPosIncrSloppy(q, 0);
((MultiPhraseQuery) q).setSlop(2);
doTestZeroPosIncrSloppy(q, 1);
}
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
Directory dir = newDirectory(); // random dir
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
IndexReader r = IndexReader.open(writer,false);
writer.close();
IndexSearcher s = new IndexSearcher(r);
if (VERBOSE) {
System.out.println("QUERY=" + q);
}
TopDocs hits = s.search(q, 1);
assertEquals("wrong number of results", nExpected, hits.totalHits);
if (VERBOSE) {
for(int hit=0;hit<hits.totalHits;hit++) {
ScoreDoc sd = hits.scoreDocs[hit];
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
}
}
r.close();
dir.close();
}
/**
* PQ AND Mode - Manually creating a phrase query
*/
public void testZeroPosIncrSloppyPqAnd() throws IOException, ParseException {
final PhraseQuery pq = new PhraseQuery();
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
pq.add(new Term("field",tap.token), tap.pos);
}
doTestZeroPosIncrSloppy(pq, 0);
pq.setSlop(1);
doTestZeroPosIncrSloppy(pq, 0);
pq.setSlop(2);
doTestZeroPosIncrSloppy(pq, 1);
}
/**
* MPQ AND Mode - Manually creating a multiple phrase query
*/
public void testZeroPosIncrSloppyMpqAnd() throws IOException, ParseException {
final MultiPhraseQuery mpq = new MultiPhraseQuery();
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
mpq.add(new Term[]{new Term("field",tap.token)}, tap.pos); //AND logic
}
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(1);
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(2);
doTestZeroPosIncrSloppy(mpq, 1);
}
/**
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query
*/
public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException, ParseException {
final MultiPhraseQuery mpq = new MultiPhraseQuery();
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
Term[] terms = tapTerms(tap);
final int pos = tap[0].pos;
mpq.add(terms, pos); //AND logic in pos, OR across lines
}
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(1);
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(2);
doTestZeroPosIncrSloppy(mpq, 1);
}
/**
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query - with no match
*/
public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException, ParseException {
final MultiPhraseQuery mpq = new MultiPhraseQuery();
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
Term[] terms = tapTerms(tap);
final int pos = tap[0].pos;
mpq.add(terms, pos); //AND logic in pos, OR across lines
}
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(2);
doTestZeroPosIncrSloppy(mpq, 0);
}
private Term[] tapTerms(TokenAndPos[] tap) {
Term[] terms = new Term[tap.length];
for (int i=0; i<terms.length; i++) {
terms[i] = new Term("field",tap[i].token);
}
return terms;
}
} }

View File

@ -65,6 +65,7 @@ public class TestTermScorer extends LuceneTestCase {
indexSearcher.close(); indexSearcher.close();
indexReader.close(); indexReader.close();
directory.close(); directory.close();
super.tearDown();
} }
public void test() throws IOException { public void test() throws IOException {

View File

@ -0,0 +1,47 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Comparator;
/**
* Tests for StringHelper.getVersionComparator
*/
public class TestVersionComparator extends LuceneTestCase {
public void testVersions() {
Comparator<String> comp = StringHelper.getVersionComparator();
assertTrue(comp.compare("1", "2") < 0);
assertTrue(comp.compare("1", "1") == 0);
assertTrue(comp.compare("2", "1") > 0);
assertTrue(comp.compare("1.1", "1") > 0);
assertTrue(comp.compare("1", "1.1") < 0);
assertTrue(comp.compare("1.1", "1.1") == 0);
assertTrue(comp.compare("1.0", "1") == 0);
assertTrue(comp.compare("1", "1.0") == 0);
assertTrue(comp.compare("1.0.1", "1.0") > 0);
assertTrue(comp.compare("1.0", "1.0.1") < 0);
assertTrue(comp.compare("1.02.003", "1.2.3.0") == 0);
assertTrue(comp.compare("1.2.3.0", "1.02.003") == 0);
assertTrue(comp.compare("1.10", "1.9") > 0);
assertTrue(comp.compare("1.9", "1.10") < 0);
}
}

View File

@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
assertCharVectors(2); assertCharVectors(2);
} }
// LUCENE-3094
public void testNoWastedStates() throws Exception {
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
}
/** /**
* Tests all possible characteristic vectors for some n * Tests all possible characteristic vectors for some n
* This exhaustively tests the parametric transitions tables. * This exhaustively tests the parametric transitions tables.
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
assertNotNull(automata[n]); assertNotNull(automata[n]);
assertTrue(automata[n].isDeterministic()); assertTrue(automata[n].isDeterministic());
assertTrue(SpecialOperations.isFinite(automata[n])); assertTrue(SpecialOperations.isFinite(automata[n]));
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
// check that the dfa for n-1 accepts a subset of the dfa for n // check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) { if (n > 0) {
assertTrue(automata[n-1].subsetOf(automata[n])); assertTrue(automata[n-1].subsetOf(automata[n]));

View File

@ -49,4 +49,9 @@ public class TestMinimize extends LuceneTestCase {
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions()); assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
} }
} }
/** n^2 space usage in Hopcroft minimization? */
public void testMinimizeHuge() {
new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
}
} }

View File

@ -54,14 +54,16 @@ public class TestFSTs extends LuceneTestCase {
private MockDirectoryWrapper dir; private MockDirectoryWrapper dir;
@Override @Override
public void setUp() throws IOException { public void setUp() throws Exception {
super.setUp();
dir = newDirectory(); dir = newDirectory();
dir.setPreventDoubleWrite(false); dir.setPreventDoubleWrite(false);
} }
@Override @Override
public void tearDown() throws IOException { public void tearDown() throws Exception {
dir.close(); dir.close();
super.tearDown();
} }
private static BytesRef toBytesRef(IntsRef ir) { private static BytesRef toBytesRef(IntsRef ir) {
@ -456,8 +458,9 @@ public class TestFSTs extends LuceneTestCase {
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) { if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs; final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output; final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.first)); @SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.second)); builderObject.add(pair.input, _outputs.get(twoLongs.first));
builderObject.add(pair.input, _outputs.get(twoLongs.second));
} else { } else {
builder.add(pair.input, pair.output); builder.add(pair.input, pair.output);
} }
@ -537,7 +540,7 @@ public class TestFSTs extends LuceneTestCase {
Object output = run(fst, term, null); Object output = run(fst, term, null);
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output); assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
assertEquals(output, pair.output); assertEquals(pair.output, output);
// verify enum's next // verify enum's next
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next(); IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();

View File

@ -49,6 +49,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
private State previous; private State previous;
private String previousType; private String previousType;
private boolean exhausted;
/** /**
* Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter * Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter
@ -67,6 +68,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
super.reset(); super.reset();
previous = null; previous = null;
previousType = null; previousType = null;
exhausted = false;
} }
/** /**
@ -79,7 +81,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
*/ */
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
while (input.incrementToken()) { while (!exhausted && input.incrementToken()) {
State current = captureState(); State current = captureState();
if (previous != null && !isGramType()) { if (previous != null && !isGramType()) {
@ -96,6 +98,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
previous = current; previous = current;
} }
exhausted = true;
if (previous == null || GRAM_TYPE.equals(previousType)) { if (previous == null || GRAM_TYPE.equals(previousType)) {
return false; return false;
} }

View File

@ -59,6 +59,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
private final StringBuilder hyphenated = new StringBuilder(); private final StringBuilder hyphenated = new StringBuilder();
private State savedState; private State savedState;
private boolean exhausted = false;
/** /**
* Creates a new HyphenatedWordsFilter * Creates a new HyphenatedWordsFilter
@ -74,7 +75,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
*/ */
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
while (input.incrementToken()) { while (!exhausted && input.incrementToken()) {
char[] term = termAttribute.buffer(); char[] term = termAttribute.buffer();
int termLength = termAttribute.length(); int termLength = termAttribute.length();
@ -96,6 +97,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
} }
} }
exhausted = true;
if (savedState != null) { if (savedState != null) {
// the final term ends with a hyphen // the final term ends with a hyphen
// add back the hyphen, for backwards compatibility. // add back the hyphen, for backwards compatibility.
@ -115,6 +118,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
super.reset(); super.reset();
hyphenated.setLength(0); hyphenated.setLength(0);
savedState = null; savedState = null;
exhausted = false;
} }
// ================================================= Helper Methods ================================================ // ================================================= Helper Methods ================================================

View File

@ -76,4 +76,9 @@ public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
public void close() throws IOException { public void close() throws IOException {
suffix.close(); suffix.close();
} }
@Override
public void end() throws IOException {
suffix.end();
}
} }

View File

@ -158,6 +158,12 @@ public class PrefixAwareTokenFilter extends TokenStream {
return suffixToken; return suffixToken;
} }
@Override
public void end() throws IOException {
prefix.end();
suffix.end();
}
@Override @Override
public void close() throws IOException { public void close() throws IOException {
prefix.close(); prefix.close();

View File

@ -225,7 +225,6 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
TokenStream result = delegate.reusableTokenStream(fieldName, reader); TokenStream result = delegate.reusableTokenStream(fieldName, reader);
if (result == streams.wrapped) { if (result == streams.wrapped) {
/* the wrapped analyzer reused the stream */ /* the wrapped analyzer reused the stream */
streams.withStopFilter.reset();
} else { } else {
/* /*
* the wrapped analyzer did not. if there are any stopwords for the * the wrapped analyzer did not. if there are any stopwords for the

View File

@ -199,10 +199,7 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
setPreviousTokenStream(streams); setPreviousTokenStream(streams);
} else { } else {
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader); TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
if (result == streams.wrapped) { if (result != streams.wrapped) {
/* the wrapped analyzer reused the stream */
streams.shingle.reset();
} else {
/* the wrapped analyzer did not, create a new shingle around the new one */ /* the wrapped analyzer did not, create a new shingle around the new one */
streams.wrapped = result; streams.wrapped = result;
streams.shingle = new ShingleFilter(streams.wrapped); streams.shingle = new ShingleFilter(streams.wrapped);

View File

@ -327,6 +327,8 @@ public final class ShingleFilter extends TokenFilter {
return tokenAvailable; return tokenAvailable;
} }
private boolean exhausted;
/** /**
* <p>Get the next token from the input stream. * <p>Get the next token from the input stream.
* <p>If the next token has <code>positionIncrement > 1</code>, * <p>If the next token has <code>positionIncrement > 1</code>,
@ -359,7 +361,7 @@ public final class ShingleFilter extends TokenFilter {
} }
isNextInputStreamToken = false; isNextInputStreamToken = false;
newTarget.isFiller = false; newTarget.isFiller = false;
} else if (input.incrementToken()) { } else if (!exhausted && input.incrementToken()) {
if (null == target) { if (null == target) {
newTarget = new InputWindowToken(cloneAttributes()); newTarget = new InputWindowToken(cloneAttributes());
} else { } else {
@ -387,6 +389,7 @@ public final class ShingleFilter extends TokenFilter {
} }
} else { } else {
newTarget = null; newTarget = null;
exhausted = true;
} }
return newTarget; return newTarget;
} }
@ -436,6 +439,7 @@ public final class ShingleFilter extends TokenFilter {
numFillerTokensToInsert = 0; numFillerTokensToInsert = 0;
isOutputHere = false; isOutputHere = false;
noShingleOutput = true; noShingleOutput = true;
exhausted = false;
if (outputUnigramsIfNoShingles && ! outputUnigrams) { if (outputUnigramsIfNoShingles && ! outputUnigrams) {
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles // Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
gramSize.minValue = minShingleSize; gramSize.minValue = minShingleSize;

View File

@ -190,16 +190,20 @@ public final class SynonymFilter extends TokenFilter {
private LinkedList<AttributeSource> buffer; private LinkedList<AttributeSource> buffer;
private LinkedList<AttributeSource> matched; private LinkedList<AttributeSource> matched;
private boolean exhausted;
private AttributeSource nextTok() throws IOException { private AttributeSource nextTok() throws IOException {
if (buffer!=null && !buffer.isEmpty()) { if (buffer!=null && !buffer.isEmpty()) {
return buffer.removeFirst(); return buffer.removeFirst();
} else { } else {
if (input.incrementToken()) { if (!exhausted && input.incrementToken()) {
return this; return this;
} else } else {
exhausted = true;
return null; return null;
} }
} }
}
private void pushTok(AttributeSource t) { private void pushTok(AttributeSource t) {
if (buffer==null) buffer=new LinkedList<AttributeSource>(); if (buffer==null) buffer=new LinkedList<AttributeSource>();
@ -250,5 +254,6 @@ public final class SynonymFilter extends TokenFilter {
public void reset() throws IOException { public void reset() throws IOException {
input.reset(); input.reset();
replacement = null; replacement = null;
exhausted = false;
} }
} }

View File

@ -159,8 +159,6 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
*/ */
protected boolean reset(final Reader reader) throws IOException { protected boolean reset(final Reader reader) throws IOException {
source.reset(reader); source.reset(reader);
if(sink != source)
sink.reset(); // only reset if the sink reference is different from source
return true; return true;
} }

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -215,8 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true); CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("строеве"); set.add("строеве");
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);
new StringReader("строевете строеве"));
BulgarianStemFilter filter = new BulgarianStemFilter( BulgarianStemFilter filter = new BulgarianStemFilter(
new KeywordMarkerFilter(tokenStream, set)); new KeywordMarkerFilter(tokenStream, set));

View File

@ -22,8 +22,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharReader; import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
public class TestMappingCharFilter extends BaseTokenStreamTestCase { public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@ -64,55 +64,55 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
public void testNothingChange() throws Exception { public void testNothingChange() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1}); assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
} }
public void test1to1() throws Exception { public void test1to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1}); assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
} }
public void test1to2() throws Exception { public void test1to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1}); assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
} }
public void test1to3() throws Exception { public void test1to3() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1}); assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
} }
public void test2to4() throws Exception { public void test2to4() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2}); assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
} }
public void test2to1() throws Exception { public void test2to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2}); assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
} }
public void test3to1() throws Exception { public void test3to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3}); assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
} }
public void test4to2() throws Exception { public void test4to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4}); assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
} }
public void test5to0() throws Exception { public void test5to0() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) ); CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[0]); assertTokenStreamContents(ts, new String[0]);
} }
@ -136,7 +136,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
// //
public void testTokenStream() throws Exception { public void testTokenStream() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) ); CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, assertTokenStreamContents(ts,
new String[]{"i","i","jj","kkk","llll","cc","b","a"}, new String[]{"i","i","jj","kkk","llll","cc","b","a"},
new int[]{0,2,4,6,8,11,16,20}, new int[]{0,2,4,6,8,11,16,20},
@ -157,7 +157,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
public void testChained() throws Exception { public void testChained() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharStream cs = new MappingCharFilter( normMap,
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) ); new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs ); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, assertTokenStreamContents(ts,
new String[]{"a","llllllll","i"}, new String[]{"a","llllllll","i"},
new int[]{0,5,8}, new int[]{0,5,8},

View File

@ -21,6 +21,7 @@ import java.io.StringReader;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@ -90,7 +91,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
@Override @Override
public TokenStream tokenStream(String field, Reader in) { public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords)); new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
} }
}; };
@ -159,7 +160,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
@Override @Override
public TokenStream tokenStream(String field, Reader in) { public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsFilter(TEST_VERSION_CURRENT, return new CommonGramsFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords); new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
} }
}; };
@ -245,7 +246,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/ */
public void testCaseSensitive() throws Exception { public void testCaseSensitive() throws Exception {
final String input = "How The s a brown s cow d like A B thing?"; final String input = "How The s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s", assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
"s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow", "s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
@ -257,7 +258,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/ */
public void testLastWordisStopWord() throws Exception { public void testLastWordisStopWord() throws Exception {
final String input = "dog the"; final String input = "dog the";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf); TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "dog_the" }); assertTokenStreamContents(nsf, new String[] { "dog_the" });
@ -268,7 +269,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/ */
public void testFirstWordisStopWord() throws Exception { public void testFirstWordisStopWord() throws Exception {
final String input = "the dog"; final String input = "the dog";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf); TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_dog" }); assertTokenStreamContents(nsf, new String[] { "the_dog" });
@ -279,7 +280,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/ */
public void testOneWordQueryStopWord() throws Exception { public void testOneWordQueryStopWord() throws Exception {
final String input = "the"; final String input = "the";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf); TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the" }); assertTokenStreamContents(nsf, new String[] { "the" });
@ -290,7 +291,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/ */
public void testOneWordQuery() throws Exception { public void testOneWordQuery() throws Exception {
final String input = "monster"; final String input = "monster";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf); TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "monster" }); assertTokenStreamContents(nsf, new String[] { "monster" });
@ -301,7 +302,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/ */
public void TestFirstAndLastStopWord() throws Exception { public void TestFirstAndLastStopWord() throws Exception {
final String input = "the of"; final String input = "the of";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf); TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_of" }); assertTokenStreamContents(nsf, new String[] { "the_of" });

View File

@ -21,6 +21,7 @@ import java.io.StringReader;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@ -35,8 +36,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
.getHyphenationTree(is); .getHyphenationTree(is);
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader( new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false),
"min veninde som er lidt af en læsehest")), hyphenator, hyphenator,
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
@ -55,8 +56,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
// the word basket will not be added due to the longest match option // the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader( new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
"basketballkurv")), hyphenator, dict, hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true); CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
assertTokenStreamContents(tf, assertTokenStreamContents(tf,
@ -77,7 +78,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter( HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT, TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")), new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator, hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
2, 4); 2, 4);
@ -89,7 +90,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
tf = new HyphenationCompoundWordTokenFilter( tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT, TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")), new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator, hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 6); 4, 6);
@ -101,7 +102,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
tf = new HyphenationCompoundWordTokenFilter( tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT, TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")), new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator, hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 10); 4, 10);
@ -120,9 +121,10 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Sko", "Vind", "Rute", "Torkare", "Blad" }; "Sko", "Vind", "Rute", "Torkare", "Blad" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new MockTokenizer(
new StringReader( new StringReader(
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")), "Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"),
MockTokenizer.WHITESPACE, false),
dict); dict);
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor", assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
@ -149,7 +151,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" }; "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")), new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true); CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -36,36 +37,23 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
public void testExactCase() throws IOException { public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time"); StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet("is", "the", "Time"); Set<String> stopWords = asSet("is", "the", "Time");
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); assertTokenStreamContents(stream, new String[] { "Now", "The" });
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
} }
public void testIgnoreCase() throws IOException { public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time"); StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet( "is", "the", "Time" ); Set<String> stopWords = asSet( "is", "the", "Time" );
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); assertTokenStreamContents(stream, new String[] { "Now" });
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.toString());
assertFalse(stream.incrementToken());
} }
public void testStopFilt() throws IOException { public void testStopFilt() throws IOException {
StringReader reader = new StringReader("Now is The Time"); StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" }; String[] stopWords = new String[] { "is", "the", "Time" };
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); assertTokenStreamContents(stream, new String[] { "Now", "The" });
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
} }
/** /**
@ -85,11 +73,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
// with increments // with increments
StringReader reader = new StringReader(sb.toString()); StringReader reader = new StringReader(sb.toString());
StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet); StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,true); doTestStopPositons(stpf,true);
// without increments // without increments
reader = new StringReader(sb.toString()); reader = new StringReader(sb.toString());
stpf = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet); stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,false); doTestStopPositons(stpf,false);
// with increments, concatenating two stop filters // with increments, concatenating two stop filters
ArrayList<String> a0 = new ArrayList<String>(); ArrayList<String> a0 = new ArrayList<String>();
@ -108,7 +96,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0); Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1); Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
reader = new StringReader(sb.toString()); reader = new StringReader(sb.toString());
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet0); // first part of the set StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true); stpf0.setEnablePositionIncrements(true);
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated! StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01,true); doTestStopPositons(stpf01,true);
@ -119,6 +107,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
stpf.setEnablePositionIncrements(enableIcrements); stpf.setEnablePositionIncrements(enableIcrements);
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class); CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
stpf.reset();
for (int i=0; i<20; i+=3) { for (int i=0; i<20; i+=3) {
assertTrue(stpf.incrementToken()); assertTrue(stpf.incrementToken());
log("Token "+i+": "+stpf); log("Token "+i+": "+stpf);
@ -127,6 +116,8 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement()); assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
} }
assertFalse(stpf.incrementToken()); assertFalse(stpf.incrementToken());
stpf.end();
stpf.close();
} }
// print debug info depending on VERBOSE // print debug info depending on VERBOSE

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
@ -278,7 +278,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole"); set.add("hole");
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter( CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set)); new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
assertTokenStreamContents(filter, new String[] { "hole", "desk" }); assertTokenStreamContents(filter, new String[] { "hole", "desk" });
} }

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanLightStemFilter(source)); return new TokenStreamComponents(source, new GermanLightStemFilter(source));
} }
}; };

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source)); return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
} }
}; };

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
/** /**
@ -34,7 +34,7 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source)); return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
} }
}; };

View File

@ -22,12 +22,11 @@ import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
@ -41,7 +40,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer t = new KeywordTokenizer(reader); Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t, new PorterStemFilter(t)); return new TokenStreamComponents(t, new PorterStemFilter(t));
} }
}; };
@ -57,7 +56,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
public void testWithKeywordAttribute() throws IOException { public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("yourselves"); set.add("yourselves");
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours")); Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
} }

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SpanishLightStemFilter(source)); return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
} }
}; };

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FinnishLightStemFilter(source)); return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
} }
}; };

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchLightStemFilter(source)); return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
} }
}; };

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*; import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName,
Reader reader) { Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source)); return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
} }
}; };

View File

@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/** /**
* Test HindiNormalizer * Test HindiNormalizer
@ -59,8 +59,7 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो"); check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
} }
private void check(String input, String output) throws IOException { private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
new StringReader(input));
TokenFilter tf = new HindiNormalizationFilter(tokenizer); TokenFilter tf = new HindiNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output }); assertTokenStreamContents(tf, new String[] { output });
} }

Some files were not shown because too many files have changed in this diff Show More