merged with trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1124321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-05-18 16:24:27 +00:00
commit 43e40e8844
193 changed files with 5893 additions and 1300 deletions

View File

@ -53,6 +53,8 @@
<classpathentry kind="src" path="modules/analysis/stempel/src/test"/>
<classpathentry kind="src" path="modules/benchmark/src/java"/>
<classpathentry kind="src" path="modules/benchmark/src/test"/>
<classpathentry kind="src" path="modules/grouping/src/java"/>
<classpathentry kind="src" path="modules/grouping/src/test"/>
<classpathentry kind="src" path="solr/src/java"/>
<classpathentry kind="src" path="solr/src/webapp/src"/>
<classpathentry kind="src" path="solr/src/common"/>
@ -124,8 +126,8 @@
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-glassfish-2.1.v20091210.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-2.1-jetty-6.1.26.jar"/>
<classpathentry kind="lib" path="solr/example/lib/jsp-2.1/jsp-api-2.1-glassfish-2.1.v20091210.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.4.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.1.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/carrot2-core-3.5.0.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/hppc-0.3.3.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-core-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/jackson-mapper-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="solr/contrib/clustering/lib/mahout-collections-0.3.jar"/>

View File

@ -26,6 +26,7 @@
<buildFile url="file://$PROJECT_DIR$/modules/analysis/smartcn/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />

View File

@ -26,6 +26,7 @@
<module filepath="$PROJECT_DIR$/modules/analysis/smartcn/smartcn.iml" />
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />

View File

@ -71,6 +71,13 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="grouping module" type="JUnit" factoryName="JUnit">
<module name="grouping" />
<option name="TEST_OBJECT" value="package" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/grouping/build" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="highlighter contrib" type="JUnit" factoryName="JUnit">
<module name="highlighter" />
<option name="TEST_OBJECT" value="package" />
@ -204,7 +211,7 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<list size="29">
<list size="30">
<item index="0" class="java.lang.String" itemvalue="JUnit.analysis-extras contrib" />
<item index="1" class="java.lang.String" itemvalue="JUnit.ant contrib" />
<item index="2" class="java.lang.String" itemvalue="JUnit.bdb contrib" />
@ -215,25 +222,26 @@
<item index="7" class="java.lang.String" itemvalue="JUnit.dataimporthandler contrib" />
<item index="8" class="java.lang.String" itemvalue="JUnit.extraction contrib" />
<item index="9" class="java.lang.String" itemvalue="JUnit.extras from dataimporthandler contrib" />
<item index="10" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
<item index="11" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
<item index="12" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
<item index="13" class="java.lang.String" itemvalue="JUnit.lucene" />
<item index="14" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
<item index="15" class="java.lang.String" itemvalue="JUnit.memory contrib" />
<item index="16" class="java.lang.String" itemvalue="JUnit.misc contrib" />
<item index="17" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
<item index="18" class="java.lang.String" itemvalue="JUnit.queries contrib" />
<item index="19" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
<item index="20" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
<item index="21" class="java.lang.String" itemvalue="JUnit.solr" />
<item index="22" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
<item index="23" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
<item index="24" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
<item index="25" class="java.lang.String" itemvalue="JUnit.swing contrib" />
<item index="26" class="java.lang.String" itemvalue="JUnit.uima contrib" />
<item index="27" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
<item index="28" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
<item index="10" class="java.lang.String" itemvalue="JUnit.grouping module" />
<item index="11" class="java.lang.String" itemvalue="JUnit.highlighter contrib" />
<item index="12" class="java.lang.String" itemvalue="JUnit.icu analysis module" />
<item index="13" class="java.lang.String" itemvalue="JUnit.instantiated contrib" />
<item index="14" class="java.lang.String" itemvalue="JUnit.lucene" />
<item index="15" class="java.lang.String" itemvalue="JUnit.lucli contrib" />
<item index="16" class="java.lang.String" itemvalue="JUnit.memory contrib" />
<item index="17" class="java.lang.String" itemvalue="JUnit.misc contrib" />
<item index="18" class="java.lang.String" itemvalue="JUnit.phonetic analysis module" />
<item index="19" class="java.lang.String" itemvalue="JUnit.queries contrib" />
<item index="20" class="java.lang.String" itemvalue="JUnit.queryparser contrib" />
<item index="21" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
<item index="22" class="java.lang.String" itemvalue="JUnit.solr" />
<item index="23" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
<item index="24" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
<item index="25" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
<item index="26" class="java.lang.String" itemvalue="JUnit.swing contrib" />
<item index="27" class="java.lang.String" itemvalue="JUnit.uima contrib" />
<item index="28" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
<item index="29" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
</list>
</component>
</project>

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/build/classes/java" />
<output-test url="file://$MODULE_DIR$/build/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/work" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" module-name="lucene" />
</component>
</module>

View File

@ -0,0 +1,71 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-parent</artifactId>
<version>@version@</version>
<relativePath>../../lucene/pom.xml</relativePath>
</parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-grouping</artifactId>
<packaging>jar</packaging>
<name>Lucene Grouping</name>
<description>Lucene Grouping Module</description>
<properties>
<module-directory>modules/grouping</module-directory>
<build-directory>build</build-directory>
</properties>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>lucene-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<directory>${build-directory}</directory>
<outputDirectory>${build-directory}/classes/java</outputDirectory>
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
<sourceDirectory>src/java</sourceDirectory>
<testSourceDirectory>src/test</testSourceDirectory>
<testResources>
<testResource>
<directory>${project.build.testSourceDirectory}</directory>
<excludes>
<exclude>**/*.java</exclude>
</excludes>
</testResource>
</testResources>
</build>
</project>

View File

@ -33,6 +33,7 @@
<modules>
<module>analysis</module>
<module>benchmark</module>
<module>grouping</module>
</modules>
<build>
<directory>build/lucene-modules-aggregator</directory>

View File

@ -24,6 +24,7 @@
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
<version>8</version>
<relativePath/>
</parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-solr-grandparent</artifactId>
@ -105,14 +106,6 @@
</license>
</licenses>
<repositories>
<repository>
<id>carrot2.org</id>
<name>Carrot2 Maven2 repository</name>
<url>http://download.carrot2.org/maven2/</url>
<snapshots>
<updatePolicy>never</updatePolicy>
</snapshots>
</repository>
<repository>
<id>apache.snapshots</id>
<name>Apache Snapshot Repository</name>
@ -305,7 +298,7 @@
<dependency>
<groupId>org.carrot2</groupId>
<artifactId>carrot2-core</artifactId>
<version>3.4.2</version>
<version>3.5.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>

View File

@ -162,11 +162,6 @@ Changes in Runtime Behavior
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
* LUCENE-1076: The default merge policy (TieredMergePolicy) is now
able to merge non-contiguous segments, which means docIDs no longer
necessarily say "in order". If this is a problem then you can use
either of the LogMergePolicy impls. (Mike McCandless)
* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
properties from all previous segments combined. Field numbers are now tracked
@ -416,6 +411,10 @@ New features
it's able to handle multi-valued fields and does not hold the term
bytes in RAM. (Mike McCandless)
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
document IDs and scores encountered during the search, and "reply" them to
another Collector. (Mike McCandless, Shai Erera)
Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
@ -452,6 +451,9 @@ Bug fixes
indexes, causing existing deletions to be applied on the incoming indexes as
well. (Shai Erera, Mike McCandless)
* LUCENE-3068: sloppy phrase query failed to match valid documents when multiple
query terms had same position in the query. (Doron Cohen)
Test Cases
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
@ -476,9 +478,15 @@ Changes in backwards compatibility policy
(Mike McCandless, Shai Erera)
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
SegmentInfos to a List<SegmentInfo>; this is actually a minor change
because SegmentInfos itself extends Vector<SegmentInfo>. (Uwe
Schindler, Mike McCandless)
SegmentInfos to a List<SegmentInfo>. SegmentInfos itsself was changed
to no longer extend Vector<SegmentInfo> (to update code that is using
Vector-API, use the new asList() and asSet() methods returning unmodifiable
collections; modifying SegmentInfos is now only possible through
the explicitely declared methods). IndexWriter.segString() now takes
Iterable<SegmentInfo> instead of List<SegmentInfo>. A simple recompile
should fix this. MergePolicy and SegmentInfos are internal/experimental
APIs not covered by the strict backwards compatibility policy.
(Uwe Schindler, Mike McCandless)
Changes in runtime behavior
@ -492,6 +500,13 @@ Changes in runtime behavior
returns NumericField instances. (Uwe Schindler, Ryan McKinley,
Mike McCandless)
* LUCENE-1076: Changed the default merge policy from
LogByteSizeMergePolicy to TieredMergePolicy, as of Version.LUCENE_32
(passed to IndexWriterConfig), which is able to merge non-contiguous
segments. This means docIDs no longer necessarily stay "in order"
during indexing. If this is a problem then you can use either of
the LogMergePolicy impls. (Mike McCandless)
New features
* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader

View File

@ -75,10 +75,36 @@ Bug Fixes
caused a problem if you consumed a tokenstream, then reused it, added different
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
* LUCENE-3113: Fixed some minor analysis bugs: double-reset() in ReusableAnalyzerBase
and ShingleAnalyzerWrapper, missing end() implementations in PrefixAwareTokenFilter
and PrefixAndSuffixAwareTokenFilter, invocations of incrementToken() after it
already returned false in CommonGramsQueryFilter, HyphenatedWordsFilter,
ShingleFilter, and SynonymsFilter. (Robert Muir, Steven Rowe, Uwe Schindler)
New Features
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
* LUCENE-1421: create new grouping contrib module, enabling search
results to be grouped by a single-valued indexed field. This
module was factored out of Solr's grouping implementation, but
it cannot group by function queries nor arbitrary queries. (Mike
McCandless)
* LUCENE-3098: add AllGroupsCollector, to collect all unique groups
(but in unspecified order). (Martijn van Groningen via Mike
McCandless)
* LUCENE-3092: Added NRTCachingDirectory in contrib/misc, which
caches small segments in RAM. This is useful, in the near-real-time
case where the indexing rate is lowish but the reopen rate is
highish, to take load off the IO system. (Mike McCandless)
Optimizations
* LUCENE-3040: Switch all analysis consumers (highlighter, morelikethis, memory, ...)
over to reusableTokenStream(). (Robert Muir)
======================= Lucene 3.1.0 =======================
Changes in backwards compatibility policy

View File

@ -17,8 +17,6 @@ package org.apache.lucene.ant;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.ant.DocumentTestCase;
import org.apache.lucene.ant.HtmlDocument;
@ -27,7 +25,8 @@ public class HtmlDocumentTest extends DocumentTestCase
HtmlDocument doc;
@Override
public void setUp() throws IOException {
public void setUp() throws Exception {
super.setUp();
doc = new HtmlDocument(getFile("test.html"));
}
@ -37,8 +36,9 @@ public class HtmlDocumentTest extends DocumentTestCase
}
@Override
public void tearDown() {
public void tearDown() throws Exception {
doc = null;
super.tearDown();
}
}

View File

@ -17,8 +17,6 @@ package org.apache.lucene.ant;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.ant.DocumentTestCase;
import org.apache.lucene.ant.TextDocument;
@ -27,7 +25,8 @@ public class TextDocumentTest extends DocumentTestCase
TextDocument doc;
@Override
public void setUp() throws IOException {
public void setUp() throws Exception {
super.setUp();
doc = new TextDocument(getFile("test.txt"));
}
@ -36,8 +35,9 @@ public class TextDocumentTest extends DocumentTestCase
}
@Override
public void tearDown() {
public void tearDown() throws Exception {
doc = null;
super.tearDown();
}
}

View File

@ -78,7 +78,7 @@ public class Highlighter
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
return getBestFragment(tokenStream, text);
}
@ -130,7 +130,7 @@ public class Highlighter
int maxNumFragments)
throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
return getBestFragments(tokenStream, text, maxNumFragments);
}

View File

@ -286,7 +286,11 @@ public class TokenSources {
// convenience method
public static TokenStream getTokenStream(String field, String contents,
Analyzer analyzer) {
return analyzer.tokenStream(field, new StringReader(contents));
try {
return analyzer.reusableTokenStream(field, new StringReader(contents));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
}

View File

@ -532,7 +532,7 @@ public class InstantiatedIndexWriter implements Closeable {
if (field.tokenStreamValue() != null) {
tokenStream = field.tokenStreamValue();
} else {
tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
tokenStream = analyzer.reusableTokenStream(field.name(), new StringReader(field.stringValue()));
}
// reset the TokenStream to the first token

View File

@ -305,11 +305,12 @@ class LuceneMethods {
int position = 0;
// Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader);
TokenStream stream = analyzer.reusableTokenStream(fieldName, reader);
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
try {
stream.reset();
while (stream.incrementToken()) {
position += (posIncrAtt.getPositionIncrement() - 1);
position++;
@ -323,6 +324,7 @@ class LuceneMethods {
}
if (position > maxFieldLength) break;
}
stream.end();
} finally {
stream.close();
}

View File

@ -262,8 +262,12 @@ public class MemoryIndex {
if (analyzer == null)
throw new IllegalArgumentException("analyzer must not be null");
TokenStream stream = analyzer.tokenStream(fieldName,
new StringReader(text));
TokenStream stream;
try {
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
addField(fieldName, stream);
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
import java.util.Set;
/**
@ -135,7 +136,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if (last > 1 || !isOptimized(infos.info(0))) {
spec = new MergeSpecification();
spec.add(new OneMerge(infos.range(0, last)));
spec.add(new OneMerge(infos.asList().subList(0, last)));
}
} else if (last > maxNumSegments) {
@ -192,7 +193,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
prev = backLink[i][prev];
int mergeStart = i + prev;
if((mergeEnd - mergeStart) > 1) {
spec.add(new OneMerge(infos.range(mergeStart, mergeEnd)));
spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
} else {
if(partialExpunge) {
SegmentInfo info = infos.info(mergeStart);
@ -208,7 +209,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(partialExpunge && maxDelCount > 0) {
// expunge deletes
spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)));
spec.add(new OneMerge(Collections.singletonList(infos.info(expungeCandidate))));
}
return spec;
@ -250,7 +251,10 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
MergeSpecification spec = null;
if(numLargeSegs < numSegs) {
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs);
// hack to create a shallow sub-range as SegmentInfos instance,
// it does not clone all metadata, but LogMerge does not need it
final SegmentInfos smallSegments = new SegmentInfos();
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
spec = super.findMergesToExpungeDeletes(smallSegments);
}
@ -258,7 +262,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
for(int i = 0; i < numLargeSegs; i++) {
SegmentInfo info = infos.info(i);
if(info.hasDeletions()) {
spec.add(new OneMerge(infos.range(i, i + 1)));
spec.add(new OneMerge(Collections.singletonList(infos.info(i))));
}
}
return spec;
@ -296,7 +300,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(totalSmallSegSize < targetSegSize * 2) {
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
if(spec == null) spec = new MergeSpecification(); // should not happen
spec.add(new OneMerge(infos.range(numLargeSegs, numSegs)));
spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
return spec;
} else {
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
@ -311,11 +315,13 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(size(info) < sizeThreshold) break;
startSeg++;
}
spec.add(new OneMerge(infos.range(startSeg, numSegs)));
spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
return spec;
} else {
// apply the log merge policy to small segments.
SegmentInfos smallSegments = infos.range(numLargeSegs, numSegs);
// hack to create a shallow sub-range as SegmentInfos instance,
// it does not clone all metadata, but LogMerge does not need it
final SegmentInfos smallSegments = new SegmentInfos();
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
MergeSpecification spec = super.findMerges(smallSegments);
if(_partialExpunge) {
@ -342,7 +348,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
}
}
if (maxDelCount > 0) {
return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1));
return new OneMerge(Collections.singletonList(infos.info(expungeCandidate)));
}
return null;
}

View File

@ -0,0 +1,289 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.store.RAMDirectory; // javadocs
import org.apache.lucene.util.IOUtils;
// TODO
// - let subclass dictate policy...?
// - rename to MergeCacheingDir? NRTCachingDir
/**
* Wraps a {@link RAMDirectory}
* around any provided delegate directory, to
* be used during NRT search. Make sure you pull the merge
* scheduler using {@link #getMergeScheduler} and pass that to your
* {@link IndexWriter}; this class uses that to keep track of which
* merges are being done by which threads, to decide when to
* cache each written file.
*
* <p>This class is likely only useful in a near-real-time
* context, where indexing rate is lowish but reopen
* rate is highish, resulting in many tiny files being
* written. This directory keeps such segments (as well as
* the segments produced by merging them, as long as they
* are small enough), in RAM.</p>
*
* <p>This is safe to use: when your app calls {IndexWriter#commit},
* all cached files will be flushed from the cached and sync'd.</p>
*
* <p><b>NOTE</b>: this class is somewhat sneaky in its
* approach for spying on merges to determine the size of a
* merge: it records which threads are running which merges
* by watching ConcurrentMergeScheduler's doMerge method.
* While this works correctly, likely future versions of
* this class will take a more general approach.
*
* <p>Here's a simple example usage:
*
* <pre>
* Directory fsDir = FSDirectory.open(new File("/path/to/index"));
* NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
* IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
* conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
* IndexWriter writer = new IndexWriter(cachedFSDir, conf);
* </pre>
*
* <p>This will cache all newly flushed segments, all merges
* whose expected segment size is <= 5 MB, unless the net
* cached bytes exceeds 60 MB at which point all writes will
* not be cached (until the net bytes falls below 60 MB).</p>
*
* @lucene.experimental
*/
public class NRTCachingDirectory extends Directory {
private final RAMDirectory cache = new RAMDirectory();
private final Directory delegate;
private final long maxMergeSizeBytes;
private final long maxCachedBytes;
private static final boolean VERBOSE = false;
/**
* We will cache a newly created output if 1) it's a
* flush or a merge and the estimated size of the merged segmnt is <=
* maxMergeSizeMB, and 2) the total cached bytes is <=
* maxCachedMB */
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
this.delegate = delegate;
maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024);
maxCachedBytes = (long) (maxCachedMB*1024*1024);
}
@Override
public synchronized String[] listAll() throws IOException {
final Set<String> files = new HashSet<String>();
for(String f : cache.listAll()) {
files.add(f);
}
for(String f : delegate.listAll()) {
assert !files.contains(f);
files.add(f);
}
return files.toArray(new String[files.size()]);
}
/** Returns how many bytes are being used by the
* RAMDirectory cache */
public long sizeInBytes() {
return cache.sizeInBytes();
}
@Override
public synchronized boolean fileExists(String name) throws IOException {
return cache.fileExists(name) || delegate.fileExists(name);
}
@Override
public synchronized long fileModified(String name) throws IOException {
if (cache.fileExists(name)) {
return cache.fileModified(name);
} else {
return delegate.fileModified(name);
}
}
@Override
public synchronized void touchFile(String name) throws IOException {
if (cache.fileExists(name)) {
cache.touchFile(name);
} else {
delegate.touchFile(name);
}
}
@Override
public synchronized void deleteFile(String name) throws IOException {
// Delete from both, in case we are currently uncaching:
if (VERBOSE) {
System.out.println("nrtdir.deleteFile name=" + name);
}
cache.deleteFile(name);
delegate.deleteFile(name);
}
@Override
public synchronized long fileLength(String name) throws IOException {
if (cache.fileExists(name)) {
return cache.fileLength(name);
} else {
return delegate.fileLength(name);
}
}
public String[] listCachedFiles() {
return cache.listAll();
}
@Override
public IndexOutput createOutput(String name) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.createOutput name=" + name);
}
if (doCacheWrite(name)) {
if (VERBOSE) {
System.out.println(" to cache");
}
return cache.createOutput(name);
} else {
return delegate.createOutput(name);
}
}
@Override
public void sync(Collection<String> fileNames) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.sync files=" + fileNames);
}
for(String fileName : fileNames) {
unCache(fileName);
}
delegate.sync(fileNames);
}
@Override
public synchronized IndexInput openInput(String name) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.openInput name=" + name);
}
if (cache.fileExists(name)) {
if (VERBOSE) {
System.out.println(" from cache");
}
return cache.openInput(name);
} else {
return delegate.openInput(name);
}
}
@Override
public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
if (cache.fileExists(name)) {
return cache.openInput(name, bufferSize);
} else {
return delegate.openInput(name, bufferSize);
}
}
@Override
public Lock makeLock(String name) {
return delegate.makeLock(name);
}
@Override
public void clearLock(String name) throws IOException {
delegate.clearLock(name);
}
/** Close thius directory, which flushes any cached files
* to the delegate and then closes the delegate. */
@Override
public void close() throws IOException {
for(String fileName : cache.listAll()) {
unCache(fileName);
}
cache.close();
delegate.close();
}
private final ConcurrentHashMap<Thread,MergePolicy.OneMerge> merges = new ConcurrentHashMap<Thread,MergePolicy.OneMerge>();
public MergeScheduler getMergeScheduler() {
return new ConcurrentMergeScheduler() {
@Override
protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
try {
merges.put(Thread.currentThread(), merge);
super.doMerge(merge);
} finally {
merges.remove(Thread.currentThread());
}
}
};
}
/** Subclass can override this to customize logic; return
* true if this file should be written to the RAMDirectory. */
protected boolean doCacheWrite(String name) {
final MergePolicy.OneMerge merge = merges.get(Thread.currentThread());
//System.out.println(Thread.currentThread().getName() + ": CACHE check merge=" + merge + " size=" + (merge==null ? 0 : merge.estimatedMergeBytes));
return !name.equals(IndexFileNames.SEGMENTS_GEN) && (merge == null || merge.estimatedMergeBytes <= maxMergeSizeBytes) && cache.sizeInBytes() <= maxCachedBytes;
}
private void unCache(String fileName) throws IOException {
final IndexOutput out;
synchronized(this) {
if (!delegate.fileExists(fileName)) {
assert cache.fileExists(fileName);
out = delegate.createOutput(fileName);
} else {
out = null;
}
}
if (out != null) {
IndexInput in = null;
try {
in = cache.openInput(fileName);
in.copyBytes(out, in.length());
} finally {
IOUtils.closeSafely(in, out);
}
synchronized(this) {
cache.deleteFile(fileName);
}
}
}
}

View File

@ -0,0 +1,114 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
public class TestNRTCachingDirectory extends LuceneTestCase {
public void testNRTAndCommit() throws Exception {
Directory dir = newDirectory();
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
conf.setMergeScheduler(cachedDir.getMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
w.w.setInfoStream(VERBOSE ? System.out : null);
final LineFileDocs docs = new LineFileDocs(random);
final int numDocs = _TestUtil.nextInt(random, 100, 400);
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
final List<BytesRef> ids = new ArrayList<BytesRef>();
IndexReader r = null;
for(int docCount=0;docCount<numDocs;docCount++) {
final Document doc = docs.nextDoc();
ids.add(new BytesRef(doc.get("docid")));
w.addDocument(doc);
if (random.nextInt(20) == 17) {
if (r == null) {
r = IndexReader.open(w.w, false);
} else {
final IndexReader r2 = r.reopen();
if (r2 != r) {
r.close();
r = r2;
}
}
assertEquals(1+docCount, r.numDocs());
final IndexSearcher s = new IndexSearcher(r);
// Just make sure search can run; we can't assert
// totHits since it could be 0
TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
// System.out.println("tot hits " + hits.totalHits);
}
}
if (r != null) {
r.close();
}
// Close should force cache to clear since all files are sync'd
w.close();
final String[] cachedFiles = cachedDir.listCachedFiles();
for(String file : cachedFiles) {
System.out.println("FAIL: cached file " + file + " remains after sync");
}
assertEquals(0, cachedFiles.length);
r = IndexReader.open(dir);
for(BytesRef id : ids) {
assertEquals(1, r.docFreq("docid", id));
}
r.close();
cachedDir.close();
}
// NOTE: not a test; just here to make sure the code frag
// in the javadocs is correct!
public void verifyCompiles() throws Exception {
Analyzer analyzer = null;
Directory fsDir = FSDirectory.open(new File("/path/to/index"));
NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 2.0, 25.0);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
}
}

View File

@ -186,7 +186,7 @@ public class FuzzyLikeThisQuery extends Query
private void addTerms(IndexReader reader,FieldVals f) throws IOException
{
if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
int corpusNumDocs=reader.numDocs();

View File

@ -881,7 +881,7 @@ public final class MoreLikeThis {
throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer");
}
TokenStream ts = analyzer.tokenStream(fieldName, r);
TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
int tokenCount=0;
// for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

View File

@ -85,7 +85,7 @@ public final class SimilarityQueries
Set<?> stop)
throws IOException
{
TokenStream ts = a.tokenStream( field, new StringReader( body));
TokenStream ts = a.reusableTokenStream( field, new StringReader( body));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
BooleanQuery tmp = new BooleanQuery();

View File

@ -106,15 +106,16 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
}
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TokenStream source;
int countTokens = 0;
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);
}
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) {
try {
if (!source.incrementToken()) break;
@ -194,14 +195,15 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
TokenStream source;
List<String> tlist = new ArrayList<String>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);
}
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) {
try {
if (!source.incrementToken()) break;
@ -247,12 +249,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TokenStream source = null;
String nextToken = null;
boolean multipleTokens = false;
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
source.reset();
if (source.incrementToken()) {
nextToken = termAtt.toString();
@ -292,7 +295,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
if (part1 != null) {
// part1
try {
source = getAnalyzer().tokenStream(field, new StringReader(part1));
source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
termAtt = source.addAttribute(CharTermAttribute.class);
source.reset();
multipleTokens = false;
@ -318,11 +321,10 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
}
if (part2 != null) {
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
try {
// part2
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
source.reset();
if (source.incrementToken()) {
part2 = termAtt.toString();

View File

@ -121,9 +121,9 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
String text = fieldNode.getTextAsString();
String field = fieldNode.getFieldAsString();
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
text));
TokenStream source;
try {
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
source.reset();
} catch (IOException e1) {
throw new RuntimeException(e1);

View File

@ -631,8 +631,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
}
@Override
public void tearDown() {
public void tearDown() throws Exception {
BooleanQuery.setMaxClauseCount(originalMaxClauses);
super.tearDown();
}
}

View File

@ -116,7 +116,7 @@ public final class SynExpand {
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {

View File

@ -124,7 +124,7 @@ public class SynLookup {
List<String> top = new LinkedList<String>(); // needs to be separately listed..
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {

View File

@ -76,10 +76,10 @@ public class LikeThisQueryBuilder implements QueryBuilder {
stopWordsSet=new HashSet<String>();
for (int i = 0; i < fields.length; i++)
{
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
try
{
TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while(ts.incrementToken()) {
stopWordsSet.add(termAtt.toString());

View File

@ -56,7 +56,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
try
{
ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();

View File

@ -57,11 +57,11 @@ public class TermsFilterBuilder implements FilterBuilder
TermsFilter tf = new TermsFilter();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
try
{
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
BytesRef bytes = termAtt.getBytesRef();
ts.reset();

View File

@ -55,9 +55,9 @@ public class TermsQueryBuilder implements QueryBuilder {
BooleanQuery bq=new BooleanQuery(DOMUtils.getAttribute(e,"disableCoord",false));
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,"minimumNumberShouldMatch",0));
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
BytesRef bytes = termAtt.getBytesRef();

View File

@ -733,8 +733,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
// case we have to roll back:
startCommit();
final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
rollbackSegmentInfos.addAll(segmentInfos);
final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
boolean success = false;
try {
@ -766,8 +765,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
deleter.refresh();
// Restore all SegmentInfos (in case we pruned some)
segmentInfos.clear();
segmentInfos.addAll(rollbackSegmentInfos);
segmentInfos.rollbackSegmentInfos(rollbackSegments);
}
}

View File

@ -126,7 +126,6 @@ final class DocumentsWriter {
final DocumentsWriterPerThreadPool perThreadPool;
final FlushPolicy flushPolicy;
final DocumentsWriterFlushControl flushControl;
final Healthiness healthiness;
DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
@ -142,10 +141,7 @@ final class DocumentsWriter {
flushPolicy = configuredPolicy;
}
flushPolicy.init(this);
healthiness = new Healthiness();
final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
flushControl = new DocumentsWriterFlushControl(this, config );
}
synchronized void deleteQueries(final Query... queries) throws IOException {
@ -283,31 +279,28 @@ final class DocumentsWriter {
ensureOpen();
boolean maybeMerge = false;
final boolean isUpdate = delTerm != null;
if (healthiness.anyStalledThreads()) {
// Help out flushing any pending DWPTs so we can un-stall:
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall:
if (infoStream != null) {
message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)");
message("DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
}
do {
// Try pick up pending threads here if possible
DocumentsWriterPerThread flushingDWPT;
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
// Don't push the delete here since the update could fail!
maybeMerge = doFlush(flushingDWPT);
if (!healthiness.anyStalledThreads()) {
break;
}
maybeMerge |= doFlush(flushingDWPT);
}
if (infoStream != null && healthiness.anyStalledThreads()) {
message("WARNING DocumentsWriter still has stalled threads; waiting");
if (infoStream != null && flushControl.anyStalledThreads()) {
message("WARNING DocumentsWriter has stalled threads; waiting");
}
healthiness.waitIfStalled(); // block if stalled
flushControl.waitIfStalled(); // block if stalled
} while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
if (infoStream != null && healthiness.anyStalledThreads()) {
message("WARNING DocumentsWriter done waiting");
if (infoStream != null) {
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
}
}
@ -353,7 +346,6 @@ final class DocumentsWriter {
maybeMerge = true;
boolean success = false;
FlushTicket ticket = null;
try {
assert currentFullFlushDelQueue == null
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
@ -511,9 +503,7 @@ final class DocumentsWriter {
anythingFlushed |= doFlush(flushingDWPT);
}
// If a concurrent flush is still in flight wait for it
while (flushControl.anyFlushing()) {
flushControl.waitForFlush();
}
if (!anythingFlushed) { // apply deletes if we did not flush any document
synchronized (ticketQueue) {
ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));

View File

@ -44,30 +44,32 @@ public final class DocumentsWriterFlushControl {
private long activeBytes = 0;
private long flushBytes = 0;
private volatile int numPending = 0;
private volatile int numFlushing = 0;
final AtomicBoolean flushDeletes = new AtomicBoolean(false);
private boolean fullFlush = false;
private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
private final Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
// only for safety reasons if a DWPT is close to the RAM limit
private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>();
private final Queue<BlockedFlush> blockedFlushes = new LinkedList<BlockedFlush>();
double maxConfiguredRamBuffer = 0;
long peakActiveBytes = 0;// only with assert
long peakFlushBytes = 0;// only with assert
long peakNetBytes = 0;// only with assert
private final Healthiness healthiness;
long peakDelta = 0; // only with assert
final DocumentsWriterStallControl stallControl;
private final DocumentsWriterPerThreadPool perThreadPool;
private final FlushPolicy flushPolicy;
private boolean closed = false;
private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
private final DocumentsWriter documentsWriter;
private final IndexWriterConfig config;
DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
Healthiness healthiness, long hardMaxBytesPerDWPT) {
this.healthiness = healthiness;
IndexWriterConfig config) {
this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy;
this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT;
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
this.config = config;
this.documentsWriter = documentsWriter;
}
@ -83,6 +85,24 @@ public final class DocumentsWriterFlushControl {
return flushBytes + activeBytes;
}
long stallLimitBytes() {
final double maxRamMB = config.getRAMBufferSizeMB();
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
}
private boolean assertMemory() {
final double maxRamMB = config.getRAMBufferSizeMB();
if (maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
// for this assert we must be tolerant to ram buffer changes!
maxConfiguredRamBuffer = Math.max(maxRamMB, maxConfiguredRamBuffer);
final long ram = flushBytes + activeBytes;
// take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
final long expected = (long)(2 * (maxConfiguredRamBuffer * 1024 * 1024)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
assert ram <= expected : "ram was " + ram + " expected: " + expected + " flush mem: " + flushBytes + " active: " + activeBytes ;
}
return true;
}
private void commitPerThreadBytes(ThreadState perThread) {
final long delta = perThread.perThread.bytesUsed()
- perThread.bytesUsed;
@ -105,11 +125,14 @@ public final class DocumentsWriterFlushControl {
peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
peakNetBytes = Math.max(peakNetBytes, netBytes());
peakDelta = Math.max(peakDelta, delta);
return true;
}
synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
boolean isUpdate) {
try {
commitPerThreadBytes(perThread);
if (!perThread.flushPending) {
if (isUpdate) {
@ -121,37 +144,43 @@ public final class DocumentsWriterFlushControl {
// Safety check to prevent a single DWPT exceeding its RAM limit. This
// is super important since we can not address more than 2048 MB per DWPT
setFlushPending(perThread);
}
}
final DocumentsWriterPerThread flushingDWPT;
if (fullFlush) {
DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
assert toBlock != null;
blockedFlushes.add(toBlock);
if (perThread.flushPending) {
checkoutAndBlock(perThread);
flushingDWPT = nextPendingFlush();
} else {
flushingDWPT = null;
}
} else {
flushingDWPT = tryCheckoutForFlush(perThread);
}
}
final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
healthiness.updateStalled(this);
return flushingDWPT;
} finally {
stallControl.updateStalled(this);
assert assertMemory();
}
}
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
assert flushingWriters.containsKey(dwpt);
try {
numFlushing--;
Long bytes = flushingWriters.remove(dwpt);
flushBytes -= bytes.longValue();
perThreadPool.recycle(dwpt);
healthiness.updateStalled(this);
stallControl.updateStalled(this);
assert assertMemory();
} finally {
notifyAll();
}
}
public synchronized boolean anyFlushing() {
return numFlushing != 0;
}
public synchronized void waitForFlush() {
if (numFlushing != 0) {
while (flushingWriters.size() != 0) {
try {
this.wait();
} catch (InterruptedException e) {
@ -173,32 +202,51 @@ public final class DocumentsWriterFlushControl {
flushBytes += bytes;
activeBytes -= bytes;
numPending++; // write access synced
assert assertMemory();
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
}
synchronized void doOnAbort(ThreadState state) {
try {
if (state.flushPending) {
flushBytes -= state.bytesUsed;
} else {
activeBytes -= state.bytesUsed;
}
assert assertMemory();
// Take it out of the loop this DWPT is stale
perThreadPool.replaceForFlush(state, closed);
healthiness.updateStalled(this);
}finally {
stallControl.updateStalled(this);
}
}
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
ThreadState perThread) {
if (fullFlush) {
return null;
return perThread.flushPending ? internalTryCheckOutForFlush(perThread) : null;
}
private void checkoutAndBlock(ThreadState perThread) {
perThread.lock();
try {
assert perThread.flushPending : "can not block non-pending threadstate";
assert fullFlush : "can not block if fullFlush == false";
final DocumentsWriterPerThread dwpt;
final long bytes = perThread.bytesUsed;
dwpt = perThreadPool.replaceForFlush(perThread, closed);
numPending--;
blockedFlushes.add(new BlockedFlush(dwpt, bytes));
}finally {
perThread.unlock();
}
return internalTryCheckOutForFlush(perThread);
}
private DocumentsWriterPerThread internalTryCheckOutForFlush(
ThreadState perThread) {
if (perThread.flushPending) {
assert Thread.holdsLock(this);
assert perThread.flushPending;
try {
// We are pending so all memory is already moved to flushBytes
if (perThread.tryLock()) {
try {
@ -212,15 +260,16 @@ public final class DocumentsWriterFlushControl {
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
flushingWriters.put(dwpt, Long.valueOf(bytes));
numPending--; // write access synced
numFlushing++;
return dwpt;
}
} finally {
perThread.unlock();
}
}
}
return null;
} finally {
stallControl.updateStalled(this);
}
}
@Override
@ -231,12 +280,13 @@ public final class DocumentsWriterFlushControl {
DocumentsWriterPerThread nextPendingFlush() {
synchronized (this) {
DocumentsWriterPerThread poll = flushQueue.poll();
if (poll != null) {
final DocumentsWriterPerThread poll;
if ((poll = flushQueue.poll()) != null) {
stallControl.updateStalled(this);
return poll;
}
}
if (numPending > 0) {
if (numPending > 0 && !fullFlush) { // don't check if we are doing a full flush
final Iterator<ThreadState> allActiveThreads = perThreadPool
.getActivePerThreadsIterator();
while (allActiveThreads.hasNext() && numPending > 0) {
@ -276,8 +326,8 @@ public final class DocumentsWriterFlushControl {
return documentsWriter.deleteQueue.numGlobalTermDeletes();
}
int numFlushingDWPT() {
return numFlushing;
synchronized int numFlushingDWPT() {
return flushingWriters.size();
}
public boolean doApplyAllDeletes() {
@ -289,7 +339,7 @@ public final class DocumentsWriterFlushControl {
}
int numActiveDWPT() {
return this.perThreadPool.getMaxThreadStates();
return this.perThreadPool.getActiveThreadState();
}
void markForFullFlush() {
@ -331,11 +381,11 @@ public final class DocumentsWriterFlushControl {
if (!next.flushPending) {
setFlushPending(next);
}
}
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
toFlush.add(flushingDWPT);
}
} else {
// get the new delete queue from DW
next.perThread.initialize();
@ -345,31 +395,54 @@ public final class DocumentsWriterFlushControl {
}
}
synchronized (this) {
assert assertBlockedFlushes(flushingQueue);
flushQueue.addAll(blockedFlushes);
blockedFlushes.clear();
/* make sure we move all DWPT that are where concurrently marked as
* pending and moved to blocked are moved over to the flushQueue. There is
* a chance that this happens since we marking DWPT for full flush without
* blocking indexing.*/
pruneBlockedQueue(flushingQueue);
assert assertBlockedFlushes(documentsWriter.deleteQueue);
flushQueue.addAll(toFlush);
stallControl.updateStalled(this);
}
}
/**
* Prunes the blockedQueue by removing all DWPT that are associated with the given flush queue.
*/
private void pruneBlockedQueue(final DocumentsWriterDeleteQueue flushingQueue) {
Iterator<BlockedFlush> iterator = blockedFlushes.iterator();
while (iterator.hasNext()) {
BlockedFlush blockedFlush = iterator.next();
if (blockedFlush.dwpt.deleteQueue == flushingQueue) {
iterator.remove();
assert !flushingWriters.containsKey(blockedFlush.dwpt) : "DWPT is already flushing";
// Record the flushing DWPT to reduce flushBytes in doAfterFlush
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
// don't decr pending here - its already done when DWPT is blocked
flushQueue.add(blockedFlush.dwpt);
}
}
}
synchronized void finishFullFlush() {
assert fullFlush;
assert flushQueue.isEmpty();
assert flushingWriters.isEmpty();
try {
if (!blockedFlushes.isEmpty()) {
assert assertBlockedFlushes(documentsWriter.deleteQueue);
flushQueue.addAll(blockedFlushes);
blockedFlushes.clear();
pruneBlockedQueue(documentsWriter.deleteQueue);
assert blockedFlushes.isEmpty();
}
} finally {
fullFlush = false;
stallControl.updateStalled(this);
}
}
boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes;
for (DocumentsWriterPerThread documentsWriterPerThread : flushes) {
assert documentsWriterPerThread.deleteQueue == flushingQueue;
for (BlockedFlush blockedFlush : blockedFlushes) {
assert blockedFlush.dwpt.deleteQueue == flushingQueue;
}
return true;
}
@ -379,18 +452,65 @@ public final class DocumentsWriterFlushControl {
for (DocumentsWriterPerThread dwpt : flushQueue) {
doAfterFlush(dwpt);
}
for (DocumentsWriterPerThread dwpt : blockedFlushes) {
doAfterFlush(dwpt);
for (BlockedFlush blockedFlush : blockedFlushes) {
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
doAfterFlush(blockedFlush.dwpt);
}
} finally {
fullFlush = false;
flushQueue.clear();
blockedFlushes.clear();
stallControl.updateStalled(this);
}
}
synchronized boolean isFullFlush() {
/**
* Returns <code>true</code> if a full flush is currently running
*/
synchronized boolean isFullFlush() { // used by assert
return fullFlush;
}
/**
* Returns the number of flushes that are already checked out but not yet
* actively flushing
*/
synchronized int numQueuedFlushes() {
return flushQueue.size();
}
/**
* Returns the number of flushes that are checked out but not yet available
* for flushing. This only applies during a full flush if a DWPT needs
* flushing but must not be flushed until the full flush has finished.
*/
synchronized int numBlockedFlushes() {
return blockedFlushes.size();
}
private static class BlockedFlush {
final DocumentsWriterPerThread dwpt;
final long bytes;
BlockedFlush(DocumentsWriterPerThread dwpt, long bytes) {
super();
this.dwpt = dwpt;
this.bytes = bytes;
}
}
/**
* This method will block if too many DWPT are currently flushing and no
* checked out DWPT are available
*/
void waitIfStalled() {
stallControl.waitIfStalled();
}
/**
* Returns <code>true</code> iff stalled
*/
boolean anyStalledThreads() {
return stallControl.anyStalledThreads();
}
}

View File

@ -166,6 +166,13 @@ public abstract class DocumentsWriterPerThreadPool {
return perThreads.length;
}
/**
* Returns the active number of {@link ThreadState} instances.
*/
public int getActiveThreadState() {
return numThreadStatesActive;
}
/**
* Returns a new {@link ThreadState} iff any new state is available otherwise
* <code>null</code>.

View File

@ -36,8 +36,7 @@ import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
* continue indexing.
*/
//TODO: rename this to DocumentsWriterStallControl (or something like that)?
final class Healthiness {
final class DocumentsWriterStallControl {
@SuppressWarnings("serial")
private static final class Sync extends AbstractQueuedSynchronizer {
volatile boolean hasBlockedThreads = false; // only with assert
@ -96,13 +95,14 @@ final class Healthiness {
* <code>true</code> iff the number of flushing
* {@link DocumentsWriterPerThread} is greater than the number of active
* {@link DocumentsWriterPerThread}. Otherwise it will reset the
* {@link Healthiness} to healthy and release all threads waiting on
* {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
* {@link #waitIfStalled()}
*/
void updateStalled(DocumentsWriterFlushControl flushControl) {
do {
// if we have more flushing DWPT than numActiveDWPT we stall!
while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) {
// if we have more flushing / blocked DWPT than numActiveDWPT we stall!
// don't stall if we have queued flushes - threads should be hijacked instead
while (flushControl.netBytes() > flushControl.stallLimitBytes()) {
if (sync.trySetStalled()) {
assert wasStalled = true;
return;
@ -115,7 +115,7 @@ final class Healthiness {
sync.acquireShared(0);
}
boolean hasBlocked() {
boolean hasBlocked() { // for tests
return sync.hasBlockedThreads;
}
}

View File

@ -40,7 +40,13 @@ import java.util.Collection;
* refuses to run by default. Specify {@code -delete-prior-commits}
* to override this, allowing the tool to delete all but the last commit.
* From Java code this can be enabled by passing {@code true} to
* {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
* {@link #IndexUpgrader(Directory,Version,PrintStream,boolean)}.
* <p><b>Warning:</b> This tool may reorder documents if the index was partially
* upgraded before execution (e.g., documents were added). If your application relies
* on &quot;monotonicity&quot; of doc IDs (which means that the order in which the documents
* were added to the index is preserved), do a full optimize instead.
* The {@link MergePolicy} set by {@link IndexWriterConfig} may also reorder
* documents.
*/
public final class IndexUpgrader {
@ -52,9 +58,11 @@ public final class IndexUpgrader {
System.err.println("reason, if the incoming index has more than one commit, the tool");
System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
System.err.println("this, allowing the tool to delete all but the last commit.");
System.err.println("WARNING: This tool may reorder document IDs!");
System.exit(1);
}
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException {
String dir = null;
boolean deletePriorCommits = false;
@ -74,7 +82,7 @@ public final class IndexUpgrader {
printUsage();
}
new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
new IndexUpgrader(FSDirectory.open(new File(dir)), Version.LUCENE_CURRENT, out, deletePriorCommits).upgrade();
}
private final Directory dir;
@ -82,16 +90,22 @@ public final class IndexUpgrader {
private final IndexWriterConfig iwc;
private final boolean deletePriorCommits;
@SuppressWarnings("deprecation")
public IndexUpgrader(Directory dir) {
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
* {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points. */
public IndexUpgrader(Directory dir, Version matchVersion) {
this(dir, new IndexWriterConfig(matchVersion, null), null, false);
}
@SuppressWarnings("deprecation")
public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
* {@code matchVersion}. You have the possibility to upgrade indexes with multiple commit points by removing
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
public IndexUpgrader(Directory dir, Version matchVersion, PrintStream infoStream, boolean deletePriorCommits) {
this(dir, new IndexWriterConfig(matchVersion, null), infoStream, deletePriorCommits);
}
/** Creates index upgrader on the given directory, using an {@link IndexWriter} using the given
* config. You have the possibility to upgrade indexes with multiple commit points by removing
* all older ones. If {@code infoStream} is not {@code null}, all logging output will be sent to this stream. */
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
this.dir = dir;
this.iwc = iwc;

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -221,7 +222,7 @@ public class IndexWriter implements Closeable {
private volatile long changeCount; // increments every time a change is completed
private long lastCommitChangeCount; // last changeCount that was committed
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
private List<SegmentInfo> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount;
@ -440,14 +441,14 @@ public class IndexWriter implements Closeable {
public synchronized boolean infoIsLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
assert idx != -1: "info=" + info + " isn't in pool";
assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
return true;
}
public synchronized SegmentInfo mapToLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
if (idx != -1) {
info = segmentInfos.get(idx);
info = segmentInfos.info(idx);
}
return info;
}
@ -818,7 +819,7 @@ public class IndexWriter implements Closeable {
}
}
setRollbackSegmentInfos(segmentInfos);
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
// start with previous field numbers, but new FieldInfos
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
@ -862,10 +863,6 @@ public class IndexWriter implements Closeable {
}
}
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
}
/**
* Returns the private {@link IndexWriterConfig}, cloned
* from the {@link IndexWriterConfig} passed to
@ -1126,8 +1123,7 @@ public class IndexWriter implements Closeable {
else
count = 0;
for (int i = 0; i < segmentInfos.size(); i++)
count += segmentInfos.info(i).docCount;
count += segmentInfos.totalDocCount();
return count;
}
@ -1144,8 +1140,7 @@ public class IndexWriter implements Closeable {
else
count = 0;
for (int i = 0; i < segmentInfos.size(); i++) {
final SegmentInfo info = segmentInfos.info(i);
for (final SegmentInfo info : segmentInfos) {
count += info.docCount - numDeletedDocs(info);
}
return count;
@ -1159,9 +1154,11 @@ public class IndexWriter implements Closeable {
if (docWriter.anyDeletions()) {
return true;
}
for (int i = 0; i < segmentInfos.size(); i++)
if (segmentInfos.info(i).hasDeletions())
for (final SegmentInfo info : segmentInfos) {
if (info.hasDeletions()) {
return true;
}
}
return false;
}
@ -1554,7 +1551,8 @@ public class IndexWriter implements Closeable {
synchronized(this) {
resetMergeExceptions();
segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos);
segmentsToOptimize.clear();
segmentsToOptimize.addAll(segmentInfos.asSet());
optimizeMaxNumSegments = maxNumSegments;
// Now mark all pending & running merges as optimize
@ -1778,7 +1776,7 @@ public class IndexWriter implements Closeable {
final MergePolicy.MergeSpecification spec;
if (optimize) {
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
if (spec != null) {
final int numMerges = spec.merges.size();
@ -1889,8 +1887,7 @@ public class IndexWriter implements Closeable {
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write
// once").
segmentInfos.clear();
segmentInfos.addAll(rollbackSegmentInfos);
segmentInfos.rollbackSegmentInfos(rollbackSegments);
docWriter.abort();
@ -2555,7 +2552,7 @@ public class IndexWriter implements Closeable {
lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
segmentInfos.setUserData(pendingCommit.getUserData());
setRollbackSegmentInfos(pendingCommit);
rollbackSegments = segmentInfos.createBackupSegmentInfos(true);
deleter.checkpoint(pendingCommit, true);
} finally {
// Matches the incRef done in startCommit:
@ -2660,7 +2657,7 @@ public class IndexWriter implements Closeable {
final synchronized void applyAllDeletes() throws IOException {
flushDeletesCount.incrementAndGet();
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
.applyDeletes(readerPool, segmentInfos);
.applyDeletes(readerPool, segmentInfos.asList());
if (result.anyDeletes) {
checkpoint();
}
@ -2709,7 +2706,7 @@ public class IndexWriter implements Closeable {
private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
for(SegmentInfo info : merge.segments) {
if (segmentInfos.indexOf(info) == -1) {
if (!segmentInfos.contains(info)) {
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
}
}
@ -2847,38 +2844,12 @@ public class IndexWriter implements Closeable {
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
}
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
int segIdx = 0;
int newSegIdx = 0;
boolean inserted = false;
final int curSegCount = segmentInfos.size();
while(segIdx < curSegCount) {
final SegmentInfo info = segmentInfos.info(segIdx++);
if (mergedAway.contains(info)) {
if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
segmentInfos.set(segIdx-1, merge.info);
inserted = true;
newSegIdx++;
}
} else {
segmentInfos.set(newSegIdx++, info);
}
}
final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
segmentInfos.applyMergeChanges(merge, dropSegment);
// Either we found place to insert segment, or, we did
// not, but only because all segments we merged became
// deleted while we are merging, in which case it should
// be the case that the new segment is also all deleted:
if (!inserted) {
assert allDeleted;
if (keepFullyDeletedSegments) {
segmentInfos.add(0, merge.info);
} else {
if (dropSegment) {
readerPool.drop(merge.info);
}
}
segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
if (infoStream != null) {
message("after commit: " + segString());
@ -3014,7 +2985,7 @@ public class IndexWriter implements Closeable {
if (mergingSegments.contains(info)) {
return false;
}
if (segmentInfos.indexOf(info) == -1) {
if (!segmentInfos.contains(info)) {
return false;
}
if (info.dir != directory) {
@ -3462,7 +3433,7 @@ public class IndexWriter implements Closeable {
}
// utility routines for tests
SegmentInfo newestSegment() {
synchronized SegmentInfo newestSegment() {
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
}
@ -3472,19 +3443,18 @@ public class IndexWriter implements Closeable {
}
/** @lucene.internal */
public synchronized String segString(List<SegmentInfo> infos) throws IOException {
StringBuilder buffer = new StringBuilder();
final int count = infos.size();
for(int i = 0; i < count; i++) {
if (i > 0) {
public synchronized String segString(Iterable<SegmentInfo> infos) throws IOException {
final StringBuilder buffer = new StringBuilder();
for(final SegmentInfo s : infos) {
if (buffer.length() > 0) {
buffer.append(' ');
}
buffer.append(segString(infos.get(i)));
buffer.append(segString(s));
}
return buffer.toString();
}
/** @lucene.internal */
public synchronized String segString(SegmentInfo info) throws IOException {
StringBuilder buffer = new StringBuilder();
SegmentReader reader = readerPool.getIfExists(info);

View File

@ -133,10 +133,15 @@ public final class IndexWriterConfig implements Cloneable {
/**
* Creates a new config that with defaults that match the specified
* {@link Version} as well as the default {@link Analyzer}. {@link Version} is
* a placeholder for future changes. The default settings are relevant to 3.1
* and before. In the future, if different settings will apply to different
* versions, they will be documented here.
* {@link Version} as well as the default {@link
* Analyzer}. If matchVersion is >= {@link
* Version#LUCENE_32}, {@link TieredMergePolicy} is used
* for merging; else {@link LogByteSizeMergePolicy}.
* Note that {@link TieredMergePolicy} is free to select
* non-contiguous merges, which means docIDs may not
* remain montonic over time. If this is a problem you
* should switch to {@link LogByteSizeMergePolicy} or
* {@link LogDocMergePolicy}.
*/
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
this.matchVersion = matchVersion;
@ -154,7 +159,11 @@ public final class IndexWriterConfig implements Cloneable {
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault();
if (matchVersion.onOrAfter(Version.LUCENE_32)) {
mergePolicy = new TieredMergePolicy();
} else {
mergePolicy = new LogByteSizeMergePolicy();
}
readerPooling = DEFAULT_READER_POOLING;
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;

View File

@ -242,6 +242,7 @@ public abstract class LogMergePolicy extends MergePolicy {
private MergeSpecification findMergesForOptimizeSizeLimit(
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
final List<SegmentInfo> segments = infos.asList();
int start = last - 1;
while (start >= 0) {
@ -254,12 +255,12 @@ public abstract class LogMergePolicy extends MergePolicy {
// unless there is only 1 which is optimized.
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
spec.add(new OneMerge(infos.range(start + 1, last)));
spec.add(new OneMerge(segments.subList(start + 1, last)));
}
last = start;
} else if (last - start == mergeFactor) {
// mergeFactor eligible segments were found, add them as a merge.
spec.add(new OneMerge(infos.range(start, last)));
spec.add(new OneMerge(segments.subList(start, last)));
last = start;
}
--start;
@ -267,7 +268,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Add any left-over segments, unless there is just 1 already optimized.
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
spec.add(new OneMerge(infos.range(start, last)));
spec.add(new OneMerge(segments.subList(start, last)));
}
return spec.merges.size() == 0 ? null : spec;
@ -280,11 +281,12 @@ public abstract class LogMergePolicy extends MergePolicy {
*/
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
final List<SegmentInfo> segments = infos.asList();
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
spec.add(new OneMerge(infos.range(last - mergeFactor, last)));
spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
last -= mergeFactor;
}
@ -296,7 +298,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Since we must optimize down to 1 segment, the
// choice is simple:
if (last > 1 || !isOptimized(infos.info(0))) {
spec.add(new OneMerge(infos.range(0, last)));
spec.add(new OneMerge(segments.subList(0, last)));
}
} else if (last > maxNumSegments) {
@ -325,7 +327,7 @@ public abstract class LogMergePolicy extends MergePolicy {
}
}
spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize)));
spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
}
}
return spec.merges.size() == 0 ? null : spec;
@ -412,7 +414,8 @@ public abstract class LogMergePolicy extends MergePolicy {
@Override
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException {
final int numSegments = segmentInfos.size();
final List<SegmentInfo> segments = segmentInfos.asList();
final int numSegments = segments.size();
if (verbose())
message("findMergesToExpungeDeletes: " + numSegments + " segments");
@ -434,7 +437,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// deletions, so force a merge now:
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i;
}
} else if (firstSegmentWithDeletions != -1) {
@ -443,7 +446,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// mergeFactor segments
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1;
}
}
@ -451,7 +454,7 @@ public abstract class LogMergePolicy extends MergePolicy {
if (firstSegmentWithDeletions != -1) {
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments)));
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
}
return spec;

View File

@ -72,7 +72,7 @@ public abstract class MergePolicy implements java.io.Closeable {
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
long estimatedMergeBytes; // used by IndexWriter
public long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter
public final List<SegmentInfo> segments;
@ -84,7 +84,8 @@ public abstract class MergePolicy implements java.io.Closeable {
public OneMerge(List<SegmentInfo> segments) {
if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment");
this.segments = segments;
// clone the list, as the in list may be based off original SegmentInfos and may be modified
this.segments = new ArrayList<SegmentInfo>(segments);
int count = 0;
for(SegmentInfo info : segments) {
count += info.docCount;

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.Constants;
*
* @lucene.experimental
*/
public final class SegmentInfo {
public final class SegmentInfo implements Cloneable {
// TODO: remove with hasVector and hasProx
private static final int CHECK_FIELDINFO = -2;
static final int NO = -1; // e.g. no norms; no deletes;

View File

@ -20,13 +20,16 @@ package org.apache.lucene.index;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.Set;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.codecs.CodecProvider;
@ -45,7 +48,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
*
* @lucene.experimental
*/
public final class SegmentInfos extends Vector<SegmentInfo> {
public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
/*
* The file format version, a negative number.
@ -85,6 +88,11 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
private FieldNumberBiMap globalFieldNumberMap; // this segments global field number map - lazy loaded on demand
private List<SegmentInfo> segments = new ArrayList<SegmentInfo>();
private Set<SegmentInfo> segmentSet = new HashSet<SegmentInfo>();
private transient List<SegmentInfo> cachedUnmodifiableList;
private transient Set<SegmentInfo> cachedUnmodifiableSet;
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
@ -107,8 +115,8 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
return format;
}
public final SegmentInfo info(int i) {
return get(i);
public SegmentInfo info(int i) {
return segments.get(i);
}
/**
@ -237,7 +245,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
boolean success = false;
// Clear any previous segments:
clear();
this.clear();
generation = generationFromSegmentsFileName(segmentFileName);
@ -252,7 +260,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
if (!success) {
// Clear any segment infos we had loaded so we
// have a clean slate on retry:
clear();
this.clear();
}
}
}
@ -349,15 +357,14 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
/** Prunes any segment whose docs are all deleted. */
public void pruneDeletedSegments() {
int segIdx = 0;
while(segIdx < size()) {
final SegmentInfo info = info(segIdx);
for(final Iterator<SegmentInfo> it = segments.iterator(); it.hasNext();) {
final SegmentInfo info = it.next();
if (info.getDelCount() == info.docCount) {
remove(segIdx);
} else {
segIdx++;
it.remove();
segmentSet.remove(info);
}
}
assert segmentSet.size() == segments.size();
}
/**
@ -367,14 +374,23 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
@Override
public Object clone() {
SegmentInfos sis = (SegmentInfos) super.clone();
for(int i=0;i<sis.size();i++) {
final SegmentInfo info = sis.info(i);
try {
final SegmentInfos sis = (SegmentInfos) super.clone();
// deep clone, first recreate all collections:
sis.segments = new ArrayList<SegmentInfo>(size());
sis.segmentSet = new HashSet<SegmentInfo>(size());
sis.cachedUnmodifiableList = null;
sis.cachedUnmodifiableSet = null;
for(final SegmentInfo info : this) {
assert info.getSegmentCodecs() != null;
sis.set(i, (SegmentInfo) info.clone());
// dont directly access segments, use add method!!!
sis.add((SegmentInfo) info.clone());
}
sis.userData = new HashMap<String,String>(userData);
return sis;
} catch (CloneNotSupportedException e) {
throw new RuntimeException("should not happen", e);
}
}
/**
@ -742,18 +758,6 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException;
}
/**
* Returns a new SegmentInfos containing the SegmentInfo
* instances in the specified range first (inclusive) to
* last (exclusive), so total number of segments returned
* is last-first.
*/
public SegmentInfos range(int first, int last) {
SegmentInfos infos = new SegmentInfos(codecs);
infos.addAll(super.subList(first, last));
return infos;
}
// Carry over generation numbers from another SegmentInfos
void updateGeneration(SegmentInfos other) {
lastGeneration = other.lastGeneration;
@ -831,6 +835,10 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
} catch (Throwable t) {
// throw orig excp
}
} else {
// we must sync here explicitly since during a commit
// IW will not sync the global field map.
dir.sync(Collections.singleton(name));
}
}
return version;
@ -956,7 +964,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
}
public synchronized String toString(Directory directory) {
public String toString(Directory directory) {
StringBuilder buffer = new StringBuilder();
buffer.append(getCurrentSegmentFileName()).append(": ");
final int count = size();
@ -987,8 +995,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
* remain write once.
*/
void replace(SegmentInfos other) {
clear();
addAll(other);
rollbackSegmentInfos(other.asList());
lastGeneration = other.lastGeneration;
lastGlobalFieldMapVersion = other.lastGlobalFieldMapVersion;
format = other.format;
@ -1014,7 +1021,7 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
* Loads or returns the already loaded the global field number map for this {@link SegmentInfos}.
* If this {@link SegmentInfos} has no global field number map the returned instance is empty
*/
synchronized FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
FieldNumberBiMap getOrLoadGlobalFieldNumberMap(Directory dir) throws IOException {
if (globalFieldNumberMap != null) {
return globalFieldNumberMap;
}
@ -1054,4 +1061,135 @@ public final class SegmentInfos extends Vector<SegmentInfo> {
long getLastGlobalFieldMapVersion() {
return lastGlobalFieldMapVersion;
}
/** applies all changes caused by committing a merge to this SegmentInfos */
void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) {
final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
boolean inserted = false;
int newSegIdx = 0;
for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) {
assert segIdx >= newSegIdx;
final SegmentInfo info = segments.get(segIdx);
if (mergedAway.contains(info)) {
if (!inserted && !dropSegment) {
segments.set(segIdx, merge.info);
inserted = true;
newSegIdx++;
}
} else {
segments.set(newSegIdx, info);
newSegIdx++;
}
}
// Either we found place to insert segment, or, we did
// not, but only because all segments we merged became
// deleted while we are merging, in which case it should
// be the case that the new segment is also all deleted,
// we insert it at the beginning if it should not be dropped:
if (!inserted && !dropSegment) {
segments.add(0, merge.info);
}
// the rest of the segments in list are duplicates, so don't remove from map, only list!
segments.subList(newSegIdx, segments.size()).clear();
// update the Set
if (!dropSegment) {
segmentSet.add(merge.info);
}
segmentSet.removeAll(mergedAway);
assert segmentSet.size() == segments.size();
}
List<SegmentInfo> createBackupSegmentInfos(boolean cloneChildren) {
if (cloneChildren) {
final List<SegmentInfo> list = new ArrayList<SegmentInfo>(size());
for(final SegmentInfo info : this) {
assert info.getSegmentCodecs() != null;
list.add((SegmentInfo) info.clone());
}
return list;
} else {
return new ArrayList<SegmentInfo>(segments);
}
}
void rollbackSegmentInfos(List<SegmentInfo> infos) {
this.clear();
this.addAll(infos);
}
/** Returns an <b>unmodifiable</b> {@link Iterator} of contained segments in order. */
// @Override (comment out until Java 6)
public Iterator<SegmentInfo> iterator() {
return asList().iterator();
}
/** Returns all contained segments as an <b>unmodifiable</b> {@link List} view. */
public List<SegmentInfo> asList() {
if (cachedUnmodifiableList == null) {
cachedUnmodifiableList = Collections.unmodifiableList(segments);
}
return cachedUnmodifiableList;
}
/** Returns all contained segments as an <b>unmodifiable</b> {@link Set} view.
* The iterator is not sorted, use {@link List} view or {@link #iterator} to get all segments in order. */
public Set<SegmentInfo> asSet() {
if (cachedUnmodifiableSet == null) {
cachedUnmodifiableSet = Collections.unmodifiableSet(segmentSet);
}
return cachedUnmodifiableSet;
}
public int size() {
return segments.size();
}
public void add(SegmentInfo si) {
if (segmentSet.contains(si)) {
throw new IllegalStateException("Cannot add the same segment two times to this SegmentInfos instance");
}
segments.add(si);
segmentSet.add(si);
assert segmentSet.size() == segments.size();
}
public void addAll(Iterable<SegmentInfo> sis) {
for (final SegmentInfo si : sis) {
this.add(si);
}
}
public void clear() {
segments.clear();
segmentSet.clear();
}
public void remove(SegmentInfo si) {
final int index = this.indexOf(si);
if (index >= 0) {
this.remove(index);
}
}
public void remove(int index) {
segmentSet.remove(segments.remove(index));
assert segmentSet.size() == segments.size();
}
public boolean contains(SegmentInfo si) {
return segmentSet.contains(si);
}
public int indexOf(SegmentInfo si) {
if (segmentSet.contains(si)) {
return segments.indexOf(si);
} else {
return -1;
}
}
}

View File

@ -251,9 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
infosSorted.addAll(infos);
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>(infos.asList());
Collections.sort(infosSorted, segmentByteSizeDescending);
// Compute total index bytes & print details about the index

View File

@ -40,6 +40,11 @@ import java.util.Set;
* w.optimize();
* w.close();
* </pre>
* <p><b>Warning:</b> This merge policy may reorder documents if the index was partially
* upgraded before calling optimize (e.g., documents were added). If your application relies
* on &quot;monotonicity&quot; of doc IDs (which means that the order in which the documents
* were added to the index is preserved), do a full optimize instead. Please note, the
* delegate {@code MergePolicy} may also reorder documents.
* @lucene.experimental
* @see IndexUpgrader
*/

View File

@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
private class FSTFieldWriter extends FieldWriter {
private final Builder<Long> fstBuilder;
private final PositiveIntOutputs fstOutputs;
private final long startTermsFilePointer;
final FieldInfo fieldInfo;
int numIndexTerms;
@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
// Always put empty string in
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
startTermsFilePointer = termsFilePointer;
}
@Override
@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
if (text.length == 0) {
// We already added empty string in ctor
assert termsFilePointer == startTermsFilePointer;
return;
}
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text);
try {

View File

@ -0,0 +1,382 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Caches all docs, and optionally also scores, coming from
* a search, and is then able to replay them to another
* collector. You specify the max RAM this class may use.
* Once the collection is done, call {@link #isCached}. If
* this returns true, you can use {@link #replay} against a
* new collector. If it returns false, this means too much
* RAM was required and you must instead re-run the original
* search.
*
* <p><b>NOTE</b>: this class consumes 4 (or 8 bytes, if
* scoring is cached) per collected document. If the result
* set is large this can easily be a very substantial amount
* of RAM!
*
* <p><b>NOTE</b>: this class caches at least 128 documents
* before checking RAM limits.
*
* <p>See the Lucene <tt>modules/grouping</tt> module for more
* details including a full code example.</p>
*
* @lucene.experimental
*/
public abstract class CachingCollector extends Collector {
// Max out at 512K arrays
private static final int MAX_ARRAY_SIZE = 512 * 1024;
private static final int INITIAL_ARRAY_SIZE = 128;
private final static int[] EMPTY_INT_ARRAY = new int[0];
private static class SegStart {
public final AtomicReaderContext readerContext;
public final int end;
public SegStart(AtomicReaderContext readerContext, int end) {
this.readerContext = readerContext;
this.end = end;
}
}
private static final class CachedScorer extends Scorer {
// NOTE: these members are package-private b/c that way accessing them from
// the outer class does not incur access check by the JVM. The same
// situation would be if they were defined in the outer class as private
// members.
int doc;
float score;
private CachedScorer() { super(null); }
@Override
public final float score() { return score; }
@Override
public final int advance(int target) { throw new UnsupportedOperationException(); }
@Override
public final int docID() { return doc; }
@Override
public final float freq() { throw new UnsupportedOperationException(); }
@Override
public final int nextDoc() { throw new UnsupportedOperationException(); }
}
// A CachingCollector which caches scores
private static final class ScoreCachingCollector extends CachingCollector {
private final CachedScorer cachedScorer;
private final List<float[]> cachedScores;
private Scorer scorer;
private float[] curScores;
ScoreCachingCollector(Collector other, double maxRAMMB) {
super(other, maxRAMMB, true);
cachedScorer = new CachedScorer();
cachedScores = new ArrayList<float[]>();
curScores = new float[128];
cachedScores.add(curScores);
}
@Override
public void collect(int doc) throws IOException {
if (curDocs == null) {
// Cache was too large
cachedScorer.score = scorer.score();
cachedScorer.doc = doc;
other.collect(doc);
return;
}
// Allocate a bigger array or abort caching
if (upto == curDocs.length) {
base += upto;
// Compute next array length - don't allocate too big arrays
int nextLength = 8*curDocs.length;
if (nextLength > MAX_ARRAY_SIZE) {
nextLength = MAX_ARRAY_SIZE;
}
if (base + nextLength > maxDocsToCache) {
// try to allocate a smaller array
nextLength = maxDocsToCache - base;
if (nextLength <= 0) {
// Too many docs to collect -- clear cache
curDocs = null;
curScores = null;
cachedSegs.clear();
cachedDocs.clear();
cachedScores.clear();
cachedScorer.score = scorer.score();
cachedScorer.doc = doc;
other.collect(doc);
return;
}
}
curDocs = new int[nextLength];
cachedDocs.add(curDocs);
curScores = new float[nextLength];
cachedScores.add(curScores);
upto = 0;
}
curDocs[upto] = doc;
cachedScorer.score = curScores[upto] = scorer.score();
upto++;
cachedScorer.doc = doc;
other.collect(doc);
}
@Override
public void replay(Collector other) throws IOException {
replayInit(other);
int curUpto = 0;
int curBase = 0;
int chunkUpto = 0;
other.setScorer(cachedScorer);
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
while (curBase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curBase += curDocs.length;
curDocs = cachedDocs.get(chunkUpto);
curScores = cachedScores.get(chunkUpto);
chunkUpto++;
curUpto = 0;
}
cachedScorer.score = curScores[curUpto];
other.collect(curDocs[curUpto++]);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
other.setScorer(cachedScorer);
}
@Override
public String toString() {
if (isCached()) {
return "CachingCollector (" + (base+upto) + " docs & scores cached)";
} else {
return "CachingCollector (cache was cleared)";
}
}
}
// A CachingCollector which does not cache scores
private static final class NoScoreCachingCollector extends CachingCollector {
NoScoreCachingCollector(Collector other, double maxRAMMB) {
super(other, maxRAMMB, false);
}
@Override
public void collect(int doc) throws IOException {
if (curDocs == null) {
// Cache was too large
other.collect(doc);
return;
}
// Allocate a bigger array or abort caching
if (upto == curDocs.length) {
base += upto;
// Compute next array length - don't allocate too big arrays
int nextLength = 8*curDocs.length;
if (nextLength > MAX_ARRAY_SIZE) {
nextLength = MAX_ARRAY_SIZE;
}
if (base + nextLength > maxDocsToCache) {
// try to allocate a smaller array
nextLength = maxDocsToCache - base;
if (nextLength <= 0) {
// Too many docs to collect -- clear cache
curDocs = null;
cachedSegs.clear();
cachedDocs.clear();
other.collect(doc);
return;
}
}
curDocs = new int[nextLength];
cachedDocs.add(curDocs);
upto = 0;
}
curDocs[upto] = doc;
upto++;
other.collect(doc);
}
@Override
public void replay(Collector other) throws IOException {
replayInit(other);
int curUpto = 0;
int curbase = 0;
int chunkUpto = 0;
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
while (curbase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curbase += curDocs.length;
curDocs = cachedDocs.get(chunkUpto);
chunkUpto++;
curUpto = 0;
}
other.collect(curDocs[curUpto++]);
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
other.setScorer(scorer);
}
@Override
public String toString() {
if (isCached()) {
return "CachingCollector (" + (base+upto) + " docs cached)";
} else {
return "CachingCollector (cache was cleared)";
}
}
}
// TODO: would be nice if a collector defined a
// needsScores() method so we can specialize / do checks
// up front. This is only relevant for the ScoreCaching
// version -- if the wrapped Collector does not need
// scores, it can avoid cachedScorer entirely.
protected final Collector other;
protected final int maxDocsToCache;
protected final List<SegStart> cachedSegs = new ArrayList<SegStart>();
protected final List<int[]> cachedDocs;
private AtomicReaderContext lastReaderContext;
protected int[] curDocs;
protected int upto;
protected int base;
protected int lastDocBase;
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
}
// Prevent extension from non-internal classes
private CachingCollector(Collector other, double maxRAMMB, boolean cacheScores) {
this.other = other;
cachedDocs = new ArrayList<int[]>();
curDocs = new int[INITIAL_ARRAY_SIZE];
cachedDocs.add(curDocs);
int bytesPerDoc = RamUsageEstimator.NUM_BYTES_INT;
if (cacheScores) {
bytesPerDoc += RamUsageEstimator.NUM_BYTES_FLOAT;
}
maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024) / bytesPerDoc);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return other.acceptsDocsOutOfOrder();
}
public boolean isCached() {
return curDocs != null;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
other.setNextReader(context);
if (lastReaderContext != null) {
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
}
lastReaderContext = context;
}
/** Reused by the specialized inner classes. */
void replayInit(Collector other) {
if (!isCached()) {
throw new IllegalStateException("cannot replay: cache was cleared because too much RAM was required");
}
if (!other.acceptsDocsOutOfOrder() && this.other.acceptsDocsOutOfOrder()) {
throw new IllegalArgumentException(
"cannot replay: given collector does not support "
+ "out-of-order collection, while the wrapped collector does. "
+ "Therefore cached documents may be out-of-order.");
}
//System.out.println("CC: replay totHits=" + (upto + base));
if (lastReaderContext != null) {
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
lastReaderContext = null;
}
}
/**
* Replays the cached doc IDs (and scores) to the given Collector. If this
* instance does not cache scores, then Scorer is not set on
* {@code other.setScorer} as well as scores are not replayed.
*
* @throws IllegalStateException
* if this collector is not cached (i.e., if the RAM limits were too
* low for the number of documents + scores to cache).
* @throws IllegalArgumentException
* if the given Collect's does not support out-of-order collection,
* while the collector passed to the ctor does.
*/
public abstract void replay(Collector other) throws IOException;
}

View File

@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
public abstract int nextDoc() throws IOException;
/**
* Advances to the first beyond the current whose document number is greater
* than or equal to <i>target</i>. Returns the current document number or
* {@link #NO_MORE_DOCS} if there are no more docs in the set.
* Advances to the first beyond (see NOTE below) the current whose document
* number is greater than or equal to <i>target</i>. Returns the current
* document number or {@link #NO_MORE_DOCS} if there are no more docs in the
* set.
* <p>
* Behaves as if written:
*

View File

@ -55,7 +55,12 @@ public class QueryTermVector implements TermFreqVector {
public QueryTermVector(String queryString, Analyzer analyzer) {
if (analyzer != null)
{
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
TokenStream stream;
try {
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
} catch (IOException e1) {
stream = null;
}
if (stream != null)
{
List<BytesRef> terms = new ArrayList<BytesRef>();

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends PhraseScorer {
/**
* Init PhrasePositions in place.
* There is a one time initialization for this scorer:
* There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
* This relies on that the position in PP is computed as (TP.position - offset) and
* so by adding offset we actually compare positions and identify that the two are
* the same term.
* An exclusion to this is two distinct terms in the same offset in query and same
* position in doc. This case is detected by comparing just the (query) offsets,
* and two such PPs are not considered "repeating".
* <br>- Also mark each such pp by pp.repeats = true.
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
* In particular, this allows to score queries with no repetitions with no overhead due to this computation.
@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends PhraseScorer {
if (!checkedRepeats) {
checkedRepeats = true;
// check for repeats
HashMap<PhrasePositions, Object> m = null;
HashSet<PhrasePositions> m = null;
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
int tpPos = pp.position + pp.offset;
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
if (pp.offset == pp2.offset) {
continue; // not a repetition: the two PPs are originally in same offset in the query!
}
int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos) {
if (m == null)
m = new HashMap<PhrasePositions, Object>();
m = new HashSet<PhrasePositions>();
pp.repeats = true;
pp2.repeats = true;
m.put(pp,null);
m.put(pp2,null);
m.add(pp);
m.add(pp2);
}
}
}
if (m!=null)
repeats = m.keySet().toArray(new PhrasePositions[0]);
repeats = m.toArray(new PhrasePositions[0]);
}
// with repeats must advance some repeating pp's so they all start with differing tp's
@ -204,12 +213,17 @@ final class SloppyPhraseScorer extends PhraseScorer {
int tpPos = pp.position + pp.offset;
for (int i = 0; i < repeats.length; i++) {
PhrasePositions pp2 = repeats[i];
if (pp2 == pp)
if (pp2 == pp) {
continue;
}
if (pp.offset == pp2.offset) {
continue; // not a repetition: the two PPs are originally in same offset in the query!
}
int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos)
if (tpPos2 == tpPos) {
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
}
}
return null;
}
}

View File

@ -172,7 +172,7 @@ public class NIOFSDirectory extends FSDirectory {
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ "with a a value smaller than the current chunk size (" + chunkSize + ")");
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
}

View File

@ -125,7 +125,7 @@ public class SimpleFSDirectory extends FSDirectory {
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ "with a value smaller than the current chunks size (" + chunkSize + ")");
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
}

View File

@ -20,9 +20,6 @@ package org.apache.lucene.util;
import java.util.Comparator;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.IOException;
/** Represents byte[], as a slice (offset + length) into an
* existing byte[].
@ -193,6 +190,9 @@ public final class BytesRef implements Comparable<BytesRef> {
@Override
public boolean equals(Object other) {
if (other == null) {
return false;
}
return this.bytesEquals((BytesRef) other);
}

View File

@ -1,5 +1,8 @@
package org.apache.lucene.util;
import java.util.Comparator;
import java.util.StringTokenizer;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -54,4 +57,42 @@ public abstract class StringHelper {
private StringHelper() {
}
/**
* @return a Comparator over versioned strings such as X.YY.Z
* @lucene.internal
*/
public static Comparator<String> getVersionComparator() {
return versionComparator;
}
private static Comparator<String> versionComparator = new Comparator<String>() {
public int compare(String a, String b) {
StringTokenizer aTokens = new StringTokenizer(a, ".");
StringTokenizer bTokens = new StringTokenizer(b, ".");
while (aTokens.hasMoreTokens()) {
int aToken = Integer.parseInt(aTokens.nextToken());
if (bTokens.hasMoreTokens()) {
int bToken = Integer.parseInt(bTokens.nextToken());
if (aToken != bToken) {
return aToken - bToken;
}
} else {
// a has some extra trailing tokens. if these are all zeroes, thats ok.
if (aToken != 0) {
return 1;
}
}
}
// b has some extra trailing tokens. if these are all zeroes, thats ok.
while (bTokens.hasMoreTokens()) {
if (Integer.parseInt(bTokens.nextToken()) != 0)
return -1;
}
return 0;
}
};
}

View File

@ -143,13 +143,16 @@ public class LevenshteinAutomata {
if (dest >= 0)
for (int r = 0; r < numRanges; r++)
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
// reduce the state: this doesn't appear to help anything
//states[k].reduce();
}
Automaton a = new Automaton(states[0]);
a.setDeterministic(true);
a.setNumberedStates(states);
// we create some useless unconnected states, and its a net-win overall to remove these,
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
// so, while we could set our numberedStates here, its actually best not to, and instead to
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
//a.setNumberedStates(states);
a.reduce();
// we need not trim transitions to dead states, as they are not created.
//a.restoreInvariant();
return a;

View File

@ -30,6 +30,8 @@
package org.apache.lucene.util.automaton;
import java.util.BitSet;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
/**
@ -72,8 +74,12 @@ final public class MinimizationOperations {
final int[] sigma = a.getStartPoints();
final State[] states = a.getNumberedStates();
final int sigmaLen = sigma.length, statesLen = states.length;
final BitSet[][] reverse = new BitSet[statesLen][sigmaLen];
final BitSet[] splitblock = new BitSet[statesLen], partition = new BitSet[statesLen];
@SuppressWarnings("unchecked") final ArrayList<State>[][] reverse =
(ArrayList<State>[][]) new ArrayList[statesLen][sigmaLen];
@SuppressWarnings("unchecked") final HashSet<State>[] partition =
(HashSet<State>[]) new HashSet[statesLen];
@SuppressWarnings("unchecked") final ArrayList<State>[] splitblock =
(ArrayList<State>[]) new ArrayList[statesLen];
final int[] block = new int[statesLen];
final StateList[][] active = new StateList[statesLen][sigmaLen];
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
@ -82,8 +88,8 @@ final public class MinimizationOperations {
final BitSet split = new BitSet(statesLen),
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
for (int q = 0; q < statesLen; q++) {
splitblock[q] = new BitSet(statesLen);
partition[q] = new BitSet(statesLen);
splitblock[q] = new ArrayList<State>();
partition[q] = new HashSet<State>();
for (int x = 0; x < sigmaLen; x++) {
active[q][x] = new StateList();
}
@ -92,23 +98,22 @@ final public class MinimizationOperations {
for (int q = 0; q < statesLen; q++) {
final State qq = states[q];
final int j = qq.accept ? 0 : 1;
partition[j].set(q);
partition[j].add(qq);
block[q] = j;
for (int x = 0; x < sigmaLen; x++) {
final BitSet[] r =
final ArrayList<State>[] r =
reverse[qq.step(sigma[x]).number];
if (r[x] == null)
r[x] = new BitSet();
r[x].set(q);
r[x] = new ArrayList<State>();
r[x].add(qq);
}
}
// initialize active sets
for (int j = 0; j <= 1; j++) {
final BitSet part = partition[j];
for (int x = 0; x < sigmaLen; x++) {
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
if (reverse[i][x] != null)
active2[i][x] = active[j][x].add(states[i]);
for (final State qq : partition[j]) {
if (reverse[qq.number][x] != null)
active2[qq.number][x] = active[j][x].add(qq);
}
}
}
@ -121,18 +126,19 @@ final public class MinimizationOperations {
// process pending until fixed point
int k = 2;
while (!pending.isEmpty()) {
IntPair ip = pending.removeFirst();
final IntPair ip = pending.removeFirst();
final int p = ip.n1;
final int x = ip.n2;
pending2.clear(x*statesLen + p);
// find states that need to be split off their blocks
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
final BitSet r = reverse[m.q.number][x];
if (r != null) for (int i = r.nextSetBit(0); i >= 0; i = r.nextSetBit(i+1)) {
final ArrayList<State> r = reverse[m.q.number][x];
if (r != null) for (final State s : r) {
final int i = s.number;
if (!split.get(i)) {
split.set(i);
final int j = block[i];
splitblock[j].set(i);
splitblock[j].add(s);
if (!refine2.get(j)) {
refine2.set(j);
refine.set(j);
@ -142,18 +148,19 @@ final public class MinimizationOperations {
}
// refine blocks
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
final BitSet sb = splitblock[j];
if (sb.cardinality() < partition[j].cardinality()) {
final BitSet b1 = partition[j], b2 = partition[k];
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1)) {
b1.clear(i);
b2.set(i);
block[i] = k;
final ArrayList<State> sb = splitblock[j];
if (sb.size() < partition[j].size()) {
final HashSet<State> b1 = partition[j];
final HashSet<State> b2 = partition[k];
for (final State s : sb) {
b1.remove(s);
b2.add(s);
block[s.number] = k;
for (int c = 0; c < sigmaLen; c++) {
final StateListNode sn = active2[i][c];
final StateListNode sn = active2[s.number][c];
if (sn != null && sn.sl == active[j][c]) {
sn.remove();
active2[i][c] = active[k][c].add(states[i]);
active2[s.number][c] = active[k][c].add(s);
}
}
}
@ -173,8 +180,8 @@ final public class MinimizationOperations {
k++;
}
refine2.clear(j);
for (int i = sb.nextSetBit(0); i >= 0; i = sb.nextSetBit(i+1))
split.clear(i);
for (final State s : sb)
split.clear(s.number);
sb.clear();
}
refine.clear();
@ -184,9 +191,7 @@ final public class MinimizationOperations {
for (int n = 0; n < newstates.length; n++) {
final State s = new State();
newstates[n] = s;
BitSet part = partition[n];
for (int i = part.nextSetBit(0); i >= 0; i = part.nextSetBit(i+1)) {
final State q = states[i];
for (State q : partition[n]) {
if (q == a.initial) a.initial = s;
s.accept = q.accept;
s.number = q.number; // select representative

View File

@ -232,9 +232,7 @@ public class FST<T> {
void setEmptyOutput(T v) throws IOException {
if (emptyOutput != null) {
if (!emptyOutput.equals(v)) {
emptyOutput = outputs.merge(emptyOutput, v);
}
} else {
emptyOutput = v;
}

View File

@ -100,7 +100,7 @@ public class MockTokenizer extends Tokenizer {
endOffset = off;
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
offsetAtt.setOffset(startOffset, endOffset);
offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
streamState = State.INCREMENT;
return true;
}

View File

@ -42,14 +42,13 @@ public class MockRandomMergePolicy extends MergePolicy {
if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
SegmentInfos segmentInfos2 = new SegmentInfos();
segmentInfos2.addAll(segmentInfos);
Collections.shuffle(segmentInfos2, random);
List<SegmentInfo> segments = new ArrayList<SegmentInfo>(segmentInfos.asList());
Collections.shuffle(segments, random);
// TODO: sometimes make more than 1 merge?
mergeSpec = new MergeSpecification();
final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge)));
mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge)));
}
return mergeSpec;

View File

@ -171,7 +171,14 @@ public abstract class LuceneTestCase extends Assert {
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
/** Used to track if setUp and tearDown are called correctly from subclasses */
private boolean setup;
private static State state = State.INITIAL;
private static enum State {
INITIAL, // no tests ran yet
SETUP, // test has called setUp()
RANTEST, // test is running
TEARDOWN // test has called tearDown()
};
/**
* Some tests expect the directory to contain a single segment, and want to do tests on that segment's reader.
@ -326,6 +333,7 @@ public abstract class LuceneTestCase extends Assert {
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
state = State.INITIAL;
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
random.setSeed(staticSeed);
tempDirs.clear();
@ -375,6 +383,11 @@ public abstract class LuceneTestCase extends Assert {
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
if (!testsFailed) {
assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!",
state == State.INITIAL || state == State.TEARDOWN);
}
state = State.INITIAL;
if (! "false".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test class");
if (rogueThreads > 0) {
@ -483,17 +496,22 @@ public abstract class LuceneTestCase extends Assert {
public void starting(FrameworkMethod method) {
// set current method name for logging
LuceneTestCase.this.name = method.getName();
if (!testsFailed) {
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP);
}
state = State.RANTEST;
super.starting(method);
}
};
@Before
public void setUp() throws Exception {
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2;
random.setSeed(seed);
assertFalse("ensure your tearDown() calls super.tearDown()!!!", setup);
setup = true;
if (!testsFailed) {
assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN));
}
state = State.SETUP;
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
public void uncaughtException(Thread t, Throwable e) {
@ -529,8 +547,12 @@ public abstract class LuceneTestCase extends Assert {
@After
public void tearDown() throws Exception {
assertTrue("ensure your setUp() calls super.setUp()!!!", setup);
setup = false;
if (!testsFailed) {
// Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state)
// because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know...
assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP);
}
state = State.TEARDOWN;
BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount);
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test method: '" + getName() + "'");

View File

@ -397,4 +397,15 @@ public class AutomatonTestUtil {
path.remove(s);
return true;
}
/**
* Checks that an automaton has no detached states that are unreachable
* from the initial state.
*/
public static void assertNoDetachedStates(Automaton a) {
int numStates = a.getNumberOfStates();
a.clearNumberedStates(); // force recomputation of cached numbered states
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
}
}

View File

@ -0,0 +1,79 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
/**
* a binary tokenstream that lets you index a BytesRef
*/
public final class BinaryTokenStream extends TokenStream {
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private boolean available = true;
public BinaryTokenStream(BytesRef bytes) {
bytesAtt.setBytesRef(bytes);
}
@Override
public boolean incrementToken() throws IOException {
if (available) {
available = false;
return true;
}
return false;
}
@Override
public void reset() throws IOException {
available = true;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
private BytesRef bytes;
public int fillBytesRef() {
return bytes.hashCode();
}
public BytesRef getBytesRef() {
return bytes;
}
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
public void clear() {}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}

View File

@ -0,0 +1,73 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
/**
* Test indexing and searching some byte[] terms
*/
public class TestBinaryTerms extends LuceneTestCase {
public void testBinary() throws IOException {
assumeFalse("PreFlex codec cannot work with binary terms!",
"PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random, dir);
BytesRef bytes = new BytesRef(2);
BinaryTokenStream tokenStream = new BinaryTokenStream(bytes);
for (int i = 0; i < 256; i++) {
bytes.bytes[0] = (byte) i;
bytes.bytes[1] = (byte) (255 - i);
bytes.length = 2;
Document doc = new Document();
doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.NO));
doc.add(new Field("bytes", tokenStream));
iw.addDocument(doc);
}
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
for (int i = 0; i < 256; i++) {
bytes.bytes[0] = (byte) i;
bytes.bytes[1] = (byte) (255 - i);
bytes.length = 2;
TopDocs docs = is.search(new TermQuery(new Term("bytes", bytes)), 5);
assertEquals(1, docs.totalHits);
assertEquals("" + i, is.doc(docs.scoreDocs[0].doc).get("id"));
}
is.close();
ir.close();
dir.close();
}
}

View File

@ -30,7 +30,6 @@ import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThrottledIndexOutput;
import org.junit.Before;
public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
@ -105,7 +104,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
}
if (ensureNotStalled) {
assertFalse(docsWriter.healthiness.wasStalled);
assertFalse(docsWriter.flushControl.stallControl.wasStalled);
}
writer.close();
assertEquals(0, flushControl.activeBytes());
@ -216,15 +215,15 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertEquals(numDocumentsToIndex, r.numDocs());
assertEquals(numDocumentsToIndex, r.maxDoc());
if (!flushPolicy.flushOnRAM()) {
assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled);
assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked());
assertFalse("never stall if we don't flush on RAM", docsWriter.flushControl.stallControl.wasStalled);
assertFalse("never block if we don't flush on RAM", docsWriter.flushControl.stallControl.hasBlocked());
}
r.close();
writer.close();
dir.close();
}
public void testHealthyness() throws InterruptedException,
public void testStallControl() throws InterruptedException,
CorruptIndexException, LockObtainFailedException, IOException {
int[] numThreads = new int[] { 4 + random.nextInt(8), 1 };
@ -264,12 +263,12 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
assertEquals(numDocumentsToIndex, writer.numDocs());
assertEquals(numDocumentsToIndex, writer.maxDoc());
if (numThreads[i] == 1) {
assertFalse(
"single thread must not stall",
docsWriter.healthiness.wasStalled);
assertFalse(
"single thread must not block numThreads: " + numThreads[i],
docsWriter.healthiness.hasBlocked());
docsWriter.flushControl.stallControl.hasBlocked());
}
if (docsWriter.flushControl.peakNetBytes > (2.d * iwc.getRAMBufferSizeMB() * 1024.d * 1024.d)) {
assertTrue(docsWriter.flushControl.stallControl.wasStalled);
}
assertActiveBytesAfter(flushControl);
writer.close(true);

View File

@ -363,7 +363,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase {
w.close();
SegmentInfos sis = new SegmentInfos();
sis.read(base);
SegmentInfo segmentInfo = sis.get(sis.size() - 1);// last segment must
SegmentInfo segmentInfo = sis.info(sis.size() - 1);// last segment must
// have all fields with
// consistent numbers
FieldInfos fieldInfos = segmentInfo.getFieldInfos();

View File

@ -1231,13 +1231,17 @@ public class TestIndexWriter extends LuceneTestCase {
System.out.println("TEST: pass=" + pass);
}
IndexWriter writer = new IndexWriter(
directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).
setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(2).
setMergePolicy(newLogMergePolicy())
);
setMergePolicy(newLogMergePolicy());
if (pass == 2) {
conf.setMergeScheduler(new SerialMergeScheduler());
}
IndexWriter writer = new IndexWriter(directory, conf);
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100);
writer.setInfoStream(VERBOSE ? System.out : null);
for(int iter=0;iter<10;iter++) {
@ -2139,7 +2143,7 @@ public class TestIndexWriter extends LuceneTestCase {
while(!finish) {
try {
while(true) {
while(!finish) {
if (w != null) {
w.close();
w = null;
@ -2157,6 +2161,7 @@ public class TestIndexWriter extends LuceneTestCase {
}
}
w.close();
w = null;
_TestUtil.checkIndex(dir);
IndexReader.open(dir, true).close();

View File

@ -71,9 +71,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
assertNull(conf.getFlushPolicy());
assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
// Sanity check - validate that all getters are covered.
Set<String> getters = new HashSet<String>();
getters.add("getAnalyzer");

View File

@ -128,8 +128,8 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
fsmp.length = 2;
System.out.println("maybeMerge "+writer.segmentInfos);
SegmentInfo info0 = writer.segmentInfos.get(0);
SegmentInfo info1 = writer.segmentInfos.get(1);
SegmentInfo info0 = writer.segmentInfos.info(0);
SegmentInfo info1 = writer.segmentInfos.info(1);
writer.maybeMerge();
System.out.println("maybeMerge after "+writer.segmentInfos);
@ -199,7 +199,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
// deletes for info1, the newly created segment from the
// merge should have no deletes because they were applied in
// the merge
//SegmentInfo info1 = writer.segmentInfos.get(1);
//SegmentInfo info1 = writer.segmentInfos.info(1);
//assertFalse(exists(info1, writer.docWriter.segmentDeletes));
//System.out.println("infos4:"+writer.segmentInfos);
@ -261,11 +261,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
throws CorruptIndexException, IOException {
MergeSpecification ms = new MergeSpecification();
if (doMerge) {
SegmentInfos mergeInfos = new SegmentInfos();
for (int x=start; x < (start+length); x++) {
mergeInfos.add(segmentInfos.get(x));
}
OneMerge om = new OneMerge(mergeInfos);
OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length));
ms.add(om);
doMerge = false;
return ms;

View File

@ -0,0 +1,175 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.CachingCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.LuceneTestCase;
public class TestCachingCollector extends LuceneTestCase {
private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB
private static class MockScorer extends Scorer {
private MockScorer() {
super((Weight) null);
}
@Override
public float score() throws IOException { return 0; }
@Override
public int docID() { return 0; }
@Override
public int nextDoc() throws IOException { return 0; }
@Override
public int advance(int target) throws IOException { return 0; }
}
private static class NoOpCollector extends Collector {
private final boolean acceptDocsOutOfOrder;
public NoOpCollector(boolean acceptDocsOutOfOrder) {
this.acceptDocsOutOfOrder = acceptDocsOutOfOrder;
}
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void collect(int doc) throws IOException {}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
@Override
public boolean acceptsDocsOutOfOrder() {
return acceptDocsOutOfOrder;
}
}
public void testBasic() throws Exception {
for (boolean cacheScores : new boolean[] { false, true }) {
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1);
cc.setScorer(new MockScorer());
// collect 1000 docs
for (int i = 0; i < 1000; i++) {
cc.collect(i);
}
// now replay them
cc.replay(new Collector() {
int prevDocID = -1;
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
@Override
public void collect(int doc) throws IOException {
assertEquals(prevDocID + 1, doc);
prevDocID = doc;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
});
}
}
public void testIllegalStateOnReplay() throws Exception {
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
cc.setScorer(new MockScorer());
// collect 130 docs, this should be enough for triggering cache abort.
for (int i = 0; i < 130; i++) {
cc.collect(i);
}
assertFalse("CachingCollector should not be cached due to low memory limit", cc.isCached());
try {
cc.replay(new NoOpCollector(false));
fail("replay should fail if CachingCollector is not cached");
} catch (IllegalStateException e) {
// expected
}
}
public void testIllegalCollectorOnReplay() throws Exception {
// tests that the Collector passed to replay() has an out-of-order mode that
// is valid with the Collector passed to the ctor
// 'src' Collector does not support out-of-order
CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE);
cc.setScorer(new MockScorer());
for (int i = 0; i < 10; i++) cc.collect(i);
cc.replay(new NoOpCollector(true)); // this call should not fail
cc.replay(new NoOpCollector(false)); // this call should not fail
// 'src' Collector supports out-of-order
cc = CachingCollector.create(new NoOpCollector(true), true, 50 * ONE_BYTE);
cc.setScorer(new MockScorer());
for (int i = 0; i < 10; i++) cc.collect(i);
cc.replay(new NoOpCollector(true)); // this call should not fail
try {
cc.replay(new NoOpCollector(false)); // this call should fail
fail("should have failed if an in-order Collector was given to replay(), " +
"while CachingCollector was initialized with out-of-order collection");
} catch (IllegalArgumentException e) {
// ok
}
}
public void testCachedArraysAllocation() throws Exception {
// tests the cached arrays allocation -- if the 'nextLength' was too high,
// caching would terminate even if a smaller length would suffice.
// set RAM limit enough for 150 docs + random(10000)
int numDocs = random.nextInt(10000) + 150;
for (boolean cacheScores : new boolean[] { false, true }) {
int bytesPerDoc = cacheScores ? 8 : 4;
CachingCollector cc = CachingCollector.create(new NoOpCollector(false),
cacheScores, bytesPerDoc * ONE_BYTE * numDocs);
cc.setScorer(new MockScorer());
for (int i = 0; i < numDocs; i++) cc.collect(i);
assertTrue(cc.isCached());
// The 151's document should terminate caching
cc.collect(numDocs);
assertFalse(cc.isCached());
}
}
}

View File

@ -17,11 +17,14 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@ -423,7 +426,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0);
}
TopDocs hits = s.search(mpq, 2);
assert hits.totalHits == 2;
assertEquals(2, hits.totalHits);
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
/*
for(int hit=0;hit<hits.totalHits;hit++) {
@ -434,4 +437,156 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
r.close();
dir.close();
}
private final static TokenAndPos[] INCR_0_DOC_TOKENS = new TokenAndPos[] {
new TokenAndPos("x", 0),
new TokenAndPos("a", 1),
new TokenAndPos("1", 1),
new TokenAndPos("m", 2), // not existing, relying on slop=2
new TokenAndPos("b", 3),
new TokenAndPos("1", 3),
new TokenAndPos("n", 4), // not existing, relying on slop=2
new TokenAndPos("c", 5),
new TokenAndPos("y", 6)
};
private final static TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[] {
new TokenAndPos("a", 0),
new TokenAndPos("1", 0),
new TokenAndPos("b", 1),
new TokenAndPos("1", 1),
new TokenAndPos("c", 2)
};
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_MATCH = new TokenAndPos[][] {
{ new TokenAndPos("a", 0) },
{ new TokenAndPos("x", 0), new TokenAndPos("1", 0) },
{ new TokenAndPos("b", 1) },
{ new TokenAndPos("x", 1), new TokenAndPos("1", 1) },
{ new TokenAndPos("c", 2) }
};
private final static TokenAndPos[][] INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN = new TokenAndPos[][] {
{ new TokenAndPos("x", 0) },
{ new TokenAndPos("a", 0), new TokenAndPos("1", 0) },
{ new TokenAndPos("x", 1) },
{ new TokenAndPos("b", 1), new TokenAndPos("1", 1) },
{ new TokenAndPos("c", 2) }
};
/**
* using query parser, MPQ will be created, and will not be strict about having all query terms
* in each position - one of each position is sufficient (OR logic)
*/
public void testZeroPosIncrSloppyParsedAnd() throws IOException, ParseException {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
final Query q = qp.parse("\"this text is acually ignored\"");
assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
doTestZeroPosIncrSloppy(q, 0);
((MultiPhraseQuery) q).setSlop(1);
doTestZeroPosIncrSloppy(q, 0);
((MultiPhraseQuery) q).setSlop(2);
doTestZeroPosIncrSloppy(q, 1);
}
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
Directory dir = newDirectory(); // random dir
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
IndexReader r = IndexReader.open(writer,false);
writer.close();
IndexSearcher s = new IndexSearcher(r);
if (VERBOSE) {
System.out.println("QUERY=" + q);
}
TopDocs hits = s.search(q, 1);
assertEquals("wrong number of results", nExpected, hits.totalHits);
if (VERBOSE) {
for(int hit=0;hit<hits.totalHits;hit++) {
ScoreDoc sd = hits.scoreDocs[hit];
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
}
}
r.close();
dir.close();
}
/**
* PQ AND Mode - Manually creating a phrase query
*/
public void testZeroPosIncrSloppyPqAnd() throws IOException, ParseException {
final PhraseQuery pq = new PhraseQuery();
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
pq.add(new Term("field",tap.token), tap.pos);
}
doTestZeroPosIncrSloppy(pq, 0);
pq.setSlop(1);
doTestZeroPosIncrSloppy(pq, 0);
pq.setSlop(2);
doTestZeroPosIncrSloppy(pq, 1);
}
/**
* MPQ AND Mode - Manually creating a multiple phrase query
*/
public void testZeroPosIncrSloppyMpqAnd() throws IOException, ParseException {
final MultiPhraseQuery mpq = new MultiPhraseQuery();
for (TokenAndPos tap : INCR_0_QUERY_TOKENS_AND) {
mpq.add(new Term[]{new Term("field",tap.token)}, tap.pos); //AND logic
}
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(1);
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(2);
doTestZeroPosIncrSloppy(mpq, 1);
}
/**
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query
*/
public void testZeroPosIncrSloppyMpqAndOrMatch() throws IOException, ParseException {
final MultiPhraseQuery mpq = new MultiPhraseQuery();
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_MATCH) {
Term[] terms = tapTerms(tap);
final int pos = tap[0].pos;
mpq.add(terms, pos); //AND logic in pos, OR across lines
}
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(1);
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(2);
doTestZeroPosIncrSloppy(mpq, 1);
}
/**
* MPQ Combined AND OR Mode - Manually creating a multiple phrase query - with no match
*/
public void testZeroPosIncrSloppyMpqAndOrNoMatch() throws IOException, ParseException {
final MultiPhraseQuery mpq = new MultiPhraseQuery();
for (TokenAndPos tap[] : INCR_0_QUERY_TOKENS_AND_OR_NO_MATCHN) {
Term[] terms = tapTerms(tap);
final int pos = tap[0].pos;
mpq.add(terms, pos); //AND logic in pos, OR across lines
}
doTestZeroPosIncrSloppy(mpq, 0);
mpq.setSlop(2);
doTestZeroPosIncrSloppy(mpq, 0);
}
private Term[] tapTerms(TokenAndPos[] tap) {
Term[] terms = new Term[tap.length];
for (int i=0; i<terms.length; i++) {
terms[i] = new Term("field",tap[i].token);
}
return terms;
}
}

View File

@ -65,6 +65,7 @@ public class TestTermScorer extends LuceneTestCase {
indexSearcher.close();
indexReader.close();
directory.close();
super.tearDown();
}
public void test() throws IOException {

View File

@ -0,0 +1,47 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Comparator;
/**
* Tests for StringHelper.getVersionComparator
*/
public class TestVersionComparator extends LuceneTestCase {
public void testVersions() {
Comparator<String> comp = StringHelper.getVersionComparator();
assertTrue(comp.compare("1", "2") < 0);
assertTrue(comp.compare("1", "1") == 0);
assertTrue(comp.compare("2", "1") > 0);
assertTrue(comp.compare("1.1", "1") > 0);
assertTrue(comp.compare("1", "1.1") < 0);
assertTrue(comp.compare("1.1", "1.1") == 0);
assertTrue(comp.compare("1.0", "1") == 0);
assertTrue(comp.compare("1", "1.0") == 0);
assertTrue(comp.compare("1.0.1", "1.0") > 0);
assertTrue(comp.compare("1.0", "1.0.1") < 0);
assertTrue(comp.compare("1.02.003", "1.2.3.0") == 0);
assertTrue(comp.compare("1.2.3.0", "1.02.003") == 0);
assertTrue(comp.compare("1.10", "1.9") > 0);
assertTrue(comp.compare("1.9", "1.10") < 0);
}
}

View File

@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
assertCharVectors(2);
}
// LUCENE-3094
public void testNoWastedStates() throws Exception {
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
}
/**
* Tests all possible characteristic vectors for some n
* This exhaustively tests the parametric transitions tables.
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
assertNotNull(automata[n]);
assertTrue(automata[n].isDeterministic());
assertTrue(SpecialOperations.isFinite(automata[n]));
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
// check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) {
assertTrue(automata[n-1].subsetOf(automata[n]));

View File

@ -49,4 +49,9 @@ public class TestMinimize extends LuceneTestCase {
assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
}
}
/** n^2 space usage in Hopcroft minimization? */
public void testMinimizeHuge() {
new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
}
}

View File

@ -54,14 +54,16 @@ public class TestFSTs extends LuceneTestCase {
private MockDirectoryWrapper dir;
@Override
public void setUp() throws IOException {
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
dir.setPreventDoubleWrite(false);
}
@Override
public void tearDown() throws IOException {
public void tearDown() throws Exception {
dir.close();
super.tearDown();
}
private static BytesRef toBytesRef(IntsRef ir) {
@ -456,8 +458,9 @@ public class TestFSTs extends LuceneTestCase {
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
final UpToTwoPositiveIntOutputs _outputs = (UpToTwoPositiveIntOutputs) outputs;
final UpToTwoPositiveIntOutputs.TwoLongs twoLongs = (UpToTwoPositiveIntOutputs.TwoLongs) pair.output;
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.first));
((Builder<Object>) builder).add(pair.input, (Object) _outputs.get(twoLongs.second));
@SuppressWarnings("unchecked") final Builder<Object> builderObject = (Builder<Object>) builder;
builderObject.add(pair.input, _outputs.get(twoLongs.first));
builderObject.add(pair.input, _outputs.get(twoLongs.second));
} else {
builder.add(pair.input, pair.output);
}
@ -537,7 +540,7 @@ public class TestFSTs extends LuceneTestCase {
Object output = run(fst, term, null);
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
assertEquals(output, pair.output);
assertEquals(pair.output, output);
// verify enum's next
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();

View File

@ -49,6 +49,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
private State previous;
private String previousType;
private boolean exhausted;
/**
* Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter
@ -67,6 +68,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
super.reset();
previous = null;
previousType = null;
exhausted = false;
}
/**
@ -79,7 +81,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
*/
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
while (!exhausted && input.incrementToken()) {
State current = captureState();
if (previous != null && !isGramType()) {
@ -96,6 +98,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
previous = current;
}
exhausted = true;
if (previous == null || GRAM_TYPE.equals(previousType)) {
return false;
}

View File

@ -59,6 +59,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
private final StringBuilder hyphenated = new StringBuilder();
private State savedState;
private boolean exhausted = false;
/**
* Creates a new HyphenatedWordsFilter
@ -74,7 +75,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
*/
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
while (!exhausted && input.incrementToken()) {
char[] term = termAttribute.buffer();
int termLength = termAttribute.length();
@ -96,6 +97,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
}
}
exhausted = true;
if (savedState != null) {
// the final term ends with a hyphen
// add back the hyphen, for backwards compatibility.
@ -115,6 +118,7 @@ public final class HyphenatedWordsFilter extends TokenFilter {
super.reset();
hyphenated.setLength(0);
savedState = null;
exhausted = false;
}
// ================================================= Helper Methods ================================================

View File

@ -76,4 +76,9 @@ public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
public void close() throws IOException {
suffix.close();
}
@Override
public void end() throws IOException {
suffix.end();
}
}

View File

@ -158,6 +158,12 @@ public class PrefixAwareTokenFilter extends TokenStream {
return suffixToken;
}
@Override
public void end() throws IOException {
prefix.end();
suffix.end();
}
@Override
public void close() throws IOException {
prefix.close();

View File

@ -225,7 +225,6 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
TokenStream result = delegate.reusableTokenStream(fieldName, reader);
if (result == streams.wrapped) {
/* the wrapped analyzer reused the stream */
streams.withStopFilter.reset();
} else {
/*
* the wrapped analyzer did not. if there are any stopwords for the

View File

@ -199,10 +199,7 @@ public final class ShingleAnalyzerWrapper extends Analyzer {
setPreviousTokenStream(streams);
} else {
TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader);
if (result == streams.wrapped) {
/* the wrapped analyzer reused the stream */
streams.shingle.reset();
} else {
if (result != streams.wrapped) {
/* the wrapped analyzer did not, create a new shingle around the new one */
streams.wrapped = result;
streams.shingle = new ShingleFilter(streams.wrapped);

View File

@ -327,6 +327,8 @@ public final class ShingleFilter extends TokenFilter {
return tokenAvailable;
}
private boolean exhausted;
/**
* <p>Get the next token from the input stream.
* <p>If the next token has <code>positionIncrement > 1</code>,
@ -359,7 +361,7 @@ public final class ShingleFilter extends TokenFilter {
}
isNextInputStreamToken = false;
newTarget.isFiller = false;
} else if (input.incrementToken()) {
} else if (!exhausted && input.incrementToken()) {
if (null == target) {
newTarget = new InputWindowToken(cloneAttributes());
} else {
@ -387,6 +389,7 @@ public final class ShingleFilter extends TokenFilter {
}
} else {
newTarget = null;
exhausted = true;
}
return newTarget;
}
@ -436,6 +439,7 @@ public final class ShingleFilter extends TokenFilter {
numFillerTokensToInsert = 0;
isOutputHere = false;
noShingleOutput = true;
exhausted = false;
if (outputUnigramsIfNoShingles && ! outputUnigrams) {
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
gramSize.minValue = minShingleSize;

View File

@ -190,16 +190,20 @@ public final class SynonymFilter extends TokenFilter {
private LinkedList<AttributeSource> buffer;
private LinkedList<AttributeSource> matched;
private boolean exhausted;
private AttributeSource nextTok() throws IOException {
if (buffer!=null && !buffer.isEmpty()) {
return buffer.removeFirst();
} else {
if (input.incrementToken()) {
if (!exhausted && input.incrementToken()) {
return this;
} else
} else {
exhausted = true;
return null;
}
}
}
private void pushTok(AttributeSource t) {
if (buffer==null) buffer=new LinkedList<AttributeSource>();
@ -250,5 +254,6 @@ public final class SynonymFilter extends TokenFilter {
public void reset() throws IOException {
input.reset();
replacement = null;
exhausted = false;
}
}

View File

@ -159,8 +159,6 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
*/
protected boolean reset(final Reader reader) throws IOException {
source.reset(reader);
if(sink != source)
sink.reset(); // only reset if the sink reference is different from source
return true;
}

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@ -215,8 +215,7 @@ public class TestBulgarianStemmer extends BaseTokenStreamTestCase {
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(Version.LUCENE_31, 1, true);
set.add("строеве");
WhitespaceTokenizer tokenStream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("строевете строеве"));
MockTokenizer tokenStream = new MockTokenizer(new StringReader("строевете строеве"), MockTokenizer.WHITESPACE, false);
BulgarianStemFilter filter = new BulgarianStemFilter(
new KeywordMarkerFilter(tokenStream, set));

View File

@ -22,8 +22,8 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@ -64,55 +64,55 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
public void testNothingChange() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) );
TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1});
}
public void test1to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1});
}
public void test1to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1});
}
public void test1to3() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1});
}
public void test2to4() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2});
}
public void test2to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2});
}
public void test3to1() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3});
}
public void test4to2() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4});
}
public void test5to0() throws Exception {
CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts, new String[0]);
}
@ -136,7 +136,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
//
public void testTokenStream() throws Exception {
CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"i","i","jj","kkk","llll","cc","b","a"},
new int[]{0,2,4,6,8,11,16,20},
@ -157,7 +157,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
public void testChained() throws Exception {
CharStream cs = new MappingCharFilter( normMap,
new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) );
TokenStream ts = new WhitespaceTokenizer( TEST_VERSION_CURRENT, cs );
TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
assertTokenStreamContents(ts,
new String[]{"a","llllllll","i"},
new int[]{0,5,8},

View File

@ -21,6 +21,7 @@ import java.io.StringReader;
import java.util.Arrays;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@ -90,7 +91,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
@Override
public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords));
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
}
};
@ -159,7 +160,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
@Override
public TokenStream tokenStream(String field, Reader in) {
return new CommonGramsFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, in), commonWords);
new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
}
};
@ -245,7 +246,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/
public void testCaseSensitive() throws Exception {
final String input = "How The s a brown s cow d like A B thing?";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
assertTokenStreamContents(cgf, new String[] {"How", "The", "The_s", "s",
"s_a", "a", "a_brown", "brown", "brown_s", "s", "s_cow", "cow",
@ -257,7 +258,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/
public void testLastWordisStopWord() throws Exception {
final String input = "dog the";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "dog_the" });
@ -268,7 +269,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/
public void testFirstWordisStopWord() throws Exception {
final String input = "the dog";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_dog" });
@ -279,7 +280,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/
public void testOneWordQueryStopWord() throws Exception {
final String input = "the";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the" });
@ -290,7 +291,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/
public void testOneWordQuery() throws Exception {
final String input = "monster";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "monster" });
@ -301,7 +302,7 @@ public class CommonGramsFilterTest extends BaseTokenStreamTestCase {
*/
public void TestFirstAndLastStopWord() throws Exception {
final String input = "the of";
WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
MockTokenizer wt = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
TokenFilter nsf = new CommonGramsQueryFilter(cgf);
assertTokenStreamContents(nsf, new String[] { "the_of" });

View File

@ -21,6 +21,7 @@ import java.io.StringReader;
import org.xml.sax.InputSource;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
@ -35,8 +36,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
.getHyphenationTree(is);
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"min veninde som er lidt af en læsehest")), hyphenator,
new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false),
hyphenator,
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
@ -55,8 +56,8 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
// the word basket will not be added due to the longest match option
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"basketballkurv")), hyphenator, dict,
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator, dict,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
assertTokenStreamContents(tf,
@ -77,7 +78,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
2, 4);
@ -89,7 +90,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 6);
@ -101,7 +102,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
tf = new HyphenationCompoundWordTokenFilter(
TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("basketballkurv")),
new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
hyphenator,
CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
4, 10);
@ -120,9 +121,10 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Sko", "Vind", "Rute", "Torkare", "Blad" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new MockTokenizer(
new StringReader(
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba")),
"Bildörr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glasögonfodral Basfiolsfodral Basfiolsfodralmakaregesäll Skomakare Vindrutetorkare Vindrutetorkarblad abba"),
MockTokenizer.WHITESPACE, false),
dict);
assertTokenStreamContents(tf, new String[] { "Bildörr", "Bil", "dörr", "Bilmotor",
@ -149,7 +151,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
"Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Basfiolsfodralmakaregesäll")),
new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -36,36 +37,23 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet("is", "the", "Time");
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false);
assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = asSet( "is", "the", "Time" );
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.toString());
assertFalse(stream.incrementToken());
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true);
assertTokenStreamContents(stream, new String[] { "Now" });
}
public void testStopFilt() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
assertTokenStreamContents(stream, new String[] { "Now", "The" });
}
/**
@ -85,11 +73,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
StopFilter stpf = new StopFilter(Version.LUCENE_40, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,true);
// without increments
reader = new StringReader(sb.toString());
stpf = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet);
doTestStopPositons(stpf,false);
// with increments, concatenating two stop filters
ArrayList<String> a0 = new ArrayList<String>();
@ -108,7 +96,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
Set<Object> stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0);
Set<Object> stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1);
reader = new StringReader(sb.toString());
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet0); // first part of the set
StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true);
StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01,true);
@ -119,6 +107,7 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
stpf.setEnablePositionIncrements(enableIcrements);
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
stpf.reset();
for (int i=0; i<20; i+=3) {
assertTrue(stpf.incrementToken());
log("Token "+i+": "+stpf);
@ -127,6 +116,8 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
}
assertFalse(stpf.incrementToken());
stpf.end();
stpf.close();
}
// print debug info depending on VERBOSE

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
@ -278,7 +278,7 @@ public class TestCzechStemmer extends BaseTokenStreamTestCase {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("hole");
CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter(
new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("hole desek")), set));
new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set));
assertTokenStreamContents(filter, new String[] { "hole", "desk" });
}

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanLightStemFilter(source));
}
};

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
}
};

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
/**
@ -34,7 +34,7 @@ public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
}
};

View File

@ -22,12 +22,11 @@ import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -41,7 +40,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer t = new KeywordTokenizer(reader);
Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
return new TokenStreamComponents(t, new PorterStemFilter(t));
}
};
@ -57,7 +56,7 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("yourselves");
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("yourselves yours"));
Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false);
TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] {"yourselves", "your"});
}

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
}
};

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
}
};

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
}
};

View File

@ -22,8 +22,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import static org.apache.lucene.analysis.util.VocabularyAssert.*;
@ -36,7 +36,7 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
}
};

View File

@ -21,9 +21,9 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Test HindiNormalizer
@ -59,8 +59,7 @@ public class TestHindiNormalizer extends BaseTokenStreamTestCase {
check("आईऊॠॡऐऔीूॄॣैौ", "अइउऋऌएओिुृॢेो");
}
private void check(String input, String output) throws IOException {
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader(input));
Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
TokenFilter tf = new HindiNormalizationFilter(tokenizer);
assertTokenStreamContents(tf, new String[] { output });
}

Some files were not shown because too many files have changed in this diff Show More