SOLR-2452: Merged with trunk up to r1129202; standardized solr/contrib/* layouts.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1129205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-05-30 14:51:25 +00:00
commit 31c83c9d6f
588 changed files with 30652 additions and 4074 deletions

View File

@ -73,6 +73,7 @@
</target> </target>
<target name="eclipse" description="Setup Eclipse configuration"> <target name="eclipse" description="Setup Eclipse configuration">
<copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/>
<copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/> <copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/>
<mkdir dir=".settings"/> <mkdir dir=".settings"/>
<copy file="dev-tools/eclipse/resources.prefs" <copy file="dev-tools/eclipse/resources.prefs"

View File

@ -20,8 +20,6 @@
<classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/> <classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/>
<classpathentry kind="src" path="lucene/contrib/spatial/src/java"/> <classpathentry kind="src" path="lucene/contrib/spatial/src/java"/>
<classpathentry kind="src" path="lucene/contrib/spatial/src/test"/> <classpathentry kind="src" path="lucene/contrib/spatial/src/test"/>
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/java"/>
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/test"/>
<classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/> <classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/>
<classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/> <classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/>
<classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/> <classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/>
@ -44,6 +42,8 @@
<classpathentry kind="src" path="modules/benchmark/src/test"/> <classpathentry kind="src" path="modules/benchmark/src/test"/>
<classpathentry kind="src" path="modules/grouping/src/java"/> <classpathentry kind="src" path="modules/grouping/src/java"/>
<classpathentry kind="src" path="modules/grouping/src/test"/> <classpathentry kind="src" path="modules/grouping/src/test"/>
<classpathentry kind="src" path="modules/suggest/src/java"/>
<classpathentry kind="src" path="modules/suggest/src/test"/>
<classpathentry kind="src" path="solr/src/java"/> <classpathentry kind="src" path="solr/src/java"/>
<classpathentry kind="src" path="solr/src/webapp/src"/> <classpathentry kind="src" path="solr/src/webapp/src"/>
<classpathentry kind="src" path="solr/src/common"/> <classpathentry kind="src" path="solr/src/common"/>

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>lucene_solr</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

View File

@ -11,7 +11,6 @@
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" /> <buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" /> <buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" /> <buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spellchecker/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" /> <buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" /> <buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" />
@ -21,6 +20,7 @@
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" /> <buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/suggest/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" /> <buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" /> <buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" /> <buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />

View File

@ -12,7 +12,6 @@
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" /> <module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" /> <module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" /> <module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/spellchecker/spellchecker.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" /> <module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" /> <module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" /> <module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
@ -22,6 +21,7 @@
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" /> <module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" /> <module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" /> <module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
<module filepath="$PROJECT_DIR$/modules/suggest/suggest.iml" />
<module filepath="$PROJECT_DIR$/solr/solr.iml" /> <module filepath="$PROJECT_DIR$/solr/solr.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" /> <module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" /> <module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />

View File

@ -141,13 +141,6 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" /> <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option> <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration> </configuration>
<configuration default="false" name="spellchecker contrib" type="JUnit" factoryName="JUnit">
<module name="spellchecker" />
<option name="TEST_OBJECT" value="package" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build/contrib/spellchecker" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit"> <configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit">
<module name="stempel" /> <module name="stempel" />
<option name="TEST_OBJECT" value="package" /> <option name="TEST_OBJECT" value="package" />
@ -155,6 +148,13 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" /> <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option> <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration> </configuration>
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
<module name="suggest" />
<option name="TEST_OBJECT" value="package" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/suggest/build" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit"> <configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit">
<module name="uima" /> <module name="uima" />
<option name="TEST_OBJECT" value="package" /> <option name="TEST_OBJECT" value="package" />
@ -197,8 +197,8 @@
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" /> <item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" /> <item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" /> <item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
<item index="20" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" /> <item index="20" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
<item index="21" class="java.lang.String" itemvalue="JUnit.stempel analysis module" /> <item index="21" class="java.lang.String" itemvalue="JUnit.suggest module" />
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" /> <item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" /> <item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" /> <item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />

View File

@ -1,18 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4"> <module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false"> <component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/java" /> <output url="file://$MODULE_DIR$/build/classes/java" />
<output-test url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/test" /> <output-test url="file://$MODULE_DIR$/build/classes/test" />
<exclude-output /> <exclude-output />
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" /> <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" /> <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" module-name="queries" />
<orderEntry type="module" module-name="misc" />
<orderEntry type="module" module-name="common" /> <orderEntry type="module" module-name="common" />
<orderEntry type="module" module-name="lucene" /> <orderEntry type="module" module-name="lucene" />
</component> </component>

View File

@ -39,7 +39,6 @@
<module>queries</module> <module>queries</module>
<module>queryparser</module> <module>queryparser</module>
<module>spatial</module> <module>spatial</module>
<module>spellchecker</module>
<module>wordnet</module> <module>wordnet</module>
<module>xml-query-parser</module> <module>xml-query-parser</module>
</modules> </modules>

View File

@ -34,6 +34,7 @@
<module>analysis</module> <module>analysis</module>
<module>benchmark</module> <module>benchmark</module>
<module>grouping</module> <module>grouping</module>
<module>suggest</module>
</modules> </modules>
<build> <build>
<directory>build/lucene-modules-aggregator</directory> <directory>build/lucene-modules-aggregator</directory>

View File

@ -24,16 +24,16 @@
<groupId>org.apache.lucene</groupId> <groupId>org.apache.lucene</groupId>
<artifactId>lucene-parent</artifactId> <artifactId>lucene-parent</artifactId>
<version>@version@</version> <version>@version@</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../lucene/pom.xml</relativePath>
</parent> </parent>
<groupId>org.apache.lucene</groupId> <groupId>org.apache.lucene</groupId>
<artifactId>lucene-spellchecker</artifactId> <artifactId>lucene-suggest</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<name>Lucene Spellchecker</name> <name>Lucene Suggest</name>
<description>Spell Checker</description> <description>Lucene Suggest Module</description>
<properties> <properties>
<module-directory>lucene/contrib/spellchecker</module-directory> <module-directory>modules/suggest</module-directory>
<build-directory>../../build/contrib/spellchecker</build-directory> <build-directory>build</build-directory>
</properties> </properties>
<dependencies> <dependencies>
<dependency> <dependency>
@ -43,14 +43,14 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>${project.groupId}</groupId> <groupId>${project.groupId}</groupId>
<artifactId>lucene-test-framework</artifactId> <artifactId>lucene-analyzers-common</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
<scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>${project.groupId}</groupId> <groupId>${project.groupId}</groupId>
<artifactId>lucene-analyzers-common</artifactId> <artifactId>lucene-test-framework</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
<scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>

View File

@ -89,7 +89,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.lucene</groupId> <groupId>org.apache.lucene</groupId>
<artifactId>lucene-spellchecker</artifactId> <artifactId>lucene-suggest</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -427,7 +427,32 @@ Bug fixes
with more document deletions is requested before a reader with fewer with more document deletions is requested before a reader with fewer
deletions, provided they share some segments. (yonik) deletions, provided they share some segments. (yonik)
======================= Lucene 3.x (not yet released) ======================= * LUCENE-3147,LUCENE-3152: Fixed open file handles leaks in many places in the
code. Now MockDirectoryWrapper (in test-framework) tracks all open files,
including locks, and fails if the test fails to release all of them.
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
======================= Lucene 3.x (not yet released) ================
Changes in backwards compatibility policy
* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass
of IndexInput) as its first argument. (Robert Muir, Dawid Weiss,
Mike McCandless)
Changes in runtime behavior
* LUCENE-2834: the hash used to compute the lock file name when the
lock file is not stored in the index has changed. This means you
will see a different lucene-XXX-write.lock in your lock directory.
(Robert Muir, Uwe Schindler, Mike McCandless)
New Features
* LUCENE-3140: Added experimental FST implementation to Lucene.
(Robert Muir, Dawid Weiss, Mike McCandless)
======================= Lucene 3.2.0 =======================
Changes in backwards compatibility policy Changes in backwards compatibility policy
@ -486,6 +511,10 @@ New features
document IDs and scores encountered during the search, and "replay" them to document IDs and scores encountered during the search, and "replay" them to
another Collector. (Mike McCandless, Shai Erera) another Collector. (Mike McCandless, Shai Erera)
* LUCENE-3112: Added experimental IndexWriter.add/updateDocuments,
enabling a block of documents to be indexed, atomically, with
guaranteed sequential docIDs. (Mike McCandless)
API Changes API Changes
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public * LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
@ -507,6 +536,9 @@ Optimizations
* LUCENE-2897: Apply deleted terms while flushing a segment. We still * LUCENE-2897: Apply deleted terms while flushing a segment. We still
buffer deleted terms to later apply to past segments. (Mike McCandless) buffer deleted terms to later apply to past segments. (Mike McCandless)
* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they
aren't already and MergePolicy allows that. (Shai Erera)
Bug fixes Bug fixes
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new * LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
@ -541,6 +573,9 @@ Build
* LUCENE-3006: Building javadocs will fail on warnings by default. * LUCENE-3006: Building javadocs will fail on warnings by default.
Override with -Dfailonjavadocwarning=false (sarowe, gsingers) Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
* LUCENE-3128: "ant eclipse" creates a .project file for easier Eclipse
integration (unless one already exists). (Daniel Serodio via Shai Erera)
Test Cases Test Cases
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to * LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to

View File

@ -227,7 +227,6 @@
<packageset dir="contrib/misc/src/java"/> <packageset dir="contrib/misc/src/java"/>
<packageset dir="contrib/queries/src/java"/> <packageset dir="contrib/queries/src/java"/>
<packageset dir="contrib/spatial/src/java"/> <packageset dir="contrib/spatial/src/java"/>
<packageset dir="contrib/spellchecker/src/java"/>
<packageset dir="contrib/wordnet/src/java"/> <packageset dir="contrib/wordnet/src/java"/>
<packageset dir="contrib/xml-query-parser/src/java"/> <packageset dir="contrib/xml-query-parser/src/java"/>
<packageset dir="contrib/queryparser/src/java"/> <packageset dir="contrib/queryparser/src/java"/>
@ -248,7 +247,6 @@
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/> <group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
<group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/> <group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/> <group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
<group title="contrib: SpellChecker" packages="org.apache.lucene.search.spell*"/>
<group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/> <group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
<group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/> <group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>

View File

@ -6,6 +6,8 @@ Build
* LUCENE-2845: Moved contrib/benchmark to modules. * LUCENE-2845: Moved contrib/benchmark to modules.
* LUCENE-2995: Moved contrib/spellchecker into modules/suggest.
New Features New Features
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser. * LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
@ -48,7 +50,14 @@ Bug Fixes
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was * LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
not lowercasing the key before checking for the tag (Adriano Crestani) not lowercasing the key before checking for the tag (Adriano Crestani)
======================= Lucene 3.x (not yet released) ======================= ======================= Lucene 3.x (not yet released) ================
API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
(Sujit Pal via Koji Sekiguchi)
======================= Lucene 3.2.0 =======================
Changes in backwards compatibility policy Changes in backwards compatibility policy

View File

@ -93,7 +93,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
if( maxNumFragments < 0 ) if( maxNumFragments < 0 )
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." ); throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos ); List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
List<String> fragments = new ArrayList<String>( maxNumFragments ); List<String> fragments = new ArrayList<String>( maxNumFragments );
Field[] values = getFields( reader, docId, fieldName ); Field[] values = getFields( reader, docId, fieldName );

View File

@ -29,7 +29,7 @@ import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseIn
*/ */
public class FieldFragList { public class FieldFragList {
List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>(); private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
/** /**
* a constructor. * a constructor.
@ -50,6 +50,15 @@ public class FieldFragList {
fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) ); fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
} }
/**
* return the list of WeightedFragInfos.
*
* @return fragInfos.
*/
public List<WeightedFragInfo> getFragInfos() {
return fragInfos;
}
public static class WeightedFragInfo { public static class WeightedFragInfo {
List<SubInfo> subInfos; List<SubInfo> subInfos;

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
/** /**
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@ -80,16 +81,16 @@ public class FieldTermStack {
Set<String> termSet = fieldQuery.getTermSet( fieldName ); Set<String> termSet = fieldQuery.getTermSet( fieldName );
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
if( termSet == null ) return; if( termSet == null ) return;
final CharsRef spare = new CharsRef();
for( BytesRef term : tpv.getTerms() ){ for( BytesRef term : tpv.getTerms() ){
if( !termSet.contains( term.utf8ToString() ) ) continue; if( !termSet.contains( term.utf8ToChars(spare).toString() ) ) continue;
int index = tpv.indexOf( term ); int index = tpv.indexOf( term );
TermVectorOffsetInfo[] tvois = tpv.getOffsets( index ); TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
if( tvois == null ) return; // just return to make null snippets if( tvois == null ) return; // just return to make null snippets
int[] poss = tpv.getTermPositions( index ); int[] poss = tpv.getTermPositions( index );
if( poss == null ) return; // just return to make null snippets if( poss == null ) return; // just return to make null snippets
for( int i = 0; i < tvois.length; i++ ) for( int i = 0; i < tvois.length; i++ )
termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) ); termList.add( new TermInfo( term.utf8ToChars(spare).toString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
} }
// sort by position // sort by position

View File

@ -24,7 +24,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception { public void testNullFieldFragList() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
assertEquals( 0, ffl.fragInfos.size() ); assertEquals( 0, ffl.getFragInfos().size() );
} }
public void testTooSmallFragSize() throws Exception { public void testTooSmallFragSize() throws Exception {
@ -40,90 +40,90 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testSmallerFragSizeThanTermQuery() throws Exception { public void testSmallerFragSizeThanTermQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE ); FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testSmallerFragSizeThanPhraseQuery() throws Exception { public void testSmallerFragSizeThanPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE ); FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.fragInfos.get( 0 ).toString() ); if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test1TermIndex() throws Exception { public void test1TermIndex() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test2TermsIndex1Frag() throws Exception { public void test2TermsIndex1Frag() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test2TermsIndex2Frags() throws Exception { public void test2TermsIndex2Frags() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.fragInfos.size() ); assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos.get( 1 ).toString() ); assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.fragInfos.size() ); assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() ); assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 ); ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.fragInfos.size() ); assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() ); assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
} }
public void test2TermsQuery() throws Exception { public void test2TermsQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
assertEquals( 0, ffl.fragInfos.size() ); assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 ); ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 ); ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testPhraseQuery() throws Exception { public void testPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
assertEquals( 0, ffl.fragInfos.size() ); assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 ); ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
assertEquals( 0, ffl.fragInfos.size() ); assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 ); ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testPhraseQuerySlop() throws Exception { public void testPhraseQuerySlop() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
} }
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception { private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
@ -142,8 +142,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldPhraseList fpl = new FieldPhraseList( stack, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test1PhraseLongMV() throws Exception { public void test1PhraseLongMV() throws Exception {
@ -154,8 +154,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldPhraseList fpl = new FieldPhraseList( stack, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void test1PhraseLongMVB() throws Exception { public void test1PhraseLongMVB() throws Exception {
@ -166,7 +166,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldPhraseList fpl = new FieldPhraseList( stack, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder(); SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
} }
} }

View File

@ -24,21 +24,21 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception { public void testNullFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder(); SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
assertEquals( 0, ffl.fragInfos.size() ); assertEquals( 0, ffl.getFragInfos().size() );
} }
public void testShortFieldFragList() throws Exception { public void testShortFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder(); SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
} }
public void testLongFieldFragList() throws Exception { public void testLongFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder(); SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 ); FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
assertEquals( 1, ffl.fragInfos.size() ); assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() ); assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
} }
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception { private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {

View File

@ -41,6 +41,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
/** /**
* Represented as a coupled graph of class instances, this * Represented as a coupled graph of class instances, this
@ -228,12 +229,13 @@ public class InstantiatedIndex
if (fieldsC != null) { if (fieldsC != null) {
FieldsEnum fieldsEnum = fieldsC.iterator(); FieldsEnum fieldsEnum = fieldsC.iterator();
String field; String field;
final CharsRef spare = new CharsRef();
while((field = fieldsEnum.next()) != null) { while((field = fieldsEnum.next()) != null) {
if (fields == null || fields.contains(field)) { if (fields == null || fields.contains(field)) {
TermsEnum termsEnum = fieldsEnum.terms(); TermsEnum termsEnum = fieldsEnum.terms();
BytesRef text; BytesRef text;
while((text = termsEnum.next()) != null) { while((text = termsEnum.next()) != null) {
String termText = text.utf8ToString(); String termText = text.utf8ToChars(spare).toString();
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText); InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
final long totalTermFreq = termsEnum.totalTermFreq(); final long totalTermFreq = termsEnum.totalTermFreq();
if (totalTermFreq != -1) { if (totalTermFreq != -1) {

View File

@ -0,0 +1,2 @@
AnyObjectId[b9c8c8a170881dfe9c33adc87c26348904510954] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -42,4 +42,26 @@
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/> <fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant> </subant>
</target> </target>
<target name="build-native-unix" >
<mkdir dir="${common.build.dir}/native"/>
<taskdef resource="cpptasks.tasks">
<classpath>
<pathelement location="ant_lib/cpptasks-1.0b5.jar"/>
</classpath>
</taskdef>
<cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
<includepath>
<pathelement location="${java.home}/../include"/>
<pathelement location="${java.home}/../include/linux"/>
<pathelement location="${java.home}/../include/solaris"/>
</includepath>
<compilerarg value="-fPIC" />
</cc>
</target>
</project> </project>

View File

@ -26,6 +26,7 @@ import java.text.DecimalFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.lucene.index.IndexWriter; // Required for javadocs
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
@ -45,6 +46,11 @@ import org.apache.lucene.store.FSDirectory;
* @lucene.experimental You can easily * @lucene.experimental You can easily
* accidentally remove segments from your index so be * accidentally remove segments from your index so be
* careful! * careful!
*
* <p><b>NOTE</b>: this tool is unaware of documents added
* atomically via {@link IndexWriter#addDocuments} or {@link
* IndexWriter#updateDocuments}, which means it can easily
* break up such document groups.
*/ */
public class IndexSplitter { public class IndexSplitter {
public SegmentInfos infos; public SegmentInfos infos;

View File

@ -40,6 +40,11 @@ import org.apache.lucene.util.Version;
* <p>Note 2: the disadvantage of this tool is that source index needs to be * <p>Note 2: the disadvantage of this tool is that source index needs to be
* read as many times as there are parts to be created, hence the name of this * read as many times as there are parts to be created, hence the name of this
* tool. * tool.
*
* <p><b>NOTE</b>: this tool is unaware of documents added
* atomically via {@link IndexWriter#addDocuments} or {@link
* IndexWriter#updateDocuments}, which means it can easily
* break up such document groups.
*/ */
public class MultiPassIndexSplitter { public class MultiPassIndexSplitter {

View File

@ -269,7 +269,7 @@ public class NRTCachingDirectory extends Directory {
in = cache.openInput(fileName); in = cache.openInput(fileName);
in.copyBytes(out, in.length()); in.copyBytes(out, in.length());
} finally { } finally {
IOUtils.closeSafely(in, out); IOUtils.closeSafely(false, in, out);
} }
synchronized(this) { synchronized(this) {
cache.deleteFile(fileName); cache.deleteFile(fileName);

View File

@ -51,9 +51,11 @@ for details.
Steps to build: Steps to build:
<ul> <ul>
<li> <tt>cd lucene/contrib/misc/src/java/org/apache/lucene/store</tt> <li> <tt>cd lucene/contrib/misc/</tt>
<li> Compile NativePosixUtil.cpp -> libNativePosixUtil.so. On linux, something like <tt>gcc -fPIC -o libNativePosixUtil.so -shared -Wl,-soname,libNativePosixUtil.so -I$JAVA_HOME/include -I$JAVA_HOME/include/linux NativePosixUtil.cpp -lc -lstdc++</tt>. Add <tt>-m64</tt> if you want to compile 64bit (and java must be run with -d64 so it knows to load a 64bit dynamic lib). <li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
<li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so) <li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.regex;
*/ */
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.regexp.CharacterIterator; import org.apache.regexp.CharacterIterator;
import org.apache.regexp.RE; import org.apache.regexp.RE;
@ -104,11 +105,11 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher { class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
private RE regexp; private RE regexp;
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); private final CharsRef utf16 = new CharsRef(10);
private final CharacterIterator utf16wrapper = new CharacterIterator() { private final CharacterIterator utf16wrapper = new CharacterIterator() {
public char charAt(int pos) { public char charAt(int pos) {
return utf16.result[pos]; return utf16.chars[pos];
} }
public boolean isEnd(int pos) { public boolean isEnd(int pos) {
@ -120,7 +121,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
} }
public String substring(int beginIndex, int endIndex) { public String substring(int beginIndex, int endIndex) {
return new String(utf16.result, beginIndex, endIndex - beginIndex); return new String(utf16.chars, beginIndex, endIndex - beginIndex);
} }
}; };

View File

@ -21,6 +21,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
/** /**
@ -95,25 +96,11 @@ public class JavaUtilRegexCapabilities implements RegexCapabilities {
class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher { class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher {
private final Pattern pattern; private final Pattern pattern;
private final Matcher matcher; private final Matcher matcher;
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); private final CharsRef utf16 = new CharsRef(10);
private final CharSequence utf16wrapper = new CharSequence() {
public int length() {
return utf16.length;
}
public char charAt(int index) {
return utf16.result[index];
}
public CharSequence subSequence(int start, int end) {
return new String(utf16.result, start, end - start);
}
};
public JavaUtilRegexMatcher(String regex, int flags) { public JavaUtilRegexMatcher(String regex, int flags) {
this.pattern = Pattern.compile(regex, flags); this.pattern = Pattern.compile(regex, flags);
this.matcher = this.pattern.matcher(utf16wrapper); this.matcher = this.pattern.matcher(utf16);
} }
public boolean match(BytesRef term) { public boolean match(BytesRef term) {

View File

@ -48,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
@ -850,8 +851,9 @@ public final class MoreLikeThis {
{ {
BytesRef[] terms = vector.getTerms(); BytesRef[] terms = vector.getTerms();
int freqs[]=vector.getTermFrequencies(); int freqs[]=vector.getTermFrequencies();
final CharsRef spare = new CharsRef();
for (int j = 0; j < terms.length; j++) { for (int j = 0; j < terms.length; j++) {
String term = terms[j].utf8ToString(); final String term = terms[j].utf8ToChars(spare).toString();
if(isNoiseWord(term)){ if(isNoiseWord(term)){
continue; continue;

View File

@ -1,5 +1,22 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="DTDDocAnt" default="main"> <project name="DTDDocAnt" default="main">
<import file="../contrib-build.xml"/> <import file="../contrib-build.xml"/>

View File

@ -23,6 +23,7 @@ import java.util.zip.DataFormatException;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
/** Simple utility class providing static methods to /** Simple utility class providing static methods to
@ -118,9 +119,9 @@ public class CompressionTools {
/** Decompress the byte array previously returned by /** Decompress the byte array previously returned by
* compressString back into a String */ * compressString back into a String */
public static String decompressString(byte[] value) throws DataFormatException { public static String decompressString(byte[] value) throws DataFormatException {
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
final byte[] bytes = decompress(value); final byte[] bytes = decompress(value);
CharsRef result = new CharsRef(bytes.length);
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
return new String(result.result, 0, result.length); return new String(result.chars, 0, result.length);
} }
} }

View File

@ -60,6 +60,9 @@ public final class CompoundFileWriter {
/** temporary holder for the start of this file's data section */ /** temporary holder for the start of this file's data section */
long dataOffset; long dataOffset;
/** the directory which contains the file. */
Directory dir;
} }
// Before versioning started. // Before versioning started.
@ -119,6 +122,14 @@ public final class CompoundFileWriter {
* has been added already * has been added already
*/ */
public void addFile(String file) { public void addFile(String file) {
addFile(file, directory);
}
/**
* Same as {@link #addFile(String)}, only for files that are found in an
* external {@link Directory}.
*/
public void addFile(String file, Directory dir) {
if (merged) if (merged)
throw new IllegalStateException( throw new IllegalStateException(
"Can't add extensions after merge has been called"); "Can't add extensions after merge has been called");
@ -133,6 +144,7 @@ public final class CompoundFileWriter {
FileEntry entry = new FileEntry(); FileEntry entry = new FileEntry();
entry.file = file; entry.file = file;
entry.dir = dir;
entries.add(entry); entries.add(entry);
} }
@ -170,7 +182,7 @@ public final class CompoundFileWriter {
fe.directoryOffset = os.getFilePointer(); fe.directoryOffset = os.getFilePointer();
os.writeLong(0); // for now os.writeLong(0); // for now
os.writeString(IndexFileNames.stripSegmentName(fe.file)); os.writeString(IndexFileNames.stripSegmentName(fe.file));
totalSize += directory.fileLength(fe.file); totalSize += fe.dir.fileLength(fe.file);
} }
// Pre-allocate size of file as optimization -- // Pre-allocate size of file as optimization --
@ -216,7 +228,7 @@ public final class CompoundFileWriter {
* output stream. * output stream.
*/ */
private void copyFile(FileEntry source, IndexOutput os) throws IOException { private void copyFile(FileEntry source, IndexOutput os) throws IOException {
IndexInput is = directory.openInput(source.file); IndexInput is = source.dir.openInput(source.file);
try { try {
long startPtr = os.getFilePointer(); long startPtr = os.getFilePointer();
long length = is.length(); long length = is.length();

View File

@ -84,19 +84,44 @@ final class DocFieldProcessor extends DocConsumer {
@Override @Override
public void abort() { public void abort() {
for(int i=0;i<fieldHash.length;i++) { Throwable th = null;
DocFieldProcessorPerField field = fieldHash[i];
while(field != null) { for (DocFieldProcessorPerField field : fieldHash) {
while (field != null) {
final DocFieldProcessorPerField next = field.next; final DocFieldProcessorPerField next = field.next;
field.abort(); try {
field.abort();
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
field = next; field = next;
} }
} }
try { try {
fieldsWriter.abort(); fieldsWriter.abort();
} finally { } catch (Throwable t) {
if (th == null) {
th = t;
}
}
try {
consumer.abort(); consumer.abort();
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
// If any errors occured, throw it.
if (th != null) {
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
// defensive code - we should not hit unchecked exceptions
throw new RuntimeException(th);
} }
} }

View File

@ -87,6 +87,7 @@ final class DocInverter extends DocFieldConsumer {
endConsumer.startDocument(); endConsumer.startDocument();
} }
@Override
public void finishDocument() throws IOException { public void finishDocument() throws IOException {
// TODO: allow endConsumer.finishDocument to also return // TODO: allow endConsumer.finishDocument to also return
// a DocWriter // a DocWriter

View File

@ -53,8 +53,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
@Override @Override
void abort() { void abort() {
consumer.abort(); try {
endConsumer.abort(); consumer.abort();
} finally {
endConsumer.abort();
}
} }
@Override @Override

View File

@ -228,14 +228,19 @@ final class DocumentsWriter {
} }
final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator(); final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
while (threadsIterator.hasNext()) { while (threadsIterator.hasNext()) {
ThreadState perThread = threadsIterator.next(); final ThreadState perThread = threadsIterator.next();
perThread.lock(); perThread.lock();
try { try {
if (perThread.isActive()) { // we might be closed if (perThread.isActive()) { // we might be closed
perThread.perThread.abort(); try {
perThread.perThread.checkAndResetHasAborted(); perThread.perThread.abort();
} catch (IOException ex) {
// continue
} finally {
perThread.perThread.checkAndResetHasAborted();
flushControl.doOnAbort(perThread);
}
} else { } else {
assert closed; assert closed;
} }
@ -243,7 +248,6 @@ final class DocumentsWriter {
perThread.unlock(); perThread.unlock();
} }
} }
success = true; success = true;
} finally { } finally {
if (infoStream != null) { if (infoStream != null) {
@ -274,11 +278,9 @@ final class DocumentsWriter {
flushControl.setClosed(); flushControl.setClosed();
} }
boolean updateDocument(final Document doc, final Analyzer analyzer, private boolean preUpdate() throws CorruptIndexException, IOException {
final Term delTerm) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
boolean maybeMerge = false; boolean maybeMerge = false;
final boolean isUpdate = delTerm != null;
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) { if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall: // Help out flushing any queued DWPTs so we can un-stall:
if (infoStream != null) { if (infoStream != null) {
@ -303,9 +305,59 @@ final class DocumentsWriter {
message("continue indexing after helpling out flushing DocumentsWriter is healthy"); message("continue indexing after helpling out flushing DocumentsWriter is healthy");
} }
} }
return maybeMerge;
}
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
this, doc); if (flushingDWPT != null) {
maybeMerge |= doFlush(flushingDWPT);
} else {
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
maybeMerge |= doFlush(nextPendingFlush);
}
}
return maybeMerge;
}
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
final DocumentsWriterPerThread flushingDWPT;
try {
if (!perThread.isActive()) {
ensureOpen();
assert false: "perThread is not active but we are still open";
}
final DocumentsWriterPerThread dwpt = perThread.perThread;
try {
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
numDocsInRAM.addAndGet(docCount);
} finally {
if (dwpt.checkAndResetHasAborted()) {
flushControl.doOnAbort(perThread);
}
}
final boolean isUpdate = delTerm != null;
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
} finally {
perThread.unlock();
}
return postUpdate(flushingDWPT, maybeMerge);
}
boolean updateDocument(final Document doc, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
final DocumentsWriterPerThread flushingDWPT; final DocumentsWriterPerThread flushingDWPT;
try { try {
@ -324,20 +376,13 @@ final class DocumentsWriter {
flushControl.doOnAbort(perThread); flushControl.doOnAbort(perThread);
} }
} }
final boolean isUpdate = delTerm != null;
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
} finally { } finally {
perThread.unlock(); perThread.unlock();
} }
if (flushingDWPT != null) { return postUpdate(flushingDWPT, maybeMerge);
maybeMerge |= doFlush(flushingDWPT);
} else {
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
maybeMerge |= doFlush(nextPendingFlush);
}
}
return maybeMerge;
} }
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
@ -541,4 +586,20 @@ final class DocumentsWriter {
return (!isSegmentFlush || segment != null); return (!isSegmentFlush || segment != null);
} }
} }
// use by IW during close to assert all DWPT are inactive after final flush
boolean assertNoActiveDWPT() {
Iterator<ThreadState> activePerThreadsIterator = perThreadPool.getAllPerThreadsIterator();
while(activePerThreadsIterator.hasNext()) {
ThreadState next = activePerThreadsIterator.next();
next.lock();
try {
assert !next.isActive();
} finally {
next.unlock();
}
}
return true;
}
} }

View File

@ -16,6 +16,7 @@ package org.apache.lucene.index;
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
@ -68,7 +69,7 @@ public final class DocumentsWriterFlushControl {
this.stallControl = new DocumentsWriterStallControl(); this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool; this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy; this.flushPolicy = documentsWriter.flushPolicy;
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;; this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
this.config = config; this.config = config;
this.documentsWriter = documentsWriter; this.documentsWriter = documentsWriter;
} }
@ -162,8 +163,6 @@ public final class DocumentsWriterFlushControl {
stallControl.updateStalled(this); stallControl.updateStalled(this);
assert assertMemory(); assert assertMemory();
} }
} }
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) { synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
@ -217,7 +216,7 @@ public final class DocumentsWriterFlushControl {
assert assertMemory(); assert assertMemory();
// Take it out of the loop this DWPT is stale // Take it out of the loop this DWPT is stale
perThreadPool.replaceForFlush(state, closed); perThreadPool.replaceForFlush(state, closed);
}finally { } finally {
stallControl.updateStalled(this); stallControl.updateStalled(this);
} }
} }
@ -305,6 +304,7 @@ public final class DocumentsWriterFlushControl {
synchronized void setClosed() { synchronized void setClosed() {
// set by DW to signal that we should not release new DWPT after close // set by DW to signal that we should not release new DWPT after close
this.closed = true; this.closed = true;
perThreadPool.deactivateUnreleasedStates();
} }
/** /**
@ -387,8 +387,12 @@ public final class DocumentsWriterFlushControl {
toFlush.add(flushingDWPT); toFlush.add(flushingDWPT);
} }
} else { } else {
// get the new delete queue from DW if (closed) {
next.perThread.initialize(); next.resetWriter(null); // make this state inactive
} else {
// get the new delete queue from DW
next.perThread.initialize();
}
} }
} finally { } finally {
next.unlock(); next.unlock();
@ -451,10 +455,21 @@ public final class DocumentsWriterFlushControl {
try { try {
for (DocumentsWriterPerThread dwpt : flushQueue) { for (DocumentsWriterPerThread dwpt : flushQueue) {
doAfterFlush(dwpt); doAfterFlush(dwpt);
try {
dwpt.abort();
} catch (IOException ex) {
// continue
}
} }
for (BlockedFlush blockedFlush : blockedFlushes) { for (BlockedFlush blockedFlush : blockedFlushes) {
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes)); flushingWriters
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
doAfterFlush(blockedFlush.dwpt); doAfterFlush(blockedFlush.dwpt);
try {
blockedFlush.dwpt.abort();
} catch (IOException ex) {
// continue
}
} }
} finally { } finally {
fullFlush = false; fullFlush = false;
@ -512,5 +527,4 @@ public final class DocumentsWriterFlushControl {
boolean anyStalledThreads() { boolean anyStalledThreads() {
return stallControl.anyStalledThreads(); return stallControl.anyStalledThreads();
} }
} }

View File

@ -104,7 +104,7 @@ public class DocumentsWriterPerThread {
// largish: // largish:
doc = null; doc = null;
analyzer = null; analyzer = null;
} }
} }
static class FlushedSegment { static class FlushedSegment {
@ -177,7 +177,7 @@ public class DocumentsWriterPerThread {
this.parent = parent; this.parent = parent;
this.fieldInfos = fieldInfos; this.fieldInfos = fieldInfos;
this.writer = parent.indexWriter; this.writer = parent.indexWriter;
this.infoStream = parent.indexWriter.getInfoStream(); this.infoStream = parent.infoStream;
this.docState = new DocState(this); this.docState = new DocState(this);
this.docState.similarityProvider = parent.indexWriter.getConfig() this.docState.similarityProvider = parent.indexWriter.getConfig()
.getSimilarityProvider(); .getSimilarityProvider();
@ -253,6 +253,82 @@ public class DocumentsWriterPerThread {
finishDocument(delTerm); finishDocument(delTerm);
} }
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
docState.analyzer = analyzer;
if (segment == null) {
// this call is synchronized on IndexWriter.segmentInfos
segment = writer.newSegmentName();
assert numDocsInRAM == 0;
}
int docCount = 0;
try {
for(Document doc : docs) {
docState.doc = doc;
docState.docID = numDocsInRAM;
docCount++;
boolean success = false;
try {
consumer.processDocument(fieldInfos);
success = true;
} finally {
if (!success) {
// An exc is being thrown...
if (!aborting) {
// One of the documents hit a non-aborting
// exception (eg something happened during
// analysis). We now go and mark any docs
// from this batch that we had already indexed
// as deleted:
int docID = docState.docID;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
// Incr here because finishDocument will not
// be called (because an exc is being thrown):
numDocsInRAM++;
fieldInfos.revertUncommitted();
} else {
abort();
}
}
}
success = false;
try {
consumer.finishDocument();
success = true;
} finally {
if (!success) {
abort();
}
}
finishDocument(null);
}
// Apply delTerm only after all indexing has
// succeeded, but apply it only to docs prior to when
// this batch started:
if (delTerm != null) {
deleteQueue.add(delTerm, deleteSlice);
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount);
}
} finally {
docState.clear();
}
return docCount;
}
private void finishDocument(Term delTerm) throws IOException { private void finishDocument(Term delTerm) throws IOException {
/* /*
* here we actually finish the document in two steps 1. push the delete into * here we actually finish the document in two steps 1. push the delete into
@ -474,6 +550,7 @@ public class DocumentsWriterPerThread {
super(blockSize); super(blockSize);
} }
@Override
public byte[] getByteBlock() { public byte[] getByteBlock() {
bytesUsed.addAndGet(blockSize); bytesUsed.addAndGet(blockSize);
return new byte[blockSize]; return new byte[blockSize];
@ -486,7 +563,7 @@ public class DocumentsWriterPerThread {
} }
} }
}; }
void setInfoStream(PrintStream infoStream) { void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream; this.infoStream = infoStream;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.util.Iterator; import java.util.Iterator;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder; import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
@ -194,6 +193,21 @@ public abstract class DocumentsWriterPerThreadPool {
return null; return null;
} }
/**
* Deactivate all unreleased threadstates
*/
protected synchronized void deactivateUnreleasedStates() {
for (int i = numThreadStatesActive; i < perThreads.length; i++) {
final ThreadState threadState = perThreads[i];
threadState.lock();
try {
threadState.resetWriter(null);
} finally {
threadState.unlock();
}
}
}
protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) { protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
assert threadState.isHeldByCurrentThread(); assert threadState.isHeldByCurrentThread();
final DocumentsWriterPerThread dwpt = threadState.perThread; final DocumentsWriterPerThread dwpt = threadState.perThread;
@ -212,7 +226,7 @@ public abstract class DocumentsWriterPerThreadPool {
// don't recycle DWPT by default // don't recycle DWPT by default
} }
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc); public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter);
/** /**
* Returns an iterator providing access to all {@link ThreadState} * Returns an iterator providing access to all {@link ThreadState}

View File

@ -113,7 +113,7 @@ final class FieldsWriter {
void close() throws IOException { void close() throws IOException {
if (directory != null) { if (directory != null) {
try { try {
IOUtils.closeSafely(fieldsStream, indexStream); IOUtils.closeSafely(false, fieldsStream, indexStream);
} finally { } finally {
fieldsStream = indexStream = null; fieldsStream = indexStream = null;
} }

View File

@ -57,9 +57,10 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
TermsHash termsHash = null; try {
TermsHash termsHash = null;
/* /*
Current writer chain: Current writer chain:
FieldsConsumer FieldsConsumer
-> IMPL: FormatPostingsTermsDictWriter -> IMPL: FormatPostingsTermsDictWriter
@ -69,36 +70,38 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
-> IMPL: FormatPostingsDocsWriter -> IMPL: FormatPostingsDocsWriter
-> PositionsConsumer -> PositionsConsumer
-> IMPL: FormatPostingsPositionsWriter -> IMPL: FormatPostingsPositionsWriter
*/ */
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo; final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber); final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
// Aggregate the storePayload as seen by the same // Aggregate the storePayload as seen by the same
// field across multiple threads // field across multiple threads
if (!fieldInfo.omitTermFreqAndPositions) { if (!fieldInfo.omitTermFreqAndPositions) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads; fieldInfo.storePayloads |= fieldWriter.hasPayloads;
}
// If this field has postings then add them to the
// segment
fieldWriter.flush(fieldInfo.name, consumer, state);
TermsHashPerField perField = fieldWriter.termsHashPerField;
assert termsHash == null || termsHash == perField.termsHash;
termsHash = perField.termsHash;
int numPostings = perField.bytesHash.size();
perField.reset();
perField.shrinkHash(numPostings);
fieldWriter.reset();
} }
// If this field has postings then add them to the if (termsHash != null) {
// segment termsHash.reset();
fieldWriter.flush(fieldInfo.name, consumer, state); }
} finally {
TermsHashPerField perField = fieldWriter.termsHashPerField; consumer.close();
assert termsHash == null || termsHash == perField.termsHash;
termsHash = perField.termsHash;
int numPostings = perField.bytesHash.size();
perField.reset();
perField.shrinkHash(numPostings);
fieldWriter.reset();
} }
if (termsHash != null) {
termsHash.reset();
}
consumer.close();
} }
BytesRef payload; BytesRef payload;

View File

@ -17,6 +17,8 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import java.util.regex.Pattern;
import org.apache.lucene.index.codecs.Codec; // for javadocs import org.apache.lucene.index.codecs.Codec; // for javadocs
/** /**
@ -239,4 +241,15 @@ public final class IndexFileNames {
return filename; return filename;
} }
/**
* Returns true if the given filename ends with the separate norms file
* pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
*/
public static boolean isSeparateNormsFile(String filename) {
int idx = filename.lastIndexOf('.');
if (idx == -1) return false;
String ext = filename.substring(idx + 1);
return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);
}
} }

View File

@ -23,6 +23,7 @@ import java.io.PrintStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -51,6 +52,7 @@ import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.MapBackedSet; import org.apache.lucene.util.MapBackedSet;
@ -1071,7 +1073,8 @@ public class IndexWriter implements Closeable {
if (infoStream != null) if (infoStream != null)
message("at close: " + segString()); message("at close: " + segString());
// used by assert below
final DocumentsWriter oldWriter = docWriter;
synchronized(this) { synchronized(this) {
readerPool.close(); readerPool.close();
docWriter = null; docWriter = null;
@ -1085,6 +1088,7 @@ public class IndexWriter implements Closeable {
synchronized(this) { synchronized(this) {
closed = true; closed = true;
} }
assert oldWriter.assertNoActiveDWPT();
} catch (OutOfMemoryError oom) { } catch (OutOfMemoryError oom) {
handleOOM(oom, "closeInternal"); handleOOM(oom, "closeInternal");
} finally { } finally {
@ -1099,6 +1103,8 @@ public class IndexWriter implements Closeable {
} }
} }
/** Returns the Directory used by this index. */ /** Returns the Directory used by this index. */
public Directory getDirectory() { public Directory getDirectory() {
// Pass false because the flush during closing calls getDirectory // Pass false because the flush during closing calls getDirectory
@ -1227,6 +1233,111 @@ public class IndexWriter implements Closeable {
updateDocument(null, doc, analyzer); updateDocument(null, doc, analyzer);
} }
/**
* Atomically adds a block of documents with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* <p><b>WARNING</b>: the index does not currently record
* which documents were added as a block. Today this is
* fine, because merging will preserve the block (as long
* as none them were deleted). But it's possible in the
* future that Lucene may more aggressively re-order
* documents (for example, perhaps to obtain better index
* compression), in which case you may need to fully
* re-index your documents at that time.
*
* <p>See {@link #addDocument(Document)} for details on
* index and IndexWriter state after an Exception, and
* flushing/merging temporary free space requirements.</p>
*
* <p><b>NOTE</b>: tools that do offline splitting of an index
* (for example, IndexSplitter in contrib) or
* re-sorting of documents (for example, IndexSorter in
* contrib) are not aware of these atomically added documents
* and will likely break them up. Use such tools at your
* own risk!
*
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
addDocuments(docs, analyzer);
}
/**
* Atomically adds a block of documents, analyzed using the
* provided analyzer, with sequentially assigned document
* IDs, such that an external reader will see all or none
* of the documents.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
updateDocuments(null, docs, analyzer);
}
/**
* Atomically deletes documents matching the provided
* delTerm and adds a block of documents with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* See {@link #addDocuments(Iterable)}.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
updateDocuments(delTerm, docs, analyzer);
}
/**
* Atomically deletes documents matching the provided
* delTerm and adds a block of documents, analyzed using
* the provided analyzer, with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* See {@link #addDocuments(Iterable)}.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
ensureOpen();
try {
boolean success = false;
boolean anySegmentFlushed = false;
try {
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
success = true;
} finally {
if (!success && infoStream != null) {
message("hit exception updating document");
}
}
if (anySegmentFlushed) {
maybeMerge();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "updateDocuments");
}
}
/** /**
* Deletes the document(s) containing <code>term</code>. * Deletes the document(s) containing <code>term</code>.
* *
@ -2217,10 +2328,10 @@ public class IndexWriter implements Closeable {
* <p> * <p>
* <b>NOTE:</b> this method only copies the segments of the incoming indexes * <b>NOTE:</b> this method only copies the segments of the incoming indexes
* and does not merge them. Therefore deleted documents are not removed and * and does not merge them. Therefore deleted documents are not removed and
* the new segments are not merged with the existing ones. Also, the segments * the new segments are not merged with the existing ones. Also, if the merge
* are copied as-is, meaning they are not converted to CFS if they aren't, * policy allows compound files, then any segment that is not compound is
* and vice-versa. If you wish to do that, you can call {@link #maybeMerge} * converted to such. However, if the segment is compound, it is copied as-is
* or {@link #optimize} afterwards. * even if the merge policy does not allow compound files.
* *
* <p>This requires this index not be among those to be added. * <p>This requires this index not be among those to be added.
* *
@ -2244,6 +2355,7 @@ public class IndexWriter implements Closeable {
int docCount = 0; int docCount = 0;
List<SegmentInfo> infos = new ArrayList<SegmentInfo>(); List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
Comparator<String> versionComparator = StringHelper.getVersionComparator();
for (Directory dir : dirs) { for (Directory dir : dirs) {
if (infoStream != null) { if (infoStream != null) {
message("addIndexes: process directory " + dir); message("addIndexes: process directory " + dir);
@ -2263,46 +2375,22 @@ public class IndexWriter implements Closeable {
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
} }
// Determine if the doc store of this segment needs to be copied. It's // create CFS only if the source segment is not CFS, and MP agrees it
// only relevant for segments who share doc store with others, because // should be CFS.
// the DS might have been copied already, in which case we just want boolean createCFS;
// to update the DS name of this SegmentInfo. synchronized (this) { // Guard segmentInfos
// NOTE: pre-3x segments include a null DSName if they don't share doc createCFS = !info.getUseCompoundFile()
// store. So the following code ensures we don't accidentally insert && mergePolicy.useCompoundFile(segmentInfos, info)
// 'null' to the map. // optimize case only for segments that don't share doc stores
final String newDsName; && versionComparator.compare(info.getVersion(), "3.1") >= 0;
if (dsName != null) { }
if (dsNames.containsKey(dsName)) {
newDsName = dsNames.get(dsName); if (createCFS) {
} else { copySegmentIntoCFS(info, newSegName);
dsNames.put(dsName, newSegName);
newDsName = newSegName;
}
} else { } else {
newDsName = newSegName; copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
} }
// Copy the segment files
for (String file: info.files()) {
final String newFileName;
if (IndexFileNames.isDocStoreFile(file)) {
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
if (dsFilesCopied.contains(newFileName)) {
continue;
}
dsFilesCopied.add(newFileName);
} else {
newFileName = newSegName + IndexFileNames.stripSegmentName(file);
}
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
dir.copy(directory, file, newFileName);
}
// Update SI appropriately
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
info.dir = directory;
info.name = newSegName;
infos.add(info); infos.add(info);
} }
} }
@ -2391,6 +2479,76 @@ public class IndexWriter implements Closeable {
} }
} }
/** Copies the segment into the IndexWriter's directory, as a compound segment. */
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
for (String file : files) {
String newFileName = segName + IndexFileNames.stripSegmentName(file);
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
&& !IndexFileNames.isSeparateNormsFile(file)) {
cfsWriter.addFile(file, info.dir);
} else {
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
info.dir.copy(directory, file, newFileName);
}
}
// Create the .cfs
cfsWriter.close();
info.dir = directory;
info.name = segName;
info.setUseCompoundFile(true);
}
/** Copies the segment files as-is into the IndexWriter's directory. */
private void copySegmentAsIs(SegmentInfo info, String segName,
Map<String, String> dsNames, Set<String> dsFilesCopied)
throws IOException {
// Determine if the doc store of this segment needs to be copied. It's
// only relevant for segments that share doc store with others,
// because the DS might have been copied already, in which case we
// just want to update the DS name of this SegmentInfo.
// NOTE: pre-3x segments include a null DSName if they don't share doc
// store. The following code ensures we don't accidentally insert
// 'null' to the map.
String dsName = info.getDocStoreSegment();
final String newDsName;
if (dsName != null) {
if (dsNames.containsKey(dsName)) {
newDsName = dsNames.get(dsName);
} else {
dsNames.put(dsName, segName);
newDsName = segName;
}
} else {
newDsName = segName;
}
// Copy the segment files
for (String file: info.files()) {
final String newFileName;
if (IndexFileNames.isDocStoreFile(file)) {
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
if (dsFilesCopied.contains(newFileName)) {
continue;
}
dsFilesCopied.add(newFileName);
} else {
newFileName = segName + IndexFileNames.stripSegmentName(file);
}
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
info.dir.copy(directory, file, newFileName);
}
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
info.dir = directory;
info.name = segName;
}
/** /**
* A hook for extending classes to execute operations after pending added and * A hook for extending classes to execute operations after pending added and
* deleted documents have been flushed to the Directory but before the change * deleted documents have been flushed to the Directory but before the change
@ -3176,50 +3334,50 @@ public class IndexWriter implements Closeable {
runningMerges.remove(merge); runningMerges.remove(merge);
} }
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException { private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
final int numSegments = merge.readers.size(); final int numSegments = merge.readers.size();
if (suppressExceptions) { Throwable th = null;
// Suppress any new exceptions so we throw the
// original cause
boolean anyChanges = false;
for (int i=0;i<numSegments;i++) {
if (merge.readers.get(i) != null) {
try {
anyChanges |= readerPool.release(merge.readers.get(i), false);
} catch (Throwable t) {
}
merge.readers.set(i, null);
}
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) { boolean anyChanges = false;
try { boolean drop = !suppressExceptions;
merge.readerClones.get(i).close(); for (int i = 0; i < numSegments; i++) {
} catch (Throwable t) { if (merge.readers.get(i) != null) {
try {
anyChanges |= readerPool.release(merge.readers.get(i), drop);
} catch (Throwable t) {
if (th == null) {
th = t;
} }
// This was a private clone and we had the
// only reference
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
merge.readerClones.set(i, null);
} }
merge.readers.set(i, null);
} }
if (anyChanges) {
checkpoint();
}
} else {
for (int i=0;i<numSegments;i++) {
if (merge.readers.get(i) != null) {
readerPool.release(merge.readers.get(i), true);
merge.readers.set(i, null);
}
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) { if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
try {
merge.readerClones.get(i).close(); merge.readerClones.get(i).close();
// This was a private clone and we had the only reference } catch (Throwable t) {
assert merge.readerClones.get(i).getRefCount() == 0; if (th == null) {
merge.readerClones.set(i, null); th = t;
}
} }
// This was a private clone and we had the
// only reference
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
merge.readerClones.set(i, null);
} }
} }
if (suppressExceptions && anyChanges) {
checkpoint();
}
// If any error occured, throw it.
if (!suppressExceptions && th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
} }
/** Does the actual (time-consuming) work of the merge, /** Does the actual (time-consuming) work of the merge,

View File

@ -22,6 +22,7 @@ import java.util.Collection;
import java.util.Map; import java.util.Map;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
// TODO FI: norms could actually be stored as doc store // TODO FI: norms could actually be stored as doc store
@ -49,9 +50,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION); final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
IndexOutput normsOut = state.directory.createOutput(normsFileName); IndexOutput normsOut = state.directory.createOutput(normsFileName);
boolean success = false;
try { try {
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length); normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
int normCount = 0; int normCount = 0;
@ -84,9 +85,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer(); assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
} }
success = true;
} finally { } finally {
normsOut.close(); IOUtils.closeSafely(!success, normsOut);
} }
} }

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/** /**
* Enables native per field codec support. This class selects the codec used to * Enables native per field codec support. This class selects the codec used to
@ -61,7 +62,15 @@ final class PerFieldCodecWrapper extends Codec {
assert segmentCodecs == state.segmentCodecs; assert segmentCodecs == state.segmentCodecs;
final Codec[] codecs = segmentCodecs.codecs; final Codec[] codecs = segmentCodecs.codecs;
for (int i = 0; i < codecs.length; i++) { for (int i = 0; i < codecs.length; i++) {
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i))); boolean success = false;
try {
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, consumers);
}
}
} }
} }
@ -74,22 +83,7 @@ final class PerFieldCodecWrapper extends Codec {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
Iterator<FieldsConsumer> it = consumers.iterator(); IOUtils.closeSafely(false, consumers);
IOException err = null;
while (it.hasNext()) {
try {
it.next().close();
} catch (IOException ioe) {
// keep first IOException we hit but keep
// closing the rest
if (err == null) {
err = ioe;
}
}
}
if (err != null) {
throw err;
}
} }
} }
@ -122,14 +116,7 @@ final class PerFieldCodecWrapper extends Codec {
// If we hit exception (eg, IOE because writer was // If we hit exception (eg, IOE because writer was
// committing, or, for any other reason) we must // committing, or, for any other reason) we must
// go back and close all FieldsProducers we opened: // go back and close all FieldsProducers we opened:
for(FieldsProducer fp : producers.values()) { IOUtils.closeSafely(true, producers.values());
try {
fp.close();
} catch (Throwable t) {
// Suppress all exceptions here so we continue
// to throw the original one
}
}
} }
} }
} }
@ -177,22 +164,7 @@ final class PerFieldCodecWrapper extends Codec {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
Iterator<FieldsProducer> it = codecs.values().iterator(); IOUtils.closeSafely(false, codecs.values());
IOException err = null;
while (it.hasNext()) {
try {
it.next().close();
} catch (IOException ioe) {
// keep first IOException we hit but keep
// closing the rest
if (err == null) {
err = ioe;
}
}
}
if (err != null) {
throw err;
}
} }
@Override @Override

View File

@ -59,7 +59,7 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
/** /**
* Reads the snapshots information from the given {@link Directory}. This * Reads the snapshots information from the given {@link Directory}. This
* method does can be used if the snapshots information is needed, however you * method can be used if the snapshots information is needed, however you
* cannot instantiate the deletion policy (because e.g., some other process * cannot instantiate the deletion policy (because e.g., some other process
* keeps a lock on the snapshots directory). * keeps a lock on the snapshots directory).
*/ */
@ -122,11 +122,19 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
writer.commit(); writer.commit();
} }
// Initializes the snapshots information. This code should basically run try {
// only if mode != CREATE, but if it is, it's no harm as we only open the // Initializes the snapshots information. This code should basically run
// reader once and immediately close it. // only if mode != CREATE, but if it is, it's no harm as we only open the
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) { // reader once and immediately close it.
registerSnapshotInfo(e.getKey(), e.getValue(), null); for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
registerSnapshotInfo(e.getKey(), e.getValue(), null);
}
} catch (RuntimeException e) {
writer.close(); // don't leave any open file handles
throw e;
} catch (IOException e) {
writer.close(); // don't leave any open file handles
throw e;
} }
} }

View File

@ -436,7 +436,7 @@ public final class SegmentInfo implements Cloneable {
*/ */
public String getNormFileName(int number) { public String getNormFileName(int number) {
if (hasSeparateNorms(number)) { if (hasSeparateNorms(number)) {
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number)); return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
} else { } else {
// single file for all norms // single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN); return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);

View File

@ -40,6 +40,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoSuchDirectoryException; import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
/** /**
@ -323,17 +324,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter(); SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this); segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
infosWriter.prepareCommit(segnOutput); infosWriter.prepareCommit(segnOutput);
success = true;
pendingSegnOutput = segnOutput; pendingSegnOutput = segnOutput;
success = true;
} finally { } finally {
if (!success) { if (!success) {
// We hit an exception above; try to close the file // We hit an exception above; try to close the file
// but suppress any exception: // but suppress any exception:
try { IOUtils.closeSafely(true, segnOutput);
segnOutput.close();
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
}
try { try {
// Try not to leave a truncated segments_N file in // Try not to leave a truncated segments_N file in
// the index: // the index:
@ -945,6 +942,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
} finally { } finally {
genOutput.close(); genOutput.close();
} }
} catch (ThreadInterruptedException t) {
throw t;
} catch (Throwable t) { } catch (Throwable t) {
// It's OK if we fail to write this file since it's // It's OK if we fail to write this file since it's
// used only as one of the retry fallbacks. // used only as one of the retry fallbacks.
@ -963,7 +962,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
finishCommit(dir); finishCommit(dir);
} }
public String toString(Directory directory) { public String toString(Directory directory) {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
buffer.append(getCurrentSegmentFileName()).append(": "); buffer.append(getCurrentSegmentFileName()).append(": ");

View File

@ -27,13 +27,13 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException; import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.MultiBits; import org.apache.lucene.util.MultiBits;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
@ -46,10 +46,6 @@ import org.apache.lucene.util.ReaderUtil;
* @see #add * @see #add
*/ */
final class SegmentMerger { final class SegmentMerger {
/** norms header placeholder */
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
private Directory directory; private Directory directory;
private String segment; private String segment;
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
@ -124,6 +120,12 @@ final class SegmentMerger {
return mergedDocs; return mergedDocs;
} }
/**
* NOTE: this method creates a compound file for all files returned by
* info.files(). While, generally, this may include separate norms and
* deletion files, this SegmentInfo must not reference such files when this
* method is called, because they are not allowed within a compound file.
*/
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info) final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
throws IOException { throws IOException {
@ -131,6 +133,10 @@ final class SegmentMerger {
Collection<String> files = info.files(); Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
for (String file : files) { for (String file : files) {
assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
: ".del file is not allowed in .cfs: " + file;
assert !IndexFileNames.isSeparateNormsFile(file)
: "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file;
cfsWriter.addFile(file); cfsWriter.addFile(file);
} }
@ -540,14 +546,13 @@ final class SegmentMerger {
} }
codec = segmentWriteState.segmentCodecs.codec(); codec = segmentWriteState.segmentCodecs.codec();
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState); final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
// NOTE: this is silly, yet, necessary -- we create a
// MultiBits as our skip docs only to have it broken
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
try { try {
// NOTE: this is silly, yet, necessary -- we create a
// MultiBits as our skip docs only to have it broken
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
consumer.merge(mergeState, consumer.merge(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY))); slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
@ -573,12 +578,13 @@ final class SegmentMerger {
private void mergeNorms() throws IOException { private void mergeNorms() throws IOException {
IndexOutput output = null; IndexOutput output = null;
boolean success = false;
try { try {
for (FieldInfo fi : fieldInfos) { for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) { if (fi.isIndexed && !fi.omitNorms) {
if (output == null) { if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); output.writeBytes(SegmentNorms.NORMS_HEADER, SegmentNorms.NORMS_HEADER.length);
} }
for (IndexReader reader : readers) { for (IndexReader reader : readers) {
final int maxDoc = reader.maxDoc(); final int maxDoc = reader.maxDoc();
@ -606,10 +612,9 @@ final class SegmentMerger {
} }
} }
} }
success = true;
} finally { } finally {
if (output != null) { IOUtils.closeSafely(!success, output);
output.close();
}
} }
} }
} }

View File

@ -33,6 +33,10 @@ import org.apache.lucene.store.IndexOutput;
*/ */
final class SegmentNorms implements Cloneable { final class SegmentNorms implements Cloneable {
/** norms header placeholder */
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
int refCount = 1; int refCount = 1;
// If this instance is a clone, the originalNorm // If this instance is a clone, the originalNorm
@ -219,7 +223,7 @@ final class SegmentNorms implements Cloneable {
boolean success = false; boolean success = false;
try { try {
try { try {
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length); out.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
out.writeBytes(bytes, owner.maxDoc()); out.writeBytes(bytes, owner.maxDoc());
} finally { } finally {
out.close(); out.close();

View File

@ -574,7 +574,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
} }
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException { private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now) long nextNormSeek = SegmentNorms.NORMS_HEADER.length; //skip header (header unused for now)
int maxDoc = maxDoc(); int maxDoc = maxDoc();
for (FieldInfo fi : core.fieldInfos) { for (FieldInfo fi : core.fieldInfos) {
if (norms.containsKey(fi.name)) { if (norms.containsKey(fi.name)) {
@ -619,7 +619,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
if (isUnversioned) { if (isUnversioned) {
normSeek = 0; normSeek = 0;
} else { } else {
normSeek = SegmentMerger.NORMS_HEADER.length; normSeek = SegmentNorms.NORMS_HEADER.length;
} }
} }

View File

@ -54,9 +54,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
fill(state.numDocs); fill(state.numDocs);
assert state.segmentName != null; assert state.segmentName != null;
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
tvx.close(); IOUtils.closeSafely(false, tvx, tvf, tvd);
tvf.close();
tvd.close();
tvx = tvd = tvf = null; tvx = tvd = tvf = null;
if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) { if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName)); throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
@ -89,18 +87,25 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
private final void initTermVectorsWriter() throws IOException { private final void initTermVectorsWriter() throws IOException {
if (tvx == null) { if (tvx == null) {
boolean success = false;
try {
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
// If we hit an exception while init'ing the term tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
// vector output files, we must abort this segment tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
// because those files will be in an unknown tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
// state: success = true;
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION)); } finally {
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); if (!success) {
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); IOUtils.closeSafely(true, tvx, tvd, tvf);
}
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT); }
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
lastDocID = 0; lastDocID = 0;
} }
@ -139,7 +144,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
} }
} }
assert lastDocID == docState.docID; assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
lastDocID++; lastDocID++;
@ -152,21 +157,27 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
public void abort() { public void abort() {
hasVectors = false; hasVectors = false;
try { try {
IOUtils.closeSafely(tvx, tvd, tvf); IOUtils.closeSafely(true, tvx, tvd, tvf);
} catch (IOException ignored) { } catch (IOException e) {
// cannot happen since we suppress exceptions
throw new RuntimeException(e);
} }
try { try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION)); docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
} catch (IOException ignored) { } catch (IOException ignored) {
} }
try { try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
} catch (IOException ignored) { } catch (IOException ignored) {
} }
try { try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
} catch (IOException ignored) { } catch (IOException ignored) {
} }
tvx = tvd = tvf = null; tvx = tvd = tvf = null;
lastDocID = 0; lastDocID = 0;

View File

@ -31,15 +31,22 @@ final class TermVectorsWriter {
private FieldInfos fieldInfos; private FieldInfos fieldInfos;
public TermVectorsWriter(Directory directory, String segment, public TermVectorsWriter(Directory directory, String segment,
FieldInfos fieldInfos) FieldInfos fieldInfos) throws IOException {
throws IOException { boolean success = false;
// Open files for TermVector storage try {
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)); // Open files for TermVector storage
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT); tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT); tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT); tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, tvx, tvd, tvf);
}
}
this.fieldInfos = fieldInfos; this.fieldInfos = fieldInfos;
} }
@ -51,8 +58,7 @@ final class TermVectorsWriter {
* @param vectors * @param vectors
* @throws IOException * @throws IOException
*/ */
public final void addAllDocVectors(TermFreqVector[] vectors) public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
throws IOException {
tvx.writeLong(tvd.getFilePointer()); tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer()); tvx.writeLong(tvf.getFilePointer());
@ -187,6 +193,6 @@ final class TermVectorsWriter {
final void close() throws IOException { final void close() throws IOException {
// make an effort to close all streams we can but remember and re-throw // make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process // the first exception encountered in this process
IOUtils.closeSafely(tvx, tvd, tvf); IOUtils.closeSafely(false, tvx, tvd, tvf);
} }
} }

View File

@ -54,7 +54,6 @@ final class TermsHash extends InvertedDocConsumer {
final boolean trackAllocations; final boolean trackAllocations;
public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) { public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
this.docState = docWriter.docState; this.docState = docWriter.docState;
this.docWriter = docWriter; this.docWriter = docWriter;
@ -108,11 +107,11 @@ final class TermsHash extends InvertedDocConsumer {
} }
for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) { for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
TermsHashPerField perField = (TermsHashPerField) entry.getValue(); TermsHashPerField perField = (TermsHashPerField) entry.getValue();
childFields.put(entry.getKey(), perField.consumer); childFields.put(entry.getKey(), perField.consumer);
if (nextTermsHash != null) { if (nextTermsHash != null) {
nextChildFields.put(entry.getKey(), perField.nextPerField); nextChildFields.put(entry.getKey(), perField.nextPerField);
} }
} }
consumer.flush(childFields, state); consumer.flush(childFields, state);
@ -134,12 +133,9 @@ final class TermsHash extends InvertedDocConsumer {
@Override @Override
void finishDocument() throws IOException { void finishDocument() throws IOException {
try { consumer.finishDocument(this);
consumer.finishDocument(this); if (nextTermsHash != null) {
} finally { nextTermsHash.consumer.finishDocument(nextTermsHash);
if (nextTermsHash != null) {
nextTermsHash.consumer.finishDocument(nextTermsHash);
}
} }
} }

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc
/** /**
@ -48,12 +47,10 @@ public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerT
} }
@Override @Override
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) { public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter) {
ThreadState threadState = threadBindings.get(requestingThread); ThreadState threadState = threadBindings.get(requestingThread);
if (threadState != null) { if (threadState != null && threadState.tryLock()) {
if (threadState.tryLock()) { return threadState;
return threadState;
}
} }
ThreadState minThreadState = null; ThreadState minThreadState = null;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RamUsageEstimator;
// TODO: currently we encode all terms between two indexed // TODO: currently we encode all terms between two indexed
@ -66,24 +67,29 @@ public class BlockTermsWriter extends FieldsConsumer {
//private final String segment; //private final String segment;
public BlockTermsWriter( public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
TermsIndexWriterBase termsIndexWriter, SegmentWriteState state, PostingsWriterBase postingsWriter)
SegmentWriteState state, throws IOException {
PostingsWriterBase postingsWriter)
throws IOException
{
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION); final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
this.termsIndexWriter = termsIndexWriter; this.termsIndexWriter = termsIndexWriter;
out = state.directory.createOutput(termsFileName); out = state.directory.createOutput(termsFileName);
fieldInfos = state.fieldInfos; boolean success = false;
writeHeader(out); try {
currentField = null; fieldInfos = state.fieldInfos;
this.postingsWriter = postingsWriter; writeHeader(out);
//segment = state.segmentName; currentField = null;
this.postingsWriter = postingsWriter;
//segment = state.segmentName;
//System.out.println("BTW.init seg=" + state.segmentName); //System.out.println("BTW.init seg=" + state.segmentName);
postingsWriter.start(out); // have consumer write its format/header postingsWriter.start(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
} }
protected void writeHeader(IndexOutput out) throws IOException { protected void writeHeader(IndexOutput out) throws IOException {
@ -130,20 +136,11 @@ public class BlockTermsWriter extends FieldsConsumer {
} }
writeTrailer(dirStart); writeTrailer(dirStart);
} finally { } finally {
try { IOUtils.closeSafely(false, out, postingsWriter, termsIndexWriter);
out.close();
} finally {
try {
postingsWriter.close();
} finally {
termsIndexWriter.close();
}
}
} }
} }
protected void writeTrailer(long dirStart) throws IOException { protected void writeTrailer(long dirStart) throws IOException {
// TODO Auto-generated method stub
out.seek(CodecUtil.headerLength(CODEC_NAME)); out.seek(CodecUtil.headerLength(CODEC_NAME));
out.writeLong(dirStart); out.writeLong(dirStart);
} }

View File

@ -88,6 +88,15 @@ public class CodecProvider {
return codec; return codec;
} }
/**
* Returns <code>true</code> iff a codec with the given name is registered
* @param name codec name
* @return <code>true</code> iff a codec with the given name is registered, otherwise <code>false</code>.
*/
public synchronized boolean isCodecRegistered(String name) {
return codecs.containsKey(name);
}
public SegmentInfosWriter getSegmentInfosWriter() { public SegmentInfosWriter getSegmentInfosWriter() {
return infosWriter; return infosWriter;
} }
@ -145,6 +154,14 @@ public class CodecProvider {
return codec; return codec;
} }
/**
* Returns <code>true</code> if this provider has a Codec registered for this
* field.
*/
public synchronized boolean hasFieldCodec(String name) {
return perFieldMap.containsKey(name);
}
/** /**
* Returns the default {@link Codec} for this {@link CodecProvider} * Returns the default {@link Codec} for this {@link CodecProvider}
* *

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.ChecksumIndexOutput; import org.apache.lucene.store.ChecksumIndexOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/** /**
* Default implementation of {@link SegmentInfosWriter}. * Default implementation of {@link SegmentInfosWriter}.
@ -56,16 +57,24 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos) public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
throws IOException { throws IOException {
IndexOutput out = createOutput(dir, segmentFileName); IndexOutput out = createOutput(dir, segmentFileName);
out.writeInt(FORMAT_CURRENT); // write FORMAT boolean success = false;
out.writeLong(infos.version); try {
out.writeInt(infos.counter); // write counter out.writeInt(FORMAT_CURRENT); // write FORMAT
out.writeLong(infos.getGlobalFieldMapVersion()); out.writeLong(infos.version);
out.writeInt(infos.size()); // write infos out.writeInt(infos.counter); // write counter
for (SegmentInfo si : infos) { out.writeLong(infos.getGlobalFieldMapVersion());
si.write(out); out.writeInt(infos.size()); // write infos
for (SegmentInfo si : infos) {
si.write(out);
}
out.writeStringStringMap(infos.getUserData());
success = true;
return out;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
} }
out.writeStringStringMap(infos.getUserData());
return out;
} }
protected IndexOutput createOutput(Directory dir, String segmentFileName) protected IndexOutput createOutput(Directory dir, String segmentFileName)

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
@ -108,6 +109,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
} }
success = true; success = true;
} finally { } finally {
if (!success) IOUtils.closeSafely(true, in);
if (indexDivisor > 0) { if (indexDivisor > 0) {
in.close(); in.close();
in = null; in = null;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import java.util.List; import java.util.List;
@ -58,9 +59,17 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION); final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
termIndexInterval = state.termIndexInterval; termIndexInterval = state.termIndexInterval;
out = state.directory.createOutput(indexFileName); out = state.directory.createOutput(indexFileName);
fieldInfos = state.fieldInfos; boolean success = false;
writeHeader(out); try {
out.writeInt(termIndexInterval); fieldInfos = state.fieldInfos;
writeHeader(out);
out.writeInt(termIndexInterval);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
} }
protected void writeHeader(IndexOutput out) throws IOException { protected void writeHeader(IndexOutput out) throws IOException {
@ -202,33 +211,37 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
} }
} }
@Override
public void close() throws IOException { public void close() throws IOException {
final long dirStart = out.getFilePointer(); boolean success = false;
final int fieldCount = fields.size(); try {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
int nonNullFieldCount = 0; int nonNullFieldCount = 0;
for(int i=0;i<fieldCount;i++) { for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i); SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) { if (field.numIndexTerms > 0) {
nonNullFieldCount++; nonNullFieldCount++;
}
} }
}
out.writeVInt(nonNullFieldCount); out.writeVInt(nonNullFieldCount);
for(int i=0;i<fieldCount;i++) { for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i); SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) { if (field.numIndexTerms > 0) {
out.writeVInt(field.fieldInfo.number); out.writeVInt(field.fieldInfo.number);
out.writeVInt(field.numIndexTerms); out.writeVInt(field.numIndexTerms);
out.writeVLong(field.termsStart); out.writeVLong(field.termsStart);
out.writeVLong(field.indexStart); out.writeVLong(field.indexStart);
out.writeVLong(field.packedIndexStart); out.writeVLong(field.packedIndexStart);
out.writeVLong(field.packedOffsetsStart); out.writeVLong(field.packedOffsetsStart);
}
} }
writeTrailer(dirStart);
success = true;
} finally {
IOUtils.closeSafely(!success, out);
} }
writeTrailer(dirStart);
out.close();
} }
protected void writeTrailer(long dirStart) throws IOException { protected void writeTrailer(long dirStart) throws IOException {

View File

@ -19,10 +19,12 @@ package org.apache.lucene.index.codecs;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
/** @lucene.experimental */ /** @lucene.experimental */
public abstract class TermsIndexWriterBase { public abstract class TermsIndexWriterBase implements Closeable {
public abstract class FieldWriter { public abstract class FieldWriter {
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException; public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
@ -31,6 +33,4 @@ public abstract class TermsIndexWriterBase {
} }
public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException; public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
public abstract void close() throws IOException;
} }

View File

@ -33,11 +33,11 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.automaton.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.automaton.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.automaton.fst.Util; // for toDot import org.apache.lucene.util.fst.Util; // for toDot
/** See {@link VariableGapTermsIndexWriter} /** See {@link VariableGapTermsIndexWriter}
* *

View File

@ -28,9 +28,10 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.automaton.fst.Builder; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.fst.FST; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
/** /**
* Selects index terms according to provided pluggable * Selects index terms according to provided pluggable
@ -159,9 +160,17 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException { public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException {
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION); final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
out = state.directory.createOutput(indexFileName); out = state.directory.createOutput(indexFileName);
fieldInfos = state.fieldInfos; boolean success = false;
this.policy = policy; try {
writeHeader(out); fieldInfos = state.fieldInfos;
this.policy = policy;
writeHeader(out);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
} }
protected void writeHeader(IndexOutput out) throws IOException { protected void writeHeader(IndexOutput out) throws IOException {
@ -265,8 +274,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
} }
} }
@Override
public void close() throws IOException { public void close() throws IOException {
try {
final long dirStart = out.getFilePointer(); final long dirStart = out.getFilePointer();
final int fieldCount = fields.size(); final int fieldCount = fields.size();
@ -287,8 +296,10 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
} }
} }
writeTrailer(dirStart); writeTrailer(dirStart);
} finally {
out.close(); out.close();
} }
}
protected void writeTrailer(long dirStart) throws IOException { protected void writeTrailer(long dirStart) throws IOException {
out.seek(CodecUtil.headerLength(CODEC_NAME)); out.seek(CodecUtil.headerLength(CODEC_NAME));

View File

@ -41,6 +41,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
protected final IndexOutput out; protected final IndexOutput out;
private int upto; private int upto;
private boolean hitExcDuringWrite;
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like? // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
// if its less than 128 we should set that as max and use byte? // if its less than 128 we should set that as max and use byte?
@ -105,19 +106,23 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
@Override @Override
public void write(int v) throws IOException { public void write(int v) throws IOException {
hitExcDuringWrite = true;
upto -= add(v)-1; upto -= add(v)-1;
hitExcDuringWrite = false;
assert upto >= 0; assert upto >= 0;
} }
@Override @Override
public void close() throws IOException { public void close() throws IOException {
try { try {
// stuff 0s in until the "real" data is flushed: if (!hitExcDuringWrite) {
int stuffed = 0; // stuff 0s in until the "real" data is flushed:
while(upto > stuffed) { int stuffed = 0;
upto -= add(0)-1; while(upto > stuffed) {
assert upto >= 0; upto -= add(0)-1;
stuffed += 1; assert upto >= 0;
stuffed += 1;
}
} }
} finally { } finally {
out.close(); out.close();

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase; import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/** This codec "inlines" the postings for terms that have /** This codec "inlines" the postings for terms that have
* low docFreq. It wraps another codec, which is used for * low docFreq. It wraps another codec, which is used for
@ -81,7 +82,7 @@ public class PulsingCodec extends Codec {
success = true; success = true;
} finally { } finally {
if (!success) { if (!success) {
pulsingWriter.close(); IOUtils.closeSafely(true, pulsingWriter);
} }
} }
@ -93,11 +94,7 @@ public class PulsingCodec extends Codec {
return ret; return ret;
} finally { } finally {
if (!success) { if (!success) {
try { IOUtils.closeSafely(true, pulsingWriter, indexWriter);
pulsingWriter.close();
} finally {
indexWriter.close();
}
} }
} }
} }

View File

@ -71,8 +71,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
* for this term) is <= maxPositions, then the postings are * for this term) is <= maxPositions, then the postings are
* inlined into terms dict */ * inlined into terms dict */
public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException { public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException {
super();
pending = new Position[maxPositions]; pending = new Position[maxPositions];
for(int i=0;i<maxPositions;i++) { for(int i=0;i<maxPositions;i++) {
pending[i] = new Position(); pending[i] = new Position();

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/** Writes frq to .frq, docs to .doc, pos to .pos, payloads /** Writes frq to .frq, docs to .doc, pos to .pos, payloads
* to .pyl, skip data to .skp * to .pyl, skip data to .skp
@ -49,18 +50,18 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
final static int VERSION_START = 0; final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START; final static int VERSION_CURRENT = VERSION_START;
final IntIndexOutput freqOut; IntIndexOutput freqOut;
final IntIndexOutput.Index freqIndex; IntIndexOutput.Index freqIndex;
final IntIndexOutput posOut; IntIndexOutput posOut;
final IntIndexOutput.Index posIndex; IntIndexOutput.Index posIndex;
final IntIndexOutput docOut; IntIndexOutput docOut;
final IntIndexOutput.Index docIndex; IntIndexOutput.Index docIndex;
final IndexOutput payloadOut; IndexOutput payloadOut;
final IndexOutput skipOut; IndexOutput skipOut;
IndexOutput termsOut; IndexOutput termsOut;
final SepSkipListWriter skipListWriter; final SepSkipListWriter skipListWriter;
@ -107,44 +108,51 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
} }
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException { public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
super(); freqOut = null;
this.skipInterval = skipInterval; freqIndex = null;
this.skipMinimum = skipInterval; /* set to the same for now */ posOut = null;
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION); posIndex = null;
docOut = factory.createOutput(state.directory, docFileName); payloadOut = null;
docIndex = docOut.index(); boolean success = false;
try {
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
docOut = factory.createOutput(state.directory, docFileName);
docIndex = docOut.index();
if (state.fieldInfos.hasProx()) { if (state.fieldInfos.hasProx()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION); final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName); freqOut = factory.createOutput(state.directory, frqFileName);
freqIndex = freqOut.index(); freqIndex = freqOut.index();
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION); final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
posOut = factory.createOutput(state.directory, posFileName); posOut = factory.createOutput(state.directory, posFileName);
posIndex = posOut.index(); posIndex = posOut.index();
// TODO: -- only if at least one field stores payloads? // TODO: -- only if at least one field stores payloads?
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION); final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
payloadOut = state.directory.createOutput(payloadFileName); payloadOut = state.directory.createOutput(payloadFileName);
}
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
skipOut = state.directory.createOutput(skipFileName);
totalNumDocs = state.numDocs;
skipListWriter = new SepSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, docOut, skipOut, freqOut, posOut, payloadOut);
}
} else {
freqOut = null;
freqIndex = null;
posOut = null;
posIndex = null;
payloadOut = null;
} }
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
skipOut = state.directory.createOutput(skipFileName);
totalNumDocs = state.numDocs;
skipListWriter = new SepSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
} }
@Override @Override
@ -306,25 +314,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
try { IOUtils.closeSafely(false, docOut, skipOut, freqOut, posOut, payloadOut);
docOut.close();
} finally {
try {
skipOut.close();
} finally {
if (freqOut != null) {
try {
freqOut.close();
} finally {
try {
posOut.close();
} finally {
payloadOut.close();
}
}
}
}
}
} }
public static void getExtensions(Set<String> extensions) { public static void getExtensions(Set<String> extensions) {

View File

@ -29,13 +29,14 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.fst.Builder; import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.automaton.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.automaton.fst.PairOutputs; import org.apache.lucene.util.fst.PairOutputs;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
@ -236,7 +237,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private int tf; private int tf;
private Bits skipDocs; private Bits skipDocs;
private final BytesRef scratch = new BytesRef(10); private final BytesRef scratch = new BytesRef(10);
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result(); private final CharsRef scratchUTF16 = new CharsRef(10);
public SimpleTextDocsEnum() { public SimpleTextDocsEnum() {
this.inStart = SimpleTextFieldsReader.this.in; this.inStart = SimpleTextFieldsReader.this.in;
@ -286,7 +287,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
return docID; return docID;
} }
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16); UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length); docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
termFreq = 0; termFreq = 0;
first = false; first = false;
} else if (scratch.startsWith(POS)) { } else if (scratch.startsWith(POS)) {
@ -323,8 +324,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
private Bits skipDocs; private Bits skipDocs;
private final BytesRef scratch = new BytesRef(10); private final BytesRef scratch = new BytesRef(10);
private final BytesRef scratch2 = new BytesRef(10); private final BytesRef scratch2 = new BytesRef(10);
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result(); private final CharsRef scratchUTF16 = new CharsRef(10);
private final UnicodeUtil.UTF16Result scratchUTF16_2 = new UnicodeUtil.UTF16Result(); private final CharsRef scratchUTF16_2 = new CharsRef(10);
private BytesRef payload; private BytesRef payload;
private long nextDocStart; private long nextDocStart;
@ -368,7 +369,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
return docID; return docID;
} }
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16); UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length); docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
tf = 0; tf = 0;
posStart = in.getFilePointer(); posStart = in.getFilePointer();
first = false; first = false;
@ -400,7 +401,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
readLine(in, scratch); readLine(in, scratch);
assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString(); assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString();
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2); UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
final int pos = ArrayUtil.parseInt(scratchUTF16_2.result, 0, scratchUTF16_2.length); final int pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
final long fp = in.getFilePointer(); final long fp = in.getFilePointer();
readLine(in, scratch); readLine(in, scratch);
if (scratch.startsWith(PAYLOAD)) { if (scratch.startsWith(PAYLOAD)) {

View File

@ -143,8 +143,11 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
write(END); try {
newline(); write(END);
out.close(); newline();
} finally {
out.close();
}
} }
} }

View File

@ -33,6 +33,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/** @lucene.experimental */ /** @lucene.experimental */
public final class StandardPostingsWriter extends PostingsWriterBase { public final class StandardPostingsWriter extends PostingsWriterBase {
@ -42,8 +43,8 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
final static int VERSION_START = 0; final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START; final static int VERSION_CURRENT = VERSION_START;
final IndexOutput freqOut; IndexOutput freqOut;
final IndexOutput proxOut; IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter; final DefaultSkipListWriter skipListWriter;
/** Expert: The fraction of TermDocs entries stored in skip tables, /** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
@ -85,31 +86,35 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
public StandardPostingsWriter(SegmentWriteState state) throws IOException { public StandardPostingsWriter(SegmentWriteState state) throws IOException {
this(state, DEFAULT_SKIP_INTERVAL); this(state, DEFAULT_SKIP_INTERVAL);
} }
public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException { public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
super();
this.skipInterval = skipInterval; this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */ this.skipMinimum = skipInterval; /* set to the same for now */
//this.segment = state.segmentName; //this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION); String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName); freqOut = state.directory.createOutput(fileName);
boolean success = false;
try {
if (state.fieldInfos.hasProx()) {
// At least one field does not omit TF, so create the
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
proxOut = state.directory.createOutput(fileName);
} else {
// Every field omits TF so we will write no prox file
proxOut = null;
}
if (state.fieldInfos.hasProx()) { totalNumDocs = state.numDocs;
// At least one field does not omit TF, so create the
// prox file skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels,
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION); state.numDocs, freqOut, proxOut);
proxOut = state.directory.createOutput(fileName); success = true;
} else { } finally {
// Every field omits TF so we will write no prox file if (!success) {
proxOut = null; IOUtils.closeSafely(true, freqOut, proxOut);
}
} }
totalNumDocs = state.numDocs;
skipListWriter = new DefaultSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
} }
@Override @Override
@ -267,12 +272,6 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
try { IOUtils.closeSafely(false, freqOut, proxOut);
freqOut.close();
} finally {
if (proxOut != null) {
proxOut.close();
}
}
} }
} }

View File

@ -181,6 +181,7 @@ public abstract class CachingCollector extends Collector {
curUpto = 0; curUpto = 0;
} }
cachedScorer.score = curScores[curUpto]; cachedScorer.score = curScores[curUpto];
cachedScorer.doc = curDocs[curUpto];
other.collect(curDocs[curUpto++]); other.collect(curDocs[curUpto++]);
} }
} }

View File

@ -22,8 +22,6 @@ import java.io.FileNotFoundException;
import java.io.FilenameFilter; import java.io.FilenameFilter;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection; import java.util.Collection;
import static java.util.Collections.synchronizedSet; import static java.util.Collections.synchronizedSet;
@ -111,15 +109,6 @@ import org.apache.lucene.util.Constants;
* @see Directory * @see Directory
*/ */
public abstract class FSDirectory extends Directory { public abstract class FSDirectory extends Directory {
private final static MessageDigest DIGESTER;
static {
try {
DIGESTER = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e.toString(), e);
}
}
/** /**
* Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's * Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's
@ -337,12 +326,6 @@ public abstract class FSDirectory extends Directory {
return openInput(name, BufferedIndexInput.BUFFER_SIZE); return openInput(name, BufferedIndexInput.BUFFER_SIZE);
} }
/**
* So we can do some byte-to-hexchar conversion below
*/
private static final char[] HEX_DIGITS =
{'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
@Override @Override
public String getLockID() { public String getLockID() {
ensureOpen(); ensureOpen();
@ -353,19 +336,12 @@ public abstract class FSDirectory extends Directory {
throw new RuntimeException(e.toString(), e); throw new RuntimeException(e.toString(), e);
} }
byte digest[]; int digest = 0;
synchronized (DIGESTER) { for(int charIDX=0;charIDX<dirName.length();charIDX++) {
digest = DIGESTER.digest(dirName.getBytes()); final char ch = dirName.charAt(charIDX);
digest = 31 * digest + ch;
} }
StringBuilder buf = new StringBuilder(); return "lucene-" + Integer.toHexString(digest);
buf.append("lucene-");
for (int i = 0; i < digest.length; i++) {
int b = digest[i];
buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
buf.append(HEX_DIGITS[b & 0xf]);
}
return buf.toString();
} }
/** Closes the store to future operations. */ /** Closes the store to future operations. */

View File

@ -18,7 +18,6 @@ package org.apache.lucene.util;
*/ */
import java.util.Comparator; import java.util.Comparator;
import java.io.UnsupportedEncodingException;
/** Represents byte[], as a slice (offset + length) into an /** Represents byte[], as a slice (offset + length) into an
* existing byte[]. * existing byte[].
@ -122,6 +121,7 @@ public final class BytesRef implements Comparable<BytesRef> {
public void copy(char text[], int offset, int length) { public void copy(char text[], int offset, int length) {
UnicodeUtil.UTF16toUTF8(text, offset, length, this); UnicodeUtil.UTF16toUTF8(text, offset, length, this);
} }
public boolean bytesEquals(BytesRef other) { public boolean bytesEquals(BytesRef other) {
if (length == other.length) { if (length == other.length) {
int otherUpto = other.offset; int otherUpto = other.offset;
@ -198,13 +198,15 @@ public final class BytesRef implements Comparable<BytesRef> {
/** Interprets stored bytes as UTF8 bytes, returning the /** Interprets stored bytes as UTF8 bytes, returning the
* resulting string */ * resulting string */
public String utf8ToString() { public String utf8ToString() {
try { final CharsRef ref = new CharsRef(length);
return new String(bytes, offset, length, "UTF-8"); UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
} catch (UnsupportedEncodingException uee) { return ref.toString();
// should not happen -- UTF8 is presumably supported }
// by all JREs
throw new RuntimeException(uee); /** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
} public CharsRef utf8ToChars(CharsRef ref) {
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return ref;
} }
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */ /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */

View File

@ -0,0 +1,215 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Represents char[], as a slice (offset + length) into an existing char[].
*
* @lucene.internal
*/
public final class CharsRef implements Comparable<CharsRef>, CharSequence {
private static final char[] EMPTY_ARRAY = new char[0];
public char[] chars;
public int offset;
public int length;
/**
* Creates a new {@link CharsRef} initialized an empty array zero-length
*/
public CharsRef() {
this(EMPTY_ARRAY, 0, 0);
}
/**
* Creates a new {@link CharsRef} initialized with an array of the given
* capacity
*/
public CharsRef(int capacity) {
chars = new char[capacity];
}
/**
* Creates a new {@link CharsRef} initialized with the given array, offset and
* length
*/
public CharsRef(char[] chars, int offset, int length) {
assert chars != null;
assert chars.length >= offset + length;
this.chars = chars;
this.offset = offset;
this.length = length;
}
/**
* Creates a new {@link CharsRef} initialized with the given Strings character
* array
*/
public CharsRef(String string) {
this.chars = string.toCharArray();
this.offset = 0;
this.length = chars.length;
}
/**
* Creates a new {@link CharsRef} and copies the contents of the source into
* the new instance.
* @see #copy(CharsRef)
*/
public CharsRef(CharsRef other) {
copy(other);
}
@Override
public Object clone() {
return new CharsRef(this);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
result = prime * result + chars[i];
}
return result;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other instanceof CharsRef) {
return charsEquals((CharsRef) other);
}
if (other instanceof CharSequence) {
final CharSequence seq = (CharSequence) other;
if (length == seq.length()) {
int n = length;
int i = offset;
int j = 0;
while (n-- != 0) {
if (chars[i++] != seq.charAt(j++))
return false;
}
return true;
}
}
return false;
}
public boolean charsEquals(CharsRef other) {
if (length == other.length) {
int otherUpto = other.offset;
final char[] otherChars = other.chars;
final int end = offset + length;
for (int upto = offset; upto < end; upto++, otherUpto++) {
if (chars[upto] != otherChars[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
/** Signed int order comparison */
public int compareTo(CharsRef other) {
if (this == other)
return 0;
final char[] aChars = this.chars;
int aUpto = this.offset;
final char[] bChars = other.chars;
int bUpto = other.offset;
final int aStop = aUpto + Math.min(this.length, other.length);
while (aUpto < aStop) {
int aInt = aChars[aUpto++];
int bInt = bChars[bUpto++];
if (aInt > bInt) {
return 1;
} else if (aInt < bInt) {
return -1;
}
}
// One is a prefix of the other, or, they are equal:
return this.length - other.length;
}
/**
* Copies the given {@link CharsRef} referenced content into this instance
* starting at offset 0.
*
* @param other
* the {@link CharsRef} to copy
*/
public void copy(CharsRef other) {
chars = ArrayUtil.grow(chars, other.length);
System.arraycopy(other.chars, other.offset, chars, 0, other.length);
length = other.length;
offset = 0;
}
public void grow(int newLength) {
if (chars.length < newLength) {
chars = ArrayUtil.grow(chars, newLength);
}
}
/**
* Copies the given array into this CharsRef starting at offset 0
*/
public void copy(char[] otherChars, int otherOffset, int otherLength) {
this.offset = 0;
append(otherChars, otherOffset, otherLength);
}
/**
* Appends the given array to this CharsRef starting at the current offset
*/
public void append(char[] otherChars, int otherOffset, int otherLength) {
grow(this.offset + otherLength);
System.arraycopy(otherChars, otherOffset, this.chars, this.offset,
otherLength);
this.length = otherLength;
}
@Override
public String toString() {
return new String(chars, offset, length);
}
public int length() {
return length;
}
public char charAt(int index) {
return chars[offset + index];
}
public CharSequence subSequence(int start, int end) {
return new CharsRef(chars, offset + start, offset + end - 1);
}
}

View File

@ -43,6 +43,8 @@ public final class Constants {
public static final boolean WINDOWS = OS_NAME.startsWith("Windows"); public static final boolean WINDOWS = OS_NAME.startsWith("Windows");
/** True iff running on SunOS. */ /** True iff running on SunOS. */
public static final boolean SUN_OS = OS_NAME.startsWith("SunOS"); public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
/** True iff running on Mac OS X */
public static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X");
public static final String OS_ARCH = System.getProperty("os.arch"); public static final String OS_ARCH = System.getProperty("os.arch");
public static final String OS_VERSION = System.getProperty("os.version"); public static final String OS_VERSION = System.getProperty("os.version");

View File

@ -47,44 +47,113 @@ public final class IOUtils {
* @param objects objects to call <tt>close()</tt> on * @param objects objects to call <tt>close()</tt> on
*/ */
public static <E extends Exception> void closeSafely(E priorException, Closeable... objects) throws E, IOException { public static <E extends Exception> void closeSafely(E priorException, Closeable... objects) throws E, IOException {
IOException firstIOE = null; Throwable th = null;
for (Closeable object : objects) { for (Closeable object : objects) {
try { try {
if (object != null) if (object != null) {
object.close(); object.close();
} catch (IOException ioe) { }
if (firstIOE == null) } catch (Throwable t) {
firstIOE = ioe; if (th == null) {
th = t;
}
} }
} }
if (priorException != null) if (priorException != null) {
throw priorException; throw priorException;
else if (firstIOE != null) } else if (th != null) {
throw firstIOE; if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
/** @see #closeSafely(Exception, Closeable...) */
public static <E extends Exception> void closeSafely(E priorException, Iterable<Closeable> objects) throws E, IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
}
if (priorException != null) {
throw priorException;
} else if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
} }
/** /**
* <p>Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions. Some of the <tt>Closeable</tt>s * Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions.
* may be null, they are ignored. After everything is closed, method either throws the first of suppressed exceptions, * Some of the <tt>Closeable</tt>s may be null, they are ignored. After
* or completes normally.</p> * everything is closed, and if {@code suppressExceptions} is {@code false},
* @param objects objects to call <tt>close()</tt> on * method either throws the first of suppressed exceptions, or completes
* normally.
*
* @param suppressExceptions
* if true then exceptions that occur during close() are suppressed
* @param objects
* objects to call <tt>close()</tt> on
*/ */
public static void closeSafely(Closeable... objects) throws IOException { public static void closeSafely(boolean suppressExceptions, Closeable... objects) throws IOException {
IOException firstIOE = null; Throwable th = null;
for (Closeable object : objects) { for (Closeable object : objects) {
try { try {
if (object != null) if (object != null) {
object.close(); object.close();
} catch (IOException ioe) { }
if (firstIOE == null) } catch (Throwable t) {
firstIOE = ioe; if (th == null)
th = t;
} }
} }
if (firstIOE != null) if (th != null && !suppressExceptions) {
throw firstIOE; if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
} }
/**
* @see #closeSafely(boolean, Closeable...)
*/
public static void closeSafely(boolean suppressExceptions, Iterable<? extends Closeable> objects) throws IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
if (th == null)
th = t;
}
}
if (th != null && !suppressExceptions) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
} }

View File

@ -78,11 +78,15 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
protected long[] bits; protected long[] bits;
protected int wlen; // number of words (elements) used in the array protected int wlen; // number of words (elements) used in the array
// Used only for assert:
private long numBits;
/** Constructs an OpenBitSet large enough to hold numBits. /** Constructs an OpenBitSet large enough to hold numBits.
* *
* @param numBits * @param numBits
*/ */
public OpenBitSet(long numBits) { public OpenBitSet(long numBits) {
this.numBits = numBits;
bits = new long[bits2words(numBits)]; bits = new long[bits2words(numBits)];
wlen = bits.length; wlen = bits.length;
} }
@ -107,6 +111,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
public OpenBitSet(long[] bits, int numWords) { public OpenBitSet(long[] bits, int numWords) {
this.bits = bits; this.bits = bits;
this.wlen = numWords; this.wlen = numWords;
this.numBits = wlen * 64;
} }
@Override @Override
@ -170,6 +175,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size * The index should be less than the OpenBitSet size
*/ */
public boolean fastGet(int index) { public boolean fastGet(int index) {
assert index >= 0 && index < numBits;
int i = index >> 6; // div 64 int i = index >> 6; // div 64
// signed shift will keep a negative index and force an // signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check. // array-index-out-of-bounds-exception, removing the need for an explicit check.
@ -194,6 +200,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public boolean fastGet(long index) { public boolean fastGet(long index) {
assert index >= 0 && index < numBits;
int i = (int)(index >> 6); // div 64 int i = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64 int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -217,6 +224,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size * The index should be less than the OpenBitSet size
*/ */
public int getBit(int index) { public int getBit(int index) {
assert index >= 0 && index < numBits;
int i = index >> 6; // div 64 int i = index >> 6; // div 64
int bit = index & 0x3f; // mod 64 int bit = index & 0x3f; // mod 64
return ((int)(bits[i]>>>bit)) & 0x01; return ((int)(bits[i]>>>bit)) & 0x01;
@ -245,6 +253,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public void fastSet(int index) { public void fastSet(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64 int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64 int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -255,6 +264,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public void fastSet(long index) { public void fastSet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); int wordNum = (int)(index >> 6);
int bit = (int)index & 0x3f; int bit = (int)index & 0x3f;
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -296,6 +306,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
ensureCapacity(index+1); ensureCapacity(index+1);
wlen = wordNum+1; wlen = wordNum+1;
} }
assert (numBits = Math.max(numBits, index+1)) >= 0;
return wordNum; return wordNum;
} }
@ -304,6 +315,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public void fastClear(int index) { public void fastClear(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; int wordNum = index >> 6;
int bit = index & 0x03f; int bit = index & 0x03f;
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -321,6 +333,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public void fastClear(long index) { public void fastClear(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64 int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64 int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -415,6 +428,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public boolean getAndSet(int index) { public boolean getAndSet(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64 int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64 int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -427,6 +441,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public boolean getAndSet(long index) { public boolean getAndSet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64 int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64 int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -439,6 +454,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public void fastFlip(int index) { public void fastFlip(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64 int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64 int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -449,6 +465,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public void fastFlip(long index) { public void fastFlip(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64 int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64 int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -467,6 +484,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public boolean flipAndGet(int index) { public boolean flipAndGet(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64 int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64 int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -478,6 +496,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size. * The index should be less than the OpenBitSet size.
*/ */
public boolean flipAndGet(long index) { public boolean flipAndGet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64 int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64 int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit; long bitmask = 1L << bit;
@ -674,6 +693,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
public void union(OpenBitSet other) { public void union(OpenBitSet other) {
int newLen = Math.max(wlen,other.wlen); int newLen = Math.max(wlen,other.wlen);
ensureCapacityWords(newLen); ensureCapacityWords(newLen);
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
long[] thisArr = this.bits; long[] thisArr = this.bits;
long[] otherArr = other.bits; long[] otherArr = other.bits;
@ -702,6 +722,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
public void xor(OpenBitSet other) { public void xor(OpenBitSet other) {
int newLen = Math.max(wlen,other.wlen); int newLen = Math.max(wlen,other.wlen);
ensureCapacityWords(newLen); ensureCapacityWords(newLen);
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
long[] thisArr = this.bits; long[] thisArr = this.bits;
long[] otherArr = other.bits; long[] otherArr = other.bits;

View File

@ -95,6 +95,19 @@ package org.apache.lucene.util;
public final class UnicodeUtil { public final class UnicodeUtil {
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
* one would normally encounter, and definitely bigger than any UTF-8 terms.
* <p>
* WARNING: This is not a valid UTF8 Term
**/
public static final BytesRef BIG_TERM = new BytesRef(
new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
); // TODO this is unrelated here find a better place for it
public static void main(String[] args) {
System.out.println(Character.toChars(0x10FFFF + 1));
}
private UnicodeUtil() {} // no instance private UnicodeUtil() {} // no instance
public static final int UNI_SUR_HIGH_START = 0xD800; public static final int UNI_SUR_HIGH_START = 0xD800;
@ -112,33 +125,6 @@ public final class UnicodeUtil {
Character.MIN_SUPPLEMENTARY_CODE_POINT - Character.MIN_SUPPLEMENTARY_CODE_POINT -
(UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START; (UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
/**
* @lucene.internal
*/
public static final class UTF16Result {
public char[] result = new char[10];
public int[] offsets = new int[10];
public int length;
public void setLength(int newLength) {
if (result.length < newLength)
result = ArrayUtil.grow(result, newLength);
length = newLength;
}
public void copyText(UTF16Result other) {
setLength(other.length);
System.arraycopy(other.result, 0, result, 0, length);
}
public void copyText(String other) {
final int otherLength = other.length();
setLength(otherLength);
other.getChars(0, otherLength, result, 0);
length = otherLength;
}
}
/** Encode characters from a char[] source, starting at /** Encode characters from a char[] source, starting at
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */ * offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) { public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
@ -302,135 +288,6 @@ public final class UnicodeUtil {
result.length = upto; result.length = upto;
} }
/** Convert UTF8 bytes into UTF16 characters. If offset
* is non-zero, conversion starts at that starting point
* in utf8, re-using the results from the previous call
* up until offset. */
public static void UTF8toUTF16(final byte[] utf8, final int offset, final int length, final UTF16Result result) {
final int end = offset + length;
char[] out = result.result;
if (result.offsets.length <= end) {
result.offsets = ArrayUtil.grow(result.offsets, end+1);
}
final int[] offsets = result.offsets;
// If incremental decoding fell in the middle of a
// single unicode character, rollback to its start:
int upto = offset;
while(offsets[upto] == -1)
upto--;
int outUpto = offsets[upto];
// Pre-allocate for worst case 1-for-1
if (outUpto+length >= out.length) {
out = result.result = ArrayUtil.grow(out, outUpto+length+1);
}
while (upto < end) {
final int b = utf8[upto]&0xff;
final int ch;
offsets[upto++] = outUpto;
if (b < 0xc0) {
assert b < 0x80;
ch = b;
} else if (b < 0xe0) {
ch = ((b&0x1f)<<6) + (utf8[upto]&0x3f);
offsets[upto++] = -1;
} else if (b < 0xf0) {
ch = ((b&0xf)<<12) + ((utf8[upto]&0x3f)<<6) + (utf8[upto+1]&0x3f);
offsets[upto++] = -1;
offsets[upto++] = -1;
} else {
assert b < 0xf8;
ch = ((b&0x7)<<18) + ((utf8[upto]&0x3f)<<12) + ((utf8[upto+1]&0x3f)<<6) + (utf8[upto+2]&0x3f);
offsets[upto++] = -1;
offsets[upto++] = -1;
offsets[upto++] = -1;
}
if (ch <= UNI_MAX_BMP) {
// target is a character <= 0xFFFF
out[outUpto++] = (char) ch;
} else {
// target is a character in range 0xFFFF - 0x10FFFF
out[outUpto++] = (char) ((ch >> HALF_SHIFT) + 0xD7C0 /* UNI_SUR_HIGH_START - 64 */);
out[outUpto++] = (char) ((ch & HALF_MASK) + UNI_SUR_LOW_START);
}
}
offsets[upto] = outUpto;
result.length = outUpto;
}
/**
* Get the next valid UTF-16 String in UTF-16 order.
* <p>
* If the input String is already valid, it is returned.
* Otherwise the next String in code unit order is returned.
* </p>
* @param s input String (possibly with unpaired surrogates)
* @return next valid UTF-16 String in UTF-16 order
*/
public static String nextValidUTF16String(String s) {
if (validUTF16String(s))
return s;
else {
UTF16Result chars = new UTF16Result();
chars.copyText(s);
nextValidUTF16String(chars);
return new String(chars.result, 0, chars.length);
}
}
public static void nextValidUTF16String(UTF16Result s) {
final int size = s.length;
for (int i = 0; i < size; i++) {
char ch = s.result[i];
if (ch >= UnicodeUtil.UNI_SUR_HIGH_START
&& ch <= UnicodeUtil.UNI_SUR_HIGH_END) {
if (i < size - 1) {
i++;
char nextCH = s.result[i];
if (nextCH >= UnicodeUtil.UNI_SUR_LOW_START
&& nextCH <= UnicodeUtil.UNI_SUR_LOW_END) {
// Valid surrogate pair
} else
// Unmatched high surrogate
if (nextCH < UnicodeUtil.UNI_SUR_LOW_START) { // SMP not enumerated
s.setLength(i + 1);
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
return;
} else { // SMP already enumerated
if (s.result[i - 1] == UnicodeUtil.UNI_SUR_HIGH_END) {
s.result[i - 1] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
s.setLength(i);
} else {
s.result[i - 1]++;
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
s.setLength(i + 1);
}
return;
}
} else {
// Unmatched high surrogate in final position, SMP not yet enumerated
s.setLength(i + 2);
s.result[i + 1] = (char) UnicodeUtil.UNI_SUR_LOW_START;
return;
}
} else if (ch >= UnicodeUtil.UNI_SUR_LOW_START
&& ch <= UnicodeUtil.UNI_SUR_LOW_END) {
// Unmatched low surrogate, SMP already enumerated
s.setLength(i + 1);
s.result[i] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
return;
}
}
}
// Only called from assert // Only called from assert
/* /*
private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) { private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
@ -705,4 +562,51 @@ public final class UnicodeUtil {
} }
return sb.toString(); return sb.toString();
} }
/**
* Interprets the given byte array as UTF-8 and converts to UTF-16. The {@link CharsRef} will be extended if
* it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
* <p>
* NOTE: Full characters are read, even if this reads past the length passed (and
* can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
* Explicit checks for valid UTF-8 are not performed.
*/
public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
int out_offset = chars.offset = 0;
final char[] out = chars.chars = ArrayUtil.grow(chars.chars, length);
final int limit = offset + length;
while (offset < limit) {
int b = utf8[offset++]&0xff;
if (b < 0xc0) {
assert b < 0x80;
out[out_offset++] = (char)b;
} else if (b < 0xe0) {
out[out_offset++] = (char)(((b&0x1f)<<6) + (utf8[offset++]&0x3f));
} else if (b < 0xf0) {
out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
offset += 2;
} else {
assert b < 0xf8;
int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
offset += 3;
if (ch < UNI_MAX_BMP) {
out[out_offset++] = (char)ch;
} else {
int chHalf = ch - 0x0010000;
out[out_offset++] = (char) ((chHalf >> 10) + 0xD800);
out[out_offset++] = (char) ((chHalf & HALF_MASK) + 0xDC00);
}
}
}
chars.length = out_offset - chars.offset;
}
/**
* Utility method for {@link #UTF8toUTF16(byte[], int, int, CharsRef)}
* @see #UTF8toUTF16(byte[], int, int, CharsRef)
*/
public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars) {
UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars);
}
} }

View File

@ -49,6 +49,13 @@ public enum Version {
@Deprecated @Deprecated
LUCENE_32, LUCENE_32,
/**
* Match settings and bugs in Lucene's 3.3 release.
* @deprecated (4.0) Use latest
*/
@Deprecated
LUCENE_33,
/** Match settings and bugs in Lucene's 4.0 release. /** Match settings and bugs in Lucene's 4.0 release.
* <p> * <p>
* Use this to get the latest &amp; greatest settings, bug * Use this to get the latest &amp; greatest settings, bug

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;
/** /**
* Output is a sequence of bytes, for each input term. * Output is a sequence of bytes, for each input term.
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** Can next() and advance() through the terms in an FST /** Can next() and advance() through the terms in an FST
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,7 +23,7 @@ import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode; import org.apache.lucene.util.fst.Builder.UnCompiledNode;
// NOTE: while the FST is able to represent a non-final // NOTE: while the FST is able to represent a non-final
// dead-end state (NON_FINAL_END_NODE=0), the layres above // dead-end state (NON_FINAL_END_NODE=0), the layres above
@ -32,6 +32,7 @@ import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
/** Represents an FST using a compact byte[] format. /** Represents an FST using a compact byte[] format.
* <p> The format is similar to what's used by Morfologik * <p> The format is similar to what's used by Morfologik
* (http://sourceforge.net/projects/morfologik). * (http://sourceforge.net/projects/morfologik).
*
* @lucene.experimental * @lucene.experimental
*/ */
public class FST<T> { public class FST<T> {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,6 +23,7 @@ import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException; import java.io.IOException;
/** Can next() and advance() through the terms in an FST /** Can next() and advance() through the terms in an FST
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.util.IntsRef;
/** /**
* Output is a sequence of ints, for each input term. * Output is a sequence of ints, for each input term.
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,7 @@ import org.apache.lucene.util.IntsRef;
import java.io.IOException; import java.io.IOException;
/** Can next() and advance() through the terms in an FST /** Can next() and advance() through the terms in an FST
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,8 @@ import org.apache.lucene.store.DataOutput;
/** /**
* Use this if you just want to build an FSA. * Use this if you just want to build an FSA.
*
* @lucene.experimental
*/ */
public final class NoOutputs extends Outputs<Object> { public final class NoOutputs extends Outputs<Object> {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.store.DataOutput;
/** /**
* Represents the outputs for an FST, providing the basic * Represents the outputs for an FST, providing the basic
* algebra needed for the FST. * algebra needed for the FST.
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -24,10 +24,10 @@ import org.apache.lucene.store.DataOutput;
/** /**
* Pairs up two outputs into one. * Pairs up two outputs into one.
*
* @lucene.experimental * @lucene.experimental
*/ */
public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> { public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
private final Pair<A,B> NO_OUTPUT; private final Pair<A,B> NO_OUTPUT;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -27,6 +27,7 @@ import org.apache.lucene.store.DataOutput;
* resulting FST is not guaranteed to be minimal! See * resulting FST is not guaranteed to be minimal! See
* {@link Builder}. You cannot store 0 output with this * {@link Builder}. You cannot store 0 output with this
* (that's reserved to mean "no output")! * (that's reserved to mean "no output")!
*
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst; package org.apache.lucene.util.fst;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,7 +23,9 @@ import java.util.*;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRef;
/** Static helper methods */ /** Static helper methods
*
* @lucene.experimental */
public final class Util { public final class Util {
private Util() { private Util() {
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import java.util.Random; import java.util.Random;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
@ -97,8 +98,43 @@ public class RandomIndexWriter implements Closeable {
* Adds a Document. * Adds a Document.
* @see IndexWriter#addDocument(Document) * @see IndexWriter#addDocument(Document)
*/ */
public void addDocument(Document doc) throws IOException { public void addDocument(final Document doc) throws IOException {
w.addDocument(doc); if (r.nextInt(5) == 3) {
// TODO: maybe, we should simply buffer up added docs
// (but we need to clone them), and only when
// getReader, commit, etc. are called, we do an
// addDocuments? Would be better testing.
w.addDocuments(new Iterable<Document>() {
// @Override -- not until Java 1.6
public Iterator<Document> iterator() {
return new Iterator<Document>() {
boolean done;
// @Override -- not until Java 1.6
public boolean hasNext() {
return !done;
}
// @Override -- not until Java 1.6
public void remove() {
throw new UnsupportedOperationException();
}
// @Override -- not until Java 1.6
public Document next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
});
} else {
w.addDocument(doc);
}
maybeCommit(); maybeCommit();
} }
@ -116,12 +152,53 @@ public class RandomIndexWriter implements Closeable {
} }
} }
public void addDocuments(Iterable<Document> docs) throws IOException {
w.addDocuments(docs);
maybeCommit();
}
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws IOException {
w.updateDocuments(delTerm, docs);
maybeCommit();
}
/** /**
* Updates a document. * Updates a document.
* @see IndexWriter#updateDocument(Term, Document) * @see IndexWriter#updateDocument(Term, Document)
*/ */
public void updateDocument(Term t, Document doc) throws IOException { public void updateDocument(Term t, final Document doc) throws IOException {
w.updateDocument(t, doc); if (r.nextInt(5) == 3) {
w.updateDocuments(t, new Iterable<Document>() {
// @Override -- not until Java 1.6
public Iterator<Document> iterator() {
return new Iterator<Document>() {
boolean done;
// @Override -- not until Java 1.6
public boolean hasNext() {
return !done;
}
// @Override -- not until Java 1.6
public void remove() {
throw new UnsupportedOperationException();
}
// @Override -- not until Java 1.6
public Document next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
});
} else {
w.updateDocument(t, doc);
}
maybeCommit(); maybeCommit();
} }

View File

@ -44,6 +44,7 @@ import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/** /**
* A silly test codec to verify core support for fixed * A silly test codec to verify core support for fixed
@ -97,15 +98,25 @@ public class MockFixedIntBlockCodec extends Codec {
@Override @Override
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException { public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
return new FixedIntBlockIndexOutput(dir.createOutput(fileName), blockSize) { IndexOutput out = dir.createOutput(fileName);
@Override boolean success = false;
protected void flushBlock() throws IOException { try {
for(int i=0;i<buffer.length;i++) { FixedIntBlockIndexOutput ret = new FixedIntBlockIndexOutput(out, blockSize) {
assert buffer[i] >= 0; @Override
out.writeVInt(buffer[i]); protected void flushBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
assert buffer[i] >= 0;
out.writeVInt(buffer[i]);
}
} }
};
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
} }
}; }
} }
} }

View File

@ -46,6 +46,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/** /**
* A silly test codec to verify core support for variable * A silly test codec to verify core support for variable
@ -102,34 +103,42 @@ public class MockVariableIntBlockCodec extends Codec {
@Override @Override
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException { public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
final IndexOutput out = dir.createOutput(fileName); final IndexOutput out = dir.createOutput(fileName);
out.writeInt(baseBlockSize); boolean success = false;
return new VariableIntBlockIndexOutput(out, 2*baseBlockSize) { try {
out.writeInt(baseBlockSize);
VariableIntBlockIndexOutput ret = new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
int pendingCount;
final int[] buffer = new int[2+2*baseBlockSize];
int pendingCount; @Override
final int[] buffer = new int[2+2*baseBlockSize]; protected int add(int value) throws IOException {
assert value >= 0;
buffer[pendingCount++] = value;
// silly variable block length int encoder: if
// first value <= 3, we write N vints at once;
// else, 2*N
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
@Override // intentionally be non-causal here:
protected int add(int value) throws IOException { if (pendingCount == flushAt+1) {
assert value >= 0; for(int i=0;i<flushAt;i++) {
buffer[pendingCount++] = value; out.writeVInt(buffer[i]);
// silly variable block length int encoder: if }
// first value <= 3, we write N vints at once; buffer[0] = buffer[flushAt];
// else, 2*N pendingCount = 1;
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize; return flushAt;
} else {
// intentionally be non-causal here: return 0;
if (pendingCount == flushAt+1) {
for(int i=0;i<flushAt;i++) {
out.writeVInt(buffer[i]);
} }
buffer[0] = buffer[flushAt];
pendingCount = 1;
return flushAt;
} else {
return 0;
} }
};
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
} }
}; }
} }
} }

View File

@ -136,8 +136,11 @@ public class MockRandomCodec extends Codec {
final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT); final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT);
final IndexOutput out = state.directory.createOutput(seedFileName); final IndexOutput out = state.directory.createOutput(seedFileName);
out.writeLong(seed); try {
out.close(); out.writeLong(seed);
} finally {
out.close();
}
final Random random = new Random(seed); final Random random = new Random(seed);

Some files were not shown because too many files have changed in this diff Show More