SOLR-2452: Merged with trunk up to r1129202; standardized solr/contrib/* layouts.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1129205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-05-30 14:51:25 +00:00
commit 31c83c9d6f
588 changed files with 30652 additions and 4074 deletions

View File

@ -73,6 +73,7 @@
</target>
<target name="eclipse" description="Setup Eclipse configuration">
<copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/>
<copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/>
<mkdir dir=".settings"/>
<copy file="dev-tools/eclipse/resources.prefs"

View File

@ -20,8 +20,6 @@
<classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/>
<classpathentry kind="src" path="lucene/contrib/spatial/src/java"/>
<classpathentry kind="src" path="lucene/contrib/spatial/src/test"/>
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/java"/>
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/test"/>
<classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/>
<classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/>
<classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/>
@ -44,6 +42,8 @@
<classpathentry kind="src" path="modules/benchmark/src/test"/>
<classpathentry kind="src" path="modules/grouping/src/java"/>
<classpathentry kind="src" path="modules/grouping/src/test"/>
<classpathentry kind="src" path="modules/suggest/src/java"/>
<classpathentry kind="src" path="modules/suggest/src/test"/>
<classpathentry kind="src" path="solr/src/java"/>
<classpathentry kind="src" path="solr/src/webapp/src"/>
<classpathentry kind="src" path="solr/src/common"/>

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>lucene_solr</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

View File

@ -11,7 +11,6 @@
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spellchecker/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" />
@ -21,6 +20,7 @@
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
<buildFile url="file://$PROJECT_DIR$/modules/suggest/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />

View File

@ -12,7 +12,6 @@
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/spellchecker/spellchecker.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
@ -22,6 +21,7 @@
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
<module filepath="$PROJECT_DIR$/modules/suggest/suggest.iml" />
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />

View File

@ -141,13 +141,6 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="spellchecker contrib" type="JUnit" factoryName="JUnit">
<module name="spellchecker" />
<option name="TEST_OBJECT" value="package" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build/contrib/spellchecker" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit">
<module name="stempel" />
<option name="TEST_OBJECT" value="package" />
@ -155,6 +148,13 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
<module name="suggest" />
<option name="TEST_OBJECT" value="package" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/suggest/build" />
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
<configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit">
<module name="uima" />
<option name="TEST_OBJECT" value="package" />
@ -197,8 +197,8 @@
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
<item index="20" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
<item index="21" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
<item index="20" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
<item index="21" class="java.lang.String" itemvalue="JUnit.suggest module" />
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />

View File

@ -1,18 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/java" />
<output-test url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/test" />
<output url="file://$MODULE_DIR$/build/classes/java" />
<output-test url="file://$MODULE_DIR$/build/classes/test" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
<orderEntry type="module" module-name="queries" />
<orderEntry type="module" module-name="misc" />
<orderEntry type="module" module-name="common" />
<orderEntry type="module" module-name="lucene" />
</component>

View File

@ -39,7 +39,6 @@
<module>queries</module>
<module>queryparser</module>
<module>spatial</module>
<module>spellchecker</module>
<module>wordnet</module>
<module>xml-query-parser</module>
</modules>

View File

@ -34,6 +34,7 @@
<module>analysis</module>
<module>benchmark</module>
<module>grouping</module>
<module>suggest</module>
</modules>
<build>
<directory>build/lucene-modules-aggregator</directory>

View File

@ -24,16 +24,16 @@
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-parent</artifactId>
<version>@version@</version>
<relativePath>../../pom.xml</relativePath>
<relativePath>../../lucene/pom.xml</relativePath>
</parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-spellchecker</artifactId>
<artifactId>lucene-suggest</artifactId>
<packaging>jar</packaging>
<name>Lucene Spellchecker</name>
<description>Spell Checker</description>
<name>Lucene Suggest</name>
<description>Lucene Suggest Module</description>
<properties>
<module-directory>lucene/contrib/spellchecker</module-directory>
<build-directory>../../build/contrib/spellchecker</build-directory>
<module-directory>modules/suggest</module-directory>
<build-directory>build</build-directory>
</properties>
<dependencies>
<dependency>
@ -43,14 +43,14 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>lucene-test-framework</artifactId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<artifactId>lucene-test-framework</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>

View File

@ -89,7 +89,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-spellchecker</artifactId>
<artifactId>lucene-suggest</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@ -427,7 +427,32 @@ Bug fixes
with more document deletions is requested before a reader with fewer
deletions, provided they share some segments. (yonik)
======================= Lucene 3.x (not yet released) =======================
* LUCENE-3147,LUCENE-3152: Fixed open file handles leaks in many places in the
code. Now MockDirectoryWrapper (in test-framework) tracks all open files,
including locks, and fails if the test fails to release all of them.
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
======================= Lucene 3.x (not yet released) ================
Changes in backwards compatibility policy
* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass
of IndexInput) as its first argument. (Robert Muir, Dawid Weiss,
Mike McCandless)
Changes in runtime behavior
* LUCENE-2834: the hash used to compute the lock file name when the
lock file is not stored in the index has changed. This means you
will see a different lucene-XXX-write.lock in your lock directory.
(Robert Muir, Uwe Schindler, Mike McCandless)
New Features
* LUCENE-3140: Added experimental FST implementation to Lucene.
(Robert Muir, Dawid Weiss, Mike McCandless)
======================= Lucene 3.2.0 =======================
Changes in backwards compatibility policy
@ -486,6 +511,10 @@ New features
document IDs and scores encountered during the search, and "replay" them to
another Collector. (Mike McCandless, Shai Erera)
* LUCENE-3112: Added experimental IndexWriter.add/updateDocuments,
enabling a block of documents to be indexed, atomically, with
guaranteed sequential docIDs. (Mike McCandless)
API Changes
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
@ -507,6 +536,9 @@ Optimizations
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
buffer deleted terms to later apply to past segments. (Mike McCandless)
* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they
aren't already and MergePolicy allows that. (Shai Erera)
Bug fixes
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
@ -541,6 +573,9 @@ Build
* LUCENE-3006: Building javadocs will fail on warnings by default.
Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
* LUCENE-3128: "ant eclipse" creates a .project file for easier Eclipse
integration (unless one already exists). (Daniel Serodio via Shai Erera)
Test Cases
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to

View File

@ -227,7 +227,6 @@
<packageset dir="contrib/misc/src/java"/>
<packageset dir="contrib/queries/src/java"/>
<packageset dir="contrib/spatial/src/java"/>
<packageset dir="contrib/spellchecker/src/java"/>
<packageset dir="contrib/wordnet/src/java"/>
<packageset dir="contrib/xml-query-parser/src/java"/>
<packageset dir="contrib/queryparser/src/java"/>
@ -248,7 +247,6 @@
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
<group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
<group title="contrib: SpellChecker" packages="org.apache.lucene.search.spell*"/>
<group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
<group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>

View File

@ -6,6 +6,8 @@ Build
* LUCENE-2845: Moved contrib/benchmark to modules.
* LUCENE-2995: Moved contrib/spellchecker into modules/suggest.
New Features
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
@ -48,7 +50,14 @@ Bug Fixes
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
not lowercasing the key before checking for the tag (Adriano Crestani)
======================= Lucene 3.x (not yet released) =======================
======================= Lucene 3.x (not yet released) ================
API Changes
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
(Sujit Pal via Koji Sekiguchi)
======================= Lucene 3.2.0 =======================
Changes in backwards compatibility policy

View File

@ -93,7 +93,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
if( maxNumFragments < 0 )
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
List<String> fragments = new ArrayList<String>( maxNumFragments );
Field[] values = getFields( reader, docId, fieldName );

View File

@ -29,7 +29,7 @@ import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseIn
*/
public class FieldFragList {
List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
/**
* a constructor.
@ -50,6 +50,15 @@ public class FieldFragList {
fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
}
/**
* return the list of WeightedFragInfos.
*
* @return fragInfos.
*/
public List<WeightedFragInfo> getFragInfos() {
return fragInfos;
}
public static class WeightedFragInfo {
List<SubInfo> subInfos;

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
/**
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@ -80,16 +81,16 @@ public class FieldTermStack {
Set<String> termSet = fieldQuery.getTermSet( fieldName );
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
if( termSet == null ) return;
final CharsRef spare = new CharsRef();
for( BytesRef term : tpv.getTerms() ){
if( !termSet.contains( term.utf8ToString() ) ) continue;
if( !termSet.contains( term.utf8ToChars(spare).toString() ) ) continue;
int index = tpv.indexOf( term );
TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
if( tvois == null ) return; // just return to make null snippets
int[] poss = tpv.getTermPositions( index );
if( poss == null ) return; // just return to make null snippets
for( int i = 0; i < tvois.length; i++ )
termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
termList.add( new TermInfo( term.utf8ToChars(spare).toString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
}
// sort by position

View File

@ -24,7 +24,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
assertEquals( 0, ffl.fragInfos.size() );
assertEquals( 0, ffl.getFragInfos().size() );
}
public void testTooSmallFragSize() throws Exception {
@ -40,90 +40,90 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
public void testSmallerFragSizeThanTermQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
assertEquals( 1, ffl.fragInfos.size() );
if (VERBOSE) System.out.println( ffl.fragInfos.get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1TermIndex() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test2TermsIndex1Frag() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test2TermsIndex2Frags() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos.get( 1 ).toString() );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
}
public void test2TermsQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
assertEquals( 0, ffl.fragInfos.size() );
assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testPhraseQuery() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
assertEquals( 0, ffl.fragInfos.size() );
assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
assertEquals( 0, ffl.fragInfos.size() );
assertEquals( 0, ffl.getFragInfos().size() );
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testPhraseQuerySlop() throws Exception {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
}
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
@ -142,8 +142,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1PhraseLongMV() throws Exception {
@ -154,8 +154,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1PhraseLongMVB() throws Exception {
@ -166,7 +166,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
}
}

View File

@ -24,21 +24,21 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
public void testNullFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
assertEquals( 0, ffl.fragInfos.size() );
assertEquals( 0, ffl.getFragInfos().size() );
}
public void testShortFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testLongFieldFragList() throws Exception {
SingleFragListBuilder sflb = new SingleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
}
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {

View File

@ -41,6 +41,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
/**
* Represented as a coupled graph of class instances, this
@ -228,12 +229,13 @@ public class InstantiatedIndex
if (fieldsC != null) {
FieldsEnum fieldsEnum = fieldsC.iterator();
String field;
final CharsRef spare = new CharsRef();
while((field = fieldsEnum.next()) != null) {
if (fields == null || fields.contains(field)) {
TermsEnum termsEnum = fieldsEnum.terms();
BytesRef text;
while((text = termsEnum.next()) != null) {
String termText = text.utf8ToString();
String termText = text.utf8ToChars(spare).toString();
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
final long totalTermFreq = termsEnum.totalTermFreq();
if (totalTermFreq != -1) {

View File

@ -0,0 +1,2 @@
AnyObjectId[b9c8c8a170881dfe9c33adc87c26348904510954] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -42,4 +42,26 @@
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
<target name="build-native-unix" >
<mkdir dir="${common.build.dir}/native"/>
<taskdef resource="cpptasks.tasks">
<classpath>
<pathelement location="ant_lib/cpptasks-1.0b5.jar"/>
</classpath>
</taskdef>
<cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
<includepath>
<pathelement location="${java.home}/../include"/>
<pathelement location="${java.home}/../include/linux"/>
<pathelement location="${java.home}/../include/solaris"/>
</includepath>
<compilerarg value="-fPIC" />
</cc>
</target>
</project>

View File

@ -26,6 +26,7 @@ import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexWriter; // Required for javadocs
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.FSDirectory;
@ -45,6 +46,11 @@ import org.apache.lucene.store.FSDirectory;
* @lucene.experimental You can easily
* accidentally remove segments from your index so be
* careful!
*
* <p><b>NOTE</b>: this tool is unaware of documents added
* atomically via {@link IndexWriter#addDocuments} or {@link
* IndexWriter#updateDocuments}, which means it can easily
* break up such document groups.
*/
public class IndexSplitter {
public SegmentInfos infos;

View File

@ -40,6 +40,11 @@ import org.apache.lucene.util.Version;
* <p>Note 2: the disadvantage of this tool is that source index needs to be
* read as many times as there are parts to be created, hence the name of this
* tool.
*
* <p><b>NOTE</b>: this tool is unaware of documents added
* atomically via {@link IndexWriter#addDocuments} or {@link
* IndexWriter#updateDocuments}, which means it can easily
* break up such document groups.
*/
public class MultiPassIndexSplitter {

View File

@ -269,7 +269,7 @@ public class NRTCachingDirectory extends Directory {
in = cache.openInput(fileName);
in.copyBytes(out, in.length());
} finally {
IOUtils.closeSafely(in, out);
IOUtils.closeSafely(false, in, out);
}
synchronized(this) {
cache.deleteFile(fileName);

View File

@ -51,9 +51,11 @@ for details.
Steps to build:
<ul>
<li> <tt>cd lucene/contrib/misc/src/java/org/apache/lucene/store</tt>
<li> <tt>cd lucene/contrib/misc/</tt>
<li> Compile NativePosixUtil.cpp -> libNativePosixUtil.so. On linux, something like <tt>gcc -fPIC -o libNativePosixUtil.so -shared -Wl,-soname,libNativePosixUtil.so -I$JAVA_HOME/include -I$JAVA_HOME/include/linux NativePosixUtil.cpp -lc -lstdc++</tt>. Add <tt>-m64</tt> if you want to compile 64bit (and java must be run with -d64 so it knows to load a 64bit dynamic lib).
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
<li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.regex;
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.regexp.CharacterIterator;
import org.apache.regexp.RE;
@ -104,11 +105,11 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
private RE regexp;
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
private final CharsRef utf16 = new CharsRef(10);
private final CharacterIterator utf16wrapper = new CharacterIterator() {
public char charAt(int pos) {
return utf16.result[pos];
return utf16.chars[pos];
}
public boolean isEnd(int pos) {
@ -120,7 +121,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
}
public String substring(int beginIndex, int endIndex) {
return new String(utf16.result, beginIndex, endIndex - beginIndex);
return new String(utf16.chars, beginIndex, endIndex - beginIndex);
}
};

View File

@ -21,6 +21,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
/**
@ -95,25 +96,11 @@ public class JavaUtilRegexCapabilities implements RegexCapabilities {
class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher {
private final Pattern pattern;
private final Matcher matcher;
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
private final CharSequence utf16wrapper = new CharSequence() {
public int length() {
return utf16.length;
}
public char charAt(int index) {
return utf16.result[index];
}
public CharSequence subSequence(int start, int end) {
return new String(utf16.result, start, end - start);
}
};
private final CharsRef utf16 = new CharsRef(10);
public JavaUtilRegexMatcher(String regex, int flags) {
this.pattern = Pattern.compile(regex, flags);
this.matcher = this.pattern.matcher(utf16wrapper);
this.matcher = this.pattern.matcher(utf16);
}
public boolean match(BytesRef term) {

View File

@ -48,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.PriorityQueue;
@ -850,8 +851,9 @@ public final class MoreLikeThis {
{
BytesRef[] terms = vector.getTerms();
int freqs[]=vector.getTermFrequencies();
final CharsRef spare = new CharsRef();
for (int j = 0; j < terms.length; j++) {
String term = terms[j].utf8ToString();
final String term = terms[j].utf8ToChars(spare).toString();
if(isNoiseWord(term)){
continue;

View File

@ -1,5 +1,22 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="DTDDocAnt" default="main">
<import file="../contrib-build.xml"/>

View File

@ -23,6 +23,7 @@ import java.util.zip.DataFormatException;
import java.io.ByteArrayOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
/** Simple utility class providing static methods to
@ -118,9 +119,9 @@ public class CompressionTools {
/** Decompress the byte array previously returned by
* compressString back into a String */
public static String decompressString(byte[] value) throws DataFormatException {
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
final byte[] bytes = decompress(value);
CharsRef result = new CharsRef(bytes.length);
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
return new String(result.result, 0, result.length);
return new String(result.chars, 0, result.length);
}
}

View File

@ -60,6 +60,9 @@ public final class CompoundFileWriter {
/** temporary holder for the start of this file's data section */
long dataOffset;
/** the directory which contains the file. */
Directory dir;
}
// Before versioning started.
@ -119,6 +122,14 @@ public final class CompoundFileWriter {
* has been added already
*/
public void addFile(String file) {
addFile(file, directory);
}
/**
* Same as {@link #addFile(String)}, only for files that are found in an
* external {@link Directory}.
*/
public void addFile(String file, Directory dir) {
if (merged)
throw new IllegalStateException(
"Can't add extensions after merge has been called");
@ -133,6 +144,7 @@ public final class CompoundFileWriter {
FileEntry entry = new FileEntry();
entry.file = file;
entry.dir = dir;
entries.add(entry);
}
@ -170,7 +182,7 @@ public final class CompoundFileWriter {
fe.directoryOffset = os.getFilePointer();
os.writeLong(0); // for now
os.writeString(IndexFileNames.stripSegmentName(fe.file));
totalSize += directory.fileLength(fe.file);
totalSize += fe.dir.fileLength(fe.file);
}
// Pre-allocate size of file as optimization --
@ -216,7 +228,7 @@ public final class CompoundFileWriter {
* output stream.
*/
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
IndexInput is = directory.openInput(source.file);
IndexInput is = source.dir.openInput(source.file);
try {
long startPtr = os.getFilePointer();
long length = is.length();

View File

@ -84,19 +84,44 @@ final class DocFieldProcessor extends DocConsumer {
@Override
public void abort() {
for(int i=0;i<fieldHash.length;i++) {
DocFieldProcessorPerField field = fieldHash[i];
while(field != null) {
Throwable th = null;
for (DocFieldProcessorPerField field : fieldHash) {
while (field != null) {
final DocFieldProcessorPerField next = field.next;
field.abort();
try {
field.abort();
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
field = next;
}
}
try {
fieldsWriter.abort();
} finally {
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
try {
consumer.abort();
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
// If any errors occured, throw it.
if (th != null) {
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
// defensive code - we should not hit unchecked exceptions
throw new RuntimeException(th);
}
}

View File

@ -87,6 +87,7 @@ final class DocInverter extends DocFieldConsumer {
endConsumer.startDocument();
}
@Override
public void finishDocument() throws IOException {
// TODO: allow endConsumer.finishDocument to also return
// a DocWriter

View File

@ -53,8 +53,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
@Override
void abort() {
consumer.abort();
endConsumer.abort();
try {
consumer.abort();
} finally {
endConsumer.abort();
}
}
@Override

View File

@ -228,14 +228,19 @@ final class DocumentsWriter {
}
final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
while (threadsIterator.hasNext()) {
ThreadState perThread = threadsIterator.next();
final ThreadState perThread = threadsIterator.next();
perThread.lock();
try {
if (perThread.isActive()) { // we might be closed
perThread.perThread.abort();
perThread.perThread.checkAndResetHasAborted();
try {
perThread.perThread.abort();
} catch (IOException ex) {
// continue
} finally {
perThread.perThread.checkAndResetHasAborted();
flushControl.doOnAbort(perThread);
}
} else {
assert closed;
}
@ -243,7 +248,6 @@ final class DocumentsWriter {
perThread.unlock();
}
}
success = true;
} finally {
if (infoStream != null) {
@ -274,11 +278,9 @@ final class DocumentsWriter {
flushControl.setClosed();
}
boolean updateDocument(final Document doc, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
private boolean preUpdate() throws CorruptIndexException, IOException {
ensureOpen();
boolean maybeMerge = false;
final boolean isUpdate = delTerm != null;
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall:
if (infoStream != null) {
@ -303,9 +305,59 @@ final class DocumentsWriter {
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
}
}
return maybeMerge;
}
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(),
this, doc);
private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
if (flushingDWPT != null) {
maybeMerge |= doFlush(flushingDWPT);
} else {
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
maybeMerge |= doFlush(nextPendingFlush);
}
}
return maybeMerge;
}
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
final DocumentsWriterPerThread flushingDWPT;
try {
if (!perThread.isActive()) {
ensureOpen();
assert false: "perThread is not active but we are still open";
}
final DocumentsWriterPerThread dwpt = perThread.perThread;
try {
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
numDocsInRAM.addAndGet(docCount);
} finally {
if (dwpt.checkAndResetHasAborted()) {
flushControl.doOnAbort(perThread);
}
}
final boolean isUpdate = delTerm != null;
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
} finally {
perThread.unlock();
}
return postUpdate(flushingDWPT, maybeMerge);
}
boolean updateDocument(final Document doc, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
final DocumentsWriterPerThread flushingDWPT;
try {
@ -324,20 +376,13 @@ final class DocumentsWriter {
flushControl.doOnAbort(perThread);
}
}
final boolean isUpdate = delTerm != null;
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
} finally {
perThread.unlock();
}
if (flushingDWPT != null) {
maybeMerge |= doFlush(flushingDWPT);
} else {
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
maybeMerge |= doFlush(nextPendingFlush);
}
}
return maybeMerge;
return postUpdate(flushingDWPT, maybeMerge);
}
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
@ -541,4 +586,20 @@ final class DocumentsWriter {
return (!isSegmentFlush || segment != null);
}
}
// use by IW during close to assert all DWPT are inactive after final flush
boolean assertNoActiveDWPT() {
Iterator<ThreadState> activePerThreadsIterator = perThreadPool.getAllPerThreadsIterator();
while(activePerThreadsIterator.hasNext()) {
ThreadState next = activePerThreadsIterator.next();
next.lock();
try {
assert !next.isActive();
} finally {
next.unlock();
}
}
return true;
}
}

View File

@ -16,6 +16,7 @@ package org.apache.lucene.index;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
@ -68,7 +69,7 @@ public final class DocumentsWriterFlushControl {
this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy;
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
this.config = config;
this.documentsWriter = documentsWriter;
}
@ -162,8 +163,6 @@ public final class DocumentsWriterFlushControl {
stallControl.updateStalled(this);
assert assertMemory();
}
}
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
@ -217,7 +216,7 @@ public final class DocumentsWriterFlushControl {
assert assertMemory();
// Take it out of the loop this DWPT is stale
perThreadPool.replaceForFlush(state, closed);
}finally {
} finally {
stallControl.updateStalled(this);
}
}
@ -305,6 +304,7 @@ public final class DocumentsWriterFlushControl {
synchronized void setClosed() {
// set by DW to signal that we should not release new DWPT after close
this.closed = true;
perThreadPool.deactivateUnreleasedStates();
}
/**
@ -387,8 +387,12 @@ public final class DocumentsWriterFlushControl {
toFlush.add(flushingDWPT);
}
} else {
// get the new delete queue from DW
next.perThread.initialize();
if (closed) {
next.resetWriter(null); // make this state inactive
} else {
// get the new delete queue from DW
next.perThread.initialize();
}
}
} finally {
next.unlock();
@ -451,10 +455,21 @@ public final class DocumentsWriterFlushControl {
try {
for (DocumentsWriterPerThread dwpt : flushQueue) {
doAfterFlush(dwpt);
try {
dwpt.abort();
} catch (IOException ex) {
// continue
}
}
for (BlockedFlush blockedFlush : blockedFlushes) {
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
flushingWriters
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
doAfterFlush(blockedFlush.dwpt);
try {
blockedFlush.dwpt.abort();
} catch (IOException ex) {
// continue
}
}
} finally {
fullFlush = false;
@ -512,5 +527,4 @@ public final class DocumentsWriterFlushControl {
boolean anyStalledThreads() {
return stallControl.anyStalledThreads();
}
}

View File

@ -104,7 +104,7 @@ public class DocumentsWriterPerThread {
// largish:
doc = null;
analyzer = null;
}
}
}
static class FlushedSegment {
@ -177,7 +177,7 @@ public class DocumentsWriterPerThread {
this.parent = parent;
this.fieldInfos = fieldInfos;
this.writer = parent.indexWriter;
this.infoStream = parent.indexWriter.getInfoStream();
this.infoStream = parent.infoStream;
this.docState = new DocState(this);
this.docState.similarityProvider = parent.indexWriter.getConfig()
.getSimilarityProvider();
@ -253,6 +253,82 @@ public class DocumentsWriterPerThread {
finishDocument(delTerm);
}
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
docState.analyzer = analyzer;
if (segment == null) {
// this call is synchronized on IndexWriter.segmentInfos
segment = writer.newSegmentName();
assert numDocsInRAM == 0;
}
int docCount = 0;
try {
for(Document doc : docs) {
docState.doc = doc;
docState.docID = numDocsInRAM;
docCount++;
boolean success = false;
try {
consumer.processDocument(fieldInfos);
success = true;
} finally {
if (!success) {
// An exc is being thrown...
if (!aborting) {
// One of the documents hit a non-aborting
// exception (eg something happened during
// analysis). We now go and mark any docs
// from this batch that we had already indexed
// as deleted:
int docID = docState.docID;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
// Incr here because finishDocument will not
// be called (because an exc is being thrown):
numDocsInRAM++;
fieldInfos.revertUncommitted();
} else {
abort();
}
}
}
success = false;
try {
consumer.finishDocument();
success = true;
} finally {
if (!success) {
abort();
}
}
finishDocument(null);
}
// Apply delTerm only after all indexing has
// succeeded, but apply it only to docs prior to when
// this batch started:
if (delTerm != null) {
deleteQueue.add(delTerm, deleteSlice);
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount);
}
} finally {
docState.clear();
}
return docCount;
}
private void finishDocument(Term delTerm) throws IOException {
/*
* here we actually finish the document in two steps 1. push the delete into
@ -474,6 +550,7 @@ public class DocumentsWriterPerThread {
super(blockSize);
}
@Override
public byte[] getByteBlock() {
bytesUsed.addAndGet(blockSize);
return new byte[blockSize];
@ -486,7 +563,7 @@ public class DocumentsWriterPerThread {
}
}
};
}
void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.util.Iterator;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
import org.apache.lucene.index.codecs.CodecProvider;
@ -194,6 +193,21 @@ public abstract class DocumentsWriterPerThreadPool {
return null;
}
/**
* Deactivate all unreleased threadstates
*/
protected synchronized void deactivateUnreleasedStates() {
for (int i = numThreadStatesActive; i < perThreads.length; i++) {
final ThreadState threadState = perThreads[i];
threadState.lock();
try {
threadState.resetWriter(null);
} finally {
threadState.unlock();
}
}
}
protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
assert threadState.isHeldByCurrentThread();
final DocumentsWriterPerThread dwpt = threadState.perThread;
@ -212,7 +226,7 @@ public abstract class DocumentsWriterPerThreadPool {
// don't recycle DWPT by default
}
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc);
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter);
/**
* Returns an iterator providing access to all {@link ThreadState}

View File

@ -113,7 +113,7 @@ final class FieldsWriter {
void close() throws IOException {
if (directory != null) {
try {
IOUtils.closeSafely(fieldsStream, indexStream);
IOUtils.closeSafely(false, fieldsStream, indexStream);
} finally {
fieldsStream = indexStream = null;
}

View File

@ -57,9 +57,10 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
TermsHash termsHash = null;
try {
TermsHash termsHash = null;
/*
/*
Current writer chain:
FieldsConsumer
-> IMPL: FormatPostingsTermsDictWriter
@ -69,36 +70,38 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
-> IMPL: FormatPostingsDocsWriter
-> PositionsConsumer
-> IMPL: FormatPostingsPositionsWriter
*/
*/
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
// Aggregate the storePayload as seen by the same
// field across multiple threads
if (!fieldInfo.omitTermFreqAndPositions) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
// Aggregate the storePayload as seen by the same
// field across multiple threads
if (!fieldInfo.omitTermFreqAndPositions) {
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
}
// If this field has postings then add them to the
// segment
fieldWriter.flush(fieldInfo.name, consumer, state);
TermsHashPerField perField = fieldWriter.termsHashPerField;
assert termsHash == null || termsHash == perField.termsHash;
termsHash = perField.termsHash;
int numPostings = perField.bytesHash.size();
perField.reset();
perField.shrinkHash(numPostings);
fieldWriter.reset();
}
// If this field has postings then add them to the
// segment
fieldWriter.flush(fieldInfo.name, consumer, state);
TermsHashPerField perField = fieldWriter.termsHashPerField;
assert termsHash == null || termsHash == perField.termsHash;
termsHash = perField.termsHash;
int numPostings = perField.bytesHash.size();
perField.reset();
perField.shrinkHash(numPostings);
fieldWriter.reset();
if (termsHash != null) {
termsHash.reset();
}
} finally {
consumer.close();
}
if (termsHash != null) {
termsHash.reset();
}
consumer.close();
}
BytesRef payload;

View File

@ -17,6 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.util.regex.Pattern;
import org.apache.lucene.index.codecs.Codec; // for javadocs
/**
@ -239,4 +241,15 @@ public final class IndexFileNames {
return filename;
}
/**
* Returns true if the given filename ends with the separate norms file
* pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
*/
public static boolean isSeparateNormsFile(String filename) {
int idx = filename.lastIndexOf('.');
if (idx == -1) return false;
String ext = filename.substring(idx + 1);
return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);
}
}

View File

@ -23,6 +23,7 @@ import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -51,6 +52,7 @@ import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.MapBackedSet;
@ -1071,7 +1073,8 @@ public class IndexWriter implements Closeable {
if (infoStream != null)
message("at close: " + segString());
// used by assert below
final DocumentsWriter oldWriter = docWriter;
synchronized(this) {
readerPool.close();
docWriter = null;
@ -1085,6 +1088,7 @@ public class IndexWriter implements Closeable {
synchronized(this) {
closed = true;
}
assert oldWriter.assertNoActiveDWPT();
} catch (OutOfMemoryError oom) {
handleOOM(oom, "closeInternal");
} finally {
@ -1099,6 +1103,8 @@ public class IndexWriter implements Closeable {
}
}
/** Returns the Directory used by this index. */
public Directory getDirectory() {
// Pass false because the flush during closing calls getDirectory
@ -1227,6 +1233,111 @@ public class IndexWriter implements Closeable {
updateDocument(null, doc, analyzer);
}
/**
* Atomically adds a block of documents with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* <p><b>WARNING</b>: the index does not currently record
* which documents were added as a block. Today this is
* fine, because merging will preserve the block (as long
* as none them were deleted). But it's possible in the
* future that Lucene may more aggressively re-order
* documents (for example, perhaps to obtain better index
* compression), in which case you may need to fully
* re-index your documents at that time.
*
* <p>See {@link #addDocument(Document)} for details on
* index and IndexWriter state after an Exception, and
* flushing/merging temporary free space requirements.</p>
*
* <p><b>NOTE</b>: tools that do offline splitting of an index
* (for example, IndexSplitter in contrib) or
* re-sorting of documents (for example, IndexSorter in
* contrib) are not aware of these atomically added documents
* and will likely break them up. Use such tools at your
* own risk!
*
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
addDocuments(docs, analyzer);
}
/**
* Atomically adds a block of documents, analyzed using the
* provided analyzer, with sequentially assigned document
* IDs, such that an external reader will see all or none
* of the documents.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
updateDocuments(null, docs, analyzer);
}
/**
* Atomically deletes documents matching the provided
* delTerm and adds a block of documents with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* See {@link #addDocuments(Iterable)}.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
updateDocuments(delTerm, docs, analyzer);
}
/**
* Atomically deletes documents matching the provided
* delTerm and adds a block of documents, analyzed using
* the provided analyzer, with sequentially
* assigned document IDs, such that an external reader
* will see all or none of the documents.
*
* See {@link #addDocuments(Iterable)}.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
ensureOpen();
try {
boolean success = false;
boolean anySegmentFlushed = false;
try {
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
success = true;
} finally {
if (!success && infoStream != null) {
message("hit exception updating document");
}
}
if (anySegmentFlushed) {
maybeMerge();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "updateDocuments");
}
}
/**
* Deletes the document(s) containing <code>term</code>.
*
@ -2217,10 +2328,10 @@ public class IndexWriter implements Closeable {
* <p>
* <b>NOTE:</b> this method only copies the segments of the incoming indexes
* and does not merge them. Therefore deleted documents are not removed and
* the new segments are not merged with the existing ones. Also, the segments
* are copied as-is, meaning they are not converted to CFS if they aren't,
* and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
* or {@link #optimize} afterwards.
* the new segments are not merged with the existing ones. Also, if the merge
* policy allows compound files, then any segment that is not compound is
* converted to such. However, if the segment is compound, it is copied as-is
* even if the merge policy does not allow compound files.
*
* <p>This requires this index not be among those to be added.
*
@ -2244,6 +2355,7 @@ public class IndexWriter implements Closeable {
int docCount = 0;
List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
Comparator<String> versionComparator = StringHelper.getVersionComparator();
for (Directory dir : dirs) {
if (infoStream != null) {
message("addIndexes: process directory " + dir);
@ -2263,46 +2375,22 @@ public class IndexWriter implements Closeable {
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
}
// Determine if the doc store of this segment needs to be copied. It's
// only relevant for segments who share doc store with others, because
// the DS might have been copied already, in which case we just want
// to update the DS name of this SegmentInfo.
// NOTE: pre-3x segments include a null DSName if they don't share doc
// store. So the following code ensures we don't accidentally insert
// 'null' to the map.
final String newDsName;
if (dsName != null) {
if (dsNames.containsKey(dsName)) {
newDsName = dsNames.get(dsName);
} else {
dsNames.put(dsName, newSegName);
newDsName = newSegName;
}
// create CFS only if the source segment is not CFS, and MP agrees it
// should be CFS.
boolean createCFS;
synchronized (this) { // Guard segmentInfos
createCFS = !info.getUseCompoundFile()
&& mergePolicy.useCompoundFile(segmentInfos, info)
// optimize case only for segments that don't share doc stores
&& versionComparator.compare(info.getVersion(), "3.1") >= 0;
}
if (createCFS) {
copySegmentIntoCFS(info, newSegName);
} else {
newDsName = newSegName;
copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
}
// Copy the segment files
for (String file: info.files()) {
final String newFileName;
if (IndexFileNames.isDocStoreFile(file)) {
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
if (dsFilesCopied.contains(newFileName)) {
continue;
}
dsFilesCopied.add(newFileName);
} else {
newFileName = newSegName + IndexFileNames.stripSegmentName(file);
}
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
dir.copy(directory, file, newFileName);
}
// Update SI appropriately
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
info.dir = directory;
info.name = newSegName;
infos.add(info);
}
}
@ -2391,6 +2479,76 @@ public class IndexWriter implements Closeable {
}
}
/** Copies the segment into the IndexWriter's directory, as a compound segment. */
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
for (String file : files) {
String newFileName = segName + IndexFileNames.stripSegmentName(file);
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
&& !IndexFileNames.isSeparateNormsFile(file)) {
cfsWriter.addFile(file, info.dir);
} else {
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
info.dir.copy(directory, file, newFileName);
}
}
// Create the .cfs
cfsWriter.close();
info.dir = directory;
info.name = segName;
info.setUseCompoundFile(true);
}
/** Copies the segment files as-is into the IndexWriter's directory. */
private void copySegmentAsIs(SegmentInfo info, String segName,
Map<String, String> dsNames, Set<String> dsFilesCopied)
throws IOException {
// Determine if the doc store of this segment needs to be copied. It's
// only relevant for segments that share doc store with others,
// because the DS might have been copied already, in which case we
// just want to update the DS name of this SegmentInfo.
// NOTE: pre-3x segments include a null DSName if they don't share doc
// store. The following code ensures we don't accidentally insert
// 'null' to the map.
String dsName = info.getDocStoreSegment();
final String newDsName;
if (dsName != null) {
if (dsNames.containsKey(dsName)) {
newDsName = dsNames.get(dsName);
} else {
dsNames.put(dsName, segName);
newDsName = segName;
}
} else {
newDsName = segName;
}
// Copy the segment files
for (String file: info.files()) {
final String newFileName;
if (IndexFileNames.isDocStoreFile(file)) {
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
if (dsFilesCopied.contains(newFileName)) {
continue;
}
dsFilesCopied.add(newFileName);
} else {
newFileName = segName + IndexFileNames.stripSegmentName(file);
}
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
info.dir.copy(directory, file, newFileName);
}
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
info.dir = directory;
info.name = segName;
}
/**
* A hook for extending classes to execute operations after pending added and
* deleted documents have been flushed to the Directory but before the change
@ -3176,50 +3334,50 @@ public class IndexWriter implements Closeable {
runningMerges.remove(merge);
}
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
final int numSegments = merge.readers.size();
if (suppressExceptions) {
// Suppress any new exceptions so we throw the
// original cause
boolean anyChanges = false;
for (int i=0;i<numSegments;i++) {
if (merge.readers.get(i) != null) {
try {
anyChanges |= readerPool.release(merge.readers.get(i), false);
} catch (Throwable t) {
}
merge.readers.set(i, null);
}
Throwable th = null;
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
try {
merge.readerClones.get(i).close();
} catch (Throwable t) {
boolean anyChanges = false;
boolean drop = !suppressExceptions;
for (int i = 0; i < numSegments; i++) {
if (merge.readers.get(i) != null) {
try {
anyChanges |= readerPool.release(merge.readers.get(i), drop);
} catch (Throwable t) {
if (th == null) {
th = t;
}
// This was a private clone and we had the
// only reference
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
merge.readerClones.set(i, null);
}
merge.readers.set(i, null);
}
if (anyChanges) {
checkpoint();
}
} else {
for (int i=0;i<numSegments;i++) {
if (merge.readers.get(i) != null) {
readerPool.release(merge.readers.get(i), true);
merge.readers.set(i, null);
}
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
try {
merge.readerClones.get(i).close();
// This was a private clone and we had the only reference
assert merge.readerClones.get(i).getRefCount() == 0;
merge.readerClones.set(i, null);
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
// This was a private clone and we had the
// only reference
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
merge.readerClones.set(i, null);
}
}
if (suppressExceptions && anyChanges) {
checkpoint();
}
// If any error occured, throw it.
if (!suppressExceptions && th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
/** Does the actual (time-consuming) work of the merge,

View File

@ -22,6 +22,7 @@ import java.util.Collection;
import java.util.Map;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
// TODO FI: norms could actually be stored as doc store
@ -49,9 +50,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
IndexOutput normsOut = state.directory.createOutput(normsFileName);
boolean success = false;
try {
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
int normCount = 0;
@ -84,9 +85,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
}
success = true;
} finally {
normsOut.close();
IOUtils.closeSafely(!success, normsOut);
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/**
* Enables native per field codec support. This class selects the codec used to
@ -61,7 +62,15 @@ final class PerFieldCodecWrapper extends Codec {
assert segmentCodecs == state.segmentCodecs;
final Codec[] codecs = segmentCodecs.codecs;
for (int i = 0; i < codecs.length; i++) {
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
boolean success = false;
try {
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, consumers);
}
}
}
}
@ -74,22 +83,7 @@ final class PerFieldCodecWrapper extends Codec {
@Override
public void close() throws IOException {
Iterator<FieldsConsumer> it = consumers.iterator();
IOException err = null;
while (it.hasNext()) {
try {
it.next().close();
} catch (IOException ioe) {
// keep first IOException we hit but keep
// closing the rest
if (err == null) {
err = ioe;
}
}
}
if (err != null) {
throw err;
}
IOUtils.closeSafely(false, consumers);
}
}
@ -122,14 +116,7 @@ final class PerFieldCodecWrapper extends Codec {
// If we hit exception (eg, IOE because writer was
// committing, or, for any other reason) we must
// go back and close all FieldsProducers we opened:
for(FieldsProducer fp : producers.values()) {
try {
fp.close();
} catch (Throwable t) {
// Suppress all exceptions here so we continue
// to throw the original one
}
}
IOUtils.closeSafely(true, producers.values());
}
}
}
@ -177,22 +164,7 @@ final class PerFieldCodecWrapper extends Codec {
@Override
public void close() throws IOException {
Iterator<FieldsProducer> it = codecs.values().iterator();
IOException err = null;
while (it.hasNext()) {
try {
it.next().close();
} catch (IOException ioe) {
// keep first IOException we hit but keep
// closing the rest
if (err == null) {
err = ioe;
}
}
}
if (err != null) {
throw err;
}
IOUtils.closeSafely(false, codecs.values());
}
@Override

View File

@ -59,7 +59,7 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
/**
* Reads the snapshots information from the given {@link Directory}. This
* method does can be used if the snapshots information is needed, however you
* method can be used if the snapshots information is needed, however you
* cannot instantiate the deletion policy (because e.g., some other process
* keeps a lock on the snapshots directory).
*/
@ -122,11 +122,19 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
writer.commit();
}
// Initializes the snapshots information. This code should basically run
// only if mode != CREATE, but if it is, it's no harm as we only open the
// reader once and immediately close it.
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
registerSnapshotInfo(e.getKey(), e.getValue(), null);
try {
// Initializes the snapshots information. This code should basically run
// only if mode != CREATE, but if it is, it's no harm as we only open the
// reader once and immediately close it.
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
registerSnapshotInfo(e.getKey(), e.getValue(), null);
}
} catch (RuntimeException e) {
writer.close(); // don't leave any open file handles
throw e;
} catch (IOException e) {
writer.close(); // don't leave any open file handles
throw e;
}
}

View File

@ -436,7 +436,7 @@ public final class SegmentInfo implements Cloneable {
*/
public String getNormFileName(int number) {
if (hasSeparateNorms(number)) {
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
} else {
// single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);

View File

@ -40,6 +40,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ThreadInterruptedException;
/**
@ -323,17 +324,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
infosWriter.prepareCommit(segnOutput);
success = true;
pendingSegnOutput = segnOutput;
success = true;
} finally {
if (!success) {
// We hit an exception above; try to close the file
// but suppress any exception:
try {
segnOutput.close();
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
}
IOUtils.closeSafely(true, segnOutput);
try {
// Try not to leave a truncated segments_N file in
// the index:
@ -945,6 +942,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
} finally {
genOutput.close();
}
} catch (ThreadInterruptedException t) {
throw t;
} catch (Throwable t) {
// It's OK if we fail to write this file since it's
// used only as one of the retry fallbacks.
@ -963,7 +962,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
finishCommit(dir);
}
public String toString(Directory directory) {
StringBuilder buffer = new StringBuilder();
buffer.append(getCurrentSegmentFileName()).append(": ");

View File

@ -27,13 +27,13 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.MultiBits;
import org.apache.lucene.util.ReaderUtil;
@ -46,10 +46,6 @@ import org.apache.lucene.util.ReaderUtil;
* @see #add
*/
final class SegmentMerger {
/** norms header placeholder */
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
private Directory directory;
private String segment;
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
@ -124,6 +120,12 @@ final class SegmentMerger {
return mergedDocs;
}
/**
* NOTE: this method creates a compound file for all files returned by
* info.files(). While, generally, this may include separate norms and
* deletion files, this SegmentInfo must not reference such files when this
* method is called, because they are not allowed within a compound file.
*/
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
throws IOException {
@ -131,6 +133,10 @@ final class SegmentMerger {
Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
for (String file : files) {
assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
: ".del file is not allowed in .cfs: " + file;
assert !IndexFileNames.isSeparateNormsFile(file)
: "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file;
cfsWriter.addFile(file);
}
@ -540,14 +546,13 @@ final class SegmentMerger {
}
codec = segmentWriteState.segmentCodecs.codec();
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
// NOTE: this is silly, yet, necessary -- we create a
// MultiBits as our skip docs only to have it broken
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
try {
// NOTE: this is silly, yet, necessary -- we create a
// MultiBits as our skip docs only to have it broken
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
consumer.merge(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
@ -573,12 +578,13 @@ final class SegmentMerger {
private void mergeNorms() throws IOException {
IndexOutput output = null;
boolean success = false;
try {
for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
output.writeBytes(SegmentNorms.NORMS_HEADER, SegmentNorms.NORMS_HEADER.length);
}
for (IndexReader reader : readers) {
final int maxDoc = reader.maxDoc();
@ -606,10 +612,9 @@ final class SegmentMerger {
}
}
}
success = true;
} finally {
if (output != null) {
output.close();
}
IOUtils.closeSafely(!success, output);
}
}
}

View File

@ -33,6 +33,10 @@ import org.apache.lucene.store.IndexOutput;
*/
final class SegmentNorms implements Cloneable {
/** norms header placeholder */
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
int refCount = 1;
// If this instance is a clone, the originalNorm
@ -219,7 +223,7 @@ final class SegmentNorms implements Cloneable {
boolean success = false;
try {
try {
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
out.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
out.writeBytes(bytes, owner.maxDoc());
} finally {
out.close();

View File

@ -574,7 +574,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
}
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
long nextNormSeek = SegmentNorms.NORMS_HEADER.length; //skip header (header unused for now)
int maxDoc = maxDoc();
for (FieldInfo fi : core.fieldInfos) {
if (norms.containsKey(fi.name)) {
@ -619,7 +619,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
if (isUnversioned) {
normSeek = 0;
} else {
normSeek = SegmentMerger.NORMS_HEADER.length;
normSeek = SegmentNorms.NORMS_HEADER.length;
}
}

View File

@ -54,9 +54,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
fill(state.numDocs);
assert state.segmentName != null;
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
tvx.close();
tvf.close();
tvd.close();
IOUtils.closeSafely(false, tvx, tvf, tvd);
tvx = tvd = tvf = null;
if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
@ -89,18 +87,25 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
private final void initTermVectorsWriter() throws IOException {
if (tvx == null) {
boolean success = false;
try {
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, tvx, tvd, tvf);
}
}
lastDocID = 0;
}
@ -139,7 +144,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
}
}
assert lastDocID == docState.docID;
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
lastDocID++;
@ -152,21 +157,27 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
public void abort() {
hasVectors = false;
try {
IOUtils.closeSafely(tvx, tvd, tvf);
} catch (IOException ignored) {
IOUtils.closeSafely(true, tvx, tvd, tvf);
} catch (IOException e) {
// cannot happen since we suppress exceptions
throw new RuntimeException(e);
}
try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
} catch (IOException ignored) {
}
try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
} catch (IOException ignored) {
}
try {
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
} catch (IOException ignored) {
}
tvx = tvd = tvf = null;
lastDocID = 0;

View File

@ -31,15 +31,22 @@ final class TermVectorsWriter {
private FieldInfos fieldInfos;
public TermVectorsWriter(Directory directory, String segment,
FieldInfos fieldInfos)
throws IOException {
// Open files for TermVector storage
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
FieldInfos fieldInfos) throws IOException {
boolean success = false;
try {
// Open files for TermVector storage
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, tvx, tvd, tvf);
}
}
this.fieldInfos = fieldInfos;
}
@ -51,8 +58,7 @@ final class TermVectorsWriter {
* @param vectors
* @throws IOException
*/
public final void addAllDocVectors(TermFreqVector[] vectors)
throws IOException {
public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
@ -187,6 +193,6 @@ final class TermVectorsWriter {
final void close() throws IOException {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
IOUtils.closeSafely(tvx, tvd, tvf);
IOUtils.closeSafely(false, tvx, tvd, tvf);
}
}

View File

@ -54,7 +54,6 @@ final class TermsHash extends InvertedDocConsumer {
final boolean trackAllocations;
public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
this.docState = docWriter.docState;
this.docWriter = docWriter;
@ -108,11 +107,11 @@ final class TermsHash extends InvertedDocConsumer {
}
for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
childFields.put(entry.getKey(), perField.consumer);
if (nextTermsHash != null) {
nextChildFields.put(entry.getKey(), perField.nextPerField);
}
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
childFields.put(entry.getKey(), perField.consumer);
if (nextTermsHash != null) {
nextChildFields.put(entry.getKey(), perField.nextPerField);
}
}
consumer.flush(childFields, state);
@ -134,12 +133,9 @@ final class TermsHash extends InvertedDocConsumer {
@Override
void finishDocument() throws IOException {
try {
consumer.finishDocument(this);
} finally {
if (nextTermsHash != null) {
nextTermsHash.consumer.finishDocument(nextTermsHash);
}
consumer.finishDocument(this);
if (nextTermsHash != null) {
nextTermsHash.consumer.finishDocument(nextTermsHash);
}
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc
/**
@ -48,12 +47,10 @@ public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerT
}
@Override
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) {
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter) {
ThreadState threadState = threadBindings.get(requestingThread);
if (threadState != null) {
if (threadState.tryLock()) {
return threadState;
}
if (threadState != null && threadState.tryLock()) {
return threadState;
}
ThreadState minThreadState = null;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
// TODO: currently we encode all terms between two indexed
@ -66,24 +67,29 @@ public class BlockTermsWriter extends FieldsConsumer {
//private final String segment;
public BlockTermsWriter(
TermsIndexWriterBase termsIndexWriter,
SegmentWriteState state,
PostingsWriterBase postingsWriter)
throws IOException
{
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
SegmentWriteState state, PostingsWriterBase postingsWriter)
throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
this.termsIndexWriter = termsIndexWriter;
out = state.directory.createOutput(termsFileName);
fieldInfos = state.fieldInfos;
writeHeader(out);
currentField = null;
this.postingsWriter = postingsWriter;
//segment = state.segmentName;
boolean success = false;
try {
fieldInfos = state.fieldInfos;
writeHeader(out);
currentField = null;
this.postingsWriter = postingsWriter;
//segment = state.segmentName;
//System.out.println("BTW.init seg=" + state.segmentName);
//System.out.println("BTW.init seg=" + state.segmentName);
postingsWriter.start(out); // have consumer write its format/header
postingsWriter.start(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
}
protected void writeHeader(IndexOutput out) throws IOException {
@ -130,20 +136,11 @@ public class BlockTermsWriter extends FieldsConsumer {
}
writeTrailer(dirStart);
} finally {
try {
out.close();
} finally {
try {
postingsWriter.close();
} finally {
termsIndexWriter.close();
}
}
IOUtils.closeSafely(false, out, postingsWriter, termsIndexWriter);
}
}
protected void writeTrailer(long dirStart) throws IOException {
// TODO Auto-generated method stub
out.seek(CodecUtil.headerLength(CODEC_NAME));
out.writeLong(dirStart);
}

View File

@ -88,6 +88,15 @@ public class CodecProvider {
return codec;
}
/**
* Returns <code>true</code> iff a codec with the given name is registered
* @param name codec name
* @return <code>true</code> iff a codec with the given name is registered, otherwise <code>false</code>.
*/
public synchronized boolean isCodecRegistered(String name) {
return codecs.containsKey(name);
}
public SegmentInfosWriter getSegmentInfosWriter() {
return infosWriter;
}
@ -145,6 +154,14 @@ public class CodecProvider {
return codec;
}
/**
* Returns <code>true</code> if this provider has a Codec registered for this
* field.
*/
public synchronized boolean hasFieldCodec(String name) {
return perFieldMap.containsKey(name);
}
/**
* Returns the default {@link Codec} for this {@link CodecProvider}
*

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.ChecksumIndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* Default implementation of {@link SegmentInfosWriter}.
@ -56,16 +57,24 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
throws IOException {
IndexOutput out = createOutput(dir, segmentFileName);
out.writeInt(FORMAT_CURRENT); // write FORMAT
out.writeLong(infos.version);
out.writeInt(infos.counter); // write counter
out.writeLong(infos.getGlobalFieldMapVersion());
out.writeInt(infos.size()); // write infos
for (SegmentInfo si : infos) {
si.write(out);
boolean success = false;
try {
out.writeInt(FORMAT_CURRENT); // write FORMAT
out.writeLong(infos.version);
out.writeInt(infos.counter); // write counter
out.writeLong(infos.getGlobalFieldMapVersion());
out.writeInt(infos.size()); // write infos
for (SegmentInfo si : infos) {
si.write(out);
}
out.writeStringStringMap(infos.getUserData());
success = true;
return out;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
out.writeStringStringMap(infos.getUserData());
return out;
}
protected IndexOutput createOutput(Directory dir, String segmentFileName)

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
@ -108,6 +109,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
}
success = true;
} finally {
if (!success) IOUtils.closeSafely(true, in);
if (indexDivisor > 0) {
in.close();
in = null;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
import java.util.List;
@ -58,9 +59,17 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
termIndexInterval = state.termIndexInterval;
out = state.directory.createOutput(indexFileName);
fieldInfos = state.fieldInfos;
writeHeader(out);
out.writeInt(termIndexInterval);
boolean success = false;
try {
fieldInfos = state.fieldInfos;
writeHeader(out);
out.writeInt(termIndexInterval);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
}
protected void writeHeader(IndexOutput out) throws IOException {
@ -202,33 +211,37 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
}
}
@Override
public void close() throws IOException {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
boolean success = false;
try {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
int nonNullFieldCount = 0;
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) {
nonNullFieldCount++;
int nonNullFieldCount = 0;
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) {
nonNullFieldCount++;
}
}
}
out.writeVInt(nonNullFieldCount);
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) {
out.writeVInt(field.fieldInfo.number);
out.writeVInt(field.numIndexTerms);
out.writeVLong(field.termsStart);
out.writeVLong(field.indexStart);
out.writeVLong(field.packedIndexStart);
out.writeVLong(field.packedOffsetsStart);
out.writeVInt(nonNullFieldCount);
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) {
out.writeVInt(field.fieldInfo.number);
out.writeVInt(field.numIndexTerms);
out.writeVLong(field.termsStart);
out.writeVLong(field.indexStart);
out.writeVLong(field.packedIndexStart);
out.writeVLong(field.packedOffsetsStart);
}
}
writeTrailer(dirStart);
success = true;
} finally {
IOUtils.closeSafely(!success, out);
}
writeTrailer(dirStart);
out.close();
}
protected void writeTrailer(long dirStart) throws IOException {

View File

@ -19,10 +19,12 @@ package org.apache.lucene.index.codecs;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.util.BytesRef;
import java.io.Closeable;
import java.io.IOException;
/** @lucene.experimental */
public abstract class TermsIndexWriterBase {
public abstract class TermsIndexWriterBase implements Closeable {
public abstract class FieldWriter {
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
@ -31,6 +33,4 @@ public abstract class TermsIndexWriterBase {
}
public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
public abstract void close() throws IOException;
}

View File

@ -33,11 +33,11 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.automaton.fst.Builder;
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
import org.apache.lucene.util.automaton.fst.FST;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
import org.apache.lucene.util.automaton.fst.Util; // for toDot
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util; // for toDot
/** See {@link VariableGapTermsIndexWriter}
*

View File

@ -28,9 +28,10 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.automaton.fst.Builder;
import org.apache.lucene.util.automaton.fst.FST;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
/**
* Selects index terms according to provided pluggable
@ -159,9 +160,17 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException {
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
out = state.directory.createOutput(indexFileName);
fieldInfos = state.fieldInfos;
this.policy = policy;
writeHeader(out);
boolean success = false;
try {
fieldInfos = state.fieldInfos;
this.policy = policy;
writeHeader(out);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
}
}
protected void writeHeader(IndexOutput out) throws IOException {
@ -265,8 +274,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
}
}
@Override
public void close() throws IOException {
try {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
@ -287,8 +296,10 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
}
}
writeTrailer(dirStart);
} finally {
out.close();
}
}
protected void writeTrailer(long dirStart) throws IOException {
out.seek(CodecUtil.headerLength(CODEC_NAME));

View File

@ -41,6 +41,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
protected final IndexOutput out;
private int upto;
private boolean hitExcDuringWrite;
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
// if its less than 128 we should set that as max and use byte?
@ -105,19 +106,23 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
@Override
public void write(int v) throws IOException {
hitExcDuringWrite = true;
upto -= add(v)-1;
hitExcDuringWrite = false;
assert upto >= 0;
}
@Override
public void close() throws IOException {
try {
// stuff 0s in until the "real" data is flushed:
int stuffed = 0;
while(upto > stuffed) {
upto -= add(0)-1;
assert upto >= 0;
stuffed += 1;
if (!hitExcDuringWrite) {
// stuff 0s in until the "real" data is flushed:
int stuffed = 0;
while(upto > stuffed) {
upto -= add(0)-1;
assert upto >= 0;
stuffed += 1;
}
}
} finally {
out.close();

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/** This codec "inlines" the postings for terms that have
* low docFreq. It wraps another codec, which is used for
@ -81,7 +82,7 @@ public class PulsingCodec extends Codec {
success = true;
} finally {
if (!success) {
pulsingWriter.close();
IOUtils.closeSafely(true, pulsingWriter);
}
}
@ -93,11 +94,7 @@ public class PulsingCodec extends Codec {
return ret;
} finally {
if (!success) {
try {
pulsingWriter.close();
} finally {
indexWriter.close();
}
IOUtils.closeSafely(true, pulsingWriter, indexWriter);
}
}
}

View File

@ -71,8 +71,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
* for this term) is <= maxPositions, then the postings are
* inlined into terms dict */
public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException {
super();
pending = new Position[maxPositions];
for(int i=0;i<maxPositions;i++) {
pending[i] = new Position();

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
* to .pyl, skip data to .skp
@ -49,18 +50,18 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
final IntIndexOutput freqOut;
final IntIndexOutput.Index freqIndex;
IntIndexOutput freqOut;
IntIndexOutput.Index freqIndex;
final IntIndexOutput posOut;
final IntIndexOutput.Index posIndex;
IntIndexOutput posOut;
IntIndexOutput.Index posIndex;
final IntIndexOutput docOut;
final IntIndexOutput.Index docIndex;
IntIndexOutput docOut;
IntIndexOutput.Index docIndex;
final IndexOutput payloadOut;
IndexOutput payloadOut;
final IndexOutput skipOut;
IndexOutput skipOut;
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
@ -107,44 +108,51 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
}
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
super();
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
docOut = factory.createOutput(state.directory, docFileName);
docIndex = docOut.index();
freqOut = null;
freqIndex = null;
posOut = null;
posIndex = null;
payloadOut = null;
boolean success = false;
try {
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
docOut = factory.createOutput(state.directory, docFileName);
docIndex = docOut.index();
if (state.fieldInfos.hasProx()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName);
freqIndex = freqOut.index();
if (state.fieldInfos.hasProx()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName);
freqIndex = freqOut.index();
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
posOut = factory.createOutput(state.directory, posFileName);
posIndex = posOut.index();
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
posOut = factory.createOutput(state.directory, posFileName);
posIndex = posOut.index();
// TODO: -- only if at least one field stores payloads?
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
payloadOut = state.directory.createOutput(payloadFileName);
// TODO: -- only if at least one field stores payloads?
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
payloadOut = state.directory.createOutput(payloadFileName);
}
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
skipOut = state.directory.createOutput(skipFileName);
totalNumDocs = state.numDocs;
skipListWriter = new SepSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, docOut, skipOut, freqOut, posOut, payloadOut);
}
} else {
freqOut = null;
freqIndex = null;
posOut = null;
posIndex = null;
payloadOut = null;
}
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
skipOut = state.directory.createOutput(skipFileName);
totalNumDocs = state.numDocs;
skipListWriter = new SepSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
}
@Override
@ -306,25 +314,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
@Override
public void close() throws IOException {
try {
docOut.close();
} finally {
try {
skipOut.close();
} finally {
if (freqOut != null) {
try {
freqOut.close();
} finally {
try {
posOut.close();
} finally {
payloadOut.close();
}
}
}
}
}
IOUtils.closeSafely(false, docOut, skipOut, freqOut, posOut, payloadOut);
}
public static void getExtensions(Set<String> extensions) {

View File

@ -29,13 +29,14 @@ import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.fst.Builder;
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
import org.apache.lucene.util.automaton.fst.FST;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
import org.apache.lucene.util.automaton.fst.PairOutputs;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.PairOutputs;
import java.io.IOException;
import java.util.Comparator;
@ -236,7 +237,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private int tf;
private Bits skipDocs;
private final BytesRef scratch = new BytesRef(10);
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result();
private final CharsRef scratchUTF16 = new CharsRef(10);
public SimpleTextDocsEnum() {
this.inStart = SimpleTextFieldsReader.this.in;
@ -286,7 +287,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
return docID;
}
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length);
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
termFreq = 0;
first = false;
} else if (scratch.startsWith(POS)) {
@ -323,8 +324,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
private Bits skipDocs;
private final BytesRef scratch = new BytesRef(10);
private final BytesRef scratch2 = new BytesRef(10);
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result();
private final UnicodeUtil.UTF16Result scratchUTF16_2 = new UnicodeUtil.UTF16Result();
private final CharsRef scratchUTF16 = new CharsRef(10);
private final CharsRef scratchUTF16_2 = new CharsRef(10);
private BytesRef payload;
private long nextDocStart;
@ -368,7 +369,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
return docID;
}
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length);
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
tf = 0;
posStart = in.getFilePointer();
first = false;
@ -400,7 +401,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
readLine(in, scratch);
assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString();
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
final int pos = ArrayUtil.parseInt(scratchUTF16_2.result, 0, scratchUTF16_2.length);
final int pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
final long fp = in.getFilePointer();
readLine(in, scratch);
if (scratch.startsWith(PAYLOAD)) {

View File

@ -143,8 +143,11 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
@Override
public void close() throws IOException {
write(END);
newline();
out.close();
try {
write(END);
newline();
} finally {
out.close();
}
}
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
/** @lucene.experimental */
public final class StandardPostingsWriter extends PostingsWriterBase {
@ -42,8 +43,8 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
final IndexOutput freqOut;
final IndexOutput proxOut;
IndexOutput freqOut;
IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
/** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
@ -85,31 +86,35 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
this(state, DEFAULT_SKIP_INTERVAL);
}
public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
super();
this.skipInterval = skipInterval;
this.skipMinimum = skipInterval; /* set to the same for now */
//this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName);
boolean success = false;
try {
if (state.fieldInfos.hasProx()) {
// At least one field does not omit TF, so create the
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
proxOut = state.directory.createOutput(fileName);
} else {
// Every field omits TF so we will write no prox file
proxOut = null;
}
if (state.fieldInfos.hasProx()) {
// At least one field does not omit TF, so create the
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
proxOut = state.directory.createOutput(fileName);
} else {
// Every field omits TF so we will write no prox file
proxOut = null;
totalNumDocs = state.numDocs;
skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels,
state.numDocs, freqOut, proxOut);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, freqOut, proxOut);
}
}
totalNumDocs = state.numDocs;
skipListWriter = new DefaultSkipListWriter(skipInterval,
maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
}
@Override
@ -267,12 +272,6 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
@Override
public void close() throws IOException {
try {
freqOut.close();
} finally {
if (proxOut != null) {
proxOut.close();
}
}
IOUtils.closeSafely(false, freqOut, proxOut);
}
}

View File

@ -181,6 +181,7 @@ public abstract class CachingCollector extends Collector {
curUpto = 0;
}
cachedScorer.score = curScores[curUpto];
cachedScorer.doc = curDocs[curUpto];
other.collect(curDocs[curUpto++]);
}
}

View File

@ -22,8 +22,6 @@ import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import static java.util.Collections.synchronizedSet;
@ -111,15 +109,6 @@ import org.apache.lucene.util.Constants;
* @see Directory
*/
public abstract class FSDirectory extends Directory {
private final static MessageDigest DIGESTER;
static {
try {
DIGESTER = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e.toString(), e);
}
}
/**
* Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's
@ -337,12 +326,6 @@ public abstract class FSDirectory extends Directory {
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
}
/**
* So we can do some byte-to-hexchar conversion below
*/
private static final char[] HEX_DIGITS =
{'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
@Override
public String getLockID() {
ensureOpen();
@ -353,19 +336,12 @@ public abstract class FSDirectory extends Directory {
throw new RuntimeException(e.toString(), e);
}
byte digest[];
synchronized (DIGESTER) {
digest = DIGESTER.digest(dirName.getBytes());
int digest = 0;
for(int charIDX=0;charIDX<dirName.length();charIDX++) {
final char ch = dirName.charAt(charIDX);
digest = 31 * digest + ch;
}
StringBuilder buf = new StringBuilder();
buf.append("lucene-");
for (int i = 0; i < digest.length; i++) {
int b = digest[i];
buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
buf.append(HEX_DIGITS[b & 0xf]);
}
return buf.toString();
return "lucene-" + Integer.toHexString(digest);
}
/** Closes the store to future operations. */

View File

@ -18,7 +18,6 @@ package org.apache.lucene.util;
*/
import java.util.Comparator;
import java.io.UnsupportedEncodingException;
/** Represents byte[], as a slice (offset + length) into an
* existing byte[].
@ -122,6 +121,7 @@ public final class BytesRef implements Comparable<BytesRef> {
public void copy(char text[], int offset, int length) {
UnicodeUtil.UTF16toUTF8(text, offset, length, this);
}
public boolean bytesEquals(BytesRef other) {
if (length == other.length) {
int otherUpto = other.offset;
@ -198,13 +198,15 @@ public final class BytesRef implements Comparable<BytesRef> {
/** Interprets stored bytes as UTF8 bytes, returning the
* resulting string */
public String utf8ToString() {
try {
return new String(bytes, offset, length, "UTF-8");
} catch (UnsupportedEncodingException uee) {
// should not happen -- UTF8 is presumably supported
// by all JREs
throw new RuntimeException(uee);
}
final CharsRef ref = new CharsRef(length);
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return ref.toString();
}
/** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
public CharsRef utf8ToChars(CharsRef ref) {
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
return ref;
}
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */

View File

@ -0,0 +1,215 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Represents char[], as a slice (offset + length) into an existing char[].
*
* @lucene.internal
*/
public final class CharsRef implements Comparable<CharsRef>, CharSequence {
private static final char[] EMPTY_ARRAY = new char[0];
public char[] chars;
public int offset;
public int length;
/**
* Creates a new {@link CharsRef} initialized an empty array zero-length
*/
public CharsRef() {
this(EMPTY_ARRAY, 0, 0);
}
/**
* Creates a new {@link CharsRef} initialized with an array of the given
* capacity
*/
public CharsRef(int capacity) {
chars = new char[capacity];
}
/**
* Creates a new {@link CharsRef} initialized with the given array, offset and
* length
*/
public CharsRef(char[] chars, int offset, int length) {
assert chars != null;
assert chars.length >= offset + length;
this.chars = chars;
this.offset = offset;
this.length = length;
}
/**
* Creates a new {@link CharsRef} initialized with the given Strings character
* array
*/
public CharsRef(String string) {
this.chars = string.toCharArray();
this.offset = 0;
this.length = chars.length;
}
/**
* Creates a new {@link CharsRef} and copies the contents of the source into
* the new instance.
* @see #copy(CharsRef)
*/
public CharsRef(CharsRef other) {
copy(other);
}
@Override
public Object clone() {
return new CharsRef(this);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
result = prime * result + chars[i];
}
return result;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other instanceof CharsRef) {
return charsEquals((CharsRef) other);
}
if (other instanceof CharSequence) {
final CharSequence seq = (CharSequence) other;
if (length == seq.length()) {
int n = length;
int i = offset;
int j = 0;
while (n-- != 0) {
if (chars[i++] != seq.charAt(j++))
return false;
}
return true;
}
}
return false;
}
public boolean charsEquals(CharsRef other) {
if (length == other.length) {
int otherUpto = other.offset;
final char[] otherChars = other.chars;
final int end = offset + length;
for (int upto = offset; upto < end; upto++, otherUpto++) {
if (chars[upto] != otherChars[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
/** Signed int order comparison */
public int compareTo(CharsRef other) {
if (this == other)
return 0;
final char[] aChars = this.chars;
int aUpto = this.offset;
final char[] bChars = other.chars;
int bUpto = other.offset;
final int aStop = aUpto + Math.min(this.length, other.length);
while (aUpto < aStop) {
int aInt = aChars[aUpto++];
int bInt = bChars[bUpto++];
if (aInt > bInt) {
return 1;
} else if (aInt < bInt) {
return -1;
}
}
// One is a prefix of the other, or, they are equal:
return this.length - other.length;
}
/**
* Copies the given {@link CharsRef} referenced content into this instance
* starting at offset 0.
*
* @param other
* the {@link CharsRef} to copy
*/
public void copy(CharsRef other) {
chars = ArrayUtil.grow(chars, other.length);
System.arraycopy(other.chars, other.offset, chars, 0, other.length);
length = other.length;
offset = 0;
}
public void grow(int newLength) {
if (chars.length < newLength) {
chars = ArrayUtil.grow(chars, newLength);
}
}
/**
* Copies the given array into this CharsRef starting at offset 0
*/
public void copy(char[] otherChars, int otherOffset, int otherLength) {
this.offset = 0;
append(otherChars, otherOffset, otherLength);
}
/**
* Appends the given array to this CharsRef starting at the current offset
*/
public void append(char[] otherChars, int otherOffset, int otherLength) {
grow(this.offset + otherLength);
System.arraycopy(otherChars, otherOffset, this.chars, this.offset,
otherLength);
this.length = otherLength;
}
@Override
public String toString() {
return new String(chars, offset, length);
}
public int length() {
return length;
}
public char charAt(int index) {
return chars[offset + index];
}
public CharSequence subSequence(int start, int end) {
return new CharsRef(chars, offset + start, offset + end - 1);
}
}

View File

@ -43,6 +43,8 @@ public final class Constants {
public static final boolean WINDOWS = OS_NAME.startsWith("Windows");
/** True iff running on SunOS. */
public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
/** True iff running on Mac OS X */
public static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X");
public static final String OS_ARCH = System.getProperty("os.arch");
public static final String OS_VERSION = System.getProperty("os.version");

View File

@ -47,44 +47,113 @@ public final class IOUtils {
* @param objects objects to call <tt>close()</tt> on
*/
public static <E extends Exception> void closeSafely(E priorException, Closeable... objects) throws E, IOException {
IOException firstIOE = null;
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null)
if (object != null) {
object.close();
} catch (IOException ioe) {
if (firstIOE == null)
firstIOE = ioe;
}
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
}
if (priorException != null)
if (priorException != null) {
throw priorException;
else if (firstIOE != null)
throw firstIOE;
} else if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
/** @see #closeSafely(Exception, Closeable...) */
public static <E extends Exception> void closeSafely(E priorException, Iterable<Closeable> objects) throws E, IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
}
if (priorException != null) {
throw priorException;
} else if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
/**
* <p>Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions. Some of the <tt>Closeable</tt>s
* may be null, they are ignored. After everything is closed, method either throws the first of suppressed exceptions,
* or completes normally.</p>
* @param objects objects to call <tt>close()</tt> on
* Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions.
* Some of the <tt>Closeable</tt>s may be null, they are ignored. After
* everything is closed, and if {@code suppressExceptions} is {@code false},
* method either throws the first of suppressed exceptions, or completes
* normally.
*
* @param suppressExceptions
* if true then exceptions that occur during close() are suppressed
* @param objects
* objects to call <tt>close()</tt> on
*/
public static void closeSafely(Closeable... objects) throws IOException {
IOException firstIOE = null;
public static void closeSafely(boolean suppressExceptions, Closeable... objects) throws IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null)
if (object != null) {
object.close();
} catch (IOException ioe) {
if (firstIOE == null)
firstIOE = ioe;
}
} catch (Throwable t) {
if (th == null)
th = t;
}
}
if (firstIOE != null)
throw firstIOE;
if (th != null && !suppressExceptions) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
/**
* @see #closeSafely(boolean, Closeable...)
*/
public static void closeSafely(boolean suppressExceptions, Iterable<? extends Closeable> objects) throws IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
if (th == null)
th = t;
}
}
if (th != null && !suppressExceptions) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
}

View File

@ -78,11 +78,15 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
protected long[] bits;
protected int wlen; // number of words (elements) used in the array
// Used only for assert:
private long numBits;
/** Constructs an OpenBitSet large enough to hold numBits.
*
* @param numBits
*/
public OpenBitSet(long numBits) {
this.numBits = numBits;
bits = new long[bits2words(numBits)];
wlen = bits.length;
}
@ -107,6 +111,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
public OpenBitSet(long[] bits, int numWords) {
this.bits = bits;
this.wlen = numWords;
this.numBits = wlen * 64;
}
@Override
@ -170,6 +175,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size
*/
public boolean fastGet(int index) {
assert index >= 0 && index < numBits;
int i = index >> 6; // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check.
@ -194,6 +200,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public boolean fastGet(long index) {
assert index >= 0 && index < numBits;
int i = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -217,6 +224,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size
*/
public int getBit(int index) {
assert index >= 0 && index < numBits;
int i = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
return ((int)(bits[i]>>>bit)) & 0x01;
@ -245,6 +253,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public void fastSet(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -255,6 +264,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public void fastSet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6);
int bit = (int)index & 0x3f;
long bitmask = 1L << bit;
@ -296,6 +306,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
ensureCapacity(index+1);
wlen = wordNum+1;
}
assert (numBits = Math.max(numBits, index+1)) >= 0;
return wordNum;
}
@ -304,6 +315,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public void fastClear(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6;
int bit = index & 0x03f;
long bitmask = 1L << bit;
@ -321,6 +333,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public void fastClear(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -415,6 +428,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public boolean getAndSet(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -427,6 +441,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public boolean getAndSet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -439,6 +454,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public void fastFlip(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -449,6 +465,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public void fastFlip(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -467,6 +484,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public boolean flipAndGet(int index) {
assert index >= 0 && index < numBits;
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -478,6 +496,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
* The index should be less than the OpenBitSet size.
*/
public boolean flipAndGet(long index) {
assert index >= 0 && index < numBits;
int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
@ -674,6 +693,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
public void union(OpenBitSet other) {
int newLen = Math.max(wlen,other.wlen);
ensureCapacityWords(newLen);
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
long[] thisArr = this.bits;
long[] otherArr = other.bits;
@ -702,6 +722,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
public void xor(OpenBitSet other) {
int newLen = Math.max(wlen,other.wlen);
ensureCapacityWords(newLen);
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
long[] thisArr = this.bits;
long[] otherArr = other.bits;

View File

@ -95,6 +95,19 @@ package org.apache.lucene.util;
public final class UnicodeUtil {
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
* one would normally encounter, and definitely bigger than any UTF-8 terms.
* <p>
* WARNING: This is not a valid UTF8 Term
**/
public static final BytesRef BIG_TERM = new BytesRef(
new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
); // TODO this is unrelated here find a better place for it
public static void main(String[] args) {
System.out.println(Character.toChars(0x10FFFF + 1));
}
private UnicodeUtil() {} // no instance
public static final int UNI_SUR_HIGH_START = 0xD800;
@ -112,33 +125,6 @@ public final class UnicodeUtil {
Character.MIN_SUPPLEMENTARY_CODE_POINT -
(UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
/**
* @lucene.internal
*/
public static final class UTF16Result {
public char[] result = new char[10];
public int[] offsets = new int[10];
public int length;
public void setLength(int newLength) {
if (result.length < newLength)
result = ArrayUtil.grow(result, newLength);
length = newLength;
}
public void copyText(UTF16Result other) {
setLength(other.length);
System.arraycopy(other.result, 0, result, 0, length);
}
public void copyText(String other) {
final int otherLength = other.length();
setLength(otherLength);
other.getChars(0, otherLength, result, 0);
length = otherLength;
}
}
/** Encode characters from a char[] source, starting at
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
@ -302,135 +288,6 @@ public final class UnicodeUtil {
result.length = upto;
}
/** Convert UTF8 bytes into UTF16 characters. If offset
* is non-zero, conversion starts at that starting point
* in utf8, re-using the results from the previous call
* up until offset. */
public static void UTF8toUTF16(final byte[] utf8, final int offset, final int length, final UTF16Result result) {
final int end = offset + length;
char[] out = result.result;
if (result.offsets.length <= end) {
result.offsets = ArrayUtil.grow(result.offsets, end+1);
}
final int[] offsets = result.offsets;
// If incremental decoding fell in the middle of a
// single unicode character, rollback to its start:
int upto = offset;
while(offsets[upto] == -1)
upto--;
int outUpto = offsets[upto];
// Pre-allocate for worst case 1-for-1
if (outUpto+length >= out.length) {
out = result.result = ArrayUtil.grow(out, outUpto+length+1);
}
while (upto < end) {
final int b = utf8[upto]&0xff;
final int ch;
offsets[upto++] = outUpto;
if (b < 0xc0) {
assert b < 0x80;
ch = b;
} else if (b < 0xe0) {
ch = ((b&0x1f)<<6) + (utf8[upto]&0x3f);
offsets[upto++] = -1;
} else if (b < 0xf0) {
ch = ((b&0xf)<<12) + ((utf8[upto]&0x3f)<<6) + (utf8[upto+1]&0x3f);
offsets[upto++] = -1;
offsets[upto++] = -1;
} else {
assert b < 0xf8;
ch = ((b&0x7)<<18) + ((utf8[upto]&0x3f)<<12) + ((utf8[upto+1]&0x3f)<<6) + (utf8[upto+2]&0x3f);
offsets[upto++] = -1;
offsets[upto++] = -1;
offsets[upto++] = -1;
}
if (ch <= UNI_MAX_BMP) {
// target is a character <= 0xFFFF
out[outUpto++] = (char) ch;
} else {
// target is a character in range 0xFFFF - 0x10FFFF
out[outUpto++] = (char) ((ch >> HALF_SHIFT) + 0xD7C0 /* UNI_SUR_HIGH_START - 64 */);
out[outUpto++] = (char) ((ch & HALF_MASK) + UNI_SUR_LOW_START);
}
}
offsets[upto] = outUpto;
result.length = outUpto;
}
/**
* Get the next valid UTF-16 String in UTF-16 order.
* <p>
* If the input String is already valid, it is returned.
* Otherwise the next String in code unit order is returned.
* </p>
* @param s input String (possibly with unpaired surrogates)
* @return next valid UTF-16 String in UTF-16 order
*/
public static String nextValidUTF16String(String s) {
if (validUTF16String(s))
return s;
else {
UTF16Result chars = new UTF16Result();
chars.copyText(s);
nextValidUTF16String(chars);
return new String(chars.result, 0, chars.length);
}
}
public static void nextValidUTF16String(UTF16Result s) {
final int size = s.length;
for (int i = 0; i < size; i++) {
char ch = s.result[i];
if (ch >= UnicodeUtil.UNI_SUR_HIGH_START
&& ch <= UnicodeUtil.UNI_SUR_HIGH_END) {
if (i < size - 1) {
i++;
char nextCH = s.result[i];
if (nextCH >= UnicodeUtil.UNI_SUR_LOW_START
&& nextCH <= UnicodeUtil.UNI_SUR_LOW_END) {
// Valid surrogate pair
} else
// Unmatched high surrogate
if (nextCH < UnicodeUtil.UNI_SUR_LOW_START) { // SMP not enumerated
s.setLength(i + 1);
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
return;
} else { // SMP already enumerated
if (s.result[i - 1] == UnicodeUtil.UNI_SUR_HIGH_END) {
s.result[i - 1] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
s.setLength(i);
} else {
s.result[i - 1]++;
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
s.setLength(i + 1);
}
return;
}
} else {
// Unmatched high surrogate in final position, SMP not yet enumerated
s.setLength(i + 2);
s.result[i + 1] = (char) UnicodeUtil.UNI_SUR_LOW_START;
return;
}
} else if (ch >= UnicodeUtil.UNI_SUR_LOW_START
&& ch <= UnicodeUtil.UNI_SUR_LOW_END) {
// Unmatched low surrogate, SMP already enumerated
s.setLength(i + 1);
s.result[i] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
return;
}
}
}
// Only called from assert
/*
private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
@ -705,4 +562,51 @@ public final class UnicodeUtil {
}
return sb.toString();
}
/**
* Interprets the given byte array as UTF-8 and converts to UTF-16. The {@link CharsRef} will be extended if
* it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
* <p>
* NOTE: Full characters are read, even if this reads past the length passed (and
* can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
* Explicit checks for valid UTF-8 are not performed.
*/
public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
int out_offset = chars.offset = 0;
final char[] out = chars.chars = ArrayUtil.grow(chars.chars, length);
final int limit = offset + length;
while (offset < limit) {
int b = utf8[offset++]&0xff;
if (b < 0xc0) {
assert b < 0x80;
out[out_offset++] = (char)b;
} else if (b < 0xe0) {
out[out_offset++] = (char)(((b&0x1f)<<6) + (utf8[offset++]&0x3f));
} else if (b < 0xf0) {
out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
offset += 2;
} else {
assert b < 0xf8;
int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
offset += 3;
if (ch < UNI_MAX_BMP) {
out[out_offset++] = (char)ch;
} else {
int chHalf = ch - 0x0010000;
out[out_offset++] = (char) ((chHalf >> 10) + 0xD800);
out[out_offset++] = (char) ((chHalf & HALF_MASK) + 0xDC00);
}
}
}
chars.length = out_offset - chars.offset;
}
/**
* Utility method for {@link #UTF8toUTF16(byte[], int, int, CharsRef)}
* @see #UTF8toUTF16(byte[], int, int, CharsRef)
*/
public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars) {
UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars);
}
}

View File

@ -49,6 +49,13 @@ public enum Version {
@Deprecated
LUCENE_32,
/**
* Match settings and bugs in Lucene's 3.3 release.
* @deprecated (4.0) Use latest
*/
@Deprecated
LUCENE_33,
/** Match settings and bugs in Lucene's 4.0 release.
* <p>
* Use this to get the latest &amp; greatest settings, bug

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;
/**
* Output is a sequence of bytes, for each input term.
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.util.BytesRef;
/** Can next() and advance() through the terms in an FST
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,7 +23,7 @@ import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
// NOTE: while the FST is able to represent a non-final
// dead-end state (NON_FINAL_END_NODE=0), the layres above
@ -32,6 +32,7 @@ import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
/** Represents an FST using a compact byte[] format.
* <p> The format is similar to what's used by Morfologik
* (http://sourceforge.net/projects/morfologik).
*
* @lucene.experimental
*/
public class FST<T> {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,6 +23,7 @@ import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
/** Can next() and advance() through the terms in an FST
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.util.IntsRef;
/**
* Output is a sequence of ints, for each input term.
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,7 @@ import org.apache.lucene.util.IntsRef;
import java.io.IOException;
/** Can next() and advance() through the terms in an FST
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,8 @@ import org.apache.lucene.store.DataOutput;
/**
* Use this if you just want to build an FSA.
*
* @lucene.experimental
*/
public final class NoOutputs extends Outputs<Object> {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.store.DataOutput;
/**
* Represents the outputs for an FST, providing the basic
* algebra needed for the FST.
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -24,10 +24,10 @@ import org.apache.lucene.store.DataOutput;
/**
* Pairs up two outputs into one.
*
* @lucene.experimental
*/
public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
private final Pair<A,B> NO_OUTPUT;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -27,6 +27,7 @@ import org.apache.lucene.store.DataOutput;
* resulting FST is not guaranteed to be minimal! See
* {@link Builder}. You cannot store 0 output with this
* (that's reserved to mean "no output")!
*
* @lucene.experimental
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.util.automaton.fst;
package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -23,7 +23,9 @@ import java.util.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/** Static helper methods */
/** Static helper methods
*
* @lucene.experimental */
public final class Util {
private Util() {
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.Closeable;
import java.io.IOException;
import java.util.Iterator;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
@ -97,8 +98,43 @@ public class RandomIndexWriter implements Closeable {
* Adds a Document.
* @see IndexWriter#addDocument(Document)
*/
public void addDocument(Document doc) throws IOException {
w.addDocument(doc);
public void addDocument(final Document doc) throws IOException {
if (r.nextInt(5) == 3) {
// TODO: maybe, we should simply buffer up added docs
// (but we need to clone them), and only when
// getReader, commit, etc. are called, we do an
// addDocuments? Would be better testing.
w.addDocuments(new Iterable<Document>() {
// @Override -- not until Java 1.6
public Iterator<Document> iterator() {
return new Iterator<Document>() {
boolean done;
// @Override -- not until Java 1.6
public boolean hasNext() {
return !done;
}
// @Override -- not until Java 1.6
public void remove() {
throw new UnsupportedOperationException();
}
// @Override -- not until Java 1.6
public Document next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
});
} else {
w.addDocument(doc);
}
maybeCommit();
}
@ -116,12 +152,53 @@ public class RandomIndexWriter implements Closeable {
}
}
public void addDocuments(Iterable<Document> docs) throws IOException {
w.addDocuments(docs);
maybeCommit();
}
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws IOException {
w.updateDocuments(delTerm, docs);
maybeCommit();
}
/**
* Updates a document.
* @see IndexWriter#updateDocument(Term, Document)
*/
public void updateDocument(Term t, Document doc) throws IOException {
w.updateDocument(t, doc);
public void updateDocument(Term t, final Document doc) throws IOException {
if (r.nextInt(5) == 3) {
w.updateDocuments(t, new Iterable<Document>() {
// @Override -- not until Java 1.6
public Iterator<Document> iterator() {
return new Iterator<Document>() {
boolean done;
// @Override -- not until Java 1.6
public boolean hasNext() {
return !done;
}
// @Override -- not until Java 1.6
public void remove() {
throw new UnsupportedOperationException();
}
// @Override -- not until Java 1.6
public Document next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
});
} else {
w.updateDocument(t, doc);
}
maybeCommit();
}

View File

@ -44,6 +44,7 @@ import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* A silly test codec to verify core support for fixed
@ -97,15 +98,25 @@ public class MockFixedIntBlockCodec extends Codec {
@Override
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
return new FixedIntBlockIndexOutput(dir.createOutput(fileName), blockSize) {
@Override
protected void flushBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
assert buffer[i] >= 0;
out.writeVInt(buffer[i]);
IndexOutput out = dir.createOutput(fileName);
boolean success = false;
try {
FixedIntBlockIndexOutput ret = new FixedIntBlockIndexOutput(out, blockSize) {
@Override
protected void flushBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
assert buffer[i] >= 0;
out.writeVInt(buffer[i]);
}
}
};
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
};
}
}
}

View File

@ -46,6 +46,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/**
* A silly test codec to verify core support for variable
@ -102,34 +103,42 @@ public class MockVariableIntBlockCodec extends Codec {
@Override
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
final IndexOutput out = dir.createOutput(fileName);
out.writeInt(baseBlockSize);
return new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
boolean success = false;
try {
out.writeInt(baseBlockSize);
VariableIntBlockIndexOutput ret = new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
int pendingCount;
final int[] buffer = new int[2+2*baseBlockSize];
int pendingCount;
final int[] buffer = new int[2+2*baseBlockSize];
@Override
protected int add(int value) throws IOException {
assert value >= 0;
buffer[pendingCount++] = value;
// silly variable block length int encoder: if
// first value <= 3, we write N vints at once;
// else, 2*N
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
@Override
protected int add(int value) throws IOException {
assert value >= 0;
buffer[pendingCount++] = value;
// silly variable block length int encoder: if
// first value <= 3, we write N vints at once;
// else, 2*N
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
// intentionally be non-causal here:
if (pendingCount == flushAt+1) {
for(int i=0;i<flushAt;i++) {
out.writeVInt(buffer[i]);
// intentionally be non-causal here:
if (pendingCount == flushAt+1) {
for(int i=0;i<flushAt;i++) {
out.writeVInt(buffer[i]);
}
buffer[0] = buffer[flushAt];
pendingCount = 1;
return flushAt;
} else {
return 0;
}
buffer[0] = buffer[flushAt];
pendingCount = 1;
return flushAt;
} else {
return 0;
}
};
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeSafely(true, out);
}
};
}
}
}

View File

@ -136,8 +136,11 @@ public class MockRandomCodec extends Codec {
final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT);
final IndexOutput out = state.directory.createOutput(seedFileName);
out.writeLong(seed);
out.close();
try {
out.writeLong(seed);
} finally {
out.close();
}
final Random random = new Random(seed);

Some files were not shown because too many files have changed in this diff Show More