mirror of https://github.com/apache/lucene.git
SOLR-2452: Merged with trunk up to r1129202; standardized solr/contrib/* layouts.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1129205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
31c83c9d6f
|
@ -73,6 +73,7 @@
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="eclipse" description="Setup Eclipse configuration">
|
<target name="eclipse" description="Setup Eclipse configuration">
|
||||||
|
<copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/>
|
||||||
<copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/>
|
<copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/>
|
||||||
<mkdir dir=".settings"/>
|
<mkdir dir=".settings"/>
|
||||||
<copy file="dev-tools/eclipse/resources.prefs"
|
<copy file="dev-tools/eclipse/resources.prefs"
|
||||||
|
|
|
@ -20,8 +20,6 @@
|
||||||
<classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/>
|
<classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/>
|
||||||
<classpathentry kind="src" path="lucene/contrib/spatial/src/java"/>
|
<classpathentry kind="src" path="lucene/contrib/spatial/src/java"/>
|
||||||
<classpathentry kind="src" path="lucene/contrib/spatial/src/test"/>
|
<classpathentry kind="src" path="lucene/contrib/spatial/src/test"/>
|
||||||
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/java"/>
|
|
||||||
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/test"/>
|
|
||||||
<classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/>
|
<classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/>
|
||||||
<classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/>
|
<classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/>
|
||||||
<classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/>
|
<classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/>
|
||||||
|
@ -44,6 +42,8 @@
|
||||||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||||
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||||
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||||
|
<classpathentry kind="src" path="modules/suggest/src/java"/>
|
||||||
|
<classpathentry kind="src" path="modules/suggest/src/test"/>
|
||||||
<classpathentry kind="src" path="solr/src/java"/>
|
<classpathentry kind="src" path="solr/src/java"/>
|
||||||
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
||||||
<classpathentry kind="src" path="solr/src/common"/>
|
<classpathentry kind="src" path="solr/src/common"/>
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<projectDescription>
|
||||||
|
<name>lucene_solr</name>
|
||||||
|
<comment></comment>
|
||||||
|
<projects>
|
||||||
|
</projects>
|
||||||
|
<buildSpec>
|
||||||
|
<buildCommand>
|
||||||
|
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||||
|
<arguments>
|
||||||
|
</arguments>
|
||||||
|
</buildCommand>
|
||||||
|
</buildSpec>
|
||||||
|
<natures>
|
||||||
|
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||||
|
</natures>
|
||||||
|
</projectDescription>
|
|
@ -11,7 +11,6 @@
|
||||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spellchecker/build.xml" />
|
|
||||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" />
|
||||||
|
@ -21,6 +20,7 @@
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||||
|
<buildFile url="file://$PROJECT_DIR$/modules/suggest/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
||||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
||||||
|
|
|
@ -12,7 +12,6 @@
|
||||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
|
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
|
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
|
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spellchecker/spellchecker.iml" />
|
|
||||||
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
|
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
|
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
|
||||||
|
@ -22,6 +21,7 @@
|
||||||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||||
|
<module filepath="$PROJECT_DIR$/modules/suggest/suggest.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||||
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
||||||
|
|
|
@ -141,13 +141,6 @@
|
||||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
</configuration>
|
</configuration>
|
||||||
<configuration default="false" name="spellchecker contrib" type="JUnit" factoryName="JUnit">
|
|
||||||
<module name="spellchecker" />
|
|
||||||
<option name="TEST_OBJECT" value="package" />
|
|
||||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build/contrib/spellchecker" />
|
|
||||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
|
||||||
</configuration>
|
|
||||||
<configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit">
|
<configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit">
|
||||||
<module name="stempel" />
|
<module name="stempel" />
|
||||||
<option name="TEST_OBJECT" value="package" />
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
@ -155,6 +148,13 @@
|
||||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
|
||||||
|
<module name="suggest" />
|
||||||
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/suggest/build" />
|
||||||
|
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||||
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
|
</configuration>
|
||||||
<configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit">
|
<configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit">
|
||||||
<module name="uima" />
|
<module name="uima" />
|
||||||
<option name="TEST_OBJECT" value="package" />
|
<option name="TEST_OBJECT" value="package" />
|
||||||
|
@ -197,8 +197,8 @@
|
||||||
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||||
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
|
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||||
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||||
<item index="20" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
<item index="20" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||||
<item index="21" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
<item index="21" class="java.lang.String" itemvalue="JUnit.suggest module" />
|
||||||
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||||
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||||
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||||
|
|
|
@ -1,18 +1,16 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<module type="JAVA_MODULE" version="4">
|
<module type="JAVA_MODULE" version="4">
|
||||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||||
<output url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/java" />
|
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||||
<output-test url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/test" />
|
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||||
<exclude-output />
|
<exclude-output />
|
||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||||
<orderEntry type="module" module-name="queries" />
|
|
||||||
<orderEntry type="module" module-name="misc" />
|
|
||||||
<orderEntry type="module" module-name="common" />
|
<orderEntry type="module" module-name="common" />
|
||||||
<orderEntry type="module" module-name="lucene" />
|
<orderEntry type="module" module-name="lucene" />
|
||||||
</component>
|
</component>
|
|
@ -39,7 +39,6 @@
|
||||||
<module>queries</module>
|
<module>queries</module>
|
||||||
<module>queryparser</module>
|
<module>queryparser</module>
|
||||||
<module>spatial</module>
|
<module>spatial</module>
|
||||||
<module>spellchecker</module>
|
|
||||||
<module>wordnet</module>
|
<module>wordnet</module>
|
||||||
<module>xml-query-parser</module>
|
<module>xml-query-parser</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
<module>analysis</module>
|
<module>analysis</module>
|
||||||
<module>benchmark</module>
|
<module>benchmark</module>
|
||||||
<module>grouping</module>
|
<module>grouping</module>
|
||||||
|
<module>suggest</module>
|
||||||
</modules>
|
</modules>
|
||||||
<build>
|
<build>
|
||||||
<directory>build/lucene-modules-aggregator</directory>
|
<directory>build/lucene-modules-aggregator</directory>
|
||||||
|
|
|
@ -24,16 +24,16 @@
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-parent</artifactId>
|
<artifactId>lucene-parent</artifactId>
|
||||||
<version>@version@</version>
|
<version>@version@</version>
|
||||||
<relativePath>../../pom.xml</relativePath>
|
<relativePath>../../lucene/pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-spellchecker</artifactId>
|
<artifactId>lucene-suggest</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<name>Lucene Spellchecker</name>
|
<name>Lucene Suggest</name>
|
||||||
<description>Spell Checker</description>
|
<description>Lucene Suggest Module</description>
|
||||||
<properties>
|
<properties>
|
||||||
<module-directory>lucene/contrib/spellchecker</module-directory>
|
<module-directory>modules/suggest</module-directory>
|
||||||
<build-directory>../../build/contrib/spellchecker</build-directory>
|
<build-directory>build</build-directory>
|
||||||
</properties>
|
</properties>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -43,14 +43,14 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${project.groupId}</groupId>
|
<groupId>${project.groupId}</groupId>
|
||||||
<artifactId>lucene-test-framework</artifactId>
|
<artifactId>lucene-analyzers-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${project.groupId}</groupId>
|
<groupId>${project.groupId}</groupId>
|
||||||
<artifactId>lucene-analyzers-common</artifactId>
|
<artifactId>lucene-test-framework</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
|
@ -89,7 +89,7 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-spellchecker</artifactId>
|
<artifactId>lucene-suggest</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
|
@ -427,7 +427,32 @@ Bug fixes
|
||||||
with more document deletions is requested before a reader with fewer
|
with more document deletions is requested before a reader with fewer
|
||||||
deletions, provided they share some segments. (yonik)
|
deletions, provided they share some segments. (yonik)
|
||||||
|
|
||||||
======================= Lucene 3.x (not yet released) =======================
|
* LUCENE-3147,LUCENE-3152: Fixed open file handles leaks in many places in the
|
||||||
|
code. Now MockDirectoryWrapper (in test-framework) tracks all open files,
|
||||||
|
including locks, and fails if the test fails to release all of them.
|
||||||
|
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
|
||||||
|
|
||||||
|
======================= Lucene 3.x (not yet released) ================
|
||||||
|
|
||||||
|
Changes in backwards compatibility policy
|
||||||
|
|
||||||
|
* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass
|
||||||
|
of IndexInput) as its first argument. (Robert Muir, Dawid Weiss,
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
|
Changes in runtime behavior
|
||||||
|
|
||||||
|
* LUCENE-2834: the hash used to compute the lock file name when the
|
||||||
|
lock file is not stored in the index has changed. This means you
|
||||||
|
will see a different lucene-XXX-write.lock in your lock directory.
|
||||||
|
(Robert Muir, Uwe Schindler, Mike McCandless)
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
* LUCENE-3140: Added experimental FST implementation to Lucene.
|
||||||
|
(Robert Muir, Dawid Weiss, Mike McCandless)
|
||||||
|
|
||||||
|
======================= Lucene 3.2.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
||||||
|
@ -486,6 +511,10 @@ New features
|
||||||
document IDs and scores encountered during the search, and "replay" them to
|
document IDs and scores encountered during the search, and "replay" them to
|
||||||
another Collector. (Mike McCandless, Shai Erera)
|
another Collector. (Mike McCandless, Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-3112: Added experimental IndexWriter.add/updateDocuments,
|
||||||
|
enabling a block of documents to be indexed, atomically, with
|
||||||
|
guaranteed sequential docIDs. (Mike McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
|
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
|
||||||
|
@ -507,6 +536,9 @@ Optimizations
|
||||||
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
||||||
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they
|
||||||
|
aren't already and MergePolicy allows that. (Shai Erera)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
|
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
|
||||||
|
@ -541,6 +573,9 @@ Build
|
||||||
* LUCENE-3006: Building javadocs will fail on warnings by default.
|
* LUCENE-3006: Building javadocs will fail on warnings by default.
|
||||||
Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
|
Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
|
||||||
|
|
||||||
|
* LUCENE-3128: "ant eclipse" creates a .project file for easier Eclipse
|
||||||
|
integration (unless one already exists). (Daniel Serodio via Shai Erera)
|
||||||
|
|
||||||
Test Cases
|
Test Cases
|
||||||
|
|
||||||
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
||||||
|
|
|
@ -227,7 +227,6 @@
|
||||||
<packageset dir="contrib/misc/src/java"/>
|
<packageset dir="contrib/misc/src/java"/>
|
||||||
<packageset dir="contrib/queries/src/java"/>
|
<packageset dir="contrib/queries/src/java"/>
|
||||||
<packageset dir="contrib/spatial/src/java"/>
|
<packageset dir="contrib/spatial/src/java"/>
|
||||||
<packageset dir="contrib/spellchecker/src/java"/>
|
|
||||||
<packageset dir="contrib/wordnet/src/java"/>
|
<packageset dir="contrib/wordnet/src/java"/>
|
||||||
<packageset dir="contrib/xml-query-parser/src/java"/>
|
<packageset dir="contrib/xml-query-parser/src/java"/>
|
||||||
<packageset dir="contrib/queryparser/src/java"/>
|
<packageset dir="contrib/queryparser/src/java"/>
|
||||||
|
@ -248,7 +247,6 @@
|
||||||
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
|
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
|
||||||
<group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
|
<group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
|
||||||
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
|
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
|
||||||
<group title="contrib: SpellChecker" packages="org.apache.lucene.search.spell*"/>
|
|
||||||
<group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
|
<group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
|
||||||
<group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>
|
<group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@ Build
|
||||||
|
|
||||||
* LUCENE-2845: Moved contrib/benchmark to modules.
|
* LUCENE-2845: Moved contrib/benchmark to modules.
|
||||||
|
|
||||||
|
* LUCENE-2995: Moved contrib/spellchecker into modules/suggest.
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
|
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
|
||||||
|
@ -48,7 +50,14 @@ Bug Fixes
|
||||||
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
||||||
not lowercasing the key before checking for the tag (Adriano Crestani)
|
not lowercasing the key before checking for the tag (Adriano Crestani)
|
||||||
|
|
||||||
======================= Lucene 3.x (not yet released) =======================
|
======================= Lucene 3.x (not yet released) ================
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
|
||||||
|
(Sujit Pal via Koji Sekiguchi)
|
||||||
|
|
||||||
|
======================= Lucene 3.2.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
||||||
|
|
|
@ -93,7 +93,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
||||||
if( maxNumFragments < 0 )
|
if( maxNumFragments < 0 )
|
||||||
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
|
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
|
||||||
|
|
||||||
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
|
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
|
||||||
|
|
||||||
List<String> fragments = new ArrayList<String>( maxNumFragments );
|
List<String> fragments = new ArrayList<String>( maxNumFragments );
|
||||||
Field[] values = getFields( reader, docId, fieldName );
|
Field[] values = getFields( reader, docId, fieldName );
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseIn
|
||||||
*/
|
*/
|
||||||
public class FieldFragList {
|
public class FieldFragList {
|
||||||
|
|
||||||
List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* a constructor.
|
* a constructor.
|
||||||
|
@ -50,6 +50,15 @@ public class FieldFragList {
|
||||||
fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
|
fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return the list of WeightedFragInfos.
|
||||||
|
*
|
||||||
|
* @return fragInfos.
|
||||||
|
*/
|
||||||
|
public List<WeightedFragInfo> getFragInfos() {
|
||||||
|
return fragInfos;
|
||||||
|
}
|
||||||
|
|
||||||
public static class WeightedFragInfo {
|
public static class WeightedFragInfo {
|
||||||
|
|
||||||
List<SubInfo> subInfos;
|
List<SubInfo> subInfos;
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.TermFreqVector;
|
||||||
import org.apache.lucene.index.TermPositionVector;
|
import org.apache.lucene.index.TermPositionVector;
|
||||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||||
|
@ -80,16 +81,16 @@ public class FieldTermStack {
|
||||||
Set<String> termSet = fieldQuery.getTermSet( fieldName );
|
Set<String> termSet = fieldQuery.getTermSet( fieldName );
|
||||||
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
|
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
|
||||||
if( termSet == null ) return;
|
if( termSet == null ) return;
|
||||||
|
final CharsRef spare = new CharsRef();
|
||||||
for( BytesRef term : tpv.getTerms() ){
|
for( BytesRef term : tpv.getTerms() ){
|
||||||
if( !termSet.contains( term.utf8ToString() ) ) continue;
|
if( !termSet.contains( term.utf8ToChars(spare).toString() ) ) continue;
|
||||||
int index = tpv.indexOf( term );
|
int index = tpv.indexOf( term );
|
||||||
TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
|
TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
|
||||||
if( tvois == null ) return; // just return to make null snippets
|
if( tvois == null ) return; // just return to make null snippets
|
||||||
int[] poss = tpv.getTermPositions( index );
|
int[] poss = tpv.getTermPositions( index );
|
||||||
if( poss == null ) return; // just return to make null snippets
|
if( poss == null ) return; // just return to make null snippets
|
||||||
for( int i = 0; i < tvois.length; i++ )
|
for( int i = 0; i < tvois.length; i++ )
|
||||||
termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
|
termList.add( new TermInfo( term.utf8ToChars(spare).toString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by position
|
// sort by position
|
||||||
|
|
|
@ -24,7 +24,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
public void testNullFieldFragList() throws Exception {
|
public void testNullFieldFragList() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||||
assertEquals( 0, ffl.fragInfos.size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTooSmallFragSize() throws Exception {
|
public void testTooSmallFragSize() throws Exception {
|
||||||
|
@ -40,90 +40,90 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
if (VERBOSE) System.out.println( ffl.fragInfos.get( 0 ).toString() );
|
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test1TermIndex() throws Exception {
|
public void test1TermIndex() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2TermsIndex1Frag() throws Exception {
|
public void test2TermsIndex1Frag() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2TermsIndex2Frags() throws Exception {
|
public void test2TermsIndex2Frags() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 2, ffl.fragInfos.size() );
|
assertEquals( 2, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos.get( 1 ).toString() );
|
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 2, ffl.fragInfos.size() );
|
assertEquals( 2, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
|
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
|
||||||
assertEquals( 2, ffl.fragInfos.size() );
|
assertEquals( 2, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
|
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test2TermsQuery() throws Exception {
|
public void test2TermsQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
|
||||||
assertEquals( 0, ffl.fragInfos.size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPhraseQuery() throws Exception {
|
public void testPhraseQuery() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
|
||||||
assertEquals( 0, ffl.fragInfos.size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
|
||||||
assertEquals( 0, ffl.fragInfos.size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
|
|
||||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
|
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPhraseQuerySlop() throws Exception {
|
public void testPhraseQuerySlop() throws Exception {
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
|
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
|
||||||
|
@ -142,8 +142,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test1PhraseLongMV() throws Exception {
|
public void test1PhraseLongMV() throws Exception {
|
||||||
|
@ -154,8 +154,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void test1PhraseLongMVB() throws Exception {
|
public void test1PhraseLongMVB() throws Exception {
|
||||||
|
@ -166,7 +166,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
||||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,21 +24,21 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
|
||||||
public void testNullFieldFragList() throws Exception {
|
public void testNullFieldFragList() throws Exception {
|
||||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||||
assertEquals( 0, ffl.fragInfos.size() );
|
assertEquals( 0, ffl.getFragInfos().size() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testShortFieldFragList() throws Exception {
|
public void testShortFieldFragList() throws Exception {
|
||||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testLongFieldFragList() throws Exception {
|
public void testLongFieldFragList() throws Exception {
|
||||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
||||||
assertEquals( 1, ffl.fragInfos.size() );
|
assertEquals( 1, ffl.getFragInfos().size() );
|
||||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
|
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
|
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
|
||||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represented as a coupled graph of class instances, this
|
* Represented as a coupled graph of class instances, this
|
||||||
|
@ -228,12 +229,13 @@ public class InstantiatedIndex
|
||||||
if (fieldsC != null) {
|
if (fieldsC != null) {
|
||||||
FieldsEnum fieldsEnum = fieldsC.iterator();
|
FieldsEnum fieldsEnum = fieldsC.iterator();
|
||||||
String field;
|
String field;
|
||||||
|
final CharsRef spare = new CharsRef();
|
||||||
while((field = fieldsEnum.next()) != null) {
|
while((field = fieldsEnum.next()) != null) {
|
||||||
if (fields == null || fields.contains(field)) {
|
if (fields == null || fields.contains(field)) {
|
||||||
TermsEnum termsEnum = fieldsEnum.terms();
|
TermsEnum termsEnum = fieldsEnum.terms();
|
||||||
BytesRef text;
|
BytesRef text;
|
||||||
while((text = termsEnum.next()) != null) {
|
while((text = termsEnum.next()) != null) {
|
||||||
String termText = text.utf8ToString();
|
String termText = text.utf8ToChars(spare).toString();
|
||||||
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
|
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
|
||||||
final long totalTermFreq = termsEnum.totalTermFreq();
|
final long totalTermFreq = termsEnum.totalTermFreq();
|
||||||
if (totalTermFreq != -1) {
|
if (totalTermFreq != -1) {
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[b9c8c8a170881dfe9c33adc87c26348904510954] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
|
@ -42,4 +42,26 @@
|
||||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||||
</subant>
|
</subant>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="build-native-unix" >
|
||||||
|
<mkdir dir="${common.build.dir}/native"/>
|
||||||
|
|
||||||
|
<taskdef resource="cpptasks.tasks">
|
||||||
|
<classpath>
|
||||||
|
<pathelement location="ant_lib/cpptasks-1.0b5.jar"/>
|
||||||
|
</classpath>
|
||||||
|
</taskdef>
|
||||||
|
|
||||||
|
<cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
|
||||||
|
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
|
||||||
|
<includepath>
|
||||||
|
<pathelement location="${java.home}/../include"/>
|
||||||
|
<pathelement location="${java.home}/../include/linux"/>
|
||||||
|
<pathelement location="${java.home}/../include/solaris"/>
|
||||||
|
</includepath>
|
||||||
|
|
||||||
|
<compilerarg value="-fPIC" />
|
||||||
|
</cc>
|
||||||
|
</target>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.text.DecimalFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexWriter; // Required for javadocs
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
@ -45,6 +46,11 @@ import org.apache.lucene.store.FSDirectory;
|
||||||
* @lucene.experimental You can easily
|
* @lucene.experimental You can easily
|
||||||
* accidentally remove segments from your index so be
|
* accidentally remove segments from your index so be
|
||||||
* careful!
|
* careful!
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: this tool is unaware of documents added
|
||||||
|
* atomically via {@link IndexWriter#addDocuments} or {@link
|
||||||
|
* IndexWriter#updateDocuments}, which means it can easily
|
||||||
|
* break up such document groups.
|
||||||
*/
|
*/
|
||||||
public class IndexSplitter {
|
public class IndexSplitter {
|
||||||
public SegmentInfos infos;
|
public SegmentInfos infos;
|
||||||
|
|
|
@ -40,6 +40,11 @@ import org.apache.lucene.util.Version;
|
||||||
* <p>Note 2: the disadvantage of this tool is that source index needs to be
|
* <p>Note 2: the disadvantage of this tool is that source index needs to be
|
||||||
* read as many times as there are parts to be created, hence the name of this
|
* read as many times as there are parts to be created, hence the name of this
|
||||||
* tool.
|
* tool.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: this tool is unaware of documents added
|
||||||
|
* atomically via {@link IndexWriter#addDocuments} or {@link
|
||||||
|
* IndexWriter#updateDocuments}, which means it can easily
|
||||||
|
* break up such document groups.
|
||||||
*/
|
*/
|
||||||
public class MultiPassIndexSplitter {
|
public class MultiPassIndexSplitter {
|
||||||
|
|
||||||
|
|
|
@ -269,7 +269,7 @@ public class NRTCachingDirectory extends Directory {
|
||||||
in = cache.openInput(fileName);
|
in = cache.openInput(fileName);
|
||||||
in.copyBytes(out, in.length());
|
in.copyBytes(out, in.length());
|
||||||
} finally {
|
} finally {
|
||||||
IOUtils.closeSafely(in, out);
|
IOUtils.closeSafely(false, in, out);
|
||||||
}
|
}
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
cache.deleteFile(fileName);
|
cache.deleteFile(fileName);
|
||||||
|
|
|
@ -51,9 +51,11 @@ for details.
|
||||||
|
|
||||||
Steps to build:
|
Steps to build:
|
||||||
<ul>
|
<ul>
|
||||||
<li> <tt>cd lucene/contrib/misc/src/java/org/apache/lucene/store</tt>
|
<li> <tt>cd lucene/contrib/misc/</tt>
|
||||||
|
|
||||||
<li> Compile NativePosixUtil.cpp -> libNativePosixUtil.so. On linux, something like <tt>gcc -fPIC -o libNativePosixUtil.so -shared -Wl,-soname,libNativePosixUtil.so -I$JAVA_HOME/include -I$JAVA_HOME/include/linux NativePosixUtil.cpp -lc -lstdc++</tt>. Add <tt>-m64</tt> if you want to compile 64bit (and java must be run with -d64 so it knows to load a 64bit dynamic lib).
|
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
|
||||||
|
|
||||||
|
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
|
||||||
|
|
||||||
<li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)
|
<li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.regex;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.regexp.CharacterIterator;
|
import org.apache.regexp.CharacterIterator;
|
||||||
import org.apache.regexp.RE;
|
import org.apache.regexp.RE;
|
||||||
|
@ -104,11 +105,11 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
||||||
|
|
||||||
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
|
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
|
||||||
private RE regexp;
|
private RE regexp;
|
||||||
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
private final CharsRef utf16 = new CharsRef(10);
|
||||||
private final CharacterIterator utf16wrapper = new CharacterIterator() {
|
private final CharacterIterator utf16wrapper = new CharacterIterator() {
|
||||||
|
|
||||||
public char charAt(int pos) {
|
public char charAt(int pos) {
|
||||||
return utf16.result[pos];
|
return utf16.chars[pos];
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEnd(int pos) {
|
public boolean isEnd(int pos) {
|
||||||
|
@ -120,7 +121,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String substring(int beginIndex, int endIndex) {
|
public String substring(int beginIndex, int endIndex) {
|
||||||
return new String(utf16.result, beginIndex, endIndex - beginIndex);
|
return new String(utf16.chars, beginIndex, endIndex - beginIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -95,25 +96,11 @@ public class JavaUtilRegexCapabilities implements RegexCapabilities {
|
||||||
class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher {
|
class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher {
|
||||||
private final Pattern pattern;
|
private final Pattern pattern;
|
||||||
private final Matcher matcher;
|
private final Matcher matcher;
|
||||||
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
private final CharsRef utf16 = new CharsRef(10);
|
||||||
private final CharSequence utf16wrapper = new CharSequence() {
|
|
||||||
|
|
||||||
public int length() {
|
|
||||||
return utf16.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
public char charAt(int index) {
|
|
||||||
return utf16.result[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
public CharSequence subSequence(int start, int end) {
|
|
||||||
return new String(utf16.result, start, end - start);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
public JavaUtilRegexMatcher(String regex, int flags) {
|
public JavaUtilRegexMatcher(String regex, int flags) {
|
||||||
this.pattern = Pattern.compile(regex, flags);
|
this.pattern = Pattern.compile(regex, flags);
|
||||||
this.matcher = this.pattern.matcher(utf16wrapper);
|
this.matcher = this.pattern.matcher(utf16);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean match(BytesRef term) {
|
public boolean match(BytesRef term) {
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
|
|
||||||
|
@ -850,8 +851,9 @@ public final class MoreLikeThis {
|
||||||
{
|
{
|
||||||
BytesRef[] terms = vector.getTerms();
|
BytesRef[] terms = vector.getTerms();
|
||||||
int freqs[]=vector.getTermFrequencies();
|
int freqs[]=vector.getTermFrequencies();
|
||||||
|
final CharsRef spare = new CharsRef();
|
||||||
for (int j = 0; j < terms.length; j++) {
|
for (int j = 0; j < terms.length; j++) {
|
||||||
String term = terms[j].utf8ToString();
|
final String term = terms[j].utf8ToChars(spare).toString();
|
||||||
|
|
||||||
if(isNoiseWord(term)){
|
if(isNoiseWord(term)){
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -1,5 +1,22 @@
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
<project name="DTDDocAnt" default="main">
|
<project name="DTDDocAnt" default="main">
|
||||||
|
|
||||||
<import file="../contrib-build.xml"/>
|
<import file="../contrib-build.xml"/>
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.zip.DataFormatException;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
|
||||||
/** Simple utility class providing static methods to
|
/** Simple utility class providing static methods to
|
||||||
|
@ -118,9 +119,9 @@ public class CompressionTools {
|
||||||
/** Decompress the byte array previously returned by
|
/** Decompress the byte array previously returned by
|
||||||
* compressString back into a String */
|
* compressString back into a String */
|
||||||
public static String decompressString(byte[] value) throws DataFormatException {
|
public static String decompressString(byte[] value) throws DataFormatException {
|
||||||
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
|
|
||||||
final byte[] bytes = decompress(value);
|
final byte[] bytes = decompress(value);
|
||||||
|
CharsRef result = new CharsRef(bytes.length);
|
||||||
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
|
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
|
||||||
return new String(result.result, 0, result.length);
|
return new String(result.chars, 0, result.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,6 +60,9 @@ public final class CompoundFileWriter {
|
||||||
|
|
||||||
/** temporary holder for the start of this file's data section */
|
/** temporary holder for the start of this file's data section */
|
||||||
long dataOffset;
|
long dataOffset;
|
||||||
|
|
||||||
|
/** the directory which contains the file. */
|
||||||
|
Directory dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Before versioning started.
|
// Before versioning started.
|
||||||
|
@ -119,6 +122,14 @@ public final class CompoundFileWriter {
|
||||||
* has been added already
|
* has been added already
|
||||||
*/
|
*/
|
||||||
public void addFile(String file) {
|
public void addFile(String file) {
|
||||||
|
addFile(file, directory);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as {@link #addFile(String)}, only for files that are found in an
|
||||||
|
* external {@link Directory}.
|
||||||
|
*/
|
||||||
|
public void addFile(String file, Directory dir) {
|
||||||
if (merged)
|
if (merged)
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
"Can't add extensions after merge has been called");
|
"Can't add extensions after merge has been called");
|
||||||
|
@ -133,6 +144,7 @@ public final class CompoundFileWriter {
|
||||||
|
|
||||||
FileEntry entry = new FileEntry();
|
FileEntry entry = new FileEntry();
|
||||||
entry.file = file;
|
entry.file = file;
|
||||||
|
entry.dir = dir;
|
||||||
entries.add(entry);
|
entries.add(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,7 +182,7 @@ public final class CompoundFileWriter {
|
||||||
fe.directoryOffset = os.getFilePointer();
|
fe.directoryOffset = os.getFilePointer();
|
||||||
os.writeLong(0); // for now
|
os.writeLong(0); // for now
|
||||||
os.writeString(IndexFileNames.stripSegmentName(fe.file));
|
os.writeString(IndexFileNames.stripSegmentName(fe.file));
|
||||||
totalSize += directory.fileLength(fe.file);
|
totalSize += fe.dir.fileLength(fe.file);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pre-allocate size of file as optimization --
|
// Pre-allocate size of file as optimization --
|
||||||
|
@ -216,7 +228,7 @@ public final class CompoundFileWriter {
|
||||||
* output stream.
|
* output stream.
|
||||||
*/
|
*/
|
||||||
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
|
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
|
||||||
IndexInput is = directory.openInput(source.file);
|
IndexInput is = source.dir.openInput(source.file);
|
||||||
try {
|
try {
|
||||||
long startPtr = os.getFilePointer();
|
long startPtr = os.getFilePointer();
|
||||||
long length = is.length();
|
long length = is.length();
|
||||||
|
|
|
@ -84,19 +84,44 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void abort() {
|
public void abort() {
|
||||||
for(int i=0;i<fieldHash.length;i++) {
|
Throwable th = null;
|
||||||
DocFieldProcessorPerField field = fieldHash[i];
|
|
||||||
while(field != null) {
|
for (DocFieldProcessorPerField field : fieldHash) {
|
||||||
|
while (field != null) {
|
||||||
final DocFieldProcessorPerField next = field.next;
|
final DocFieldProcessorPerField next = field.next;
|
||||||
field.abort();
|
try {
|
||||||
|
field.abort();
|
||||||
|
} catch (Throwable t) {
|
||||||
|
if (th == null) {
|
||||||
|
th = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
field = next;
|
field = next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
fieldsWriter.abort();
|
fieldsWriter.abort();
|
||||||
} finally {
|
} catch (Throwable t) {
|
||||||
|
if (th == null) {
|
||||||
|
th = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
consumer.abort();
|
consumer.abort();
|
||||||
|
} catch (Throwable t) {
|
||||||
|
if (th == null) {
|
||||||
|
th = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If any errors occured, throw it.
|
||||||
|
if (th != null) {
|
||||||
|
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||||
|
if (th instanceof Error) throw (Error) th;
|
||||||
|
// defensive code - we should not hit unchecked exceptions
|
||||||
|
throw new RuntimeException(th);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,7 @@ final class DocInverter extends DocFieldConsumer {
|
||||||
endConsumer.startDocument();
|
endConsumer.startDocument();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void finishDocument() throws IOException {
|
public void finishDocument() throws IOException {
|
||||||
// TODO: allow endConsumer.finishDocument to also return
|
// TODO: allow endConsumer.finishDocument to also return
|
||||||
// a DocWriter
|
// a DocWriter
|
||||||
|
|
|
@ -53,8 +53,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void abort() {
|
void abort() {
|
||||||
consumer.abort();
|
try {
|
||||||
endConsumer.abort();
|
consumer.abort();
|
||||||
|
} finally {
|
||||||
|
endConsumer.abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -228,14 +228,19 @@ final class DocumentsWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
|
final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
|
||||||
|
|
||||||
while (threadsIterator.hasNext()) {
|
while (threadsIterator.hasNext()) {
|
||||||
ThreadState perThread = threadsIterator.next();
|
final ThreadState perThread = threadsIterator.next();
|
||||||
perThread.lock();
|
perThread.lock();
|
||||||
try {
|
try {
|
||||||
if (perThread.isActive()) { // we might be closed
|
if (perThread.isActive()) { // we might be closed
|
||||||
perThread.perThread.abort();
|
try {
|
||||||
perThread.perThread.checkAndResetHasAborted();
|
perThread.perThread.abort();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
// continue
|
||||||
|
} finally {
|
||||||
|
perThread.perThread.checkAndResetHasAborted();
|
||||||
|
flushControl.doOnAbort(perThread);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
assert closed;
|
assert closed;
|
||||||
}
|
}
|
||||||
|
@ -243,7 +248,6 @@ final class DocumentsWriter {
|
||||||
perThread.unlock();
|
perThread.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
|
@ -274,11 +278,9 @@ final class DocumentsWriter {
|
||||||
flushControl.setClosed();
|
flushControl.setClosed();
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
private boolean preUpdate() throws CorruptIndexException, IOException {
|
||||||
final Term delTerm) throws CorruptIndexException, IOException {
|
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
boolean maybeMerge = false;
|
boolean maybeMerge = false;
|
||||||
final boolean isUpdate = delTerm != null;
|
|
||||||
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
|
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
|
||||||
// Help out flushing any queued DWPTs so we can un-stall:
|
// Help out flushing any queued DWPTs so we can un-stall:
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
|
@ -303,9 +305,59 @@ final class DocumentsWriter {
|
||||||
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
|
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return maybeMerge;
|
||||||
|
}
|
||||||
|
|
||||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(),
|
private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
|
||||||
this, doc);
|
if (flushingDWPT != null) {
|
||||||
|
maybeMerge |= doFlush(flushingDWPT);
|
||||||
|
} else {
|
||||||
|
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
|
||||||
|
if (nextPendingFlush != null) {
|
||||||
|
maybeMerge |= doFlush(nextPendingFlush);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return maybeMerge;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
|
||||||
|
final Term delTerm) throws CorruptIndexException, IOException {
|
||||||
|
boolean maybeMerge = preUpdate();
|
||||||
|
|
||||||
|
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
|
||||||
|
final DocumentsWriterPerThread flushingDWPT;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!perThread.isActive()) {
|
||||||
|
ensureOpen();
|
||||||
|
assert false: "perThread is not active but we are still open";
|
||||||
|
}
|
||||||
|
|
||||||
|
final DocumentsWriterPerThread dwpt = perThread.perThread;
|
||||||
|
try {
|
||||||
|
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
|
||||||
|
numDocsInRAM.addAndGet(docCount);
|
||||||
|
} finally {
|
||||||
|
if (dwpt.checkAndResetHasAborted()) {
|
||||||
|
flushControl.doOnAbort(perThread);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final boolean isUpdate = delTerm != null;
|
||||||
|
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
||||||
|
} finally {
|
||||||
|
perThread.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
return postUpdate(flushingDWPT, maybeMerge);
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
||||||
|
final Term delTerm) throws CorruptIndexException, IOException {
|
||||||
|
|
||||||
|
boolean maybeMerge = preUpdate();
|
||||||
|
|
||||||
|
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
|
||||||
final DocumentsWriterPerThread flushingDWPT;
|
final DocumentsWriterPerThread flushingDWPT;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -324,20 +376,13 @@ final class DocumentsWriter {
|
||||||
flushControl.doOnAbort(perThread);
|
flushControl.doOnAbort(perThread);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
final boolean isUpdate = delTerm != null;
|
||||||
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
||||||
} finally {
|
} finally {
|
||||||
perThread.unlock();
|
perThread.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flushingDWPT != null) {
|
return postUpdate(flushingDWPT, maybeMerge);
|
||||||
maybeMerge |= doFlush(flushingDWPT);
|
|
||||||
} else {
|
|
||||||
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
|
|
||||||
if (nextPendingFlush != null) {
|
|
||||||
maybeMerge |= doFlush(nextPendingFlush);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return maybeMerge;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
|
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
|
||||||
|
@ -541,4 +586,20 @@ final class DocumentsWriter {
|
||||||
return (!isSegmentFlush || segment != null);
|
return (!isSegmentFlush || segment != null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// use by IW during close to assert all DWPT are inactive after final flush
|
||||||
|
boolean assertNoActiveDWPT() {
|
||||||
|
Iterator<ThreadState> activePerThreadsIterator = perThreadPool.getAllPerThreadsIterator();
|
||||||
|
while(activePerThreadsIterator.hasNext()) {
|
||||||
|
ThreadState next = activePerThreadsIterator.next();
|
||||||
|
next.lock();
|
||||||
|
try {
|
||||||
|
assert !next.isActive();
|
||||||
|
} finally {
|
||||||
|
next.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ package org.apache.lucene.index;
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
@ -68,7 +69,7 @@ public final class DocumentsWriterFlushControl {
|
||||||
this.stallControl = new DocumentsWriterStallControl();
|
this.stallControl = new DocumentsWriterStallControl();
|
||||||
this.perThreadPool = documentsWriter.perThreadPool;
|
this.perThreadPool = documentsWriter.perThreadPool;
|
||||||
this.flushPolicy = documentsWriter.flushPolicy;
|
this.flushPolicy = documentsWriter.flushPolicy;
|
||||||
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
|
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.documentsWriter = documentsWriter;
|
this.documentsWriter = documentsWriter;
|
||||||
}
|
}
|
||||||
|
@ -162,8 +163,6 @@ public final class DocumentsWriterFlushControl {
|
||||||
stallControl.updateStalled(this);
|
stallControl.updateStalled(this);
|
||||||
assert assertMemory();
|
assert assertMemory();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
||||||
|
@ -217,7 +216,7 @@ public final class DocumentsWriterFlushControl {
|
||||||
assert assertMemory();
|
assert assertMemory();
|
||||||
// Take it out of the loop this DWPT is stale
|
// Take it out of the loop this DWPT is stale
|
||||||
perThreadPool.replaceForFlush(state, closed);
|
perThreadPool.replaceForFlush(state, closed);
|
||||||
}finally {
|
} finally {
|
||||||
stallControl.updateStalled(this);
|
stallControl.updateStalled(this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -305,6 +304,7 @@ public final class DocumentsWriterFlushControl {
|
||||||
synchronized void setClosed() {
|
synchronized void setClosed() {
|
||||||
// set by DW to signal that we should not release new DWPT after close
|
// set by DW to signal that we should not release new DWPT after close
|
||||||
this.closed = true;
|
this.closed = true;
|
||||||
|
perThreadPool.deactivateUnreleasedStates();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -387,8 +387,12 @@ public final class DocumentsWriterFlushControl {
|
||||||
toFlush.add(flushingDWPT);
|
toFlush.add(flushingDWPT);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// get the new delete queue from DW
|
if (closed) {
|
||||||
next.perThread.initialize();
|
next.resetWriter(null); // make this state inactive
|
||||||
|
} else {
|
||||||
|
// get the new delete queue from DW
|
||||||
|
next.perThread.initialize();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
next.unlock();
|
next.unlock();
|
||||||
|
@ -451,10 +455,21 @@ public final class DocumentsWriterFlushControl {
|
||||||
try {
|
try {
|
||||||
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
||||||
doAfterFlush(dwpt);
|
doAfterFlush(dwpt);
|
||||||
|
try {
|
||||||
|
dwpt.abort();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
// continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (BlockedFlush blockedFlush : blockedFlushes) {
|
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||||
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
flushingWriters
|
||||||
|
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||||
doAfterFlush(blockedFlush.dwpt);
|
doAfterFlush(blockedFlush.dwpt);
|
||||||
|
try {
|
||||||
|
blockedFlush.dwpt.abort();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
// continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
fullFlush = false;
|
fullFlush = false;
|
||||||
|
@ -512,5 +527,4 @@ public final class DocumentsWriterFlushControl {
|
||||||
boolean anyStalledThreads() {
|
boolean anyStalledThreads() {
|
||||||
return stallControl.anyStalledThreads();
|
return stallControl.anyStalledThreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -104,7 +104,7 @@ public class DocumentsWriterPerThread {
|
||||||
// largish:
|
// largish:
|
||||||
doc = null;
|
doc = null;
|
||||||
analyzer = null;
|
analyzer = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class FlushedSegment {
|
static class FlushedSegment {
|
||||||
|
@ -177,7 +177,7 @@ public class DocumentsWriterPerThread {
|
||||||
this.parent = parent;
|
this.parent = parent;
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
this.writer = parent.indexWriter;
|
this.writer = parent.indexWriter;
|
||||||
this.infoStream = parent.indexWriter.getInfoStream();
|
this.infoStream = parent.infoStream;
|
||||||
this.docState = new DocState(this);
|
this.docState = new DocState(this);
|
||||||
this.docState.similarityProvider = parent.indexWriter.getConfig()
|
this.docState.similarityProvider = parent.indexWriter.getConfig()
|
||||||
.getSimilarityProvider();
|
.getSimilarityProvider();
|
||||||
|
@ -253,6 +253,82 @@ public class DocumentsWriterPerThread {
|
||||||
finishDocument(delTerm);
|
finishDocument(delTerm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
|
||||||
|
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
|
||||||
|
assert deleteQueue != null;
|
||||||
|
docState.analyzer = analyzer;
|
||||||
|
if (segment == null) {
|
||||||
|
// this call is synchronized on IndexWriter.segmentInfos
|
||||||
|
segment = writer.newSegmentName();
|
||||||
|
assert numDocsInRAM == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int docCount = 0;
|
||||||
|
try {
|
||||||
|
for(Document doc : docs) {
|
||||||
|
docState.doc = doc;
|
||||||
|
docState.docID = numDocsInRAM;
|
||||||
|
docCount++;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
consumer.processDocument(fieldInfos);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
// An exc is being thrown...
|
||||||
|
|
||||||
|
if (!aborting) {
|
||||||
|
// One of the documents hit a non-aborting
|
||||||
|
// exception (eg something happened during
|
||||||
|
// analysis). We now go and mark any docs
|
||||||
|
// from this batch that we had already indexed
|
||||||
|
// as deleted:
|
||||||
|
int docID = docState.docID;
|
||||||
|
final int endDocID = docID - docCount;
|
||||||
|
while (docID > endDocID) {
|
||||||
|
deleteDocID(docID);
|
||||||
|
docID--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Incr here because finishDocument will not
|
||||||
|
// be called (because an exc is being thrown):
|
||||||
|
numDocsInRAM++;
|
||||||
|
fieldInfos.revertUncommitted();
|
||||||
|
} else {
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
consumer.finishDocument();
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
finishDocument(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply delTerm only after all indexing has
|
||||||
|
// succeeded, but apply it only to docs prior to when
|
||||||
|
// this batch started:
|
||||||
|
if (delTerm != null) {
|
||||||
|
deleteQueue.add(delTerm, deleteSlice);
|
||||||
|
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
|
||||||
|
deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
docState.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
return docCount;
|
||||||
|
}
|
||||||
|
|
||||||
private void finishDocument(Term delTerm) throws IOException {
|
private void finishDocument(Term delTerm) throws IOException {
|
||||||
/*
|
/*
|
||||||
* here we actually finish the document in two steps 1. push the delete into
|
* here we actually finish the document in two steps 1. push the delete into
|
||||||
|
@ -474,6 +550,7 @@ public class DocumentsWriterPerThread {
|
||||||
super(blockSize);
|
super(blockSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public byte[] getByteBlock() {
|
public byte[] getByteBlock() {
|
||||||
bytesUsed.addAndGet(blockSize);
|
bytesUsed.addAndGet(blockSize);
|
||||||
return new byte[blockSize];
|
return new byte[blockSize];
|
||||||
|
@ -486,7 +563,7 @@ public class DocumentsWriterPerThread {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
}
|
||||||
|
|
||||||
void setInfoStream(PrintStream infoStream) {
|
void setInfoStream(PrintStream infoStream) {
|
||||||
this.infoStream = infoStream;
|
this.infoStream = infoStream;
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||||
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
@ -194,6 +193,21 @@ public abstract class DocumentsWriterPerThreadPool {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deactivate all unreleased threadstates
|
||||||
|
*/
|
||||||
|
protected synchronized void deactivateUnreleasedStates() {
|
||||||
|
for (int i = numThreadStatesActive; i < perThreads.length; i++) {
|
||||||
|
final ThreadState threadState = perThreads[i];
|
||||||
|
threadState.lock();
|
||||||
|
try {
|
||||||
|
threadState.resetWriter(null);
|
||||||
|
} finally {
|
||||||
|
threadState.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
|
protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
|
||||||
assert threadState.isHeldByCurrentThread();
|
assert threadState.isHeldByCurrentThread();
|
||||||
final DocumentsWriterPerThread dwpt = threadState.perThread;
|
final DocumentsWriterPerThread dwpt = threadState.perThread;
|
||||||
|
@ -212,7 +226,7 @@ public abstract class DocumentsWriterPerThreadPool {
|
||||||
// don't recycle DWPT by default
|
// don't recycle DWPT by default
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc);
|
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an iterator providing access to all {@link ThreadState}
|
* Returns an iterator providing access to all {@link ThreadState}
|
||||||
|
|
|
@ -113,7 +113,7 @@ final class FieldsWriter {
|
||||||
void close() throws IOException {
|
void close() throws IOException {
|
||||||
if (directory != null) {
|
if (directory != null) {
|
||||||
try {
|
try {
|
||||||
IOUtils.closeSafely(fieldsStream, indexStream);
|
IOUtils.closeSafely(false, fieldsStream, indexStream);
|
||||||
} finally {
|
} finally {
|
||||||
fieldsStream = indexStream = null;
|
fieldsStream = indexStream = null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,9 +57,10 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
||||||
|
|
||||||
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
|
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
|
||||||
|
|
||||||
TermsHash termsHash = null;
|
try {
|
||||||
|
TermsHash termsHash = null;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Current writer chain:
|
Current writer chain:
|
||||||
FieldsConsumer
|
FieldsConsumer
|
||||||
-> IMPL: FormatPostingsTermsDictWriter
|
-> IMPL: FormatPostingsTermsDictWriter
|
||||||
|
@ -69,36 +70,38 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
||||||
-> IMPL: FormatPostingsDocsWriter
|
-> IMPL: FormatPostingsDocsWriter
|
||||||
-> PositionsConsumer
|
-> PositionsConsumer
|
||||||
-> IMPL: FormatPostingsPositionsWriter
|
-> IMPL: FormatPostingsPositionsWriter
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
|
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
|
||||||
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
|
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
|
||||||
|
|
||||||
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
|
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
|
||||||
|
|
||||||
// Aggregate the storePayload as seen by the same
|
// Aggregate the storePayload as seen by the same
|
||||||
// field across multiple threads
|
// field across multiple threads
|
||||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this field has postings then add them to the
|
||||||
|
// segment
|
||||||
|
fieldWriter.flush(fieldInfo.name, consumer, state);
|
||||||
|
|
||||||
|
TermsHashPerField perField = fieldWriter.termsHashPerField;
|
||||||
|
assert termsHash == null || termsHash == perField.termsHash;
|
||||||
|
termsHash = perField.termsHash;
|
||||||
|
int numPostings = perField.bytesHash.size();
|
||||||
|
perField.reset();
|
||||||
|
perField.shrinkHash(numPostings);
|
||||||
|
fieldWriter.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this field has postings then add them to the
|
if (termsHash != null) {
|
||||||
// segment
|
termsHash.reset();
|
||||||
fieldWriter.flush(fieldInfo.name, consumer, state);
|
}
|
||||||
|
} finally {
|
||||||
TermsHashPerField perField = fieldWriter.termsHashPerField;
|
consumer.close();
|
||||||
assert termsHash == null || termsHash == perField.termsHash;
|
|
||||||
termsHash = perField.termsHash;
|
|
||||||
int numPostings = perField.bytesHash.size();
|
|
||||||
perField.reset();
|
|
||||||
perField.shrinkHash(numPostings);
|
|
||||||
fieldWriter.reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (termsHash != null) {
|
|
||||||
termsHash.reset();
|
|
||||||
}
|
|
||||||
consumer.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef payload;
|
BytesRef payload;
|
||||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.index.codecs.Codec; // for javadocs
|
import org.apache.lucene.index.codecs.Codec; // for javadocs
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -239,4 +241,15 @@ public final class IndexFileNames {
|
||||||
return filename;
|
return filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the given filename ends with the separate norms file
|
||||||
|
* pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
|
||||||
|
*/
|
||||||
|
public static boolean isSeparateNormsFile(String filename) {
|
||||||
|
int idx = filename.lastIndexOf('.');
|
||||||
|
if (idx == -1) return false;
|
||||||
|
String ext = filename.substring(idx + 1);
|
||||||
|
return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -51,6 +52,7 @@ import org.apache.lucene.store.LockObtainFailedException;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.Constants;
|
import org.apache.lucene.util.Constants;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
import org.apache.lucene.util.MapBackedSet;
|
import org.apache.lucene.util.MapBackedSet;
|
||||||
|
|
||||||
|
@ -1071,7 +1073,8 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
message("at close: " + segString());
|
message("at close: " + segString());
|
||||||
|
// used by assert below
|
||||||
|
final DocumentsWriter oldWriter = docWriter;
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
readerPool.close();
|
readerPool.close();
|
||||||
docWriter = null;
|
docWriter = null;
|
||||||
|
@ -1085,6 +1088,7 @@ public class IndexWriter implements Closeable {
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
closed = true;
|
closed = true;
|
||||||
}
|
}
|
||||||
|
assert oldWriter.assertNoActiveDWPT();
|
||||||
} catch (OutOfMemoryError oom) {
|
} catch (OutOfMemoryError oom) {
|
||||||
handleOOM(oom, "closeInternal");
|
handleOOM(oom, "closeInternal");
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -1099,6 +1103,8 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** Returns the Directory used by this index. */
|
/** Returns the Directory used by this index. */
|
||||||
public Directory getDirectory() {
|
public Directory getDirectory() {
|
||||||
// Pass false because the flush during closing calls getDirectory
|
// Pass false because the flush during closing calls getDirectory
|
||||||
|
@ -1227,6 +1233,111 @@ public class IndexWriter implements Closeable {
|
||||||
updateDocument(null, doc, analyzer);
|
updateDocument(null, doc, analyzer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically adds a block of documents with sequentially
|
||||||
|
* assigned document IDs, such that an external reader
|
||||||
|
* will see all or none of the documents.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: the index does not currently record
|
||||||
|
* which documents were added as a block. Today this is
|
||||||
|
* fine, because merging will preserve the block (as long
|
||||||
|
* as none them were deleted). But it's possible in the
|
||||||
|
* future that Lucene may more aggressively re-order
|
||||||
|
* documents (for example, perhaps to obtain better index
|
||||||
|
* compression), in which case you may need to fully
|
||||||
|
* re-index your documents at that time.
|
||||||
|
*
|
||||||
|
* <p>See {@link #addDocument(Document)} for details on
|
||||||
|
* index and IndexWriter state after an Exception, and
|
||||||
|
* flushing/merging temporary free space requirements.</p>
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: tools that do offline splitting of an index
|
||||||
|
* (for example, IndexSplitter in contrib) or
|
||||||
|
* re-sorting of documents (for example, IndexSorter in
|
||||||
|
* contrib) are not aware of these atomically added documents
|
||||||
|
* and will likely break them up. Use such tools at your
|
||||||
|
* own risk!
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
|
||||||
|
* you should immediately close the writer. See <a
|
||||||
|
* href="#OOME">above</a> for details.</p>
|
||||||
|
*
|
||||||
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
* @throws IOException if there is a low-level IO error
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||||
|
addDocuments(docs, analyzer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically adds a block of documents, analyzed using the
|
||||||
|
* provided analyzer, with sequentially assigned document
|
||||||
|
* IDs, such that an external reader will see all or none
|
||||||
|
* of the documents.
|
||||||
|
*
|
||||||
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
* @throws IOException if there is a low-level IO error
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||||
|
updateDocuments(null, docs, analyzer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically deletes documents matching the provided
|
||||||
|
* delTerm and adds a block of documents with sequentially
|
||||||
|
* assigned document IDs, such that an external reader
|
||||||
|
* will see all or none of the documents.
|
||||||
|
*
|
||||||
|
* See {@link #addDocuments(Iterable)}.
|
||||||
|
*
|
||||||
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
* @throws IOException if there is a low-level IO error
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||||
|
updateDocuments(delTerm, docs, analyzer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically deletes documents matching the provided
|
||||||
|
* delTerm and adds a block of documents, analyzed using
|
||||||
|
* the provided analyzer, with sequentially
|
||||||
|
* assigned document IDs, such that an external reader
|
||||||
|
* will see all or none of the documents.
|
||||||
|
*
|
||||||
|
* See {@link #addDocuments(Iterable)}.
|
||||||
|
*
|
||||||
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
* @throws IOException if there is a low-level IO error
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||||
|
ensureOpen();
|
||||||
|
try {
|
||||||
|
boolean success = false;
|
||||||
|
boolean anySegmentFlushed = false;
|
||||||
|
try {
|
||||||
|
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success && infoStream != null) {
|
||||||
|
message("hit exception updating document");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (anySegmentFlushed) {
|
||||||
|
maybeMerge();
|
||||||
|
}
|
||||||
|
} catch (OutOfMemoryError oom) {
|
||||||
|
handleOOM(oom, "updateDocuments");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes the document(s) containing <code>term</code>.
|
* Deletes the document(s) containing <code>term</code>.
|
||||||
*
|
*
|
||||||
|
@ -2217,10 +2328,10 @@ public class IndexWriter implements Closeable {
|
||||||
* <p>
|
* <p>
|
||||||
* <b>NOTE:</b> this method only copies the segments of the incoming indexes
|
* <b>NOTE:</b> this method only copies the segments of the incoming indexes
|
||||||
* and does not merge them. Therefore deleted documents are not removed and
|
* and does not merge them. Therefore deleted documents are not removed and
|
||||||
* the new segments are not merged with the existing ones. Also, the segments
|
* the new segments are not merged with the existing ones. Also, if the merge
|
||||||
* are copied as-is, meaning they are not converted to CFS if they aren't,
|
* policy allows compound files, then any segment that is not compound is
|
||||||
* and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
|
* converted to such. However, if the segment is compound, it is copied as-is
|
||||||
* or {@link #optimize} afterwards.
|
* even if the merge policy does not allow compound files.
|
||||||
*
|
*
|
||||||
* <p>This requires this index not be among those to be added.
|
* <p>This requires this index not be among those to be added.
|
||||||
*
|
*
|
||||||
|
@ -2244,6 +2355,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
|
List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
|
||||||
|
Comparator<String> versionComparator = StringHelper.getVersionComparator();
|
||||||
for (Directory dir : dirs) {
|
for (Directory dir : dirs) {
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("addIndexes: process directory " + dir);
|
message("addIndexes: process directory " + dir);
|
||||||
|
@ -2263,46 +2375,22 @@ public class IndexWriter implements Closeable {
|
||||||
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
|
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine if the doc store of this segment needs to be copied. It's
|
// create CFS only if the source segment is not CFS, and MP agrees it
|
||||||
// only relevant for segments who share doc store with others, because
|
// should be CFS.
|
||||||
// the DS might have been copied already, in which case we just want
|
boolean createCFS;
|
||||||
// to update the DS name of this SegmentInfo.
|
synchronized (this) { // Guard segmentInfos
|
||||||
// NOTE: pre-3x segments include a null DSName if they don't share doc
|
createCFS = !info.getUseCompoundFile()
|
||||||
// store. So the following code ensures we don't accidentally insert
|
&& mergePolicy.useCompoundFile(segmentInfos, info)
|
||||||
// 'null' to the map.
|
// optimize case only for segments that don't share doc stores
|
||||||
final String newDsName;
|
&& versionComparator.compare(info.getVersion(), "3.1") >= 0;
|
||||||
if (dsName != null) {
|
}
|
||||||
if (dsNames.containsKey(dsName)) {
|
|
||||||
newDsName = dsNames.get(dsName);
|
if (createCFS) {
|
||||||
} else {
|
copySegmentIntoCFS(info, newSegName);
|
||||||
dsNames.put(dsName, newSegName);
|
|
||||||
newDsName = newSegName;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
newDsName = newSegName;
|
copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy the segment files
|
|
||||||
for (String file: info.files()) {
|
|
||||||
final String newFileName;
|
|
||||||
if (IndexFileNames.isDocStoreFile(file)) {
|
|
||||||
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
|
||||||
if (dsFilesCopied.contains(newFileName)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
dsFilesCopied.add(newFileName);
|
|
||||||
} else {
|
|
||||||
newFileName = newSegName + IndexFileNames.stripSegmentName(file);
|
|
||||||
}
|
|
||||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
|
||||||
dir.copy(directory, file, newFileName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update SI appropriately
|
|
||||||
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
|
|
||||||
info.dir = directory;
|
|
||||||
info.name = newSegName;
|
|
||||||
|
|
||||||
infos.add(info);
|
infos.add(info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2391,6 +2479,76 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Copies the segment into the IndexWriter's directory, as a compound segment. */
|
||||||
|
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
|
||||||
|
String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||||
|
Collection<String> files = info.files();
|
||||||
|
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
|
||||||
|
for (String file : files) {
|
||||||
|
String newFileName = segName + IndexFileNames.stripSegmentName(file);
|
||||||
|
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
|
||||||
|
&& !IndexFileNames.isSeparateNormsFile(file)) {
|
||||||
|
cfsWriter.addFile(file, info.dir);
|
||||||
|
} else {
|
||||||
|
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||||
|
info.dir.copy(directory, file, newFileName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the .cfs
|
||||||
|
cfsWriter.close();
|
||||||
|
|
||||||
|
info.dir = directory;
|
||||||
|
info.name = segName;
|
||||||
|
info.setUseCompoundFile(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Copies the segment files as-is into the IndexWriter's directory. */
|
||||||
|
private void copySegmentAsIs(SegmentInfo info, String segName,
|
||||||
|
Map<String, String> dsNames, Set<String> dsFilesCopied)
|
||||||
|
throws IOException {
|
||||||
|
// Determine if the doc store of this segment needs to be copied. It's
|
||||||
|
// only relevant for segments that share doc store with others,
|
||||||
|
// because the DS might have been copied already, in which case we
|
||||||
|
// just want to update the DS name of this SegmentInfo.
|
||||||
|
// NOTE: pre-3x segments include a null DSName if they don't share doc
|
||||||
|
// store. The following code ensures we don't accidentally insert
|
||||||
|
// 'null' to the map.
|
||||||
|
String dsName = info.getDocStoreSegment();
|
||||||
|
final String newDsName;
|
||||||
|
if (dsName != null) {
|
||||||
|
if (dsNames.containsKey(dsName)) {
|
||||||
|
newDsName = dsNames.get(dsName);
|
||||||
|
} else {
|
||||||
|
dsNames.put(dsName, segName);
|
||||||
|
newDsName = segName;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
newDsName = segName;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the segment files
|
||||||
|
for (String file: info.files()) {
|
||||||
|
final String newFileName;
|
||||||
|
if (IndexFileNames.isDocStoreFile(file)) {
|
||||||
|
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
||||||
|
if (dsFilesCopied.contains(newFileName)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
dsFilesCopied.add(newFileName);
|
||||||
|
} else {
|
||||||
|
newFileName = segName + IndexFileNames.stripSegmentName(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||||
|
info.dir.copy(directory, file, newFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
|
||||||
|
info.dir = directory;
|
||||||
|
info.name = segName;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A hook for extending classes to execute operations after pending added and
|
* A hook for extending classes to execute operations after pending added and
|
||||||
* deleted documents have been flushed to the Directory but before the change
|
* deleted documents have been flushed to the Directory but before the change
|
||||||
|
@ -3176,50 +3334,50 @@ public class IndexWriter implements Closeable {
|
||||||
runningMerges.remove(merge);
|
runningMerges.remove(merge);
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
|
private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
|
||||||
final int numSegments = merge.readers.size();
|
final int numSegments = merge.readers.size();
|
||||||
if (suppressExceptions) {
|
Throwable th = null;
|
||||||
// Suppress any new exceptions so we throw the
|
|
||||||
// original cause
|
|
||||||
boolean anyChanges = false;
|
|
||||||
for (int i=0;i<numSegments;i++) {
|
|
||||||
if (merge.readers.get(i) != null) {
|
|
||||||
try {
|
|
||||||
anyChanges |= readerPool.release(merge.readers.get(i), false);
|
|
||||||
} catch (Throwable t) {
|
|
||||||
}
|
|
||||||
merge.readers.set(i, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
boolean anyChanges = false;
|
||||||
try {
|
boolean drop = !suppressExceptions;
|
||||||
merge.readerClones.get(i).close();
|
for (int i = 0; i < numSegments; i++) {
|
||||||
} catch (Throwable t) {
|
if (merge.readers.get(i) != null) {
|
||||||
|
try {
|
||||||
|
anyChanges |= readerPool.release(merge.readers.get(i), drop);
|
||||||
|
} catch (Throwable t) {
|
||||||
|
if (th == null) {
|
||||||
|
th = t;
|
||||||
}
|
}
|
||||||
// This was a private clone and we had the
|
|
||||||
// only reference
|
|
||||||
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
|
|
||||||
merge.readerClones.set(i, null);
|
|
||||||
}
|
}
|
||||||
|
merge.readers.set(i, null);
|
||||||
}
|
}
|
||||||
if (anyChanges) {
|
|
||||||
checkpoint();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i=0;i<numSegments;i++) {
|
|
||||||
if (merge.readers.get(i) != null) {
|
|
||||||
readerPool.release(merge.readers.get(i), true);
|
|
||||||
merge.readers.set(i, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||||
|
try {
|
||||||
merge.readerClones.get(i).close();
|
merge.readerClones.get(i).close();
|
||||||
// This was a private clone and we had the only reference
|
} catch (Throwable t) {
|
||||||
assert merge.readerClones.get(i).getRefCount() == 0;
|
if (th == null) {
|
||||||
merge.readerClones.set(i, null);
|
th = t;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// This was a private clone and we had the
|
||||||
|
// only reference
|
||||||
|
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
|
||||||
|
merge.readerClones.set(i, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (suppressExceptions && anyChanges) {
|
||||||
|
checkpoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If any error occured, throw it.
|
||||||
|
if (!suppressExceptions && th != null) {
|
||||||
|
if (th instanceof IOException) throw (IOException) th;
|
||||||
|
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||||
|
if (th instanceof Error) throw (Error) th;
|
||||||
|
throw new RuntimeException(th);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Does the actual (time-consuming) work of the merge,
|
/** Does the actual (time-consuming) work of the merge,
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
// TODO FI: norms could actually be stored as doc store
|
// TODO FI: norms could actually be stored as doc store
|
||||||
|
|
||||||
|
@ -49,9 +50,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
||||||
|
|
||||||
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
|
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
|
||||||
IndexOutput normsOut = state.directory.createOutput(normsFileName);
|
IndexOutput normsOut = state.directory.createOutput(normsFileName);
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
|
normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
|
||||||
|
|
||||||
int normCount = 0;
|
int normCount = 0;
|
||||||
|
|
||||||
|
@ -84,9 +85,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
||||||
|
|
||||||
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
|
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
|
||||||
}
|
}
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
normsOut.close();
|
IOUtils.closeSafely(!success, normsOut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.index.codecs.TermsConsumer;
|
import org.apache.lucene.index.codecs.TermsConsumer;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enables native per field codec support. This class selects the codec used to
|
* Enables native per field codec support. This class selects the codec used to
|
||||||
|
@ -61,7 +62,15 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
assert segmentCodecs == state.segmentCodecs;
|
assert segmentCodecs == state.segmentCodecs;
|
||||||
final Codec[] codecs = segmentCodecs.codecs;
|
final Codec[] codecs = segmentCodecs.codecs;
|
||||||
for (int i = 0; i < codecs.length; i++) {
|
for (int i = 0; i < codecs.length; i++) {
|
||||||
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, consumers);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,22 +83,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
Iterator<FieldsConsumer> it = consumers.iterator();
|
IOUtils.closeSafely(false, consumers);
|
||||||
IOException err = null;
|
|
||||||
while (it.hasNext()) {
|
|
||||||
try {
|
|
||||||
it.next().close();
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
// keep first IOException we hit but keep
|
|
||||||
// closing the rest
|
|
||||||
if (err == null) {
|
|
||||||
err = ioe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (err != null) {
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,14 +116,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
// If we hit exception (eg, IOE because writer was
|
// If we hit exception (eg, IOE because writer was
|
||||||
// committing, or, for any other reason) we must
|
// committing, or, for any other reason) we must
|
||||||
// go back and close all FieldsProducers we opened:
|
// go back and close all FieldsProducers we opened:
|
||||||
for(FieldsProducer fp : producers.values()) {
|
IOUtils.closeSafely(true, producers.values());
|
||||||
try {
|
|
||||||
fp.close();
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// Suppress all exceptions here so we continue
|
|
||||||
// to throw the original one
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -177,22 +164,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
Iterator<FieldsProducer> it = codecs.values().iterator();
|
IOUtils.closeSafely(false, codecs.values());
|
||||||
IOException err = null;
|
|
||||||
while (it.hasNext()) {
|
|
||||||
try {
|
|
||||||
it.next().close();
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
// keep first IOException we hit but keep
|
|
||||||
// closing the rest
|
|
||||||
if (err == null) {
|
|
||||||
err = ioe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (err != null) {
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -59,7 +59,7 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads the snapshots information from the given {@link Directory}. This
|
* Reads the snapshots information from the given {@link Directory}. This
|
||||||
* method does can be used if the snapshots information is needed, however you
|
* method can be used if the snapshots information is needed, however you
|
||||||
* cannot instantiate the deletion policy (because e.g., some other process
|
* cannot instantiate the deletion policy (because e.g., some other process
|
||||||
* keeps a lock on the snapshots directory).
|
* keeps a lock on the snapshots directory).
|
||||||
*/
|
*/
|
||||||
|
@ -122,11 +122,19 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
||||||
writer.commit();
|
writer.commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initializes the snapshots information. This code should basically run
|
try {
|
||||||
// only if mode != CREATE, but if it is, it's no harm as we only open the
|
// Initializes the snapshots information. This code should basically run
|
||||||
// reader once and immediately close it.
|
// only if mode != CREATE, but if it is, it's no harm as we only open the
|
||||||
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
|
// reader once and immediately close it.
|
||||||
registerSnapshotInfo(e.getKey(), e.getValue(), null);
|
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
|
||||||
|
registerSnapshotInfo(e.getKey(), e.getValue(), null);
|
||||||
|
}
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
writer.close(); // don't leave any open file handles
|
||||||
|
throw e;
|
||||||
|
} catch (IOException e) {
|
||||||
|
writer.close(); // don't leave any open file handles
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -436,7 +436,7 @@ public final class SegmentInfo implements Cloneable {
|
||||||
*/
|
*/
|
||||||
public String getNormFileName(int number) {
|
public String getNormFileName(int number) {
|
||||||
if (hasSeparateNorms(number)) {
|
if (hasSeparateNorms(number)) {
|
||||||
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
|
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
|
||||||
} else {
|
} else {
|
||||||
// single file for all norms
|
// single file for all norms
|
||||||
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
|
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -323,17 +324,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
||||||
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
|
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
|
||||||
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
|
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
|
||||||
infosWriter.prepareCommit(segnOutput);
|
infosWriter.prepareCommit(segnOutput);
|
||||||
success = true;
|
|
||||||
pendingSegnOutput = segnOutput;
|
pendingSegnOutput = segnOutput;
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
// We hit an exception above; try to close the file
|
// We hit an exception above; try to close the file
|
||||||
// but suppress any exception:
|
// but suppress any exception:
|
||||||
try {
|
IOUtils.closeSafely(true, segnOutput);
|
||||||
segnOutput.close();
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// Suppress so we keep throwing the original exception
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
// Try not to leave a truncated segments_N file in
|
// Try not to leave a truncated segments_N file in
|
||||||
// the index:
|
// the index:
|
||||||
|
@ -945,6 +942,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
||||||
} finally {
|
} finally {
|
||||||
genOutput.close();
|
genOutput.close();
|
||||||
}
|
}
|
||||||
|
} catch (ThreadInterruptedException t) {
|
||||||
|
throw t;
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
// It's OK if we fail to write this file since it's
|
// It's OK if we fail to write this file since it's
|
||||||
// used only as one of the retry fallbacks.
|
// used only as one of the retry fallbacks.
|
||||||
|
@ -963,7 +962,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
||||||
finishCommit(dir);
|
finishCommit(dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String toString(Directory directory) {
|
public String toString(Directory directory) {
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
buffer.append(getCurrentSegmentFileName()).append(": ");
|
buffer.append(getCurrentSegmentFileName()).append(": ");
|
||||||
|
|
|
@ -27,13 +27,13 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexReader.FieldOption;
|
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.MergeState;
|
import org.apache.lucene.index.codecs.MergeState;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.MultiBits;
|
import org.apache.lucene.util.MultiBits;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
|
||||||
|
@ -46,10 +46,6 @@ import org.apache.lucene.util.ReaderUtil;
|
||||||
* @see #add
|
* @see #add
|
||||||
*/
|
*/
|
||||||
final class SegmentMerger {
|
final class SegmentMerger {
|
||||||
|
|
||||||
/** norms header placeholder */
|
|
||||||
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
|
||||||
|
|
||||||
private Directory directory;
|
private Directory directory;
|
||||||
private String segment;
|
private String segment;
|
||||||
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
|
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
|
||||||
|
@ -124,6 +120,12 @@ final class SegmentMerger {
|
||||||
return mergedDocs;
|
return mergedDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* NOTE: this method creates a compound file for all files returned by
|
||||||
|
* info.files(). While, generally, this may include separate norms and
|
||||||
|
* deletion files, this SegmentInfo must not reference such files when this
|
||||||
|
* method is called, because they are not allowed within a compound file.
|
||||||
|
*/
|
||||||
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
|
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
|
@ -131,6 +133,10 @@ final class SegmentMerger {
|
||||||
Collection<String> files = info.files();
|
Collection<String> files = info.files();
|
||||||
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
|
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
|
||||||
for (String file : files) {
|
for (String file : files) {
|
||||||
|
assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
|
||||||
|
: ".del file is not allowed in .cfs: " + file;
|
||||||
|
assert !IndexFileNames.isSeparateNormsFile(file)
|
||||||
|
: "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file;
|
||||||
cfsWriter.addFile(file);
|
cfsWriter.addFile(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -540,14 +546,13 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
codec = segmentWriteState.segmentCodecs.codec();
|
codec = segmentWriteState.segmentCodecs.codec();
|
||||||
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
|
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
|
||||||
|
|
||||||
// NOTE: this is silly, yet, necessary -- we create a
|
|
||||||
// MultiBits as our skip docs only to have it broken
|
|
||||||
// apart when we step through the docs enums in
|
|
||||||
// MultiDocsEnum.
|
|
||||||
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// NOTE: this is silly, yet, necessary -- we create a
|
||||||
|
// MultiBits as our skip docs only to have it broken
|
||||||
|
// apart when we step through the docs enums in
|
||||||
|
// MultiDocsEnum.
|
||||||
|
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
|
||||||
|
|
||||||
consumer.merge(mergeState,
|
consumer.merge(mergeState,
|
||||||
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
|
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
|
||||||
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
|
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
|
||||||
|
@ -573,12 +578,13 @@ final class SegmentMerger {
|
||||||
|
|
||||||
private void mergeNorms() throws IOException {
|
private void mergeNorms() throws IOException {
|
||||||
IndexOutput output = null;
|
IndexOutput output = null;
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
for (FieldInfo fi : fieldInfos) {
|
for (FieldInfo fi : fieldInfos) {
|
||||||
if (fi.isIndexed && !fi.omitNorms) {
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
if (output == null) {
|
if (output == null) {
|
||||||
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
|
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
|
||||||
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
|
output.writeBytes(SegmentNorms.NORMS_HEADER, SegmentNorms.NORMS_HEADER.length);
|
||||||
}
|
}
|
||||||
for (IndexReader reader : readers) {
|
for (IndexReader reader : readers) {
|
||||||
final int maxDoc = reader.maxDoc();
|
final int maxDoc = reader.maxDoc();
|
||||||
|
@ -606,10 +612,9 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (output != null) {
|
IOUtils.closeSafely(!success, output);
|
||||||
output.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,10 @@ import org.apache.lucene.store.IndexOutput;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
final class SegmentNorms implements Cloneable {
|
final class SegmentNorms implements Cloneable {
|
||||||
|
|
||||||
|
/** norms header placeholder */
|
||||||
|
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
||||||
|
|
||||||
int refCount = 1;
|
int refCount = 1;
|
||||||
|
|
||||||
// If this instance is a clone, the originalNorm
|
// If this instance is a clone, the originalNorm
|
||||||
|
@ -219,7 +223,7 @@ final class SegmentNorms implements Cloneable {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
try {
|
try {
|
||||||
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
|
out.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
|
||||||
out.writeBytes(bytes, owner.maxDoc());
|
out.writeBytes(bytes, owner.maxDoc());
|
||||||
} finally {
|
} finally {
|
||||||
out.close();
|
out.close();
|
||||||
|
|
|
@ -574,7 +574,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
|
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
|
||||||
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
|
long nextNormSeek = SegmentNorms.NORMS_HEADER.length; //skip header (header unused for now)
|
||||||
int maxDoc = maxDoc();
|
int maxDoc = maxDoc();
|
||||||
for (FieldInfo fi : core.fieldInfos) {
|
for (FieldInfo fi : core.fieldInfos) {
|
||||||
if (norms.containsKey(fi.name)) {
|
if (norms.containsKey(fi.name)) {
|
||||||
|
@ -619,7 +619,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
if (isUnversioned) {
|
if (isUnversioned) {
|
||||||
normSeek = 0;
|
normSeek = 0;
|
||||||
} else {
|
} else {
|
||||||
normSeek = SegmentMerger.NORMS_HEADER.length;
|
normSeek = SegmentNorms.NORMS_HEADER.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -54,9 +54,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
fill(state.numDocs);
|
fill(state.numDocs);
|
||||||
assert state.segmentName != null;
|
assert state.segmentName != null;
|
||||||
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
|
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
|
||||||
tvx.close();
|
IOUtils.closeSafely(false, tvx, tvf, tvd);
|
||||||
tvf.close();
|
|
||||||
tvd.close();
|
|
||||||
tvx = tvd = tvf = null;
|
tvx = tvd = tvf = null;
|
||||||
if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
|
if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
|
||||||
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
|
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
|
||||||
|
@ -89,18 +87,25 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
|
|
||||||
private final void initTermVectorsWriter() throws IOException {
|
private final void initTermVectorsWriter() throws IOException {
|
||||||
if (tvx == null) {
|
if (tvx == null) {
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
// If we hit an exception while init'ing the term
|
||||||
|
// vector output files, we must abort this segment
|
||||||
|
// because those files will be in an unknown
|
||||||
|
// state:
|
||||||
|
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||||
|
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||||
|
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||||
|
|
||||||
// If we hit an exception while init'ing the term
|
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||||
// vector output files, we must abort this segment
|
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||||
// because those files will be in an unknown
|
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||||
// state:
|
success = true;
|
||||||
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
} finally {
|
||||||
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
if (!success) {
|
||||||
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||||
|
}
|
||||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
}
|
||||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
|
||||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
|
||||||
|
|
||||||
lastDocID = 0;
|
lastDocID = 0;
|
||||||
}
|
}
|
||||||
|
@ -139,7 +144,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert lastDocID == docState.docID;
|
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
|
||||||
|
|
||||||
lastDocID++;
|
lastDocID++;
|
||||||
|
|
||||||
|
@ -152,21 +157,27 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
public void abort() {
|
public void abort() {
|
||||||
hasVectors = false;
|
hasVectors = false;
|
||||||
try {
|
try {
|
||||||
IOUtils.closeSafely(tvx, tvd, tvf);
|
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||||
} catch (IOException ignored) {
|
} catch (IOException e) {
|
||||||
|
// cannot happen since we suppress exceptions
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||||
} catch (IOException ignored) {
|
} catch (IOException ignored) {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||||
} catch (IOException ignored) {
|
} catch (IOException ignored) {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||||
} catch (IOException ignored) {
|
} catch (IOException ignored) {
|
||||||
}
|
}
|
||||||
|
|
||||||
tvx = tvd = tvf = null;
|
tvx = tvd = tvf = null;
|
||||||
lastDocID = 0;
|
lastDocID = 0;
|
||||||
|
|
||||||
|
|
|
@ -31,15 +31,22 @@ final class TermVectorsWriter {
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
public TermVectorsWriter(Directory directory, String segment,
|
public TermVectorsWriter(Directory directory, String segment,
|
||||||
FieldInfos fieldInfos)
|
FieldInfos fieldInfos) throws IOException {
|
||||||
throws IOException {
|
boolean success = false;
|
||||||
// Open files for TermVector storage
|
try {
|
||||||
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
// Open files for TermVector storage
|
||||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||||
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||||
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||||
|
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
}
|
}
|
||||||
|
@ -51,8 +58,7 @@ final class TermVectorsWriter {
|
||||||
* @param vectors
|
* @param vectors
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public final void addAllDocVectors(TermFreqVector[] vectors)
|
public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
tvx.writeLong(tvd.getFilePointer());
|
tvx.writeLong(tvd.getFilePointer());
|
||||||
tvx.writeLong(tvf.getFilePointer());
|
tvx.writeLong(tvf.getFilePointer());
|
||||||
|
@ -187,6 +193,6 @@ final class TermVectorsWriter {
|
||||||
final void close() throws IOException {
|
final void close() throws IOException {
|
||||||
// make an effort to close all streams we can but remember and re-throw
|
// make an effort to close all streams we can but remember and re-throw
|
||||||
// the first exception encountered in this process
|
// the first exception encountered in this process
|
||||||
IOUtils.closeSafely(tvx, tvd, tvf);
|
IOUtils.closeSafely(false, tvx, tvd, tvf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,7 +54,6 @@ final class TermsHash extends InvertedDocConsumer {
|
||||||
|
|
||||||
final boolean trackAllocations;
|
final boolean trackAllocations;
|
||||||
|
|
||||||
|
|
||||||
public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
|
public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
|
||||||
this.docState = docWriter.docState;
|
this.docState = docWriter.docState;
|
||||||
this.docWriter = docWriter;
|
this.docWriter = docWriter;
|
||||||
|
@ -108,11 +107,11 @@ final class TermsHash extends InvertedDocConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
|
for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
|
||||||
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
|
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
|
||||||
childFields.put(entry.getKey(), perField.consumer);
|
childFields.put(entry.getKey(), perField.consumer);
|
||||||
if (nextTermsHash != null) {
|
if (nextTermsHash != null) {
|
||||||
nextChildFields.put(entry.getKey(), perField.nextPerField);
|
nextChildFields.put(entry.getKey(), perField.nextPerField);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
consumer.flush(childFields, state);
|
consumer.flush(childFields, state);
|
||||||
|
@ -134,12 +133,9 @@ final class TermsHash extends InvertedDocConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void finishDocument() throws IOException {
|
void finishDocument() throws IOException {
|
||||||
try {
|
consumer.finishDocument(this);
|
||||||
consumer.finishDocument(this);
|
if (nextTermsHash != null) {
|
||||||
} finally {
|
nextTermsHash.consumer.finishDocument(nextTermsHash);
|
||||||
if (nextTermsHash != null) {
|
|
||||||
nextTermsHash.consumer.finishDocument(nextTermsHash);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc
|
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -48,12 +47,10 @@ public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerT
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) {
|
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter) {
|
||||||
ThreadState threadState = threadBindings.get(requestingThread);
|
ThreadState threadState = threadBindings.get(requestingThread);
|
||||||
if (threadState != null) {
|
if (threadState != null && threadState.tryLock()) {
|
||||||
if (threadState.tryLock()) {
|
return threadState;
|
||||||
return threadState;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ThreadState minThreadState = null;
|
ThreadState minThreadState = null;
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
// TODO: currently we encode all terms between two indexed
|
// TODO: currently we encode all terms between two indexed
|
||||||
|
@ -66,24 +67,29 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
//private final String segment;
|
//private final String segment;
|
||||||
|
|
||||||
public BlockTermsWriter(
|
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
|
||||||
TermsIndexWriterBase termsIndexWriter,
|
SegmentWriteState state, PostingsWriterBase postingsWriter)
|
||||||
SegmentWriteState state,
|
throws IOException {
|
||||||
PostingsWriterBase postingsWriter)
|
|
||||||
throws IOException
|
|
||||||
{
|
|
||||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
|
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
|
||||||
this.termsIndexWriter = termsIndexWriter;
|
this.termsIndexWriter = termsIndexWriter;
|
||||||
out = state.directory.createOutput(termsFileName);
|
out = state.directory.createOutput(termsFileName);
|
||||||
fieldInfos = state.fieldInfos;
|
boolean success = false;
|
||||||
writeHeader(out);
|
try {
|
||||||
currentField = null;
|
fieldInfos = state.fieldInfos;
|
||||||
this.postingsWriter = postingsWriter;
|
writeHeader(out);
|
||||||
//segment = state.segmentName;
|
currentField = null;
|
||||||
|
this.postingsWriter = postingsWriter;
|
||||||
|
//segment = state.segmentName;
|
||||||
|
|
||||||
//System.out.println("BTW.init seg=" + state.segmentName);
|
//System.out.println("BTW.init seg=" + state.segmentName);
|
||||||
|
|
||||||
postingsWriter.start(out); // have consumer write its format/header
|
postingsWriter.start(out); // have consumer write its format/header
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeHeader(IndexOutput out) throws IOException {
|
protected void writeHeader(IndexOutput out) throws IOException {
|
||||||
|
@ -130,20 +136,11 @@ public class BlockTermsWriter extends FieldsConsumer {
|
||||||
}
|
}
|
||||||
writeTrailer(dirStart);
|
writeTrailer(dirStart);
|
||||||
} finally {
|
} finally {
|
||||||
try {
|
IOUtils.closeSafely(false, out, postingsWriter, termsIndexWriter);
|
||||||
out.close();
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
postingsWriter.close();
|
|
||||||
} finally {
|
|
||||||
termsIndexWriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeTrailer(long dirStart) throws IOException {
|
protected void writeTrailer(long dirStart) throws IOException {
|
||||||
// TODO Auto-generated method stub
|
|
||||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
out.writeLong(dirStart);
|
out.writeLong(dirStart);
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,15 @@ public class CodecProvider {
|
||||||
return codec;
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns <code>true</code> iff a codec with the given name is registered
|
||||||
|
* @param name codec name
|
||||||
|
* @return <code>true</code> iff a codec with the given name is registered, otherwise <code>false</code>.
|
||||||
|
*/
|
||||||
|
public synchronized boolean isCodecRegistered(String name) {
|
||||||
|
return codecs.containsKey(name);
|
||||||
|
}
|
||||||
|
|
||||||
public SegmentInfosWriter getSegmentInfosWriter() {
|
public SegmentInfosWriter getSegmentInfosWriter() {
|
||||||
return infosWriter;
|
return infosWriter;
|
||||||
}
|
}
|
||||||
|
@ -145,6 +154,14 @@ public class CodecProvider {
|
||||||
return codec;
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns <code>true</code> if this provider has a Codec registered for this
|
||||||
|
* field.
|
||||||
|
*/
|
||||||
|
public synchronized boolean hasFieldCodec(String name) {
|
||||||
|
return perFieldMap.containsKey(name);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the default {@link Codec} for this {@link CodecProvider}
|
* Returns the default {@link Codec} for this {@link CodecProvider}
|
||||||
*
|
*
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfos;
|
||||||
import org.apache.lucene.store.ChecksumIndexOutput;
|
import org.apache.lucene.store.ChecksumIndexOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default implementation of {@link SegmentInfosWriter}.
|
* Default implementation of {@link SegmentInfosWriter}.
|
||||||
|
@ -56,16 +57,24 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
|
||||||
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
|
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
IndexOutput out = createOutput(dir, segmentFileName);
|
IndexOutput out = createOutput(dir, segmentFileName);
|
||||||
out.writeInt(FORMAT_CURRENT); // write FORMAT
|
boolean success = false;
|
||||||
out.writeLong(infos.version);
|
try {
|
||||||
out.writeInt(infos.counter); // write counter
|
out.writeInt(FORMAT_CURRENT); // write FORMAT
|
||||||
out.writeLong(infos.getGlobalFieldMapVersion());
|
out.writeLong(infos.version);
|
||||||
out.writeInt(infos.size()); // write infos
|
out.writeInt(infos.counter); // write counter
|
||||||
for (SegmentInfo si : infos) {
|
out.writeLong(infos.getGlobalFieldMapVersion());
|
||||||
si.write(out);
|
out.writeInt(infos.size()); // write infos
|
||||||
|
for (SegmentInfo si : infos) {
|
||||||
|
si.write(out);
|
||||||
|
}
|
||||||
|
out.writeStringStringMap(infos.getUserData());
|
||||||
|
success = true;
|
||||||
|
return out;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, out);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
out.writeStringStringMap(infos.getUserData());
|
|
||||||
return out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected IndexOutput createOutput(Directory dir, String segmentFileName)
|
protected IndexOutput createOutput(Directory dir, String segmentFileName)
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
@ -108,6 +109,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
if (!success) IOUtils.closeSafely(true, in);
|
||||||
if (indexDivisor > 0) {
|
if (indexDivisor > 0) {
|
||||||
in.close();
|
in.close();
|
||||||
in = null;
|
in = null;
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -58,9 +59,17 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
|
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
|
||||||
termIndexInterval = state.termIndexInterval;
|
termIndexInterval = state.termIndexInterval;
|
||||||
out = state.directory.createOutput(indexFileName);
|
out = state.directory.createOutput(indexFileName);
|
||||||
fieldInfos = state.fieldInfos;
|
boolean success = false;
|
||||||
writeHeader(out);
|
try {
|
||||||
out.writeInt(termIndexInterval);
|
fieldInfos = state.fieldInfos;
|
||||||
|
writeHeader(out);
|
||||||
|
out.writeInt(termIndexInterval);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeHeader(IndexOutput out) throws IOException {
|
protected void writeHeader(IndexOutput out) throws IOException {
|
||||||
|
@ -202,33 +211,37 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
final long dirStart = out.getFilePointer();
|
boolean success = false;
|
||||||
final int fieldCount = fields.size();
|
try {
|
||||||
|
final long dirStart = out.getFilePointer();
|
||||||
|
final int fieldCount = fields.size();
|
||||||
|
|
||||||
int nonNullFieldCount = 0;
|
int nonNullFieldCount = 0;
|
||||||
for(int i=0;i<fieldCount;i++) {
|
for(int i=0;i<fieldCount;i++) {
|
||||||
SimpleFieldWriter field = fields.get(i);
|
SimpleFieldWriter field = fields.get(i);
|
||||||
if (field.numIndexTerms > 0) {
|
if (field.numIndexTerms > 0) {
|
||||||
nonNullFieldCount++;
|
nonNullFieldCount++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
out.writeVInt(nonNullFieldCount);
|
out.writeVInt(nonNullFieldCount);
|
||||||
for(int i=0;i<fieldCount;i++) {
|
for(int i=0;i<fieldCount;i++) {
|
||||||
SimpleFieldWriter field = fields.get(i);
|
SimpleFieldWriter field = fields.get(i);
|
||||||
if (field.numIndexTerms > 0) {
|
if (field.numIndexTerms > 0) {
|
||||||
out.writeVInt(field.fieldInfo.number);
|
out.writeVInt(field.fieldInfo.number);
|
||||||
out.writeVInt(field.numIndexTerms);
|
out.writeVInt(field.numIndexTerms);
|
||||||
out.writeVLong(field.termsStart);
|
out.writeVLong(field.termsStart);
|
||||||
out.writeVLong(field.indexStart);
|
out.writeVLong(field.indexStart);
|
||||||
out.writeVLong(field.packedIndexStart);
|
out.writeVLong(field.packedIndexStart);
|
||||||
out.writeVLong(field.packedOffsetsStart);
|
out.writeVLong(field.packedOffsetsStart);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
writeTrailer(dirStart);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeSafely(!success, out);
|
||||||
}
|
}
|
||||||
writeTrailer(dirStart);
|
|
||||||
out.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeTrailer(long dirStart) throws IOException {
|
protected void writeTrailer(long dirStart) throws IOException {
|
||||||
|
|
|
@ -19,10 +19,12 @@ package org.apache.lucene.index.codecs;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/** @lucene.experimental */
|
/** @lucene.experimental */
|
||||||
public abstract class TermsIndexWriterBase {
|
public abstract class TermsIndexWriterBase implements Closeable {
|
||||||
|
|
||||||
public abstract class FieldWriter {
|
public abstract class FieldWriter {
|
||||||
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
|
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
|
||||||
|
@ -31,6 +33,4 @@ public abstract class TermsIndexWriterBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
|
public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
|
||||||
|
|
||||||
public abstract void close() throws IOException;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,11 +33,11 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.automaton.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
import org.apache.lucene.util.automaton.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
import org.apache.lucene.util.automaton.fst.Util; // for toDot
|
import org.apache.lucene.util.fst.Util; // for toDot
|
||||||
|
|
||||||
/** See {@link VariableGapTermsIndexWriter}
|
/** See {@link VariableGapTermsIndexWriter}
|
||||||
*
|
*
|
||||||
|
|
|
@ -28,9 +28,10 @@ import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.automaton.fst.Builder;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.automaton.fst.FST;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.FST;
|
||||||
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Selects index terms according to provided pluggable
|
* Selects index terms according to provided pluggable
|
||||||
|
@ -159,9 +160,17 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException {
|
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException {
|
||||||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
|
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
|
||||||
out = state.directory.createOutput(indexFileName);
|
out = state.directory.createOutput(indexFileName);
|
||||||
fieldInfos = state.fieldInfos;
|
boolean success = false;
|
||||||
this.policy = policy;
|
try {
|
||||||
writeHeader(out);
|
fieldInfos = state.fieldInfos;
|
||||||
|
this.policy = policy;
|
||||||
|
writeHeader(out);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeHeader(IndexOutput out) throws IOException {
|
protected void writeHeader(IndexOutput out) throws IOException {
|
||||||
|
@ -265,8 +274,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
final long dirStart = out.getFilePointer();
|
final long dirStart = out.getFilePointer();
|
||||||
final int fieldCount = fields.size();
|
final int fieldCount = fields.size();
|
||||||
|
|
||||||
|
@ -287,8 +296,10 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writeTrailer(dirStart);
|
writeTrailer(dirStart);
|
||||||
|
} finally {
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected void writeTrailer(long dirStart) throws IOException {
|
protected void writeTrailer(long dirStart) throws IOException {
|
||||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
|
|
@ -41,6 +41,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
||||||
protected final IndexOutput out;
|
protected final IndexOutput out;
|
||||||
|
|
||||||
private int upto;
|
private int upto;
|
||||||
|
private boolean hitExcDuringWrite;
|
||||||
|
|
||||||
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
|
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
|
||||||
// if its less than 128 we should set that as max and use byte?
|
// if its less than 128 we should set that as max and use byte?
|
||||||
|
@ -105,19 +106,23 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(int v) throws IOException {
|
public void write(int v) throws IOException {
|
||||||
|
hitExcDuringWrite = true;
|
||||||
upto -= add(v)-1;
|
upto -= add(v)-1;
|
||||||
|
hitExcDuringWrite = false;
|
||||||
assert upto >= 0;
|
assert upto >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
try {
|
try {
|
||||||
// stuff 0s in until the "real" data is flushed:
|
if (!hitExcDuringWrite) {
|
||||||
int stuffed = 0;
|
// stuff 0s in until the "real" data is flushed:
|
||||||
while(upto > stuffed) {
|
int stuffed = 0;
|
||||||
upto -= add(0)-1;
|
while(upto > stuffed) {
|
||||||
assert upto >= 0;
|
upto -= add(0)-1;
|
||||||
stuffed += 1;
|
assert upto >= 0;
|
||||||
|
stuffed += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
out.close();
|
out.close();
|
||||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
||||||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/** This codec "inlines" the postings for terms that have
|
/** This codec "inlines" the postings for terms that have
|
||||||
* low docFreq. It wraps another codec, which is used for
|
* low docFreq. It wraps another codec, which is used for
|
||||||
|
@ -81,7 +82,7 @@ public class PulsingCodec extends Codec {
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
pulsingWriter.close();
|
IOUtils.closeSafely(true, pulsingWriter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,11 +94,7 @@ public class PulsingCodec extends Codec {
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
try {
|
IOUtils.closeSafely(true, pulsingWriter, indexWriter);
|
||||||
pulsingWriter.close();
|
|
||||||
} finally {
|
|
||||||
indexWriter.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,8 +71,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
||||||
* for this term) is <= maxPositions, then the postings are
|
* for this term) is <= maxPositions, then the postings are
|
||||||
* inlined into terms dict */
|
* inlined into terms dict */
|
||||||
public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException {
|
public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException {
|
||||||
super();
|
|
||||||
|
|
||||||
pending = new Position[maxPositions];
|
pending = new Position[maxPositions];
|
||||||
for(int i=0;i<maxPositions;i++) {
|
for(int i=0;i<maxPositions;i++) {
|
||||||
pending[i] = new Position();
|
pending[i] = new Position();
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
|
/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
|
||||||
* to .pyl, skip data to .skp
|
* to .pyl, skip data to .skp
|
||||||
|
@ -49,18 +50,18 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
final static int VERSION_START = 0;
|
final static int VERSION_START = 0;
|
||||||
final static int VERSION_CURRENT = VERSION_START;
|
final static int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
final IntIndexOutput freqOut;
|
IntIndexOutput freqOut;
|
||||||
final IntIndexOutput.Index freqIndex;
|
IntIndexOutput.Index freqIndex;
|
||||||
|
|
||||||
final IntIndexOutput posOut;
|
IntIndexOutput posOut;
|
||||||
final IntIndexOutput.Index posIndex;
|
IntIndexOutput.Index posIndex;
|
||||||
|
|
||||||
final IntIndexOutput docOut;
|
IntIndexOutput docOut;
|
||||||
final IntIndexOutput.Index docIndex;
|
IntIndexOutput.Index docIndex;
|
||||||
|
|
||||||
final IndexOutput payloadOut;
|
IndexOutput payloadOut;
|
||||||
|
|
||||||
final IndexOutput skipOut;
|
IndexOutput skipOut;
|
||||||
IndexOutput termsOut;
|
IndexOutput termsOut;
|
||||||
|
|
||||||
final SepSkipListWriter skipListWriter;
|
final SepSkipListWriter skipListWriter;
|
||||||
|
@ -107,44 +108,51 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
|
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
|
||||||
super();
|
freqOut = null;
|
||||||
this.skipInterval = skipInterval;
|
freqIndex = null;
|
||||||
this.skipMinimum = skipInterval; /* set to the same for now */
|
posOut = null;
|
||||||
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
|
posIndex = null;
|
||||||
docOut = factory.createOutput(state.directory, docFileName);
|
payloadOut = null;
|
||||||
docIndex = docOut.index();
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
this.skipInterval = skipInterval;
|
||||||
|
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||||
|
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
|
||||||
|
docOut = factory.createOutput(state.directory, docFileName);
|
||||||
|
docIndex = docOut.index();
|
||||||
|
|
||||||
if (state.fieldInfos.hasProx()) {
|
if (state.fieldInfos.hasProx()) {
|
||||||
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
||||||
freqOut = factory.createOutput(state.directory, frqFileName);
|
freqOut = factory.createOutput(state.directory, frqFileName);
|
||||||
freqIndex = freqOut.index();
|
freqIndex = freqOut.index();
|
||||||
|
|
||||||
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
||||||
posOut = factory.createOutput(state.directory, posFileName);
|
posOut = factory.createOutput(state.directory, posFileName);
|
||||||
posIndex = posOut.index();
|
posIndex = posOut.index();
|
||||||
|
|
||||||
// TODO: -- only if at least one field stores payloads?
|
// TODO: -- only if at least one field stores payloads?
|
||||||
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
|
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
|
||||||
payloadOut = state.directory.createOutput(payloadFileName);
|
payloadOut = state.directory.createOutput(payloadFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
|
||||||
|
skipOut = state.directory.createOutput(skipFileName);
|
||||||
|
|
||||||
|
totalNumDocs = state.numDocs;
|
||||||
|
|
||||||
|
skipListWriter = new SepSkipListWriter(skipInterval,
|
||||||
|
maxSkipLevels,
|
||||||
|
state.numDocs,
|
||||||
|
freqOut, docOut,
|
||||||
|
posOut, payloadOut);
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, docOut, skipOut, freqOut, posOut, payloadOut);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
|
||||||
freqOut = null;
|
|
||||||
freqIndex = null;
|
|
||||||
posOut = null;
|
|
||||||
posIndex = null;
|
|
||||||
payloadOut = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
|
|
||||||
skipOut = state.directory.createOutput(skipFileName);
|
|
||||||
|
|
||||||
totalNumDocs = state.numDocs;
|
|
||||||
|
|
||||||
skipListWriter = new SepSkipListWriter(skipInterval,
|
|
||||||
maxSkipLevels,
|
|
||||||
state.numDocs,
|
|
||||||
freqOut, docOut,
|
|
||||||
posOut, payloadOut);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -306,25 +314,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
try {
|
IOUtils.closeSafely(false, docOut, skipOut, freqOut, posOut, payloadOut);
|
||||||
docOut.close();
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
skipOut.close();
|
|
||||||
} finally {
|
|
||||||
if (freqOut != null) {
|
|
||||||
try {
|
|
||||||
freqOut.close();
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
posOut.close();
|
|
||||||
} finally {
|
|
||||||
payloadOut.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void getExtensions(Set<String> extensions) {
|
public static void getExtensions(Set<String> extensions) {
|
||||||
|
|
|
@ -29,13 +29,14 @@ import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.automaton.fst.Builder;
|
import org.apache.lucene.util.fst.Builder;
|
||||||
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
import org.apache.lucene.util.automaton.fst.FST;
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
import org.apache.lucene.util.automaton.fst.PairOutputs;
|
import org.apache.lucene.util.fst.PairOutputs;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -236,7 +237,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
private int tf;
|
private int tf;
|
||||||
private Bits skipDocs;
|
private Bits skipDocs;
|
||||||
private final BytesRef scratch = new BytesRef(10);
|
private final BytesRef scratch = new BytesRef(10);
|
||||||
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result();
|
private final CharsRef scratchUTF16 = new CharsRef(10);
|
||||||
|
|
||||||
public SimpleTextDocsEnum() {
|
public SimpleTextDocsEnum() {
|
||||||
this.inStart = SimpleTextFieldsReader.this.in;
|
this.inStart = SimpleTextFieldsReader.this.in;
|
||||||
|
@ -286,7 +287,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
||||||
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length);
|
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||||
termFreq = 0;
|
termFreq = 0;
|
||||||
first = false;
|
first = false;
|
||||||
} else if (scratch.startsWith(POS)) {
|
} else if (scratch.startsWith(POS)) {
|
||||||
|
@ -323,8 +324,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
private Bits skipDocs;
|
private Bits skipDocs;
|
||||||
private final BytesRef scratch = new BytesRef(10);
|
private final BytesRef scratch = new BytesRef(10);
|
||||||
private final BytesRef scratch2 = new BytesRef(10);
|
private final BytesRef scratch2 = new BytesRef(10);
|
||||||
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result();
|
private final CharsRef scratchUTF16 = new CharsRef(10);
|
||||||
private final UnicodeUtil.UTF16Result scratchUTF16_2 = new UnicodeUtil.UTF16Result();
|
private final CharsRef scratchUTF16_2 = new CharsRef(10);
|
||||||
private BytesRef payload;
|
private BytesRef payload;
|
||||||
private long nextDocStart;
|
private long nextDocStart;
|
||||||
|
|
||||||
|
@ -368,7 +369,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
||||||
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length);
|
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||||
tf = 0;
|
tf = 0;
|
||||||
posStart = in.getFilePointer();
|
posStart = in.getFilePointer();
|
||||||
first = false;
|
first = false;
|
||||||
|
@ -400,7 +401,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
readLine(in, scratch);
|
readLine(in, scratch);
|
||||||
assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString();
|
assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString();
|
||||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
|
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
|
||||||
final int pos = ArrayUtil.parseInt(scratchUTF16_2.result, 0, scratchUTF16_2.length);
|
final int pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
|
||||||
final long fp = in.getFilePointer();
|
final long fp = in.getFilePointer();
|
||||||
readLine(in, scratch);
|
readLine(in, scratch);
|
||||||
if (scratch.startsWith(PAYLOAD)) {
|
if (scratch.startsWith(PAYLOAD)) {
|
||||||
|
|
|
@ -143,8 +143,11 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
write(END);
|
try {
|
||||||
newline();
|
write(END);
|
||||||
out.close();
|
newline();
|
||||||
|
} finally {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/** @lucene.experimental */
|
/** @lucene.experimental */
|
||||||
public final class StandardPostingsWriter extends PostingsWriterBase {
|
public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
|
@ -42,8 +43,8 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
final static int VERSION_START = 0;
|
final static int VERSION_START = 0;
|
||||||
final static int VERSION_CURRENT = VERSION_START;
|
final static int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
final IndexOutput freqOut;
|
IndexOutput freqOut;
|
||||||
final IndexOutput proxOut;
|
IndexOutput proxOut;
|
||||||
final DefaultSkipListWriter skipListWriter;
|
final DefaultSkipListWriter skipListWriter;
|
||||||
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||||
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||||
|
@ -85,31 +86,35 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
|
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
|
||||||
this(state, DEFAULT_SKIP_INTERVAL);
|
this(state, DEFAULT_SKIP_INTERVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
|
public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
|
||||||
super();
|
|
||||||
this.skipInterval = skipInterval;
|
this.skipInterval = skipInterval;
|
||||||
this.skipMinimum = skipInterval; /* set to the same for now */
|
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||||
//this.segment = state.segmentName;
|
//this.segment = state.segmentName;
|
||||||
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
|
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
|
||||||
freqOut = state.directory.createOutput(fileName);
|
freqOut = state.directory.createOutput(fileName);
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
if (state.fieldInfos.hasProx()) {
|
||||||
|
// At least one field does not omit TF, so create the
|
||||||
|
// prox file
|
||||||
|
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
|
||||||
|
proxOut = state.directory.createOutput(fileName);
|
||||||
|
} else {
|
||||||
|
// Every field omits TF so we will write no prox file
|
||||||
|
proxOut = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (state.fieldInfos.hasProx()) {
|
totalNumDocs = state.numDocs;
|
||||||
// At least one field does not omit TF, so create the
|
|
||||||
// prox file
|
skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels,
|
||||||
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
|
state.numDocs, freqOut, proxOut);
|
||||||
proxOut = state.directory.createOutput(fileName);
|
success = true;
|
||||||
} else {
|
} finally {
|
||||||
// Every field omits TF so we will write no prox file
|
if (!success) {
|
||||||
proxOut = null;
|
IOUtils.closeSafely(true, freqOut, proxOut);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
totalNumDocs = state.numDocs;
|
|
||||||
|
|
||||||
skipListWriter = new DefaultSkipListWriter(skipInterval,
|
|
||||||
maxSkipLevels,
|
|
||||||
state.numDocs,
|
|
||||||
freqOut,
|
|
||||||
proxOut);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -267,12 +272,6 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
try {
|
IOUtils.closeSafely(false, freqOut, proxOut);
|
||||||
freqOut.close();
|
|
||||||
} finally {
|
|
||||||
if (proxOut != null) {
|
|
||||||
proxOut.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,6 +181,7 @@ public abstract class CachingCollector extends Collector {
|
||||||
curUpto = 0;
|
curUpto = 0;
|
||||||
}
|
}
|
||||||
cachedScorer.score = curScores[curUpto];
|
cachedScorer.score = curScores[curUpto];
|
||||||
|
cachedScorer.doc = curDocs[curUpto];
|
||||||
other.collect(curDocs[curUpto++]);
|
other.collect(curDocs[curUpto++]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,6 @@ import java.io.FileNotFoundException;
|
||||||
import java.io.FilenameFilter;
|
import java.io.FilenameFilter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
import java.security.MessageDigest;
|
|
||||||
import java.security.NoSuchAlgorithmException;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import static java.util.Collections.synchronizedSet;
|
import static java.util.Collections.synchronizedSet;
|
||||||
|
@ -111,15 +109,6 @@ import org.apache.lucene.util.Constants;
|
||||||
* @see Directory
|
* @see Directory
|
||||||
*/
|
*/
|
||||||
public abstract class FSDirectory extends Directory {
|
public abstract class FSDirectory extends Directory {
|
||||||
private final static MessageDigest DIGESTER;
|
|
||||||
|
|
||||||
static {
|
|
||||||
try {
|
|
||||||
DIGESTER = MessageDigest.getInstance("MD5");
|
|
||||||
} catch (NoSuchAlgorithmException e) {
|
|
||||||
throw new RuntimeException(e.toString(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's
|
* Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's
|
||||||
|
@ -337,12 +326,6 @@ public abstract class FSDirectory extends Directory {
|
||||||
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
|
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* So we can do some byte-to-hexchar conversion below
|
|
||||||
*/
|
|
||||||
private static final char[] HEX_DIGITS =
|
|
||||||
{'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getLockID() {
|
public String getLockID() {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
|
@ -353,19 +336,12 @@ public abstract class FSDirectory extends Directory {
|
||||||
throw new RuntimeException(e.toString(), e);
|
throw new RuntimeException(e.toString(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
byte digest[];
|
int digest = 0;
|
||||||
synchronized (DIGESTER) {
|
for(int charIDX=0;charIDX<dirName.length();charIDX++) {
|
||||||
digest = DIGESTER.digest(dirName.getBytes());
|
final char ch = dirName.charAt(charIDX);
|
||||||
|
digest = 31 * digest + ch;
|
||||||
}
|
}
|
||||||
StringBuilder buf = new StringBuilder();
|
return "lucene-" + Integer.toHexString(digest);
|
||||||
buf.append("lucene-");
|
|
||||||
for (int i = 0; i < digest.length; i++) {
|
|
||||||
int b = digest[i];
|
|
||||||
buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
|
|
||||||
buf.append(HEX_DIGITS[b & 0xf]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Closes the store to future operations. */
|
/** Closes the store to future operations. */
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
|
|
||||||
/** Represents byte[], as a slice (offset + length) into an
|
/** Represents byte[], as a slice (offset + length) into an
|
||||||
* existing byte[].
|
* existing byte[].
|
||||||
|
@ -122,6 +121,7 @@ public final class BytesRef implements Comparable<BytesRef> {
|
||||||
public void copy(char text[], int offset, int length) {
|
public void copy(char text[], int offset, int length) {
|
||||||
UnicodeUtil.UTF16toUTF8(text, offset, length, this);
|
UnicodeUtil.UTF16toUTF8(text, offset, length, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean bytesEquals(BytesRef other) {
|
public boolean bytesEquals(BytesRef other) {
|
||||||
if (length == other.length) {
|
if (length == other.length) {
|
||||||
int otherUpto = other.offset;
|
int otherUpto = other.offset;
|
||||||
|
@ -198,13 +198,15 @@ public final class BytesRef implements Comparable<BytesRef> {
|
||||||
/** Interprets stored bytes as UTF8 bytes, returning the
|
/** Interprets stored bytes as UTF8 bytes, returning the
|
||||||
* resulting string */
|
* resulting string */
|
||||||
public String utf8ToString() {
|
public String utf8ToString() {
|
||||||
try {
|
final CharsRef ref = new CharsRef(length);
|
||||||
return new String(bytes, offset, length, "UTF-8");
|
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||||
} catch (UnsupportedEncodingException uee) {
|
return ref.toString();
|
||||||
// should not happen -- UTF8 is presumably supported
|
}
|
||||||
// by all JREs
|
|
||||||
throw new RuntimeException(uee);
|
/** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
|
||||||
}
|
public CharsRef utf8ToChars(CharsRef ref) {
|
||||||
|
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||||
|
return ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
||||||
|
|
|
@ -0,0 +1,215 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents char[], as a slice (offset + length) into an existing char[].
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public final class CharsRef implements Comparable<CharsRef>, CharSequence {
|
||||||
|
private static final char[] EMPTY_ARRAY = new char[0];
|
||||||
|
public char[] chars;
|
||||||
|
public int offset;
|
||||||
|
public int length;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link CharsRef} initialized an empty array zero-length
|
||||||
|
*/
|
||||||
|
public CharsRef() {
|
||||||
|
this(EMPTY_ARRAY, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link CharsRef} initialized with an array of the given
|
||||||
|
* capacity
|
||||||
|
*/
|
||||||
|
public CharsRef(int capacity) {
|
||||||
|
chars = new char[capacity];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link CharsRef} initialized with the given array, offset and
|
||||||
|
* length
|
||||||
|
*/
|
||||||
|
public CharsRef(char[] chars, int offset, int length) {
|
||||||
|
assert chars != null;
|
||||||
|
assert chars.length >= offset + length;
|
||||||
|
this.chars = chars;
|
||||||
|
this.offset = offset;
|
||||||
|
this.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link CharsRef} initialized with the given Strings character
|
||||||
|
* array
|
||||||
|
*/
|
||||||
|
public CharsRef(String string) {
|
||||||
|
this.chars = string.toCharArray();
|
||||||
|
this.offset = 0;
|
||||||
|
this.length = chars.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link CharsRef} and copies the contents of the source into
|
||||||
|
* the new instance.
|
||||||
|
* @see #copy(CharsRef)
|
||||||
|
*/
|
||||||
|
public CharsRef(CharsRef other) {
|
||||||
|
copy(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object clone() {
|
||||||
|
return new CharsRef(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 0;
|
||||||
|
final int end = offset + length;
|
||||||
|
for (int i = offset; i < end; i++) {
|
||||||
|
result = prime * result + chars[i];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (this == other) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other instanceof CharsRef) {
|
||||||
|
return charsEquals((CharsRef) other);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other instanceof CharSequence) {
|
||||||
|
final CharSequence seq = (CharSequence) other;
|
||||||
|
if (length == seq.length()) {
|
||||||
|
int n = length;
|
||||||
|
int i = offset;
|
||||||
|
int j = 0;
|
||||||
|
while (n-- != 0) {
|
||||||
|
if (chars[i++] != seq.charAt(j++))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean charsEquals(CharsRef other) {
|
||||||
|
if (length == other.length) {
|
||||||
|
int otherUpto = other.offset;
|
||||||
|
final char[] otherChars = other.chars;
|
||||||
|
final int end = offset + length;
|
||||||
|
for (int upto = offset; upto < end; upto++, otherUpto++) {
|
||||||
|
if (chars[upto] != otherChars[otherUpto]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Signed int order comparison */
|
||||||
|
public int compareTo(CharsRef other) {
|
||||||
|
if (this == other)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
final char[] aChars = this.chars;
|
||||||
|
int aUpto = this.offset;
|
||||||
|
final char[] bChars = other.chars;
|
||||||
|
int bUpto = other.offset;
|
||||||
|
|
||||||
|
final int aStop = aUpto + Math.min(this.length, other.length);
|
||||||
|
|
||||||
|
while (aUpto < aStop) {
|
||||||
|
int aInt = aChars[aUpto++];
|
||||||
|
int bInt = bChars[bUpto++];
|
||||||
|
if (aInt > bInt) {
|
||||||
|
return 1;
|
||||||
|
} else if (aInt < bInt) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// One is a prefix of the other, or, they are equal:
|
||||||
|
return this.length - other.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies the given {@link CharsRef} referenced content into this instance
|
||||||
|
* starting at offset 0.
|
||||||
|
*
|
||||||
|
* @param other
|
||||||
|
* the {@link CharsRef} to copy
|
||||||
|
*/
|
||||||
|
public void copy(CharsRef other) {
|
||||||
|
chars = ArrayUtil.grow(chars, other.length);
|
||||||
|
System.arraycopy(other.chars, other.offset, chars, 0, other.length);
|
||||||
|
length = other.length;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void grow(int newLength) {
|
||||||
|
if (chars.length < newLength) {
|
||||||
|
chars = ArrayUtil.grow(chars, newLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies the given array into this CharsRef starting at offset 0
|
||||||
|
*/
|
||||||
|
public void copy(char[] otherChars, int otherOffset, int otherLength) {
|
||||||
|
this.offset = 0;
|
||||||
|
append(otherChars, otherOffset, otherLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Appends the given array to this CharsRef starting at the current offset
|
||||||
|
*/
|
||||||
|
public void append(char[] otherChars, int otherOffset, int otherLength) {
|
||||||
|
grow(this.offset + otherLength);
|
||||||
|
System.arraycopy(otherChars, otherOffset, this.chars, this.offset,
|
||||||
|
otherLength);
|
||||||
|
this.length = otherLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new String(chars, offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int length() {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public char charAt(int index) {
|
||||||
|
return chars[offset + index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public CharSequence subSequence(int start, int end) {
|
||||||
|
return new CharsRef(chars, offset + start, offset + end - 1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -43,6 +43,8 @@ public final class Constants {
|
||||||
public static final boolean WINDOWS = OS_NAME.startsWith("Windows");
|
public static final boolean WINDOWS = OS_NAME.startsWith("Windows");
|
||||||
/** True iff running on SunOS. */
|
/** True iff running on SunOS. */
|
||||||
public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
|
public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
|
||||||
|
/** True iff running on Mac OS X */
|
||||||
|
public static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X");
|
||||||
|
|
||||||
public static final String OS_ARCH = System.getProperty("os.arch");
|
public static final String OS_ARCH = System.getProperty("os.arch");
|
||||||
public static final String OS_VERSION = System.getProperty("os.version");
|
public static final String OS_VERSION = System.getProperty("os.version");
|
||||||
|
|
|
@ -47,44 +47,113 @@ public final class IOUtils {
|
||||||
* @param objects objects to call <tt>close()</tt> on
|
* @param objects objects to call <tt>close()</tt> on
|
||||||
*/
|
*/
|
||||||
public static <E extends Exception> void closeSafely(E priorException, Closeable... objects) throws E, IOException {
|
public static <E extends Exception> void closeSafely(E priorException, Closeable... objects) throws E, IOException {
|
||||||
IOException firstIOE = null;
|
Throwable th = null;
|
||||||
|
|
||||||
for (Closeable object : objects) {
|
for (Closeable object : objects) {
|
||||||
try {
|
try {
|
||||||
if (object != null)
|
if (object != null) {
|
||||||
object.close();
|
object.close();
|
||||||
} catch (IOException ioe) {
|
}
|
||||||
if (firstIOE == null)
|
} catch (Throwable t) {
|
||||||
firstIOE = ioe;
|
if (th == null) {
|
||||||
|
th = t;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (priorException != null)
|
if (priorException != null) {
|
||||||
throw priorException;
|
throw priorException;
|
||||||
else if (firstIOE != null)
|
} else if (th != null) {
|
||||||
throw firstIOE;
|
if (th instanceof IOException) throw (IOException) th;
|
||||||
|
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||||
|
if (th instanceof Error) throw (Error) th;
|
||||||
|
throw new RuntimeException(th);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @see #closeSafely(Exception, Closeable...) */
|
||||||
|
public static <E extends Exception> void closeSafely(E priorException, Iterable<Closeable> objects) throws E, IOException {
|
||||||
|
Throwable th = null;
|
||||||
|
|
||||||
|
for (Closeable object : objects) {
|
||||||
|
try {
|
||||||
|
if (object != null) {
|
||||||
|
object.close();
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
if (th == null) {
|
||||||
|
th = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (priorException != null) {
|
||||||
|
throw priorException;
|
||||||
|
} else if (th != null) {
|
||||||
|
if (th instanceof IOException) throw (IOException) th;
|
||||||
|
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||||
|
if (th instanceof Error) throw (Error) th;
|
||||||
|
throw new RuntimeException(th);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions. Some of the <tt>Closeable</tt>s
|
* Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions.
|
||||||
* may be null, they are ignored. After everything is closed, method either throws the first of suppressed exceptions,
|
* Some of the <tt>Closeable</tt>s may be null, they are ignored. After
|
||||||
* or completes normally.</p>
|
* everything is closed, and if {@code suppressExceptions} is {@code false},
|
||||||
* @param objects objects to call <tt>close()</tt> on
|
* method either throws the first of suppressed exceptions, or completes
|
||||||
|
* normally.
|
||||||
|
*
|
||||||
|
* @param suppressExceptions
|
||||||
|
* if true then exceptions that occur during close() are suppressed
|
||||||
|
* @param objects
|
||||||
|
* objects to call <tt>close()</tt> on
|
||||||
*/
|
*/
|
||||||
public static void closeSafely(Closeable... objects) throws IOException {
|
public static void closeSafely(boolean suppressExceptions, Closeable... objects) throws IOException {
|
||||||
IOException firstIOE = null;
|
Throwable th = null;
|
||||||
|
|
||||||
for (Closeable object : objects) {
|
for (Closeable object : objects) {
|
||||||
try {
|
try {
|
||||||
if (object != null)
|
if (object != null) {
|
||||||
object.close();
|
object.close();
|
||||||
} catch (IOException ioe) {
|
}
|
||||||
if (firstIOE == null)
|
} catch (Throwable t) {
|
||||||
firstIOE = ioe;
|
if (th == null)
|
||||||
|
th = t;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (firstIOE != null)
|
if (th != null && !suppressExceptions) {
|
||||||
throw firstIOE;
|
if (th instanceof IOException) throw (IOException) th;
|
||||||
|
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||||
|
if (th instanceof Error) throw (Error) th;
|
||||||
|
throw new RuntimeException(th);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #closeSafely(boolean, Closeable...)
|
||||||
|
*/
|
||||||
|
public static void closeSafely(boolean suppressExceptions, Iterable<? extends Closeable> objects) throws IOException {
|
||||||
|
Throwable th = null;
|
||||||
|
|
||||||
|
for (Closeable object : objects) {
|
||||||
|
try {
|
||||||
|
if (object != null) {
|
||||||
|
object.close();
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
if (th == null)
|
||||||
|
th = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (th != null && !suppressExceptions) {
|
||||||
|
if (th instanceof IOException) throw (IOException) th;
|
||||||
|
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||||
|
if (th instanceof Error) throw (Error) th;
|
||||||
|
throw new RuntimeException(th);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,11 +78,15 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
protected long[] bits;
|
protected long[] bits;
|
||||||
protected int wlen; // number of words (elements) used in the array
|
protected int wlen; // number of words (elements) used in the array
|
||||||
|
|
||||||
|
// Used only for assert:
|
||||||
|
private long numBits;
|
||||||
|
|
||||||
/** Constructs an OpenBitSet large enough to hold numBits.
|
/** Constructs an OpenBitSet large enough to hold numBits.
|
||||||
*
|
*
|
||||||
* @param numBits
|
* @param numBits
|
||||||
*/
|
*/
|
||||||
public OpenBitSet(long numBits) {
|
public OpenBitSet(long numBits) {
|
||||||
|
this.numBits = numBits;
|
||||||
bits = new long[bits2words(numBits)];
|
bits = new long[bits2words(numBits)];
|
||||||
wlen = bits.length;
|
wlen = bits.length;
|
||||||
}
|
}
|
||||||
|
@ -107,6 +111,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
public OpenBitSet(long[] bits, int numWords) {
|
public OpenBitSet(long[] bits, int numWords) {
|
||||||
this.bits = bits;
|
this.bits = bits;
|
||||||
this.wlen = numWords;
|
this.wlen = numWords;
|
||||||
|
this.numBits = wlen * 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -170,6 +175,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size
|
* The index should be less than the OpenBitSet size
|
||||||
*/
|
*/
|
||||||
public boolean fastGet(int index) {
|
public boolean fastGet(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int i = index >> 6; // div 64
|
int i = index >> 6; // div 64
|
||||||
// signed shift will keep a negative index and force an
|
// signed shift will keep a negative index and force an
|
||||||
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||||
|
@ -194,6 +200,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public boolean fastGet(long index) {
|
public boolean fastGet(long index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int i = (int)(index >> 6); // div 64
|
int i = (int)(index >> 6); // div 64
|
||||||
int bit = (int)index & 0x3f; // mod 64
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -217,6 +224,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size
|
* The index should be less than the OpenBitSet size
|
||||||
*/
|
*/
|
||||||
public int getBit(int index) {
|
public int getBit(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int i = index >> 6; // div 64
|
int i = index >> 6; // div 64
|
||||||
int bit = index & 0x3f; // mod 64
|
int bit = index & 0x3f; // mod 64
|
||||||
return ((int)(bits[i]>>>bit)) & 0x01;
|
return ((int)(bits[i]>>>bit)) & 0x01;
|
||||||
|
@ -245,6 +253,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public void fastSet(int index) {
|
public void fastSet(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = index >> 6; // div 64
|
int wordNum = index >> 6; // div 64
|
||||||
int bit = index & 0x3f; // mod 64
|
int bit = index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -255,6 +264,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public void fastSet(long index) {
|
public void fastSet(long index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = (int)(index >> 6);
|
int wordNum = (int)(index >> 6);
|
||||||
int bit = (int)index & 0x3f;
|
int bit = (int)index & 0x3f;
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -296,6 +306,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
ensureCapacity(index+1);
|
ensureCapacity(index+1);
|
||||||
wlen = wordNum+1;
|
wlen = wordNum+1;
|
||||||
}
|
}
|
||||||
|
assert (numBits = Math.max(numBits, index+1)) >= 0;
|
||||||
return wordNum;
|
return wordNum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,6 +315,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public void fastClear(int index) {
|
public void fastClear(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = index >> 6;
|
int wordNum = index >> 6;
|
||||||
int bit = index & 0x03f;
|
int bit = index & 0x03f;
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -321,6 +333,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public void fastClear(long index) {
|
public void fastClear(long index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = (int)(index >> 6); // div 64
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
int bit = (int)index & 0x3f; // mod 64
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -415,6 +428,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public boolean getAndSet(int index) {
|
public boolean getAndSet(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = index >> 6; // div 64
|
int wordNum = index >> 6; // div 64
|
||||||
int bit = index & 0x3f; // mod 64
|
int bit = index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -427,6 +441,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public boolean getAndSet(long index) {
|
public boolean getAndSet(long index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = (int)(index >> 6); // div 64
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
int bit = (int)index & 0x3f; // mod 64
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -439,6 +454,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public void fastFlip(int index) {
|
public void fastFlip(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = index >> 6; // div 64
|
int wordNum = index >> 6; // div 64
|
||||||
int bit = index & 0x3f; // mod 64
|
int bit = index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -449,6 +465,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public void fastFlip(long index) {
|
public void fastFlip(long index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = (int)(index >> 6); // div 64
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
int bit = (int)index & 0x3f; // mod 64
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -467,6 +484,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public boolean flipAndGet(int index) {
|
public boolean flipAndGet(int index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = index >> 6; // div 64
|
int wordNum = index >> 6; // div 64
|
||||||
int bit = index & 0x3f; // mod 64
|
int bit = index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -478,6 +496,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
* The index should be less than the OpenBitSet size.
|
* The index should be less than the OpenBitSet size.
|
||||||
*/
|
*/
|
||||||
public boolean flipAndGet(long index) {
|
public boolean flipAndGet(long index) {
|
||||||
|
assert index >= 0 && index < numBits;
|
||||||
int wordNum = (int)(index >> 6); // div 64
|
int wordNum = (int)(index >> 6); // div 64
|
||||||
int bit = (int)index & 0x3f; // mod 64
|
int bit = (int)index & 0x3f; // mod 64
|
||||||
long bitmask = 1L << bit;
|
long bitmask = 1L << bit;
|
||||||
|
@ -674,6 +693,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
public void union(OpenBitSet other) {
|
public void union(OpenBitSet other) {
|
||||||
int newLen = Math.max(wlen,other.wlen);
|
int newLen = Math.max(wlen,other.wlen);
|
||||||
ensureCapacityWords(newLen);
|
ensureCapacityWords(newLen);
|
||||||
|
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
|
||||||
|
|
||||||
long[] thisArr = this.bits;
|
long[] thisArr = this.bits;
|
||||||
long[] otherArr = other.bits;
|
long[] otherArr = other.bits;
|
||||||
|
@ -702,6 +722,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
||||||
public void xor(OpenBitSet other) {
|
public void xor(OpenBitSet other) {
|
||||||
int newLen = Math.max(wlen,other.wlen);
|
int newLen = Math.max(wlen,other.wlen);
|
||||||
ensureCapacityWords(newLen);
|
ensureCapacityWords(newLen);
|
||||||
|
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
|
||||||
|
|
||||||
long[] thisArr = this.bits;
|
long[] thisArr = this.bits;
|
||||||
long[] otherArr = other.bits;
|
long[] otherArr = other.bits;
|
||||||
|
|
|
@ -95,6 +95,19 @@ package org.apache.lucene.util;
|
||||||
|
|
||||||
public final class UnicodeUtil {
|
public final class UnicodeUtil {
|
||||||
|
|
||||||
|
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
|
||||||
|
* one would normally encounter, and definitely bigger than any UTF-8 terms.
|
||||||
|
* <p>
|
||||||
|
* WARNING: This is not a valid UTF8 Term
|
||||||
|
**/
|
||||||
|
public static final BytesRef BIG_TERM = new BytesRef(
|
||||||
|
new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
|
||||||
|
); // TODO this is unrelated here find a better place for it
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
System.out.println(Character.toChars(0x10FFFF + 1));
|
||||||
|
}
|
||||||
|
|
||||||
private UnicodeUtil() {} // no instance
|
private UnicodeUtil() {} // no instance
|
||||||
|
|
||||||
public static final int UNI_SUR_HIGH_START = 0xD800;
|
public static final int UNI_SUR_HIGH_START = 0xD800;
|
||||||
|
@ -112,33 +125,6 @@ public final class UnicodeUtil {
|
||||||
Character.MIN_SUPPLEMENTARY_CODE_POINT -
|
Character.MIN_SUPPLEMENTARY_CODE_POINT -
|
||||||
(UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
|
(UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
|
||||||
|
|
||||||
/**
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public static final class UTF16Result {
|
|
||||||
public char[] result = new char[10];
|
|
||||||
public int[] offsets = new int[10];
|
|
||||||
public int length;
|
|
||||||
|
|
||||||
public void setLength(int newLength) {
|
|
||||||
if (result.length < newLength)
|
|
||||||
result = ArrayUtil.grow(result, newLength);
|
|
||||||
length = newLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void copyText(UTF16Result other) {
|
|
||||||
setLength(other.length);
|
|
||||||
System.arraycopy(other.result, 0, result, 0, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void copyText(String other) {
|
|
||||||
final int otherLength = other.length();
|
|
||||||
setLength(otherLength);
|
|
||||||
other.getChars(0, otherLength, result, 0);
|
|
||||||
length = otherLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Encode characters from a char[] source, starting at
|
/** Encode characters from a char[] source, starting at
|
||||||
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
|
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
|
||||||
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
|
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
|
||||||
|
@ -302,135 +288,6 @@ public final class UnicodeUtil {
|
||||||
result.length = upto;
|
result.length = upto;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Convert UTF8 bytes into UTF16 characters. If offset
|
|
||||||
* is non-zero, conversion starts at that starting point
|
|
||||||
* in utf8, re-using the results from the previous call
|
|
||||||
* up until offset. */
|
|
||||||
public static void UTF8toUTF16(final byte[] utf8, final int offset, final int length, final UTF16Result result) {
|
|
||||||
|
|
||||||
final int end = offset + length;
|
|
||||||
char[] out = result.result;
|
|
||||||
if (result.offsets.length <= end) {
|
|
||||||
result.offsets = ArrayUtil.grow(result.offsets, end+1);
|
|
||||||
}
|
|
||||||
final int[] offsets = result.offsets;
|
|
||||||
|
|
||||||
// If incremental decoding fell in the middle of a
|
|
||||||
// single unicode character, rollback to its start:
|
|
||||||
int upto = offset;
|
|
||||||
while(offsets[upto] == -1)
|
|
||||||
upto--;
|
|
||||||
|
|
||||||
int outUpto = offsets[upto];
|
|
||||||
|
|
||||||
// Pre-allocate for worst case 1-for-1
|
|
||||||
if (outUpto+length >= out.length) {
|
|
||||||
out = result.result = ArrayUtil.grow(out, outUpto+length+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (upto < end) {
|
|
||||||
|
|
||||||
final int b = utf8[upto]&0xff;
|
|
||||||
final int ch;
|
|
||||||
|
|
||||||
offsets[upto++] = outUpto;
|
|
||||||
|
|
||||||
if (b < 0xc0) {
|
|
||||||
assert b < 0x80;
|
|
||||||
ch = b;
|
|
||||||
} else if (b < 0xe0) {
|
|
||||||
ch = ((b&0x1f)<<6) + (utf8[upto]&0x3f);
|
|
||||||
offsets[upto++] = -1;
|
|
||||||
} else if (b < 0xf0) {
|
|
||||||
ch = ((b&0xf)<<12) + ((utf8[upto]&0x3f)<<6) + (utf8[upto+1]&0x3f);
|
|
||||||
offsets[upto++] = -1;
|
|
||||||
offsets[upto++] = -1;
|
|
||||||
} else {
|
|
||||||
assert b < 0xf8;
|
|
||||||
ch = ((b&0x7)<<18) + ((utf8[upto]&0x3f)<<12) + ((utf8[upto+1]&0x3f)<<6) + (utf8[upto+2]&0x3f);
|
|
||||||
offsets[upto++] = -1;
|
|
||||||
offsets[upto++] = -1;
|
|
||||||
offsets[upto++] = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ch <= UNI_MAX_BMP) {
|
|
||||||
// target is a character <= 0xFFFF
|
|
||||||
out[outUpto++] = (char) ch;
|
|
||||||
} else {
|
|
||||||
// target is a character in range 0xFFFF - 0x10FFFF
|
|
||||||
out[outUpto++] = (char) ((ch >> HALF_SHIFT) + 0xD7C0 /* UNI_SUR_HIGH_START - 64 */);
|
|
||||||
out[outUpto++] = (char) ((ch & HALF_MASK) + UNI_SUR_LOW_START);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
offsets[upto] = outUpto;
|
|
||||||
result.length = outUpto;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the next valid UTF-16 String in UTF-16 order.
|
|
||||||
* <p>
|
|
||||||
* If the input String is already valid, it is returned.
|
|
||||||
* Otherwise the next String in code unit order is returned.
|
|
||||||
* </p>
|
|
||||||
* @param s input String (possibly with unpaired surrogates)
|
|
||||||
* @return next valid UTF-16 String in UTF-16 order
|
|
||||||
*/
|
|
||||||
public static String nextValidUTF16String(String s) {
|
|
||||||
if (validUTF16String(s))
|
|
||||||
return s;
|
|
||||||
else {
|
|
||||||
UTF16Result chars = new UTF16Result();
|
|
||||||
chars.copyText(s);
|
|
||||||
nextValidUTF16String(chars);
|
|
||||||
return new String(chars.result, 0, chars.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void nextValidUTF16String(UTF16Result s) {
|
|
||||||
final int size = s.length;
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
char ch = s.result[i];
|
|
||||||
if (ch >= UnicodeUtil.UNI_SUR_HIGH_START
|
|
||||||
&& ch <= UnicodeUtil.UNI_SUR_HIGH_END) {
|
|
||||||
if (i < size - 1) {
|
|
||||||
i++;
|
|
||||||
char nextCH = s.result[i];
|
|
||||||
if (nextCH >= UnicodeUtil.UNI_SUR_LOW_START
|
|
||||||
&& nextCH <= UnicodeUtil.UNI_SUR_LOW_END) {
|
|
||||||
// Valid surrogate pair
|
|
||||||
} else
|
|
||||||
// Unmatched high surrogate
|
|
||||||
if (nextCH < UnicodeUtil.UNI_SUR_LOW_START) { // SMP not enumerated
|
|
||||||
s.setLength(i + 1);
|
|
||||||
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
|
|
||||||
return;
|
|
||||||
} else { // SMP already enumerated
|
|
||||||
if (s.result[i - 1] == UnicodeUtil.UNI_SUR_HIGH_END) {
|
|
||||||
s.result[i - 1] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
|
|
||||||
s.setLength(i);
|
|
||||||
} else {
|
|
||||||
s.result[i - 1]++;
|
|
||||||
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
|
|
||||||
s.setLength(i + 1);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Unmatched high surrogate in final position, SMP not yet enumerated
|
|
||||||
s.setLength(i + 2);
|
|
||||||
s.result[i + 1] = (char) UnicodeUtil.UNI_SUR_LOW_START;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else if (ch >= UnicodeUtil.UNI_SUR_LOW_START
|
|
||||||
&& ch <= UnicodeUtil.UNI_SUR_LOW_END) {
|
|
||||||
// Unmatched low surrogate, SMP already enumerated
|
|
||||||
s.setLength(i + 1);
|
|
||||||
s.result[i] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only called from assert
|
// Only called from assert
|
||||||
/*
|
/*
|
||||||
private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
|
private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
|
||||||
|
@ -705,4 +562,51 @@ public final class UnicodeUtil {
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interprets the given byte array as UTF-8 and converts to UTF-16. The {@link CharsRef} will be extended if
|
||||||
|
* it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
|
||||||
|
* <p>
|
||||||
|
* NOTE: Full characters are read, even if this reads past the length passed (and
|
||||||
|
* can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
|
||||||
|
* Explicit checks for valid UTF-8 are not performed.
|
||||||
|
*/
|
||||||
|
public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
|
||||||
|
int out_offset = chars.offset = 0;
|
||||||
|
final char[] out = chars.chars = ArrayUtil.grow(chars.chars, length);
|
||||||
|
final int limit = offset + length;
|
||||||
|
while (offset < limit) {
|
||||||
|
int b = utf8[offset++]&0xff;
|
||||||
|
if (b < 0xc0) {
|
||||||
|
assert b < 0x80;
|
||||||
|
out[out_offset++] = (char)b;
|
||||||
|
} else if (b < 0xe0) {
|
||||||
|
out[out_offset++] = (char)(((b&0x1f)<<6) + (utf8[offset++]&0x3f));
|
||||||
|
} else if (b < 0xf0) {
|
||||||
|
out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
|
||||||
|
offset += 2;
|
||||||
|
} else {
|
||||||
|
assert b < 0xf8;
|
||||||
|
int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
|
||||||
|
offset += 3;
|
||||||
|
if (ch < UNI_MAX_BMP) {
|
||||||
|
out[out_offset++] = (char)ch;
|
||||||
|
} else {
|
||||||
|
int chHalf = ch - 0x0010000;
|
||||||
|
out[out_offset++] = (char) ((chHalf >> 10) + 0xD800);
|
||||||
|
out[out_offset++] = (char) ((chHalf & HALF_MASK) + 0xDC00);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chars.length = out_offset - chars.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method for {@link #UTF8toUTF16(byte[], int, int, CharsRef)}
|
||||||
|
* @see #UTF8toUTF16(byte[], int, int, CharsRef)
|
||||||
|
*/
|
||||||
|
public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars) {
|
||||||
|
UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,13 @@ public enum Version {
|
||||||
@Deprecated
|
@Deprecated
|
||||||
LUCENE_32,
|
LUCENE_32,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Match settings and bugs in Lucene's 3.3 release.
|
||||||
|
* @deprecated (4.0) Use latest
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
LUCENE_33,
|
||||||
|
|
||||||
/** Match settings and bugs in Lucene's 4.0 release.
|
/** Match settings and bugs in Lucene's 4.0 release.
|
||||||
* <p>
|
* <p>
|
||||||
* Use this to get the latest & greatest settings, bug
|
* Use this to get the latest & greatest settings, bug
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Output is a sequence of bytes, for each input term.
|
* Output is a sequence of bytes, for each input term.
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** Can next() and advance() through the terms in an FST
|
/** Can next() and advance() through the terms in an FST
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -23,7 +23,7 @@ import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
|
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
|
||||||
|
|
||||||
// NOTE: while the FST is able to represent a non-final
|
// NOTE: while the FST is able to represent a non-final
|
||||||
// dead-end state (NON_FINAL_END_NODE=0), the layres above
|
// dead-end state (NON_FINAL_END_NODE=0), the layres above
|
||||||
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
|
||||||
/** Represents an FST using a compact byte[] format.
|
/** Represents an FST using a compact byte[] format.
|
||||||
* <p> The format is similar to what's used by Morfologik
|
* <p> The format is similar to what's used by Morfologik
|
||||||
* (http://sourceforge.net/projects/morfologik).
|
* (http://sourceforge.net/projects/morfologik).
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class FST<T> {
|
public class FST<T> {
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/** Can next() and advance() through the terms in an FST
|
/** Can next() and advance() through the terms in an FST
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.IntsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Output is a sequence of ints, for each input term.
|
* Output is a sequence of ints, for each input term.
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -22,6 +22,7 @@ import org.apache.lucene.util.IntsRef;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/** Can next() and advance() through the terms in an FST
|
/** Can next() and advance() through the terms in an FST
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -22,6 +22,8 @@ import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use this if you just want to build an FSA.
|
* Use this if you just want to build an FSA.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class NoOutputs extends Outputs<Object> {
|
public final class NoOutputs extends Outputs<Object> {
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.DataOutput;
|
||||||
/**
|
/**
|
||||||
* Represents the outputs for an FST, providing the basic
|
* Represents the outputs for an FST, providing the basic
|
||||||
* algebra needed for the FST.
|
* algebra needed for the FST.
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -24,10 +24,10 @@ import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pairs up two outputs into one.
|
* Pairs up two outputs into one.
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||||
|
|
||||||
private final Pair<A,B> NO_OUTPUT;
|
private final Pair<A,B> NO_OUTPUT;
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -27,6 +27,7 @@ import org.apache.lucene.store.DataOutput;
|
||||||
* resulting FST is not guaranteed to be minimal! See
|
* resulting FST is not guaranteed to be minimal! See
|
||||||
* {@link Builder}. You cannot store 0 output with this
|
* {@link Builder}. You cannot store 0 output with this
|
||||||
* (that's reserved to mean "no output")!
|
* (that's reserved to mean "no output")!
|
||||||
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.util.automaton.fst;
|
package org.apache.lucene.util.fst;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -23,7 +23,9 @@ import java.util.*;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
|
||||||
/** Static helper methods */
|
/** Static helper methods
|
||||||
|
*
|
||||||
|
* @lucene.experimental */
|
||||||
public final class Util {
|
public final class Util {
|
||||||
private Util() {
|
private Util() {
|
||||||
}
|
}
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -97,8 +98,43 @@ public class RandomIndexWriter implements Closeable {
|
||||||
* Adds a Document.
|
* Adds a Document.
|
||||||
* @see IndexWriter#addDocument(Document)
|
* @see IndexWriter#addDocument(Document)
|
||||||
*/
|
*/
|
||||||
public void addDocument(Document doc) throws IOException {
|
public void addDocument(final Document doc) throws IOException {
|
||||||
w.addDocument(doc);
|
if (r.nextInt(5) == 3) {
|
||||||
|
// TODO: maybe, we should simply buffer up added docs
|
||||||
|
// (but we need to clone them), and only when
|
||||||
|
// getReader, commit, etc. are called, we do an
|
||||||
|
// addDocuments? Would be better testing.
|
||||||
|
w.addDocuments(new Iterable<Document>() {
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public Iterator<Document> iterator() {
|
||||||
|
return new Iterator<Document>() {
|
||||||
|
boolean done;
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public boolean hasNext() {
|
||||||
|
return !done;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public Document next() {
|
||||||
|
if (done) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
done = true;
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
maybeCommit();
|
maybeCommit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,12 +152,53 @@ public class RandomIndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addDocuments(Iterable<Document> docs) throws IOException {
|
||||||
|
w.addDocuments(docs);
|
||||||
|
maybeCommit();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws IOException {
|
||||||
|
w.updateDocuments(delTerm, docs);
|
||||||
|
maybeCommit();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates a document.
|
* Updates a document.
|
||||||
* @see IndexWriter#updateDocument(Term, Document)
|
* @see IndexWriter#updateDocument(Term, Document)
|
||||||
*/
|
*/
|
||||||
public void updateDocument(Term t, Document doc) throws IOException {
|
public void updateDocument(Term t, final Document doc) throws IOException {
|
||||||
w.updateDocument(t, doc);
|
if (r.nextInt(5) == 3) {
|
||||||
|
w.updateDocuments(t, new Iterable<Document>() {
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public Iterator<Document> iterator() {
|
||||||
|
return new Iterator<Document>() {
|
||||||
|
boolean done;
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public boolean hasNext() {
|
||||||
|
return !done;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override -- not until Java 1.6
|
||||||
|
public Document next() {
|
||||||
|
if (done) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
done = true;
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
w.updateDocument(t, doc);
|
||||||
|
}
|
||||||
maybeCommit();
|
maybeCommit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.*;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A silly test codec to verify core support for fixed
|
* A silly test codec to verify core support for fixed
|
||||||
|
@ -97,15 +98,25 @@ public class MockFixedIntBlockCodec extends Codec {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
|
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
|
||||||
return new FixedIntBlockIndexOutput(dir.createOutput(fileName), blockSize) {
|
IndexOutput out = dir.createOutput(fileName);
|
||||||
@Override
|
boolean success = false;
|
||||||
protected void flushBlock() throws IOException {
|
try {
|
||||||
for(int i=0;i<buffer.length;i++) {
|
FixedIntBlockIndexOutput ret = new FixedIntBlockIndexOutput(out, blockSize) {
|
||||||
assert buffer[i] >= 0;
|
@Override
|
||||||
out.writeVInt(buffer[i]);
|
protected void flushBlock() throws IOException {
|
||||||
|
for(int i=0;i<buffer.length;i++) {
|
||||||
|
assert buffer[i] >= 0;
|
||||||
|
out.writeVInt(buffer[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, out);
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A silly test codec to verify core support for variable
|
* A silly test codec to verify core support for variable
|
||||||
|
@ -102,34 +103,42 @@ public class MockVariableIntBlockCodec extends Codec {
|
||||||
@Override
|
@Override
|
||||||
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
|
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
|
||||||
final IndexOutput out = dir.createOutput(fileName);
|
final IndexOutput out = dir.createOutput(fileName);
|
||||||
out.writeInt(baseBlockSize);
|
boolean success = false;
|
||||||
return new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
|
try {
|
||||||
|
out.writeInt(baseBlockSize);
|
||||||
|
VariableIntBlockIndexOutput ret = new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
|
||||||
|
int pendingCount;
|
||||||
|
final int[] buffer = new int[2+2*baseBlockSize];
|
||||||
|
|
||||||
int pendingCount;
|
@Override
|
||||||
final int[] buffer = new int[2+2*baseBlockSize];
|
protected int add(int value) throws IOException {
|
||||||
|
assert value >= 0;
|
||||||
|
buffer[pendingCount++] = value;
|
||||||
|
// silly variable block length int encoder: if
|
||||||
|
// first value <= 3, we write N vints at once;
|
||||||
|
// else, 2*N
|
||||||
|
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
|
||||||
|
|
||||||
@Override
|
// intentionally be non-causal here:
|
||||||
protected int add(int value) throws IOException {
|
if (pendingCount == flushAt+1) {
|
||||||
assert value >= 0;
|
for(int i=0;i<flushAt;i++) {
|
||||||
buffer[pendingCount++] = value;
|
out.writeVInt(buffer[i]);
|
||||||
// silly variable block length int encoder: if
|
}
|
||||||
// first value <= 3, we write N vints at once;
|
buffer[0] = buffer[flushAt];
|
||||||
// else, 2*N
|
pendingCount = 1;
|
||||||
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
|
return flushAt;
|
||||||
|
} else {
|
||||||
// intentionally be non-causal here:
|
return 0;
|
||||||
if (pendingCount == flushAt+1) {
|
|
||||||
for(int i=0;i<flushAt;i++) {
|
|
||||||
out.writeVInt(buffer[i]);
|
|
||||||
}
|
}
|
||||||
buffer[0] = buffer[flushAt];
|
|
||||||
pendingCount = 1;
|
|
||||||
return flushAt;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, out);
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -136,8 +136,11 @@ public class MockRandomCodec extends Codec {
|
||||||
|
|
||||||
final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT);
|
final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT);
|
||||||
final IndexOutput out = state.directory.createOutput(seedFileName);
|
final IndexOutput out = state.directory.createOutput(seedFileName);
|
||||||
out.writeLong(seed);
|
try {
|
||||||
out.close();
|
out.writeLong(seed);
|
||||||
|
} finally {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
|
||||||
final Random random = new Random(seed);
|
final Random random = new Random(seed);
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue