mirror of https://github.com/apache/lucene.git
SOLR-2452: Merged with trunk up to r1129202; standardized solr/contrib/* layouts.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1129205 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
31c83c9d6f
|
@ -73,6 +73,7 @@
|
|||
</target>
|
||||
|
||||
<target name="eclipse" description="Setup Eclipse configuration">
|
||||
<copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/>
|
||||
<copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/>
|
||||
<mkdir dir=".settings"/>
|
||||
<copy file="dev-tools/eclipse/resources.prefs"
|
||||
|
|
|
@ -20,8 +20,6 @@
|
|||
<classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spatial/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spatial/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/>
|
||||
|
@ -44,6 +42,8 @@
|
|||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||
<classpathentry kind="src" path="modules/suggest/src/java"/>
|
||||
<classpathentry kind="src" path="modules/suggest/src/test"/>
|
||||
<classpathentry kind="src" path="solr/src/java"/>
|
||||
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
||||
<classpathentry kind="src" path="solr/src/common"/>
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>lucene_solr</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -11,7 +11,6 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spellchecker/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" />
|
||||
|
@ -21,6 +20,7 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/suggest/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spellchecker/spellchecker.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
|
||||
|
@ -22,6 +21,7 @@
|
|||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/suggest/suggest.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
||||
|
|
|
@ -141,13 +141,6 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="spellchecker contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="spellchecker" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build/contrib/spellchecker" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit">
|
||||
<module name="stempel" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
@ -155,6 +148,13 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
|
||||
<module name="suggest" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/suggest/build" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="uima" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
@ -197,8 +197,8 @@
|
|||
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.suggest module" />
|
||||
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||
|
|
|
@ -1,19 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
||||
</module>
|
|
@ -39,7 +39,6 @@
|
|||
<module>queries</module>
|
||||
<module>queryparser</module>
|
||||
<module>spatial</module>
|
||||
<module>spellchecker</module>
|
||||
<module>wordnet</module>
|
||||
<module>xml-query-parser</module>
|
||||
</modules>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
<module>analysis</module>
|
||||
<module>benchmark</module>
|
||||
<module>grouping</module>
|
||||
<module>suggest</module>
|
||||
</modules>
|
||||
<build>
|
||||
<directory>build/lucene-modules-aggregator</directory>
|
||||
|
|
|
@ -1,76 +1,76 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-spellchecker</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Spellchecker</name>
|
||||
<description>Spell Checker</description>
|
||||
<properties>
|
||||
<module-directory>lucene/contrib/spellchecker</module-directory>
|
||||
<build-directory>../../build/contrib/spellchecker</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||
<sourceDirectory>src/java</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../lucene/pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-suggest</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Suggest</name>
|
||||
<description>Lucene Suggest Module</description>
|
||||
<properties>
|
||||
<module-directory>modules/suggest</module-directory>
|
||||
<build-directory>build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<directory>${build-directory}</directory>
|
||||
<outputDirectory>${build-directory}/classes/java</outputDirectory>
|
||||
<testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
|
||||
<sourceDirectory>src/java</sourceDirectory>
|
||||
<testSourceDirectory>src/test</testSourceDirectory>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${project.build.testSourceDirectory}</directory>
|
||||
<excludes>
|
||||
<exclude>**/*.java</exclude>
|
||||
</excludes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
</build>
|
||||
</project>
|
|
@ -89,7 +89,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-spellchecker</artifactId>
|
||||
<artifactId>lucene-suggest</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -427,7 +427,32 @@ Bug fixes
|
|||
with more document deletions is requested before a reader with fewer
|
||||
deletions, provided they share some segments. (yonik)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
* LUCENE-3147,LUCENE-3152: Fixed open file handles leaks in many places in the
|
||||
code. Now MockDirectoryWrapper (in test-framework) tracks all open files,
|
||||
including locks, and fails if the test fails to release all of them.
|
||||
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
|
||||
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass
|
||||
of IndexInput) as its first argument. (Robert Muir, Dawid Weiss,
|
||||
Mike McCandless)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
* LUCENE-2834: the hash used to compute the lock file name when the
|
||||
lock file is not stored in the index has changed. This means you
|
||||
will see a different lucene-XXX-write.lock in your lock directory.
|
||||
(Robert Muir, Uwe Schindler, Mike McCandless)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3140: Added experimental FST implementation to Lucene.
|
||||
(Robert Muir, Dawid Weiss, Mike McCandless)
|
||||
|
||||
======================= Lucene 3.2.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -486,6 +511,10 @@ New features
|
|||
document IDs and scores encountered during the search, and "replay" them to
|
||||
another Collector. (Mike McCandless, Shai Erera)
|
||||
|
||||
* LUCENE-3112: Added experimental IndexWriter.add/updateDocuments,
|
||||
enabling a block of documents to be indexed, atomically, with
|
||||
guaranteed sequential docIDs. (Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
|
||||
|
@ -507,6 +536,9 @@ Optimizations
|
|||
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
||||
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
||||
|
||||
* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they
|
||||
aren't already and MergePolicy allows that. (Shai Erera)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
|
||||
|
@ -541,6 +573,9 @@ Build
|
|||
* LUCENE-3006: Building javadocs will fail on warnings by default.
|
||||
Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
|
||||
|
||||
* LUCENE-3128: "ant eclipse" creates a .project file for easier Eclipse
|
||||
integration (unless one already exists). (Daniel Serodio via Shai Erera)
|
||||
|
||||
Test Cases
|
||||
|
||||
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
||||
|
|
|
@ -227,7 +227,6 @@
|
|||
<packageset dir="contrib/misc/src/java"/>
|
||||
<packageset dir="contrib/queries/src/java"/>
|
||||
<packageset dir="contrib/spatial/src/java"/>
|
||||
<packageset dir="contrib/spellchecker/src/java"/>
|
||||
<packageset dir="contrib/wordnet/src/java"/>
|
||||
<packageset dir="contrib/xml-query-parser/src/java"/>
|
||||
<packageset dir="contrib/queryparser/src/java"/>
|
||||
|
@ -248,7 +247,6 @@
|
|||
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
|
||||
<group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
|
||||
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
|
||||
<group title="contrib: SpellChecker" packages="org.apache.lucene.search.spell*"/>
|
||||
<group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
|
||||
<group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@ Build
|
|||
|
||||
* LUCENE-2845: Moved contrib/benchmark to modules.
|
||||
|
||||
* LUCENE-2995: Moved contrib/spellchecker into modules/suggest.
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
|
||||
|
@ -48,7 +50,14 @@ Bug Fixes
|
|||
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
||||
not lowercasing the key before checking for the tag (Adriano Crestani)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
|
||||
(Sujit Pal via Koji Sekiguchi)
|
||||
|
||||
======================= Lucene 3.2.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
if( maxNumFragments < 0 )
|
||||
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
|
||||
|
||||
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
|
||||
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
|
||||
|
||||
List<String> fragments = new ArrayList<String>( maxNumFragments );
|
||||
Field[] values = getFields( reader, docId, fieldName );
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseIn
|
|||
*/
|
||||
public class FieldFragList {
|
||||
|
||||
List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
||||
private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
|
@ -50,6 +50,15 @@ public class FieldFragList {
|
|||
fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* return the list of WeightedFragInfos.
|
||||
*
|
||||
* @return fragInfos.
|
||||
*/
|
||||
public List<WeightedFragInfo> getFragInfos() {
|
||||
return fragInfos;
|
||||
}
|
||||
|
||||
public static class WeightedFragInfo {
|
||||
|
||||
List<SubInfo> subInfos;
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.TermFreqVector;
|
|||
import org.apache.lucene.index.TermPositionVector;
|
||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
||||
/**
|
||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||
|
@ -80,16 +81,16 @@ public class FieldTermStack {
|
|||
Set<String> termSet = fieldQuery.getTermSet( fieldName );
|
||||
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
|
||||
if( termSet == null ) return;
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
for( BytesRef term : tpv.getTerms() ){
|
||||
if( !termSet.contains( term.utf8ToString() ) ) continue;
|
||||
if( !termSet.contains( term.utf8ToChars(spare).toString() ) ) continue;
|
||||
int index = tpv.indexOf( term );
|
||||
TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
|
||||
if( tvois == null ) return; // just return to make null snippets
|
||||
int[] poss = tpv.getTermPositions( index );
|
||||
if( poss == null ) return; // just return to make null snippets
|
||||
for( int i = 0; i < tvois.length; i++ )
|
||||
termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
|
||||
termList.add( new TermInfo( term.utf8ToChars(spare).toString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
|
||||
}
|
||||
|
||||
// sort by position
|
||||
|
|
|
@ -24,7 +24,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
public void testNullFieldFragList() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
}
|
||||
|
||||
public void testTooSmallFragSize() throws Exception {
|
||||
|
@ -40,90 +40,90 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
if (VERBOSE) System.out.println( ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1TermIndex() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex1Frag() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex2Frags() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos.get( 1 ).toString() );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testPhraseQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testPhraseQuerySlop() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
|
||||
|
@ -142,8 +142,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
@ -154,8 +154,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMVB() throws Exception {
|
||||
|
@ -166,7 +166,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,21 +24,21 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
|
|||
public void testNullFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
}
|
||||
|
||||
public void testShortFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testLongFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
|||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
||||
/**
|
||||
* Represented as a coupled graph of class instances, this
|
||||
|
@ -228,12 +229,13 @@ public class InstantiatedIndex
|
|||
if (fieldsC != null) {
|
||||
FieldsEnum fieldsEnum = fieldsC.iterator();
|
||||
String field;
|
||||
final CharsRef spare = new CharsRef();
|
||||
while((field = fieldsEnum.next()) != null) {
|
||||
if (fields == null || fields.contains(field)) {
|
||||
TermsEnum termsEnum = fieldsEnum.terms();
|
||||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
String termText = text.utf8ToString();
|
||||
String termText = text.utf8ToChars(spare).toString();
|
||||
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
|
||||
final long totalTermFreq = termsEnum.totalTermFreq();
|
||||
if (totalTermFreq != -1) {
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[b9c8c8a170881dfe9c33adc87c26348904510954] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -42,4 +42,26 @@
|
|||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="build-native-unix" >
|
||||
<mkdir dir="${common.build.dir}/native"/>
|
||||
|
||||
<taskdef resource="cpptasks.tasks">
|
||||
<classpath>
|
||||
<pathelement location="ant_lib/cpptasks-1.0b5.jar"/>
|
||||
</classpath>
|
||||
</taskdef>
|
||||
|
||||
<cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
|
||||
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
|
||||
<includepath>
|
||||
<pathelement location="${java.home}/../include"/>
|
||||
<pathelement location="${java.home}/../include/linux"/>
|
||||
<pathelement location="${java.home}/../include/solaris"/>
|
||||
</includepath>
|
||||
|
||||
<compilerarg value="-fPIC" />
|
||||
</cc>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.text.DecimalFormat;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter; // Required for javadocs
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
||||
|
@ -45,6 +46,11 @@ import org.apache.lucene.store.FSDirectory;
|
|||
* @lucene.experimental You can easily
|
||||
* accidentally remove segments from your index so be
|
||||
* careful!
|
||||
*
|
||||
* <p><b>NOTE</b>: this tool is unaware of documents added
|
||||
* atomically via {@link IndexWriter#addDocuments} or {@link
|
||||
* IndexWriter#updateDocuments}, which means it can easily
|
||||
* break up such document groups.
|
||||
*/
|
||||
public class IndexSplitter {
|
||||
public SegmentInfos infos;
|
||||
|
|
|
@ -40,6 +40,11 @@ import org.apache.lucene.util.Version;
|
|||
* <p>Note 2: the disadvantage of this tool is that source index needs to be
|
||||
* read as many times as there are parts to be created, hence the name of this
|
||||
* tool.
|
||||
*
|
||||
* <p><b>NOTE</b>: this tool is unaware of documents added
|
||||
* atomically via {@link IndexWriter#addDocuments} or {@link
|
||||
* IndexWriter#updateDocuments}, which means it can easily
|
||||
* break up such document groups.
|
||||
*/
|
||||
public class MultiPassIndexSplitter {
|
||||
|
||||
|
|
|
@ -269,7 +269,7 @@ public class NRTCachingDirectory extends Directory {
|
|||
in = cache.openInput(fileName);
|
||||
in.copyBytes(out, in.length());
|
||||
} finally {
|
||||
IOUtils.closeSafely(in, out);
|
||||
IOUtils.closeSafely(false, in, out);
|
||||
}
|
||||
synchronized(this) {
|
||||
cache.deleteFile(fileName);
|
||||
|
|
|
@ -51,9 +51,11 @@ for details.
|
|||
|
||||
Steps to build:
|
||||
<ul>
|
||||
<li> <tt>cd lucene/contrib/misc/src/java/org/apache/lucene/store</tt>
|
||||
<li> <tt>cd lucene/contrib/misc/</tt>
|
||||
|
||||
<li> Compile NativePosixUtil.cpp -> libNativePosixUtil.so. On linux, something like <tt>gcc -fPIC -o libNativePosixUtil.so -shared -Wl,-soname,libNativePosixUtil.so -I$JAVA_HOME/include -I$JAVA_HOME/include/linux NativePosixUtil.cpp -lc -lstdc++</tt>. Add <tt>-m64</tt> if you want to compile 64bit (and java must be run with -d64 so it knows to load a 64bit dynamic lib).
|
||||
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
|
||||
|
||||
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
|
||||
|
||||
<li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.regex;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.regexp.CharacterIterator;
|
||||
import org.apache.regexp.RE;
|
||||
|
@ -104,11 +105,11 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
|||
|
||||
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
|
||||
private RE regexp;
|
||||
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
||||
private final CharsRef utf16 = new CharsRef(10);
|
||||
private final CharacterIterator utf16wrapper = new CharacterIterator() {
|
||||
|
||||
public char charAt(int pos) {
|
||||
return utf16.result[pos];
|
||||
return utf16.chars[pos];
|
||||
}
|
||||
|
||||
public boolean isEnd(int pos) {
|
||||
|
@ -120,7 +121,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
|||
}
|
||||
|
||||
public String substring(int beginIndex, int endIndex) {
|
||||
return new String(utf16.result, beginIndex, endIndex - beginIndex);
|
||||
return new String(utf16.chars, beginIndex, endIndex - beginIndex);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/**
|
||||
|
@ -95,25 +96,11 @@ public class JavaUtilRegexCapabilities implements RegexCapabilities {
|
|||
class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher {
|
||||
private final Pattern pattern;
|
||||
private final Matcher matcher;
|
||||
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
||||
private final CharSequence utf16wrapper = new CharSequence() {
|
||||
|
||||
public int length() {
|
||||
return utf16.length;
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
return utf16.result[index];
|
||||
}
|
||||
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
return new String(utf16.result, start, end - start);
|
||||
}
|
||||
};
|
||||
private final CharsRef utf16 = new CharsRef(10);
|
||||
|
||||
public JavaUtilRegexMatcher(String regex, int flags) {
|
||||
this.pattern = Pattern.compile(regex, flags);
|
||||
this.matcher = this.pattern.matcher(utf16wrapper);
|
||||
this.matcher = this.pattern.matcher(utf16);
|
||||
}
|
||||
|
||||
public boolean match(BytesRef term) {
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
|
||||
|
@ -850,8 +851,9 @@ public final class MoreLikeThis {
|
|||
{
|
||||
BytesRef[] terms = vector.getTerms();
|
||||
int freqs[]=vector.getTermFrequencies();
|
||||
final CharsRef spare = new CharsRef();
|
||||
for (int j = 0; j < terms.length; j++) {
|
||||
String term = terms[j].utf8ToString();
|
||||
final String term = terms[j].utf8ToChars(spare).toString();
|
||||
|
||||
if(isNoiseWord(term)){
|
||||
continue;
|
||||
|
|
|
@ -1,5 +1,22 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="DTDDocAnt" default="main">
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.zip.DataFormatException;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/** Simple utility class providing static methods to
|
||||
|
@ -118,9 +119,9 @@ public class CompressionTools {
|
|||
/** Decompress the byte array previously returned by
|
||||
* compressString back into a String */
|
||||
public static String decompressString(byte[] value) throws DataFormatException {
|
||||
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
|
||||
final byte[] bytes = decompress(value);
|
||||
CharsRef result = new CharsRef(bytes.length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
|
||||
return new String(result.result, 0, result.length);
|
||||
return new String(result.chars, 0, result.length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,6 +60,9 @@ public final class CompoundFileWriter {
|
|||
|
||||
/** temporary holder for the start of this file's data section */
|
||||
long dataOffset;
|
||||
|
||||
/** the directory which contains the file. */
|
||||
Directory dir;
|
||||
}
|
||||
|
||||
// Before versioning started.
|
||||
|
@ -119,6 +122,14 @@ public final class CompoundFileWriter {
|
|||
* has been added already
|
||||
*/
|
||||
public void addFile(String file) {
|
||||
addFile(file, directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #addFile(String)}, only for files that are found in an
|
||||
* external {@link Directory}.
|
||||
*/
|
||||
public void addFile(String file, Directory dir) {
|
||||
if (merged)
|
||||
throw new IllegalStateException(
|
||||
"Can't add extensions after merge has been called");
|
||||
|
@ -133,6 +144,7 @@ public final class CompoundFileWriter {
|
|||
|
||||
FileEntry entry = new FileEntry();
|
||||
entry.file = file;
|
||||
entry.dir = dir;
|
||||
entries.add(entry);
|
||||
}
|
||||
|
||||
|
@ -170,7 +182,7 @@ public final class CompoundFileWriter {
|
|||
fe.directoryOffset = os.getFilePointer();
|
||||
os.writeLong(0); // for now
|
||||
os.writeString(IndexFileNames.stripSegmentName(fe.file));
|
||||
totalSize += directory.fileLength(fe.file);
|
||||
totalSize += fe.dir.fileLength(fe.file);
|
||||
}
|
||||
|
||||
// Pre-allocate size of file as optimization --
|
||||
|
@ -216,7 +228,7 @@ public final class CompoundFileWriter {
|
|||
* output stream.
|
||||
*/
|
||||
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
|
||||
IndexInput is = directory.openInput(source.file);
|
||||
IndexInput is = source.dir.openInput(source.file);
|
||||
try {
|
||||
long startPtr = os.getFilePointer();
|
||||
long length = is.length();
|
||||
|
|
|
@ -84,19 +84,44 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
|
||||
@Override
|
||||
public void abort() {
|
||||
for(int i=0;i<fieldHash.length;i++) {
|
||||
DocFieldProcessorPerField field = fieldHash[i];
|
||||
while(field != null) {
|
||||
Throwable th = null;
|
||||
|
||||
for (DocFieldProcessorPerField field : fieldHash) {
|
||||
while (field != null) {
|
||||
final DocFieldProcessorPerField next = field.next;
|
||||
field.abort();
|
||||
try {
|
||||
field.abort();
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
field = next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
fieldsWriter.abort();
|
||||
} finally {
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
consumer.abort();
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
|
||||
// If any errors occured, throw it.
|
||||
if (th != null) {
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
// defensive code - we should not hit unchecked exceptions
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ final class DocInverter extends DocFieldConsumer {
|
|||
endConsumer.startDocument();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDocument() throws IOException {
|
||||
// TODO: allow endConsumer.finishDocument to also return
|
||||
// a DocWriter
|
||||
|
|
|
@ -53,8 +53,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
|
||||
@Override
|
||||
void abort() {
|
||||
consumer.abort();
|
||||
endConsumer.abort();
|
||||
try {
|
||||
consumer.abort();
|
||||
} finally {
|
||||
endConsumer.abort();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -228,14 +228,19 @@ final class DocumentsWriter {
|
|||
}
|
||||
|
||||
final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
|
||||
|
||||
while (threadsIterator.hasNext()) {
|
||||
ThreadState perThread = threadsIterator.next();
|
||||
final ThreadState perThread = threadsIterator.next();
|
||||
perThread.lock();
|
||||
try {
|
||||
if (perThread.isActive()) { // we might be closed
|
||||
perThread.perThread.abort();
|
||||
perThread.perThread.checkAndResetHasAborted();
|
||||
try {
|
||||
perThread.perThread.abort();
|
||||
} catch (IOException ex) {
|
||||
// continue
|
||||
} finally {
|
||||
perThread.perThread.checkAndResetHasAborted();
|
||||
flushControl.doOnAbort(perThread);
|
||||
}
|
||||
} else {
|
||||
assert closed;
|
||||
}
|
||||
|
@ -243,7 +248,6 @@ final class DocumentsWriter {
|
|||
perThread.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
if (infoStream != null) {
|
||||
|
@ -274,11 +278,9 @@ final class DocumentsWriter {
|
|||
flushControl.setClosed();
|
||||
}
|
||||
|
||||
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
private boolean preUpdate() throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
boolean maybeMerge = false;
|
||||
final boolean isUpdate = delTerm != null;
|
||||
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
|
||||
// Help out flushing any queued DWPTs so we can un-stall:
|
||||
if (infoStream != null) {
|
||||
|
@ -303,9 +305,59 @@ final class DocumentsWriter {
|
|||
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
|
||||
}
|
||||
}
|
||||
return maybeMerge;
|
||||
}
|
||||
|
||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(),
|
||||
this, doc);
|
||||
private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
|
||||
if (flushingDWPT != null) {
|
||||
maybeMerge |= doFlush(flushingDWPT);
|
||||
} else {
|
||||
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
|
||||
if (nextPendingFlush != null) {
|
||||
maybeMerge |= doFlush(nextPendingFlush);
|
||||
}
|
||||
}
|
||||
|
||||
return maybeMerge;
|
||||
}
|
||||
|
||||
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
boolean maybeMerge = preUpdate();
|
||||
|
||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
|
||||
final DocumentsWriterPerThread flushingDWPT;
|
||||
|
||||
try {
|
||||
if (!perThread.isActive()) {
|
||||
ensureOpen();
|
||||
assert false: "perThread is not active but we are still open";
|
||||
}
|
||||
|
||||
final DocumentsWriterPerThread dwpt = perThread.perThread;
|
||||
try {
|
||||
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
|
||||
numDocsInRAM.addAndGet(docCount);
|
||||
} finally {
|
||||
if (dwpt.checkAndResetHasAborted()) {
|
||||
flushControl.doOnAbort(perThread);
|
||||
}
|
||||
}
|
||||
final boolean isUpdate = delTerm != null;
|
||||
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
||||
} finally {
|
||||
perThread.unlock();
|
||||
}
|
||||
|
||||
return postUpdate(flushingDWPT, maybeMerge);
|
||||
}
|
||||
|
||||
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
|
||||
boolean maybeMerge = preUpdate();
|
||||
|
||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
|
||||
final DocumentsWriterPerThread flushingDWPT;
|
||||
|
||||
try {
|
||||
|
@ -324,20 +376,13 @@ final class DocumentsWriter {
|
|||
flushControl.doOnAbort(perThread);
|
||||
}
|
||||
}
|
||||
final boolean isUpdate = delTerm != null;
|
||||
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
||||
} finally {
|
||||
perThread.unlock();
|
||||
}
|
||||
|
||||
if (flushingDWPT != null) {
|
||||
maybeMerge |= doFlush(flushingDWPT);
|
||||
} else {
|
||||
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
|
||||
if (nextPendingFlush != null) {
|
||||
maybeMerge |= doFlush(nextPendingFlush);
|
||||
}
|
||||
}
|
||||
return maybeMerge;
|
||||
|
||||
return postUpdate(flushingDWPT, maybeMerge);
|
||||
}
|
||||
|
||||
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
|
||||
|
@ -541,4 +586,20 @@ final class DocumentsWriter {
|
|||
return (!isSegmentFlush || segment != null);
|
||||
}
|
||||
}
|
||||
|
||||
// use by IW during close to assert all DWPT are inactive after final flush
|
||||
boolean assertNoActiveDWPT() {
|
||||
Iterator<ThreadState> activePerThreadsIterator = perThreadPool.getAllPerThreadsIterator();
|
||||
while(activePerThreadsIterator.hasNext()) {
|
||||
ThreadState next = activePerThreadsIterator.next();
|
||||
next.lock();
|
||||
try {
|
||||
assert !next.isActive();
|
||||
} finally {
|
||||
next.unlock();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ package org.apache.lucene.index;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
@ -68,7 +69,7 @@ public final class DocumentsWriterFlushControl {
|
|||
this.stallControl = new DocumentsWriterStallControl();
|
||||
this.perThreadPool = documentsWriter.perThreadPool;
|
||||
this.flushPolicy = documentsWriter.flushPolicy;
|
||||
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
|
||||
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
|
||||
this.config = config;
|
||||
this.documentsWriter = documentsWriter;
|
||||
}
|
||||
|
@ -162,8 +163,6 @@ public final class DocumentsWriterFlushControl {
|
|||
stallControl.updateStalled(this);
|
||||
assert assertMemory();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
||||
|
@ -206,7 +205,7 @@ public final class DocumentsWriterFlushControl {
|
|||
} // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
|
||||
|
||||
}
|
||||
|
||||
|
||||
synchronized void doOnAbort(ThreadState state) {
|
||||
try {
|
||||
if (state.flushPending) {
|
||||
|
@ -217,7 +216,7 @@ public final class DocumentsWriterFlushControl {
|
|||
assert assertMemory();
|
||||
// Take it out of the loop this DWPT is stale
|
||||
perThreadPool.replaceForFlush(state, closed);
|
||||
}finally {
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
}
|
||||
|
@ -305,6 +304,7 @@ public final class DocumentsWriterFlushControl {
|
|||
synchronized void setClosed() {
|
||||
// set by DW to signal that we should not release new DWPT after close
|
||||
this.closed = true;
|
||||
perThreadPool.deactivateUnreleasedStates();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -387,8 +387,12 @@ public final class DocumentsWriterFlushControl {
|
|||
toFlush.add(flushingDWPT);
|
||||
}
|
||||
} else {
|
||||
// get the new delete queue from DW
|
||||
next.perThread.initialize();
|
||||
if (closed) {
|
||||
next.resetWriter(null); // make this state inactive
|
||||
} else {
|
||||
// get the new delete queue from DW
|
||||
next.perThread.initialize();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
next.unlock();
|
||||
|
@ -451,10 +455,21 @@ public final class DocumentsWriterFlushControl {
|
|||
try {
|
||||
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
||||
doAfterFlush(dwpt);
|
||||
try {
|
||||
dwpt.abort();
|
||||
} catch (IOException ex) {
|
||||
// continue
|
||||
}
|
||||
}
|
||||
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
flushingWriters
|
||||
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
doAfterFlush(blockedFlush.dwpt);
|
||||
try {
|
||||
blockedFlush.dwpt.abort();
|
||||
} catch (IOException ex) {
|
||||
// continue
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
|
@ -512,5 +527,4 @@ public final class DocumentsWriterFlushControl {
|
|||
boolean anyStalledThreads() {
|
||||
return stallControl.anyStalledThreads();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ public class DocumentsWriterPerThread {
|
|||
// largish:
|
||||
doc = null;
|
||||
analyzer = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class FlushedSegment {
|
||||
|
@ -177,7 +177,7 @@ public class DocumentsWriterPerThread {
|
|||
this.parent = parent;
|
||||
this.fieldInfos = fieldInfos;
|
||||
this.writer = parent.indexWriter;
|
||||
this.infoStream = parent.indexWriter.getInfoStream();
|
||||
this.infoStream = parent.infoStream;
|
||||
this.docState = new DocState(this);
|
||||
this.docState.similarityProvider = parent.indexWriter.getConfig()
|
||||
.getSimilarityProvider();
|
||||
|
@ -253,6 +253,82 @@ public class DocumentsWriterPerThread {
|
|||
finishDocument(delTerm);
|
||||
}
|
||||
|
||||
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
|
||||
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
|
||||
assert deleteQueue != null;
|
||||
docState.analyzer = analyzer;
|
||||
if (segment == null) {
|
||||
// this call is synchronized on IndexWriter.segmentInfos
|
||||
segment = writer.newSegmentName();
|
||||
assert numDocsInRAM == 0;
|
||||
}
|
||||
|
||||
int docCount = 0;
|
||||
try {
|
||||
for(Document doc : docs) {
|
||||
docState.doc = doc;
|
||||
docState.docID = numDocsInRAM;
|
||||
docCount++;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
consumer.processDocument(fieldInfos);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// An exc is being thrown...
|
||||
|
||||
if (!aborting) {
|
||||
// One of the documents hit a non-aborting
|
||||
// exception (eg something happened during
|
||||
// analysis). We now go and mark any docs
|
||||
// from this batch that we had already indexed
|
||||
// as deleted:
|
||||
int docID = docState.docID;
|
||||
final int endDocID = docID - docCount;
|
||||
while (docID > endDocID) {
|
||||
deleteDocID(docID);
|
||||
docID--;
|
||||
}
|
||||
|
||||
// Incr here because finishDocument will not
|
||||
// be called (because an exc is being thrown):
|
||||
numDocsInRAM++;
|
||||
fieldInfos.revertUncommitted();
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
success = false;
|
||||
try {
|
||||
consumer.finishDocument();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
finishDocument(null);
|
||||
}
|
||||
|
||||
// Apply delTerm only after all indexing has
|
||||
// succeeded, but apply it only to docs prior to when
|
||||
// this batch started:
|
||||
if (delTerm != null) {
|
||||
deleteQueue.add(delTerm, deleteSlice);
|
||||
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
|
||||
deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount);
|
||||
}
|
||||
|
||||
} finally {
|
||||
docState.clear();
|
||||
}
|
||||
|
||||
return docCount;
|
||||
}
|
||||
|
||||
private void finishDocument(Term delTerm) throws IOException {
|
||||
/*
|
||||
* here we actually finish the document in two steps 1. push the delete into
|
||||
|
@ -474,6 +550,7 @@ public class DocumentsWriterPerThread {
|
|||
super(blockSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getByteBlock() {
|
||||
bytesUsed.addAndGet(blockSize);
|
||||
return new byte[blockSize];
|
||||
|
@ -486,7 +563,7 @@ public class DocumentsWriterPerThread {
|
|||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
void setInfoStream(PrintStream infoStream) {
|
||||
this.infoStream = infoStream;
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
import java.util.Iterator;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
|
@ -194,6 +193,21 @@ public abstract class DocumentsWriterPerThreadPool {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deactivate all unreleased threadstates
|
||||
*/
|
||||
protected synchronized void deactivateUnreleasedStates() {
|
||||
for (int i = numThreadStatesActive; i < perThreads.length; i++) {
|
||||
final ThreadState threadState = perThreads[i];
|
||||
threadState.lock();
|
||||
try {
|
||||
threadState.resetWriter(null);
|
||||
} finally {
|
||||
threadState.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
|
||||
assert threadState.isHeldByCurrentThread();
|
||||
final DocumentsWriterPerThread dwpt = threadState.perThread;
|
||||
|
@ -212,7 +226,7 @@ public abstract class DocumentsWriterPerThreadPool {
|
|||
// don't recycle DWPT by default
|
||||
}
|
||||
|
||||
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc);
|
||||
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter);
|
||||
|
||||
/**
|
||||
* Returns an iterator providing access to all {@link ThreadState}
|
||||
|
|
|
@ -113,7 +113,7 @@ final class FieldsWriter {
|
|||
void close() throws IOException {
|
||||
if (directory != null) {
|
||||
try {
|
||||
IOUtils.closeSafely(fieldsStream, indexStream);
|
||||
IOUtils.closeSafely(false, fieldsStream, indexStream);
|
||||
} finally {
|
||||
fieldsStream = indexStream = null;
|
||||
}
|
||||
|
|
|
@ -57,9 +57,10 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
|||
|
||||
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
|
||||
|
||||
TermsHash termsHash = null;
|
||||
|
||||
/*
|
||||
try {
|
||||
TermsHash termsHash = null;
|
||||
|
||||
/*
|
||||
Current writer chain:
|
||||
FieldsConsumer
|
||||
-> IMPL: FormatPostingsTermsDictWriter
|
||||
|
@ -69,36 +70,38 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
|||
-> IMPL: FormatPostingsDocsWriter
|
||||
-> PositionsConsumer
|
||||
-> IMPL: FormatPostingsPositionsWriter
|
||||
*/
|
||||
|
||||
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
|
||||
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
|
||||
|
||||
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
|
||||
|
||||
// Aggregate the storePayload as seen by the same
|
||||
// field across multiple threads
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||
*/
|
||||
|
||||
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
|
||||
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
|
||||
|
||||
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
|
||||
|
||||
// Aggregate the storePayload as seen by the same
|
||||
// field across multiple threads
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||
}
|
||||
|
||||
// If this field has postings then add them to the
|
||||
// segment
|
||||
fieldWriter.flush(fieldInfo.name, consumer, state);
|
||||
|
||||
TermsHashPerField perField = fieldWriter.termsHashPerField;
|
||||
assert termsHash == null || termsHash == perField.termsHash;
|
||||
termsHash = perField.termsHash;
|
||||
int numPostings = perField.bytesHash.size();
|
||||
perField.reset();
|
||||
perField.shrinkHash(numPostings);
|
||||
fieldWriter.reset();
|
||||
}
|
||||
|
||||
// If this field has postings then add them to the
|
||||
// segment
|
||||
fieldWriter.flush(fieldInfo.name, consumer, state);
|
||||
|
||||
TermsHashPerField perField = fieldWriter.termsHashPerField;
|
||||
assert termsHash == null || termsHash == perField.termsHash;
|
||||
termsHash = perField.termsHash;
|
||||
int numPostings = perField.bytesHash.size();
|
||||
perField.reset();
|
||||
perField.shrinkHash(numPostings);
|
||||
fieldWriter.reset();
|
||||
|
||||
if (termsHash != null) {
|
||||
termsHash.reset();
|
||||
}
|
||||
} finally {
|
||||
consumer.close();
|
||||
}
|
||||
|
||||
if (termsHash != null) {
|
||||
termsHash.reset();
|
||||
}
|
||||
consumer.close();
|
||||
}
|
||||
|
||||
BytesRef payload;
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.index.codecs.Codec; // for javadocs
|
||||
|
||||
/**
|
||||
|
@ -238,5 +240,16 @@ public final class IndexFileNames {
|
|||
}
|
||||
return filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given filename ends with the separate norms file
|
||||
* pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
|
||||
*/
|
||||
public static boolean isSeparateNormsFile(String filename) {
|
||||
int idx = filename.lastIndexOf('.');
|
||||
if (idx == -1) return false;
|
||||
String ext = filename.substring(idx + 1);
|
||||
return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.io.PrintStream;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -51,6 +52,7 @@ import org.apache.lucene.store.LockObtainFailedException;
|
|||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.MapBackedSet;
|
||||
|
||||
|
@ -1071,7 +1073,8 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
if (infoStream != null)
|
||||
message("at close: " + segString());
|
||||
|
||||
// used by assert below
|
||||
final DocumentsWriter oldWriter = docWriter;
|
||||
synchronized(this) {
|
||||
readerPool.close();
|
||||
docWriter = null;
|
||||
|
@ -1085,6 +1088,7 @@ public class IndexWriter implements Closeable {
|
|||
synchronized(this) {
|
||||
closed = true;
|
||||
}
|
||||
assert oldWriter.assertNoActiveDWPT();
|
||||
} catch (OutOfMemoryError oom) {
|
||||
handleOOM(oom, "closeInternal");
|
||||
} finally {
|
||||
|
@ -1098,6 +1102,8 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Returns the Directory used by this index. */
|
||||
public Directory getDirectory() {
|
||||
|
@ -1227,6 +1233,111 @@ public class IndexWriter implements Closeable {
|
|||
updateDocument(null, doc, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically adds a block of documents with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* <p><b>WARNING</b>: the index does not currently record
|
||||
* which documents were added as a block. Today this is
|
||||
* fine, because merging will preserve the block (as long
|
||||
* as none them were deleted). But it's possible in the
|
||||
* future that Lucene may more aggressively re-order
|
||||
* documents (for example, perhaps to obtain better index
|
||||
* compression), in which case you may need to fully
|
||||
* re-index your documents at that time.
|
||||
*
|
||||
* <p>See {@link #addDocument(Document)} for details on
|
||||
* index and IndexWriter state after an Exception, and
|
||||
* flushing/merging temporary free space requirements.</p>
|
||||
*
|
||||
* <p><b>NOTE</b>: tools that do offline splitting of an index
|
||||
* (for example, IndexSplitter in contrib) or
|
||||
* re-sorting of documents (for example, IndexSorter in
|
||||
* contrib) are not aware of these atomically added documents
|
||||
* and will likely break them up. Use such tools at your
|
||||
* own risk!
|
||||
*
|
||||
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
|
||||
* you should immediately close the writer. See <a
|
||||
* href="#OOME">above</a> for details.</p>
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||
addDocuments(docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically adds a block of documents, analyzed using the
|
||||
* provided analyzer, with sequentially assigned document
|
||||
* IDs, such that an external reader will see all or none
|
||||
* of the documents.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
updateDocuments(null, docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically deletes documents matching the provided
|
||||
* delTerm and adds a block of documents with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* See {@link #addDocuments(Iterable)}.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||
updateDocuments(delTerm, docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically deletes documents matching the provided
|
||||
* delTerm and adds a block of documents, analyzed using
|
||||
* the provided analyzer, with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* See {@link #addDocuments(Iterable)}.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
try {
|
||||
boolean success = false;
|
||||
boolean anySegmentFlushed = false;
|
||||
try {
|
||||
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success && infoStream != null) {
|
||||
message("hit exception updating document");
|
||||
}
|
||||
}
|
||||
if (anySegmentFlushed) {
|
||||
maybeMerge();
|
||||
}
|
||||
} catch (OutOfMemoryError oom) {
|
||||
handleOOM(oom, "updateDocuments");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the document(s) containing <code>term</code>.
|
||||
*
|
||||
|
@ -2217,10 +2328,10 @@ public class IndexWriter implements Closeable {
|
|||
* <p>
|
||||
* <b>NOTE:</b> this method only copies the segments of the incoming indexes
|
||||
* and does not merge them. Therefore deleted documents are not removed and
|
||||
* the new segments are not merged with the existing ones. Also, the segments
|
||||
* are copied as-is, meaning they are not converted to CFS if they aren't,
|
||||
* and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
|
||||
* or {@link #optimize} afterwards.
|
||||
* the new segments are not merged with the existing ones. Also, if the merge
|
||||
* policy allows compound files, then any segment that is not compound is
|
||||
* converted to such. However, if the segment is compound, it is copied as-is
|
||||
* even if the merge policy does not allow compound files.
|
||||
*
|
||||
* <p>This requires this index not be among those to be added.
|
||||
*
|
||||
|
@ -2244,6 +2355,7 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
int docCount = 0;
|
||||
List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
|
||||
Comparator<String> versionComparator = StringHelper.getVersionComparator();
|
||||
for (Directory dir : dirs) {
|
||||
if (infoStream != null) {
|
||||
message("addIndexes: process directory " + dir);
|
||||
|
@ -2263,46 +2375,22 @@ public class IndexWriter implements Closeable {
|
|||
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
|
||||
}
|
||||
|
||||
// Determine if the doc store of this segment needs to be copied. It's
|
||||
// only relevant for segments who share doc store with others, because
|
||||
// the DS might have been copied already, in which case we just want
|
||||
// to update the DS name of this SegmentInfo.
|
||||
// NOTE: pre-3x segments include a null DSName if they don't share doc
|
||||
// store. So the following code ensures we don't accidentally insert
|
||||
// 'null' to the map.
|
||||
final String newDsName;
|
||||
if (dsName != null) {
|
||||
if (dsNames.containsKey(dsName)) {
|
||||
newDsName = dsNames.get(dsName);
|
||||
} else {
|
||||
dsNames.put(dsName, newSegName);
|
||||
newDsName = newSegName;
|
||||
}
|
||||
// create CFS only if the source segment is not CFS, and MP agrees it
|
||||
// should be CFS.
|
||||
boolean createCFS;
|
||||
synchronized (this) { // Guard segmentInfos
|
||||
createCFS = !info.getUseCompoundFile()
|
||||
&& mergePolicy.useCompoundFile(segmentInfos, info)
|
||||
// optimize case only for segments that don't share doc stores
|
||||
&& versionComparator.compare(info.getVersion(), "3.1") >= 0;
|
||||
}
|
||||
|
||||
if (createCFS) {
|
||||
copySegmentIntoCFS(info, newSegName);
|
||||
} else {
|
||||
newDsName = newSegName;
|
||||
copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
|
||||
}
|
||||
|
||||
// Copy the segment files
|
||||
for (String file: info.files()) {
|
||||
final String newFileName;
|
||||
if (IndexFileNames.isDocStoreFile(file)) {
|
||||
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
||||
if (dsFilesCopied.contains(newFileName)) {
|
||||
continue;
|
||||
}
|
||||
dsFilesCopied.add(newFileName);
|
||||
} else {
|
||||
newFileName = newSegName + IndexFileNames.stripSegmentName(file);
|
||||
}
|
||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||
dir.copy(directory, file, newFileName);
|
||||
}
|
||||
|
||||
// Update SI appropriately
|
||||
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
|
||||
info.dir = directory;
|
||||
info.name = newSegName;
|
||||
|
||||
infos.add(info);
|
||||
}
|
||||
}
|
||||
|
@ -2391,6 +2479,76 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Copies the segment into the IndexWriter's directory, as a compound segment. */
|
||||
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
|
||||
String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||
Collection<String> files = info.files();
|
||||
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
|
||||
for (String file : files) {
|
||||
String newFileName = segName + IndexFileNames.stripSegmentName(file);
|
||||
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
|
||||
&& !IndexFileNames.isSeparateNormsFile(file)) {
|
||||
cfsWriter.addFile(file, info.dir);
|
||||
} else {
|
||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||
info.dir.copy(directory, file, newFileName);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the .cfs
|
||||
cfsWriter.close();
|
||||
|
||||
info.dir = directory;
|
||||
info.name = segName;
|
||||
info.setUseCompoundFile(true);
|
||||
}
|
||||
|
||||
/** Copies the segment files as-is into the IndexWriter's directory. */
|
||||
private void copySegmentAsIs(SegmentInfo info, String segName,
|
||||
Map<String, String> dsNames, Set<String> dsFilesCopied)
|
||||
throws IOException {
|
||||
// Determine if the doc store of this segment needs to be copied. It's
|
||||
// only relevant for segments that share doc store with others,
|
||||
// because the DS might have been copied already, in which case we
|
||||
// just want to update the DS name of this SegmentInfo.
|
||||
// NOTE: pre-3x segments include a null DSName if they don't share doc
|
||||
// store. The following code ensures we don't accidentally insert
|
||||
// 'null' to the map.
|
||||
String dsName = info.getDocStoreSegment();
|
||||
final String newDsName;
|
||||
if (dsName != null) {
|
||||
if (dsNames.containsKey(dsName)) {
|
||||
newDsName = dsNames.get(dsName);
|
||||
} else {
|
||||
dsNames.put(dsName, segName);
|
||||
newDsName = segName;
|
||||
}
|
||||
} else {
|
||||
newDsName = segName;
|
||||
}
|
||||
|
||||
// Copy the segment files
|
||||
for (String file: info.files()) {
|
||||
final String newFileName;
|
||||
if (IndexFileNames.isDocStoreFile(file)) {
|
||||
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
||||
if (dsFilesCopied.contains(newFileName)) {
|
||||
continue;
|
||||
}
|
||||
dsFilesCopied.add(newFileName);
|
||||
} else {
|
||||
newFileName = segName + IndexFileNames.stripSegmentName(file);
|
||||
}
|
||||
|
||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||
info.dir.copy(directory, file, newFileName);
|
||||
}
|
||||
|
||||
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
|
||||
info.dir = directory;
|
||||
info.name = segName;
|
||||
}
|
||||
|
||||
/**
|
||||
* A hook for extending classes to execute operations after pending added and
|
||||
* deleted documents have been flushed to the Directory but before the change
|
||||
|
@ -3176,50 +3334,50 @@ public class IndexWriter implements Closeable {
|
|||
runningMerges.remove(merge);
|
||||
}
|
||||
|
||||
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
|
||||
private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
|
||||
final int numSegments = merge.readers.size();
|
||||
if (suppressExceptions) {
|
||||
// Suppress any new exceptions so we throw the
|
||||
// original cause
|
||||
boolean anyChanges = false;
|
||||
for (int i=0;i<numSegments;i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
try {
|
||||
anyChanges |= readerPool.release(merge.readers.get(i), false);
|
||||
} catch (Throwable t) {
|
||||
Throwable th = null;
|
||||
|
||||
boolean anyChanges = false;
|
||||
boolean drop = !suppressExceptions;
|
||||
for (int i = 0; i < numSegments; i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
try {
|
||||
anyChanges |= readerPool.release(merge.readers.get(i), drop);
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
merge.readers.set(i, null);
|
||||
}
|
||||
|
||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||
try {
|
||||
merge.readerClones.get(i).close();
|
||||
} catch (Throwable t) {
|
||||
}
|
||||
// This was a private clone and we had the
|
||||
// only reference
|
||||
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
|
||||
merge.readerClones.set(i, null);
|
||||
}
|
||||
merge.readers.set(i, null);
|
||||
}
|
||||
if (anyChanges) {
|
||||
checkpoint();
|
||||
}
|
||||
} else {
|
||||
for (int i=0;i<numSegments;i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
readerPool.release(merge.readers.get(i), true);
|
||||
merge.readers.set(i, null);
|
||||
}
|
||||
|
||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||
|
||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||
try {
|
||||
merge.readerClones.get(i).close();
|
||||
// This was a private clone and we had the only reference
|
||||
assert merge.readerClones.get(i).getRefCount() == 0;
|
||||
merge.readerClones.set(i, null);
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
// This was a private clone and we had the
|
||||
// only reference
|
||||
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
|
||||
merge.readerClones.set(i, null);
|
||||
}
|
||||
}
|
||||
|
||||
if (suppressExceptions && anyChanges) {
|
||||
checkpoint();
|
||||
}
|
||||
|
||||
// If any error occured, throw it.
|
||||
if (!suppressExceptions && th != null) {
|
||||
if (th instanceof IOException) throw (IOException) th;
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
/** Does the actual (time-consuming) work of the merge,
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
// TODO FI: norms could actually be stored as doc store
|
||||
|
||||
|
@ -49,9 +50,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
|
||||
IndexOutput normsOut = state.directory.createOutput(normsFileName);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
|
||||
normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
|
||||
|
||||
int normCount = 0;
|
||||
|
||||
|
@ -84,9 +85,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
|
||||
}
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
normsOut.close();
|
||||
IOUtils.closeSafely(!success, normsOut);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.codecs.FieldsConsumer;
|
|||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.codecs.TermsConsumer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Enables native per field codec support. This class selects the codec used to
|
||||
|
@ -61,7 +62,15 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
assert segmentCodecs == state.segmentCodecs;
|
||||
final Codec[] codecs = segmentCodecs.codecs;
|
||||
for (int i = 0; i < codecs.length; i++) {
|
||||
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
|
||||
boolean success = false;
|
||||
try {
|
||||
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, consumers);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,22 +83,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
Iterator<FieldsConsumer> it = consumers.iterator();
|
||||
IOException err = null;
|
||||
while (it.hasNext()) {
|
||||
try {
|
||||
it.next().close();
|
||||
} catch (IOException ioe) {
|
||||
// keep first IOException we hit but keep
|
||||
// closing the rest
|
||||
if (err == null) {
|
||||
err = ioe;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != null) {
|
||||
throw err;
|
||||
}
|
||||
IOUtils.closeSafely(false, consumers);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,14 +116,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
// If we hit exception (eg, IOE because writer was
|
||||
// committing, or, for any other reason) we must
|
||||
// go back and close all FieldsProducers we opened:
|
||||
for(FieldsProducer fp : producers.values()) {
|
||||
try {
|
||||
fp.close();
|
||||
} catch (Throwable t) {
|
||||
// Suppress all exceptions here so we continue
|
||||
// to throw the original one
|
||||
}
|
||||
}
|
||||
IOUtils.closeSafely(true, producers.values());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,22 +164,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
Iterator<FieldsProducer> it = codecs.values().iterator();
|
||||
IOException err = null;
|
||||
while (it.hasNext()) {
|
||||
try {
|
||||
it.next().close();
|
||||
} catch (IOException ioe) {
|
||||
// keep first IOException we hit but keep
|
||||
// closing the rest
|
||||
if (err == null) {
|
||||
err = ioe;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != null) {
|
||||
throw err;
|
||||
}
|
||||
IOUtils.closeSafely(false, codecs.values());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -59,7 +59,7 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
|||
|
||||
/**
|
||||
* Reads the snapshots information from the given {@link Directory}. This
|
||||
* method does can be used if the snapshots information is needed, however you
|
||||
* method can be used if the snapshots information is needed, however you
|
||||
* cannot instantiate the deletion policy (because e.g., some other process
|
||||
* keeps a lock on the snapshots directory).
|
||||
*/
|
||||
|
@ -122,11 +122,19 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
|||
writer.commit();
|
||||
}
|
||||
|
||||
// Initializes the snapshots information. This code should basically run
|
||||
// only if mode != CREATE, but if it is, it's no harm as we only open the
|
||||
// reader once and immediately close it.
|
||||
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
|
||||
registerSnapshotInfo(e.getKey(), e.getValue(), null);
|
||||
try {
|
||||
// Initializes the snapshots information. This code should basically run
|
||||
// only if mode != CREATE, but if it is, it's no harm as we only open the
|
||||
// reader once and immediately close it.
|
||||
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
|
||||
registerSnapshotInfo(e.getKey(), e.getValue(), null);
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
writer.close(); // don't leave any open file handles
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
writer.close(); // don't leave any open file handles
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -436,7 +436,7 @@ public final class SegmentInfo implements Cloneable {
|
|||
*/
|
||||
public String getNormFileName(int number) {
|
||||
if (hasSeparateNorms(number)) {
|
||||
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
|
||||
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
|
||||
} else {
|
||||
// single file for all norms
|
||||
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/**
|
||||
|
@ -323,17 +324,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
|||
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
|
||||
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
|
||||
infosWriter.prepareCommit(segnOutput);
|
||||
success = true;
|
||||
pendingSegnOutput = segnOutput;
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// We hit an exception above; try to close the file
|
||||
// but suppress any exception:
|
||||
try {
|
||||
segnOutput.close();
|
||||
} catch (Throwable t) {
|
||||
// Suppress so we keep throwing the original exception
|
||||
}
|
||||
IOUtils.closeSafely(true, segnOutput);
|
||||
try {
|
||||
// Try not to leave a truncated segments_N file in
|
||||
// the index:
|
||||
|
@ -945,6 +942,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
|||
} finally {
|
||||
genOutput.close();
|
||||
}
|
||||
} catch (ThreadInterruptedException t) {
|
||||
throw t;
|
||||
} catch (Throwable t) {
|
||||
// It's OK if we fail to write this file since it's
|
||||
// used only as one of the retry fallbacks.
|
||||
|
@ -962,7 +961,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
|||
prepareCommit(dir);
|
||||
finishCommit(dir);
|
||||
}
|
||||
|
||||
|
||||
public String toString(Directory directory) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
|
|
|
@ -27,13 +27,13 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.MergeState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.MultiBits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
|
@ -46,10 +46,6 @@ import org.apache.lucene.util.ReaderUtil;
|
|||
* @see #add
|
||||
*/
|
||||
final class SegmentMerger {
|
||||
|
||||
/** norms header placeholder */
|
||||
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
||||
|
||||
private Directory directory;
|
||||
private String segment;
|
||||
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
|
||||
|
@ -124,6 +120,12 @@ final class SegmentMerger {
|
|||
return mergedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: this method creates a compound file for all files returned by
|
||||
* info.files(). While, generally, this may include separate norms and
|
||||
* deletion files, this SegmentInfo must not reference such files when this
|
||||
* method is called, because they are not allowed within a compound file.
|
||||
*/
|
||||
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
|
||||
throws IOException {
|
||||
|
||||
|
@ -131,6 +133,10 @@ final class SegmentMerger {
|
|||
Collection<String> files = info.files();
|
||||
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
|
||||
for (String file : files) {
|
||||
assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
|
||||
: ".del file is not allowed in .cfs: " + file;
|
||||
assert !IndexFileNames.isSeparateNormsFile(file)
|
||||
: "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file;
|
||||
cfsWriter.addFile(file);
|
||||
}
|
||||
|
||||
|
@ -139,7 +145,7 @@ final class SegmentMerger {
|
|||
|
||||
return files;
|
||||
}
|
||||
|
||||
|
||||
private static void addIndexed(IndexReader reader, FieldInfos fInfos,
|
||||
Collection<String> names, boolean storeTermVectors,
|
||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||
|
@ -540,14 +546,13 @@ final class SegmentMerger {
|
|||
}
|
||||
codec = segmentWriteState.segmentCodecs.codec();
|
||||
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
|
||||
|
||||
// NOTE: this is silly, yet, necessary -- we create a
|
||||
// MultiBits as our skip docs only to have it broken
|
||||
// apart when we step through the docs enums in
|
||||
// MultiDocsEnum.
|
||||
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
|
||||
|
||||
try {
|
||||
// NOTE: this is silly, yet, necessary -- we create a
|
||||
// MultiBits as our skip docs only to have it broken
|
||||
// apart when we step through the docs enums in
|
||||
// MultiDocsEnum.
|
||||
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
|
||||
|
||||
consumer.merge(mergeState,
|
||||
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
|
||||
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
|
||||
|
@ -573,12 +578,13 @@ final class SegmentMerger {
|
|||
|
||||
private void mergeNorms() throws IOException {
|
||||
IndexOutput output = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
for (FieldInfo fi : fieldInfos) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
if (output == null) {
|
||||
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
|
||||
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
|
||||
output.writeBytes(SegmentNorms.NORMS_HEADER, SegmentNorms.NORMS_HEADER.length);
|
||||
}
|
||||
for (IndexReader reader : readers) {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
|
@ -606,10 +612,9 @@ final class SegmentMerger {
|
|||
}
|
||||
}
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (output != null) {
|
||||
output.close();
|
||||
}
|
||||
IOUtils.closeSafely(!success, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,10 @@ import org.apache.lucene.store.IndexOutput;
|
|||
*/
|
||||
|
||||
final class SegmentNorms implements Cloneable {
|
||||
|
||||
/** norms header placeholder */
|
||||
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
||||
|
||||
int refCount = 1;
|
||||
|
||||
// If this instance is a clone, the originalNorm
|
||||
|
@ -219,7 +223,7 @@ final class SegmentNorms implements Cloneable {
|
|||
boolean success = false;
|
||||
try {
|
||||
try {
|
||||
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
|
||||
out.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
|
||||
out.writeBytes(bytes, owner.maxDoc());
|
||||
} finally {
|
||||
out.close();
|
||||
|
|
|
@ -574,7 +574,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
}
|
||||
|
||||
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
|
||||
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
|
||||
long nextNormSeek = SegmentNorms.NORMS_HEADER.length; //skip header (header unused for now)
|
||||
int maxDoc = maxDoc();
|
||||
for (FieldInfo fi : core.fieldInfos) {
|
||||
if (norms.containsKey(fi.name)) {
|
||||
|
@ -619,7 +619,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
if (isUnversioned) {
|
||||
normSeek = 0;
|
||||
} else {
|
||||
normSeek = SegmentMerger.NORMS_HEADER.length;
|
||||
normSeek = SegmentNorms.NORMS_HEADER.length;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -54,9 +54,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
fill(state.numDocs);
|
||||
assert state.segmentName != null;
|
||||
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
|
||||
tvx.close();
|
||||
tvf.close();
|
||||
tvd.close();
|
||||
IOUtils.closeSafely(false, tvx, tvf, tvd);
|
||||
tvx = tvd = tvf = null;
|
||||
if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
|
||||
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
|
||||
|
@ -89,18 +87,25 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
|
||||
private final void initTermVectorsWriter() throws IOException {
|
||||
if (tvx == null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
// If we hit an exception while init'ing the term
|
||||
// vector output files, we must abort this segment
|
||||
// because those files will be in an unknown
|
||||
// state:
|
||||
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
|
||||
// If we hit an exception while init'ing the term
|
||||
// vector output files, we must abort this segment
|
||||
// because those files will be in an unknown
|
||||
// state:
|
||||
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||
}
|
||||
}
|
||||
|
||||
lastDocID = 0;
|
||||
}
|
||||
|
@ -139,7 +144,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
assert lastDocID == docState.docID;
|
||||
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
|
||||
|
||||
lastDocID++;
|
||||
|
||||
|
@ -152,21 +157,27 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
public void abort() {
|
||||
hasVectors = false;
|
||||
try {
|
||||
IOUtils.closeSafely(tvx, tvd, tvf);
|
||||
} catch (IOException ignored) {
|
||||
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||
} catch (IOException e) {
|
||||
// cannot happen since we suppress exceptions
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try {
|
||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
try {
|
||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
try {
|
||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
tvx = tvd = tvf = null;
|
||||
lastDocID = 0;
|
||||
|
||||
|
|
|
@ -31,15 +31,22 @@ final class TermVectorsWriter {
|
|||
private FieldInfos fieldInfos;
|
||||
|
||||
public TermVectorsWriter(Directory directory, String segment,
|
||||
FieldInfos fieldInfos)
|
||||
throws IOException {
|
||||
// Open files for TermVector storage
|
||||
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
FieldInfos fieldInfos) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
// Open files for TermVector storage
|
||||
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||
}
|
||||
}
|
||||
|
||||
this.fieldInfos = fieldInfos;
|
||||
}
|
||||
|
@ -51,8 +58,7 @@ final class TermVectorsWriter {
|
|||
* @param vectors
|
||||
* @throws IOException
|
||||
*/
|
||||
public final void addAllDocVectors(TermFreqVector[] vectors)
|
||||
throws IOException {
|
||||
public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
|
||||
|
||||
tvx.writeLong(tvd.getFilePointer());
|
||||
tvx.writeLong(tvf.getFilePointer());
|
||||
|
@ -187,6 +193,6 @@ final class TermVectorsWriter {
|
|||
final void close() throws IOException {
|
||||
// make an effort to close all streams we can but remember and re-throw
|
||||
// the first exception encountered in this process
|
||||
IOUtils.closeSafely(tvx, tvd, tvf);
|
||||
IOUtils.closeSafely(false, tvx, tvd, tvf);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,7 +54,6 @@ final class TermsHash extends InvertedDocConsumer {
|
|||
|
||||
final boolean trackAllocations;
|
||||
|
||||
|
||||
public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
|
||||
this.docState = docWriter.docState;
|
||||
this.docWriter = docWriter;
|
||||
|
@ -108,11 +107,11 @@ final class TermsHash extends InvertedDocConsumer {
|
|||
}
|
||||
|
||||
for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
|
||||
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
|
||||
childFields.put(entry.getKey(), perField.consumer);
|
||||
if (nextTermsHash != null) {
|
||||
nextChildFields.put(entry.getKey(), perField.nextPerField);
|
||||
}
|
||||
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
|
||||
childFields.put(entry.getKey(), perField.consumer);
|
||||
if (nextTermsHash != null) {
|
||||
nextChildFields.put(entry.getKey(), perField.nextPerField);
|
||||
}
|
||||
}
|
||||
|
||||
consumer.flush(childFields, state);
|
||||
|
@ -134,12 +133,9 @@ final class TermsHash extends InvertedDocConsumer {
|
|||
|
||||
@Override
|
||||
void finishDocument() throws IOException {
|
||||
try {
|
||||
consumer.finishDocument(this);
|
||||
} finally {
|
||||
if (nextTermsHash != null) {
|
||||
nextTermsHash.consumer.finishDocument(nextTermsHash);
|
||||
}
|
||||
consumer.finishDocument(this);
|
||||
if (nextTermsHash != null) {
|
||||
nextTermsHash.consumer.finishDocument(nextTermsHash);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc
|
||||
|
||||
/**
|
||||
|
@ -48,12 +47,10 @@ public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerT
|
|||
}
|
||||
|
||||
@Override
|
||||
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) {
|
||||
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter) {
|
||||
ThreadState threadState = threadBindings.get(requestingThread);
|
||||
if (threadState != null) {
|
||||
if (threadState.tryLock()) {
|
||||
return threadState;
|
||||
}
|
||||
if (threadState != null && threadState.tryLock()) {
|
||||
return threadState;
|
||||
}
|
||||
ThreadState minThreadState = null;
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO: currently we encode all terms between two indexed
|
||||
|
@ -66,24 +67,29 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
|
||||
//private final String segment;
|
||||
|
||||
public BlockTermsWriter(
|
||||
TermsIndexWriterBase termsIndexWriter,
|
||||
SegmentWriteState state,
|
||||
PostingsWriterBase postingsWriter)
|
||||
throws IOException
|
||||
{
|
||||
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
|
||||
SegmentWriteState state, PostingsWriterBase postingsWriter)
|
||||
throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
|
||||
this.termsIndexWriter = termsIndexWriter;
|
||||
out = state.directory.createOutput(termsFileName);
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
currentField = null;
|
||||
this.postingsWriter = postingsWriter;
|
||||
//segment = state.segmentName;
|
||||
|
||||
//System.out.println("BTW.init seg=" + state.segmentName);
|
||||
|
||||
postingsWriter.start(out); // have consumer write its format/header
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
currentField = null;
|
||||
this.postingsWriter = postingsWriter;
|
||||
//segment = state.segmentName;
|
||||
|
||||
//System.out.println("BTW.init seg=" + state.segmentName);
|
||||
|
||||
postingsWriter.start(out); // have consumer write its format/header
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
|
@ -130,20 +136,11 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
try {
|
||||
out.close();
|
||||
} finally {
|
||||
try {
|
||||
postingsWriter.close();
|
||||
} finally {
|
||||
termsIndexWriter.close();
|
||||
}
|
||||
}
|
||||
IOUtils.closeSafely(false, out, postingsWriter, termsIndexWriter);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
|
|
|
@ -88,6 +88,15 @@ public class CodecProvider {
|
|||
return codec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff a codec with the given name is registered
|
||||
* @param name codec name
|
||||
* @return <code>true</code> iff a codec with the given name is registered, otherwise <code>false</code>.
|
||||
*/
|
||||
public synchronized boolean isCodecRegistered(String name) {
|
||||
return codecs.containsKey(name);
|
||||
}
|
||||
|
||||
public SegmentInfosWriter getSegmentInfosWriter() {
|
||||
return infosWriter;
|
||||
}
|
||||
|
@ -145,6 +154,14 @@ public class CodecProvider {
|
|||
return codec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if this provider has a Codec registered for this
|
||||
* field.
|
||||
*/
|
||||
public synchronized boolean hasFieldCodec(String name) {
|
||||
return perFieldMap.containsKey(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default {@link Codec} for this {@link CodecProvider}
|
||||
*
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfos;
|
|||
import org.apache.lucene.store.ChecksumIndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Default implementation of {@link SegmentInfosWriter}.
|
||||
|
@ -56,16 +57,24 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
|
|||
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
|
||||
throws IOException {
|
||||
IndexOutput out = createOutput(dir, segmentFileName);
|
||||
out.writeInt(FORMAT_CURRENT); // write FORMAT
|
||||
out.writeLong(infos.version);
|
||||
out.writeInt(infos.counter); // write counter
|
||||
out.writeLong(infos.getGlobalFieldMapVersion());
|
||||
out.writeInt(infos.size()); // write infos
|
||||
for (SegmentInfo si : infos) {
|
||||
si.write(out);
|
||||
boolean success = false;
|
||||
try {
|
||||
out.writeInt(FORMAT_CURRENT); // write FORMAT
|
||||
out.writeLong(infos.version);
|
||||
out.writeInt(infos.counter); // write counter
|
||||
out.writeLong(infos.getGlobalFieldMapVersion());
|
||||
out.writeInt(infos.size()); // write infos
|
||||
for (SegmentInfo si : infos) {
|
||||
si.write(out);
|
||||
}
|
||||
out.writeStringStringMap(infos.getUserData());
|
||||
success = true;
|
||||
return out;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
out.writeStringStringMap(infos.getUserData());
|
||||
return out;
|
||||
}
|
||||
|
||||
protected IndexOutput createOutput(Directory dir, String segmentFileName)
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
|
@ -108,6 +109,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) IOUtils.closeSafely(true, in);
|
||||
if (indexDivisor > 0) {
|
||||
in.close();
|
||||
in = null;
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -58,9 +59,17 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
|
||||
termIndexInterval = state.termIndexInterval;
|
||||
out = state.directory.createOutput(indexFileName);
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
out.writeInt(termIndexInterval);
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
out.writeInt(termIndexInterval);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
|
@ -202,33 +211,37 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
nonNullFieldCount++;
|
||||
boolean success = false;
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
nonNullFieldCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVInt(field.numIndexTerms);
|
||||
out.writeVLong(field.termsStart);
|
||||
out.writeVLong(field.indexStart);
|
||||
out.writeVLong(field.packedIndexStart);
|
||||
out.writeVLong(field.packedOffsetsStart);
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVInt(field.numIndexTerms);
|
||||
out.writeVLong(field.termsStart);
|
||||
out.writeVLong(field.indexStart);
|
||||
out.writeVLong(field.packedIndexStart);
|
||||
out.writeVLong(field.packedOffsetsStart);
|
||||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
success = true;
|
||||
} finally {
|
||||
IOUtils.closeSafely(!success, out);
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
out.close();
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
|
|
|
@ -19,10 +19,12 @@ package org.apache.lucene.index.codecs;
|
|||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public abstract class TermsIndexWriterBase {
|
||||
public abstract class TermsIndexWriterBase implements Closeable {
|
||||
|
||||
public abstract class FieldWriter {
|
||||
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
|
||||
|
@ -31,6 +33,4 @@ public abstract class TermsIndexWriterBase {
|
|||
}
|
||||
|
||||
public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
|
||||
|
||||
public abstract void close() throws IOException;
|
||||
}
|
||||
|
|
|
@ -33,11 +33,11 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder;
|
||||
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.automaton.fst.FST;
|
||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.automaton.fst.Util; // for toDot
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util; // for toDot
|
||||
|
||||
/** See {@link VariableGapTermsIndexWriter}
|
||||
*
|
||||
|
|
|
@ -28,9 +28,10 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder;
|
||||
import org.apache.lucene.util.automaton.fst.FST;
|
||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
/**
|
||||
* Selects index terms according to provided pluggable
|
||||
|
@ -159,9 +160,17 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException {
|
||||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
|
||||
out = state.directory.createOutput(indexFileName);
|
||||
fieldInfos = state.fieldInfos;
|
||||
this.policy = policy;
|
||||
writeHeader(out);
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldInfos = state.fieldInfos;
|
||||
this.policy = policy;
|
||||
writeHeader(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
|
@ -265,8 +274,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
|
@ -287,8 +296,10 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
|
|
|
@ -41,6 +41,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
|||
protected final IndexOutput out;
|
||||
|
||||
private int upto;
|
||||
private boolean hitExcDuringWrite;
|
||||
|
||||
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
|
||||
// if its less than 128 we should set that as max and use byte?
|
||||
|
@ -105,19 +106,23 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
|||
|
||||
@Override
|
||||
public void write(int v) throws IOException {
|
||||
hitExcDuringWrite = true;
|
||||
upto -= add(v)-1;
|
||||
hitExcDuringWrite = false;
|
||||
assert upto >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
// stuff 0s in until the "real" data is flushed:
|
||||
int stuffed = 0;
|
||||
while(upto > stuffed) {
|
||||
upto -= add(0)-1;
|
||||
assert upto >= 0;
|
||||
stuffed += 1;
|
||||
if (!hitExcDuringWrite) {
|
||||
// stuff 0s in until the "real" data is flushed:
|
||||
int stuffed = 0;
|
||||
while(upto > stuffed) {
|
||||
upto -= add(0)-1;
|
||||
assert upto >= 0;
|
||||
stuffed += 1;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
|||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** This codec "inlines" the postings for terms that have
|
||||
* low docFreq. It wraps another codec, which is used for
|
||||
|
@ -81,7 +82,7 @@ public class PulsingCodec extends Codec {
|
|||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
pulsingWriter.close();
|
||||
IOUtils.closeSafely(true, pulsingWriter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,11 +94,7 @@ public class PulsingCodec extends Codec {
|
|||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
try {
|
||||
pulsingWriter.close();
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
}
|
||||
IOUtils.closeSafely(true, pulsingWriter, indexWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,8 +71,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
* for this term) is <= maxPositions, then the postings are
|
||||
* inlined into terms dict */
|
||||
public PulsingPostingsWriterImpl(int maxPositions, PostingsWriterBase wrappedPostingsWriter) throws IOException {
|
||||
super();
|
||||
|
||||
pending = new Position[maxPositions];
|
||||
for(int i=0;i<maxPositions;i++) {
|
||||
pending[i] = new Position();
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
|
||||
* to .pyl, skip data to .skp
|
||||
|
@ -49,18 +50,18 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
final static int VERSION_START = 0;
|
||||
final static int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
final IntIndexOutput freqOut;
|
||||
final IntIndexOutput.Index freqIndex;
|
||||
IntIndexOutput freqOut;
|
||||
IntIndexOutput.Index freqIndex;
|
||||
|
||||
final IntIndexOutput posOut;
|
||||
final IntIndexOutput.Index posIndex;
|
||||
IntIndexOutput posOut;
|
||||
IntIndexOutput.Index posIndex;
|
||||
|
||||
final IntIndexOutput docOut;
|
||||
final IntIndexOutput.Index docIndex;
|
||||
IntIndexOutput docOut;
|
||||
IntIndexOutput.Index docIndex;
|
||||
|
||||
final IndexOutput payloadOut;
|
||||
IndexOutput payloadOut;
|
||||
|
||||
final IndexOutput skipOut;
|
||||
IndexOutput skipOut;
|
||||
IndexOutput termsOut;
|
||||
|
||||
final SepSkipListWriter skipListWriter;
|
||||
|
@ -107,44 +108,51 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
}
|
||||
|
||||
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
|
||||
super();
|
||||
this.skipInterval = skipInterval;
|
||||
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
|
||||
docOut = factory.createOutput(state.directory, docFileName);
|
||||
docIndex = docOut.index();
|
||||
freqOut = null;
|
||||
freqIndex = null;
|
||||
posOut = null;
|
||||
posIndex = null;
|
||||
payloadOut = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
this.skipInterval = skipInterval;
|
||||
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
|
||||
docOut = factory.createOutput(state.directory, docFileName);
|
||||
docIndex = docOut.index();
|
||||
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
||||
freqOut = factory.createOutput(state.directory, frqFileName);
|
||||
freqIndex = freqOut.index();
|
||||
|
||||
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
||||
posOut = factory.createOutput(state.directory, posFileName);
|
||||
posIndex = posOut.index();
|
||||
|
||||
// TODO: -- only if at least one field stores payloads?
|
||||
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
|
||||
payloadOut = state.directory.createOutput(payloadFileName);
|
||||
}
|
||||
|
||||
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
|
||||
skipOut = state.directory.createOutput(skipFileName);
|
||||
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
skipListWriter = new SepSkipListWriter(skipInterval,
|
||||
maxSkipLevels,
|
||||
state.numDocs,
|
||||
freqOut, docOut,
|
||||
posOut, payloadOut);
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, docOut, skipOut, freqOut, posOut, payloadOut);
|
||||
}
|
||||
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, FREQ_EXTENSION);
|
||||
freqOut = factory.createOutput(state.directory, frqFileName);
|
||||
freqIndex = freqOut.index();
|
||||
|
||||
final String posFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, POS_EXTENSION);
|
||||
posOut = factory.createOutput(state.directory, posFileName);
|
||||
posIndex = posOut.index();
|
||||
|
||||
// TODO: -- only if at least one field stores payloads?
|
||||
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, PAYLOAD_EXTENSION);
|
||||
payloadOut = state.directory.createOutput(payloadFileName);
|
||||
|
||||
} else {
|
||||
freqOut = null;
|
||||
freqIndex = null;
|
||||
posOut = null;
|
||||
posIndex = null;
|
||||
payloadOut = null;
|
||||
}
|
||||
|
||||
final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SKIP_EXTENSION);
|
||||
skipOut = state.directory.createOutput(skipFileName);
|
||||
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
skipListWriter = new SepSkipListWriter(skipInterval,
|
||||
maxSkipLevels,
|
||||
state.numDocs,
|
||||
freqOut, docOut,
|
||||
posOut, payloadOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -306,25 +314,7 @@ public final class SepPostingsWriterImpl extends PostingsWriterBase {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
docOut.close();
|
||||
} finally {
|
||||
try {
|
||||
skipOut.close();
|
||||
} finally {
|
||||
if (freqOut != null) {
|
||||
try {
|
||||
freqOut.close();
|
||||
} finally {
|
||||
try {
|
||||
posOut.close();
|
||||
} finally {
|
||||
payloadOut.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
IOUtils.closeSafely(false, docOut, skipOut, freqOut, posOut, payloadOut);
|
||||
}
|
||||
|
||||
public static void getExtensions(Set<String> extensions) {
|
||||
|
|
|
@ -29,13 +29,14 @@ import org.apache.lucene.index.FieldInfos;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder;
|
||||
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.automaton.fst.FST;
|
||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.automaton.fst.PairOutputs;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
@ -236,7 +237,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private int tf;
|
||||
private Bits skipDocs;
|
||||
private final BytesRef scratch = new BytesRef(10);
|
||||
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result();
|
||||
private final CharsRef scratchUTF16 = new CharsRef(10);
|
||||
|
||||
public SimpleTextDocsEnum() {
|
||||
this.inStart = SimpleTextFieldsReader.this.in;
|
||||
|
@ -286,7 +287,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
return docID;
|
||||
}
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
||||
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length);
|
||||
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
termFreq = 0;
|
||||
first = false;
|
||||
} else if (scratch.startsWith(POS)) {
|
||||
|
@ -323,8 +324,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private Bits skipDocs;
|
||||
private final BytesRef scratch = new BytesRef(10);
|
||||
private final BytesRef scratch2 = new BytesRef(10);
|
||||
private final UnicodeUtil.UTF16Result scratchUTF16 = new UnicodeUtil.UTF16Result();
|
||||
private final UnicodeUtil.UTF16Result scratchUTF16_2 = new UnicodeUtil.UTF16Result();
|
||||
private final CharsRef scratchUTF16 = new CharsRef(10);
|
||||
private final CharsRef scratchUTF16_2 = new CharsRef(10);
|
||||
private BytesRef payload;
|
||||
private long nextDocStart;
|
||||
|
||||
|
@ -368,7 +369,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
return docID;
|
||||
}
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
|
||||
docID = ArrayUtil.parseInt(scratchUTF16.result, 0, scratchUTF16.length);
|
||||
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
|
||||
tf = 0;
|
||||
posStart = in.getFilePointer();
|
||||
first = false;
|
||||
|
@ -400,7 +401,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
readLine(in, scratch);
|
||||
assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString();
|
||||
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
|
||||
final int pos = ArrayUtil.parseInt(scratchUTF16_2.result, 0, scratchUTF16_2.length);
|
||||
final int pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
|
||||
final long fp = in.getFilePointer();
|
||||
readLine(in, scratch);
|
||||
if (scratch.startsWith(PAYLOAD)) {
|
||||
|
|
|
@ -143,8 +143,11 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
write(END);
|
||||
newline();
|
||||
out.close();
|
||||
try {
|
||||
write(END);
|
||||
newline();
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public final class StandardPostingsWriter extends PostingsWriterBase {
|
||||
|
@ -42,8 +43,8 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
final static int VERSION_START = 0;
|
||||
final static int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
final IndexOutput freqOut;
|
||||
final IndexOutput proxOut;
|
||||
IndexOutput freqOut;
|
||||
IndexOutput proxOut;
|
||||
final DefaultSkipListWriter skipListWriter;
|
||||
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||
|
@ -85,31 +86,35 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
|
||||
this(state, DEFAULT_SKIP_INTERVAL);
|
||||
}
|
||||
|
||||
public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
|
||||
super();
|
||||
this.skipInterval = skipInterval;
|
||||
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||
//this.segment = state.segmentName;
|
||||
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
|
||||
freqOut = state.directory.createOutput(fileName);
|
||||
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
// At least one field does not omit TF, so create the
|
||||
// prox file
|
||||
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
|
||||
proxOut = state.directory.createOutput(fileName);
|
||||
} else {
|
||||
// Every field omits TF so we will write no prox file
|
||||
proxOut = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
if (state.fieldInfos.hasProx()) {
|
||||
// At least one field does not omit TF, so create the
|
||||
// prox file
|
||||
fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.PROX_EXTENSION);
|
||||
proxOut = state.directory.createOutput(fileName);
|
||||
} else {
|
||||
// Every field omits TF so we will write no prox file
|
||||
proxOut = null;
|
||||
}
|
||||
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels,
|
||||
state.numDocs, freqOut, proxOut);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, freqOut, proxOut);
|
||||
}
|
||||
}
|
||||
|
||||
totalNumDocs = state.numDocs;
|
||||
|
||||
skipListWriter = new DefaultSkipListWriter(skipInterval,
|
||||
maxSkipLevels,
|
||||
state.numDocs,
|
||||
freqOut,
|
||||
proxOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -267,12 +272,6 @@ public final class StandardPostingsWriter extends PostingsWriterBase {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
freqOut.close();
|
||||
} finally {
|
||||
if (proxOut != null) {
|
||||
proxOut.close();
|
||||
}
|
||||
}
|
||||
IOUtils.closeSafely(false, freqOut, proxOut);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -181,6 +181,7 @@ public abstract class CachingCollector extends Collector {
|
|||
curUpto = 0;
|
||||
}
|
||||
cachedScorer.score = curScores[curUpto];
|
||||
cachedScorer.doc = curDocs[curUpto];
|
||||
other.collect(curDocs[curUpto++]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,8 +22,6 @@ import java.io.FileNotFoundException;
|
|||
import java.io.FilenameFilter;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
import java.util.Collection;
|
||||
import static java.util.Collections.synchronizedSet;
|
||||
|
@ -111,15 +109,6 @@ import org.apache.lucene.util.Constants;
|
|||
* @see Directory
|
||||
*/
|
||||
public abstract class FSDirectory extends Directory {
|
||||
private final static MessageDigest DIGESTER;
|
||||
|
||||
static {
|
||||
try {
|
||||
DIGESTER = MessageDigest.getInstance("MD5");
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default read chunk size. This is a conditional default: on 32bit JVMs, it defaults to 100 MB. On 64bit JVMs, it's
|
||||
|
@ -337,12 +326,6 @@ public abstract class FSDirectory extends Directory {
|
|||
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* So we can do some byte-to-hexchar conversion below
|
||||
*/
|
||||
private static final char[] HEX_DIGITS =
|
||||
{'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
|
||||
|
||||
@Override
|
||||
public String getLockID() {
|
||||
ensureOpen();
|
||||
|
@ -353,19 +336,12 @@ public abstract class FSDirectory extends Directory {
|
|||
throw new RuntimeException(e.toString(), e);
|
||||
}
|
||||
|
||||
byte digest[];
|
||||
synchronized (DIGESTER) {
|
||||
digest = DIGESTER.digest(dirName.getBytes());
|
||||
int digest = 0;
|
||||
for(int charIDX=0;charIDX<dirName.length();charIDX++) {
|
||||
final char ch = dirName.charAt(charIDX);
|
||||
digest = 31 * digest + ch;
|
||||
}
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append("lucene-");
|
||||
for (int i = 0; i < digest.length; i++) {
|
||||
int b = digest[i];
|
||||
buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
|
||||
buf.append(HEX_DIGITS[b & 0xf]);
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
return "lucene-" + Integer.toHexString(digest);
|
||||
}
|
||||
|
||||
/** Closes the store to future operations. */
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
/** Represents byte[], as a slice (offset + length) into an
|
||||
* existing byte[].
|
||||
|
@ -122,6 +121,7 @@ public final class BytesRef implements Comparable<BytesRef> {
|
|||
public void copy(char text[], int offset, int length) {
|
||||
UnicodeUtil.UTF16toUTF8(text, offset, length, this);
|
||||
}
|
||||
|
||||
public boolean bytesEquals(BytesRef other) {
|
||||
if (length == other.length) {
|
||||
int otherUpto = other.offset;
|
||||
|
@ -198,13 +198,15 @@ public final class BytesRef implements Comparable<BytesRef> {
|
|||
/** Interprets stored bytes as UTF8 bytes, returning the
|
||||
* resulting string */
|
||||
public String utf8ToString() {
|
||||
try {
|
||||
return new String(bytes, offset, length, "UTF-8");
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
// should not happen -- UTF8 is presumably supported
|
||||
// by all JREs
|
||||
throw new RuntimeException(uee);
|
||||
}
|
||||
final CharsRef ref = new CharsRef(length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
return ref.toString();
|
||||
}
|
||||
|
||||
/** Interprets stored bytes as UTF8 bytes into the given {@link CharsRef} */
|
||||
public CharsRef utf8ToChars(CharsRef ref) {
|
||||
UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
/** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Represents char[], as a slice (offset + length) into an existing char[].
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class CharsRef implements Comparable<CharsRef>, CharSequence {
|
||||
private static final char[] EMPTY_ARRAY = new char[0];
|
||||
public char[] chars;
|
||||
public int offset;
|
||||
public int length;
|
||||
|
||||
/**
|
||||
* Creates a new {@link CharsRef} initialized an empty array zero-length
|
||||
*/
|
||||
public CharsRef() {
|
||||
this(EMPTY_ARRAY, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link CharsRef} initialized with an array of the given
|
||||
* capacity
|
||||
*/
|
||||
public CharsRef(int capacity) {
|
||||
chars = new char[capacity];
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link CharsRef} initialized with the given array, offset and
|
||||
* length
|
||||
*/
|
||||
public CharsRef(char[] chars, int offset, int length) {
|
||||
assert chars != null;
|
||||
assert chars.length >= offset + length;
|
||||
this.chars = chars;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link CharsRef} initialized with the given Strings character
|
||||
* array
|
||||
*/
|
||||
public CharsRef(String string) {
|
||||
this.chars = string.toCharArray();
|
||||
this.offset = 0;
|
||||
this.length = chars.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link CharsRef} and copies the contents of the source into
|
||||
* the new instance.
|
||||
* @see #copy(CharsRef)
|
||||
*/
|
||||
public CharsRef(CharsRef other) {
|
||||
copy(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new CharsRef(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 0;
|
||||
final int end = offset + length;
|
||||
for (int i = offset; i < end; i++) {
|
||||
result = prime * result + chars[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other instanceof CharsRef) {
|
||||
return charsEquals((CharsRef) other);
|
||||
}
|
||||
|
||||
if (other instanceof CharSequence) {
|
||||
final CharSequence seq = (CharSequence) other;
|
||||
if (length == seq.length()) {
|
||||
int n = length;
|
||||
int i = offset;
|
||||
int j = 0;
|
||||
while (n-- != 0) {
|
||||
if (chars[i++] != seq.charAt(j++))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean charsEquals(CharsRef other) {
|
||||
if (length == other.length) {
|
||||
int otherUpto = other.offset;
|
||||
final char[] otherChars = other.chars;
|
||||
final int end = offset + length;
|
||||
for (int upto = offset; upto < end; upto++, otherUpto++) {
|
||||
if (chars[upto] != otherChars[otherUpto]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Signed int order comparison */
|
||||
public int compareTo(CharsRef other) {
|
||||
if (this == other)
|
||||
return 0;
|
||||
|
||||
final char[] aChars = this.chars;
|
||||
int aUpto = this.offset;
|
||||
final char[] bChars = other.chars;
|
||||
int bUpto = other.offset;
|
||||
|
||||
final int aStop = aUpto + Math.min(this.length, other.length);
|
||||
|
||||
while (aUpto < aStop) {
|
||||
int aInt = aChars[aUpto++];
|
||||
int bInt = bChars[bUpto++];
|
||||
if (aInt > bInt) {
|
||||
return 1;
|
||||
} else if (aInt < bInt) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// One is a prefix of the other, or, they are equal:
|
||||
return this.length - other.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given {@link CharsRef} referenced content into this instance
|
||||
* starting at offset 0.
|
||||
*
|
||||
* @param other
|
||||
* the {@link CharsRef} to copy
|
||||
*/
|
||||
public void copy(CharsRef other) {
|
||||
chars = ArrayUtil.grow(chars, other.length);
|
||||
System.arraycopy(other.chars, other.offset, chars, 0, other.length);
|
||||
length = other.length;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
public void grow(int newLength) {
|
||||
if (chars.length < newLength) {
|
||||
chars = ArrayUtil.grow(chars, newLength);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given array into this CharsRef starting at offset 0
|
||||
*/
|
||||
public void copy(char[] otherChars, int otherOffset, int otherLength) {
|
||||
this.offset = 0;
|
||||
append(otherChars, otherOffset, otherLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the given array to this CharsRef starting at the current offset
|
||||
*/
|
||||
public void append(char[] otherChars, int otherOffset, int otherLength) {
|
||||
grow(this.offset + otherLength);
|
||||
System.arraycopy(otherChars, otherOffset, this.chars, this.offset,
|
||||
otherLength);
|
||||
this.length = otherLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new String(chars, offset, length);
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return length;
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
return chars[offset + index];
|
||||
}
|
||||
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
return new CharsRef(chars, offset + start, offset + end - 1);
|
||||
}
|
||||
}
|
|
@ -43,6 +43,8 @@ public final class Constants {
|
|||
public static final boolean WINDOWS = OS_NAME.startsWith("Windows");
|
||||
/** True iff running on SunOS. */
|
||||
public static final boolean SUN_OS = OS_NAME.startsWith("SunOS");
|
||||
/** True iff running on Mac OS X */
|
||||
public static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X");
|
||||
|
||||
public static final String OS_ARCH = System.getProperty("os.arch");
|
||||
public static final String OS_VERSION = System.getProperty("os.version");
|
||||
|
|
|
@ -47,44 +47,113 @@ public final class IOUtils {
|
|||
* @param objects objects to call <tt>close()</tt> on
|
||||
*/
|
||||
public static <E extends Exception> void closeSafely(E priorException, Closeable... objects) throws E, IOException {
|
||||
IOException firstIOE = null;
|
||||
Throwable th = null;
|
||||
|
||||
for (Closeable object : objects) {
|
||||
try {
|
||||
if (object != null)
|
||||
if (object != null) {
|
||||
object.close();
|
||||
} catch (IOException ioe) {
|
||||
if (firstIOE == null)
|
||||
firstIOE = ioe;
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (priorException != null)
|
||||
if (priorException != null) {
|
||||
throw priorException;
|
||||
else if (firstIOE != null)
|
||||
throw firstIOE;
|
||||
} else if (th != null) {
|
||||
if (th instanceof IOException) throw (IOException) th;
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
/** @see #closeSafely(Exception, Closeable...) */
|
||||
public static <E extends Exception> void closeSafely(E priorException, Iterable<Closeable> objects) throws E, IOException {
|
||||
Throwable th = null;
|
||||
|
||||
for (Closeable object : objects) {
|
||||
try {
|
||||
if (object != null) {
|
||||
object.close();
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (priorException != null) {
|
||||
throw priorException;
|
||||
} else if (th != null) {
|
||||
if (th instanceof IOException) throw (IOException) th;
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions. Some of the <tt>Closeable</tt>s
|
||||
* may be null, they are ignored. After everything is closed, method either throws the first of suppressed exceptions,
|
||||
* or completes normally.</p>
|
||||
* @param objects objects to call <tt>close()</tt> on
|
||||
* Closes all given <tt>Closeable</tt>s, suppressing all thrown exceptions.
|
||||
* Some of the <tt>Closeable</tt>s may be null, they are ignored. After
|
||||
* everything is closed, and if {@code suppressExceptions} is {@code false},
|
||||
* method either throws the first of suppressed exceptions, or completes
|
||||
* normally.
|
||||
*
|
||||
* @param suppressExceptions
|
||||
* if true then exceptions that occur during close() are suppressed
|
||||
* @param objects
|
||||
* objects to call <tt>close()</tt> on
|
||||
*/
|
||||
public static void closeSafely(Closeable... objects) throws IOException {
|
||||
IOException firstIOE = null;
|
||||
public static void closeSafely(boolean suppressExceptions, Closeable... objects) throws IOException {
|
||||
Throwable th = null;
|
||||
|
||||
for (Closeable object : objects) {
|
||||
try {
|
||||
if (object != null)
|
||||
if (object != null) {
|
||||
object.close();
|
||||
} catch (IOException ioe) {
|
||||
if (firstIOE == null)
|
||||
firstIOE = ioe;
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (th == null)
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
|
||||
if (firstIOE != null)
|
||||
throw firstIOE;
|
||||
if (th != null && !suppressExceptions) {
|
||||
if (th instanceof IOException) throw (IOException) th;
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #closeSafely(boolean, Closeable...)
|
||||
*/
|
||||
public static void closeSafely(boolean suppressExceptions, Iterable<? extends Closeable> objects) throws IOException {
|
||||
Throwable th = null;
|
||||
|
||||
for (Closeable object : objects) {
|
||||
try {
|
||||
if (object != null) {
|
||||
object.close();
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (th == null)
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
|
||||
if (th != null && !suppressExceptions) {
|
||||
if (th instanceof IOException) throw (IOException) th;
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -78,11 +78,15 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
protected long[] bits;
|
||||
protected int wlen; // number of words (elements) used in the array
|
||||
|
||||
// Used only for assert:
|
||||
private long numBits;
|
||||
|
||||
/** Constructs an OpenBitSet large enough to hold numBits.
|
||||
*
|
||||
* @param numBits
|
||||
*/
|
||||
public OpenBitSet(long numBits) {
|
||||
this.numBits = numBits;
|
||||
bits = new long[bits2words(numBits)];
|
||||
wlen = bits.length;
|
||||
}
|
||||
|
@ -107,6 +111,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
public OpenBitSet(long[] bits, int numWords) {
|
||||
this.bits = bits;
|
||||
this.wlen = numWords;
|
||||
this.numBits = wlen * 64;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -170,6 +175,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size
|
||||
*/
|
||||
public boolean fastGet(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int i = index >> 6; // div 64
|
||||
// signed shift will keep a negative index and force an
|
||||
// array-index-out-of-bounds-exception, removing the need for an explicit check.
|
||||
|
@ -194,6 +200,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public boolean fastGet(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int i = (int)(index >> 6); // div 64
|
||||
int bit = (int)index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -217,6 +224,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size
|
||||
*/
|
||||
public int getBit(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int i = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
return ((int)(bits[i]>>>bit)) & 0x01;
|
||||
|
@ -245,6 +253,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public void fastSet(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -255,6 +264,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public void fastSet(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int)(index >> 6);
|
||||
int bit = (int)index & 0x3f;
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -296,6 +306,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
ensureCapacity(index+1);
|
||||
wlen = wordNum+1;
|
||||
}
|
||||
assert (numBits = Math.max(numBits, index+1)) >= 0;
|
||||
return wordNum;
|
||||
}
|
||||
|
||||
|
@ -304,6 +315,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public void fastClear(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6;
|
||||
int bit = index & 0x03f;
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -321,6 +333,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public void fastClear(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int)(index >> 6); // div 64
|
||||
int bit = (int)index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -415,6 +428,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public boolean getAndSet(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -427,6 +441,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public boolean getAndSet(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int)(index >> 6); // div 64
|
||||
int bit = (int)index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -439,6 +454,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public void fastFlip(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -449,6 +465,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public void fastFlip(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int)(index >> 6); // div 64
|
||||
int bit = (int)index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -467,6 +484,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public boolean flipAndGet(int index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = index >> 6; // div 64
|
||||
int bit = index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -478,6 +496,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
* The index should be less than the OpenBitSet size.
|
||||
*/
|
||||
public boolean flipAndGet(long index) {
|
||||
assert index >= 0 && index < numBits;
|
||||
int wordNum = (int)(index >> 6); // div 64
|
||||
int bit = (int)index & 0x3f; // mod 64
|
||||
long bitmask = 1L << bit;
|
||||
|
@ -674,6 +693,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
public void union(OpenBitSet other) {
|
||||
int newLen = Math.max(wlen,other.wlen);
|
||||
ensureCapacityWords(newLen);
|
||||
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
|
||||
|
||||
long[] thisArr = this.bits;
|
||||
long[] otherArr = other.bits;
|
||||
|
@ -702,6 +722,7 @@ public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
|
|||
public void xor(OpenBitSet other) {
|
||||
int newLen = Math.max(wlen,other.wlen);
|
||||
ensureCapacityWords(newLen);
|
||||
assert (numBits = Math.max(other.numBits, numBits)) >= 0;
|
||||
|
||||
long[] thisArr = this.bits;
|
||||
long[] otherArr = other.bits;
|
||||
|
|
|
@ -94,6 +94,19 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
public final class UnicodeUtil {
|
||||
|
||||
/** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms
|
||||
* one would normally encounter, and definitely bigger than any UTF-8 terms.
|
||||
* <p>
|
||||
* WARNING: This is not a valid UTF8 Term
|
||||
**/
|
||||
public static final BytesRef BIG_TERM = new BytesRef(
|
||||
new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}
|
||||
); // TODO this is unrelated here find a better place for it
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println(Character.toChars(0x10FFFF + 1));
|
||||
}
|
||||
|
||||
private UnicodeUtil() {} // no instance
|
||||
|
||||
|
@ -112,33 +125,6 @@ public final class UnicodeUtil {
|
|||
Character.MIN_SUPPLEMENTARY_CODE_POINT -
|
||||
(UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START;
|
||||
|
||||
/**
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static final class UTF16Result {
|
||||
public char[] result = new char[10];
|
||||
public int[] offsets = new int[10];
|
||||
public int length;
|
||||
|
||||
public void setLength(int newLength) {
|
||||
if (result.length < newLength)
|
||||
result = ArrayUtil.grow(result, newLength);
|
||||
length = newLength;
|
||||
}
|
||||
|
||||
public void copyText(UTF16Result other) {
|
||||
setLength(other.length);
|
||||
System.arraycopy(other.result, 0, result, 0, length);
|
||||
}
|
||||
|
||||
public void copyText(String other) {
|
||||
final int otherLength = other.length();
|
||||
setLength(otherLength);
|
||||
other.getChars(0, otherLength, result, 0);
|
||||
length = otherLength;
|
||||
}
|
||||
}
|
||||
|
||||
/** Encode characters from a char[] source, starting at
|
||||
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
|
||||
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
|
||||
|
@ -302,135 +288,6 @@ public final class UnicodeUtil {
|
|||
result.length = upto;
|
||||
}
|
||||
|
||||
/** Convert UTF8 bytes into UTF16 characters. If offset
|
||||
* is non-zero, conversion starts at that starting point
|
||||
* in utf8, re-using the results from the previous call
|
||||
* up until offset. */
|
||||
public static void UTF8toUTF16(final byte[] utf8, final int offset, final int length, final UTF16Result result) {
|
||||
|
||||
final int end = offset + length;
|
||||
char[] out = result.result;
|
||||
if (result.offsets.length <= end) {
|
||||
result.offsets = ArrayUtil.grow(result.offsets, end+1);
|
||||
}
|
||||
final int[] offsets = result.offsets;
|
||||
|
||||
// If incremental decoding fell in the middle of a
|
||||
// single unicode character, rollback to its start:
|
||||
int upto = offset;
|
||||
while(offsets[upto] == -1)
|
||||
upto--;
|
||||
|
||||
int outUpto = offsets[upto];
|
||||
|
||||
// Pre-allocate for worst case 1-for-1
|
||||
if (outUpto+length >= out.length) {
|
||||
out = result.result = ArrayUtil.grow(out, outUpto+length+1);
|
||||
}
|
||||
|
||||
while (upto < end) {
|
||||
|
||||
final int b = utf8[upto]&0xff;
|
||||
final int ch;
|
||||
|
||||
offsets[upto++] = outUpto;
|
||||
|
||||
if (b < 0xc0) {
|
||||
assert b < 0x80;
|
||||
ch = b;
|
||||
} else if (b < 0xe0) {
|
||||
ch = ((b&0x1f)<<6) + (utf8[upto]&0x3f);
|
||||
offsets[upto++] = -1;
|
||||
} else if (b < 0xf0) {
|
||||
ch = ((b&0xf)<<12) + ((utf8[upto]&0x3f)<<6) + (utf8[upto+1]&0x3f);
|
||||
offsets[upto++] = -1;
|
||||
offsets[upto++] = -1;
|
||||
} else {
|
||||
assert b < 0xf8;
|
||||
ch = ((b&0x7)<<18) + ((utf8[upto]&0x3f)<<12) + ((utf8[upto+1]&0x3f)<<6) + (utf8[upto+2]&0x3f);
|
||||
offsets[upto++] = -1;
|
||||
offsets[upto++] = -1;
|
||||
offsets[upto++] = -1;
|
||||
}
|
||||
|
||||
if (ch <= UNI_MAX_BMP) {
|
||||
// target is a character <= 0xFFFF
|
||||
out[outUpto++] = (char) ch;
|
||||
} else {
|
||||
// target is a character in range 0xFFFF - 0x10FFFF
|
||||
out[outUpto++] = (char) ((ch >> HALF_SHIFT) + 0xD7C0 /* UNI_SUR_HIGH_START - 64 */);
|
||||
out[outUpto++] = (char) ((ch & HALF_MASK) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
offsets[upto] = outUpto;
|
||||
result.length = outUpto;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next valid UTF-16 String in UTF-16 order.
|
||||
* <p>
|
||||
* If the input String is already valid, it is returned.
|
||||
* Otherwise the next String in code unit order is returned.
|
||||
* </p>
|
||||
* @param s input String (possibly with unpaired surrogates)
|
||||
* @return next valid UTF-16 String in UTF-16 order
|
||||
*/
|
||||
public static String nextValidUTF16String(String s) {
|
||||
if (validUTF16String(s))
|
||||
return s;
|
||||
else {
|
||||
UTF16Result chars = new UTF16Result();
|
||||
chars.copyText(s);
|
||||
nextValidUTF16String(chars);
|
||||
return new String(chars.result, 0, chars.length);
|
||||
}
|
||||
}
|
||||
|
||||
public static void nextValidUTF16String(UTF16Result s) {
|
||||
final int size = s.length;
|
||||
for (int i = 0; i < size; i++) {
|
||||
char ch = s.result[i];
|
||||
if (ch >= UnicodeUtil.UNI_SUR_HIGH_START
|
||||
&& ch <= UnicodeUtil.UNI_SUR_HIGH_END) {
|
||||
if (i < size - 1) {
|
||||
i++;
|
||||
char nextCH = s.result[i];
|
||||
if (nextCH >= UnicodeUtil.UNI_SUR_LOW_START
|
||||
&& nextCH <= UnicodeUtil.UNI_SUR_LOW_END) {
|
||||
// Valid surrogate pair
|
||||
} else
|
||||
// Unmatched high surrogate
|
||||
if (nextCH < UnicodeUtil.UNI_SUR_LOW_START) { // SMP not enumerated
|
||||
s.setLength(i + 1);
|
||||
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
|
||||
return;
|
||||
} else { // SMP already enumerated
|
||||
if (s.result[i - 1] == UnicodeUtil.UNI_SUR_HIGH_END) {
|
||||
s.result[i - 1] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
|
||||
s.setLength(i);
|
||||
} else {
|
||||
s.result[i - 1]++;
|
||||
s.result[i] = (char) UnicodeUtil.UNI_SUR_LOW_START;
|
||||
s.setLength(i + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// Unmatched high surrogate in final position, SMP not yet enumerated
|
||||
s.setLength(i + 2);
|
||||
s.result[i + 1] = (char) UnicodeUtil.UNI_SUR_LOW_START;
|
||||
return;
|
||||
}
|
||||
} else if (ch >= UnicodeUtil.UNI_SUR_LOW_START
|
||||
&& ch <= UnicodeUtil.UNI_SUR_LOW_END) {
|
||||
// Unmatched low surrogate, SMP already enumerated
|
||||
s.setLength(i + 1);
|
||||
s.result[i] = (char) (UnicodeUtil.UNI_SUR_LOW_END + 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only called from assert
|
||||
/*
|
||||
private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) {
|
||||
|
@ -705,4 +562,51 @@ public final class UnicodeUtil {
|
|||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Interprets the given byte array as UTF-8 and converts to UTF-16. The {@link CharsRef} will be extended if
|
||||
* it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
|
||||
* <p>
|
||||
* NOTE: Full characters are read, even if this reads past the length passed (and
|
||||
* can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
|
||||
* Explicit checks for valid UTF-8 are not performed.
|
||||
*/
|
||||
public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
|
||||
int out_offset = chars.offset = 0;
|
||||
final char[] out = chars.chars = ArrayUtil.grow(chars.chars, length);
|
||||
final int limit = offset + length;
|
||||
while (offset < limit) {
|
||||
int b = utf8[offset++]&0xff;
|
||||
if (b < 0xc0) {
|
||||
assert b < 0x80;
|
||||
out[out_offset++] = (char)b;
|
||||
} else if (b < 0xe0) {
|
||||
out[out_offset++] = (char)(((b&0x1f)<<6) + (utf8[offset++]&0x3f));
|
||||
} else if (b < 0xf0) {
|
||||
out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
|
||||
offset += 2;
|
||||
} else {
|
||||
assert b < 0xf8;
|
||||
int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
|
||||
offset += 3;
|
||||
if (ch < UNI_MAX_BMP) {
|
||||
out[out_offset++] = (char)ch;
|
||||
} else {
|
||||
int chHalf = ch - 0x0010000;
|
||||
out[out_offset++] = (char) ((chHalf >> 10) + 0xD800);
|
||||
out[out_offset++] = (char) ((chHalf & HALF_MASK) + 0xDC00);
|
||||
}
|
||||
}
|
||||
}
|
||||
chars.length = out_offset - chars.offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method for {@link #UTF8toUTF16(byte[], int, int, CharsRef)}
|
||||
* @see #UTF8toUTF16(byte[], int, int, CharsRef)
|
||||
*/
|
||||
public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars) {
|
||||
UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -48,6 +48,13 @@ public enum Version {
|
|||
*/
|
||||
@Deprecated
|
||||
LUCENE_32,
|
||||
|
||||
/**
|
||||
* Match settings and bugs in Lucene's 3.3 release.
|
||||
* @deprecated (4.0) Use latest
|
||||
*/
|
||||
@Deprecated
|
||||
LUCENE_33,
|
||||
|
||||
/** Match settings and bugs in Lucene's 4.0 release.
|
||||
* <p>
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
|
||||
/**
|
||||
* Output is a sequence of bytes, for each input term.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Can next() and advance() through the terms in an FST
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -23,7 +23,7 @@ import org.apache.lucene.store.DataInput;
|
|||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
|
||||
import org.apache.lucene.util.fst.Builder.UnCompiledNode;
|
||||
|
||||
// NOTE: while the FST is able to represent a non-final
|
||||
// dead-end state (NON_FINAL_END_NODE=0), the layres above
|
||||
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.automaton.fst.Builder.UnCompiledNode;
|
|||
/** Represents an FST using a compact byte[] format.
|
||||
* <p> The format is similar to what's used by Morfologik
|
||||
* (http://sourceforge.net/projects/morfologik).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FST<T> {
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import java.io.IOException;
|
||||
|
||||
/** Can next() and advance() through the terms in an FST
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.IntsRef;
|
|||
|
||||
/**
|
||||
* Output is a sequence of ints, for each input term.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -22,6 +22,7 @@ import org.apache.lucene.util.IntsRef;
|
|||
import java.io.IOException;
|
||||
|
||||
/** Can next() and advance() through the terms in an FST
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -22,6 +22,8 @@ import org.apache.lucene.store.DataOutput;
|
|||
|
||||
/**
|
||||
* Use this if you just want to build an FSA.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class NoOutputs extends Outputs<Object> {
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.DataOutput;
|
|||
/**
|
||||
* Represents the outputs for an FST, providing the basic
|
||||
* algebra needed for the FST.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -24,10 +24,10 @@ import org.apache.lucene.store.DataOutput;
|
|||
|
||||
/**
|
||||
* Pairs up two outputs into one.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
|
||||
public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||
|
||||
private final Pair<A,B> NO_OUTPUT;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -27,6 +27,7 @@ import org.apache.lucene.store.DataOutput;
|
|||
* resulting FST is not guaranteed to be minimal! See
|
||||
* {@link Builder}. You cannot store 0 output with this
|
||||
* (that's reserved to mean "no output")!
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.util.automaton.fst;
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -23,7 +23,9 @@ import java.util.*;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/** Static helper methods */
|
||||
/** Static helper methods
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public final class Util {
|
||||
private Util() {
|
||||
}
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -97,8 +98,43 @@ public class RandomIndexWriter implements Closeable {
|
|||
* Adds a Document.
|
||||
* @see IndexWriter#addDocument(Document)
|
||||
*/
|
||||
public void addDocument(Document doc) throws IOException {
|
||||
w.addDocument(doc);
|
||||
public void addDocument(final Document doc) throws IOException {
|
||||
if (r.nextInt(5) == 3) {
|
||||
// TODO: maybe, we should simply buffer up added docs
|
||||
// (but we need to clone them), and only when
|
||||
// getReader, commit, etc. are called, we do an
|
||||
// addDocuments? Would be better testing.
|
||||
w.addDocuments(new Iterable<Document>() {
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public Iterator<Document> iterator() {
|
||||
return new Iterator<Document>() {
|
||||
boolean done;
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public boolean hasNext() {
|
||||
return !done;
|
||||
}
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public Document next() {
|
||||
if (done) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
done = true;
|
||||
return doc;
|
||||
}
|
||||
};
|
||||
}
|
||||
});
|
||||
} else {
|
||||
w.addDocument(doc);
|
||||
}
|
||||
maybeCommit();
|
||||
}
|
||||
|
||||
|
@ -116,12 +152,53 @@ public class RandomIndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
public void addDocuments(Iterable<Document> docs) throws IOException {
|
||||
w.addDocuments(docs);
|
||||
maybeCommit();
|
||||
}
|
||||
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws IOException {
|
||||
w.updateDocuments(delTerm, docs);
|
||||
maybeCommit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a document.
|
||||
* @see IndexWriter#updateDocument(Term, Document)
|
||||
*/
|
||||
public void updateDocument(Term t, Document doc) throws IOException {
|
||||
w.updateDocument(t, doc);
|
||||
public void updateDocument(Term t, final Document doc) throws IOException {
|
||||
if (r.nextInt(5) == 3) {
|
||||
w.updateDocuments(t, new Iterable<Document>() {
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public Iterator<Document> iterator() {
|
||||
return new Iterator<Document>() {
|
||||
boolean done;
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public boolean hasNext() {
|
||||
return !done;
|
||||
}
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
// @Override -- not until Java 1.6
|
||||
public Document next() {
|
||||
if (done) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
done = true;
|
||||
return doc;
|
||||
}
|
||||
};
|
||||
}
|
||||
});
|
||||
} else {
|
||||
w.updateDocument(t, doc);
|
||||
}
|
||||
maybeCommit();
|
||||
}
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
|||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* A silly test codec to verify core support for fixed
|
||||
|
@ -97,15 +98,25 @@ public class MockFixedIntBlockCodec extends Codec {
|
|||
|
||||
@Override
|
||||
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
|
||||
return new FixedIntBlockIndexOutput(dir.createOutput(fileName), blockSize) {
|
||||
@Override
|
||||
protected void flushBlock() throws IOException {
|
||||
for(int i=0;i<buffer.length;i++) {
|
||||
assert buffer[i] >= 0;
|
||||
out.writeVInt(buffer[i]);
|
||||
IndexOutput out = dir.createOutput(fileName);
|
||||
boolean success = false;
|
||||
try {
|
||||
FixedIntBlockIndexOutput ret = new FixedIntBlockIndexOutput(out, blockSize) {
|
||||
@Override
|
||||
protected void flushBlock() throws IOException {
|
||||
for(int i=0;i<buffer.length;i++) {
|
||||
assert buffer[i] >= 0;
|
||||
out.writeVInt(buffer[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* A silly test codec to verify core support for variable
|
||||
|
@ -102,34 +103,42 @@ public class MockVariableIntBlockCodec extends Codec {
|
|||
@Override
|
||||
public IntIndexOutput createOutput(Directory dir, String fileName) throws IOException {
|
||||
final IndexOutput out = dir.createOutput(fileName);
|
||||
out.writeInt(baseBlockSize);
|
||||
return new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
|
||||
|
||||
int pendingCount;
|
||||
final int[] buffer = new int[2+2*baseBlockSize];
|
||||
|
||||
@Override
|
||||
protected int add(int value) throws IOException {
|
||||
assert value >= 0;
|
||||
buffer[pendingCount++] = value;
|
||||
// silly variable block length int encoder: if
|
||||
// first value <= 3, we write N vints at once;
|
||||
// else, 2*N
|
||||
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
|
||||
|
||||
// intentionally be non-causal here:
|
||||
if (pendingCount == flushAt+1) {
|
||||
for(int i=0;i<flushAt;i++) {
|
||||
out.writeVInt(buffer[i]);
|
||||
boolean success = false;
|
||||
try {
|
||||
out.writeInt(baseBlockSize);
|
||||
VariableIntBlockIndexOutput ret = new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
|
||||
int pendingCount;
|
||||
final int[] buffer = new int[2+2*baseBlockSize];
|
||||
|
||||
@Override
|
||||
protected int add(int value) throws IOException {
|
||||
assert value >= 0;
|
||||
buffer[pendingCount++] = value;
|
||||
// silly variable block length int encoder: if
|
||||
// first value <= 3, we write N vints at once;
|
||||
// else, 2*N
|
||||
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
|
||||
|
||||
// intentionally be non-causal here:
|
||||
if (pendingCount == flushAt+1) {
|
||||
for(int i=0;i<flushAt;i++) {
|
||||
out.writeVInt(buffer[i]);
|
||||
}
|
||||
buffer[0] = buffer[flushAt];
|
||||
pendingCount = 1;
|
||||
return flushAt;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
buffer[0] = buffer[flushAt];
|
||||
pendingCount = 1;
|
||||
return flushAt;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -136,8 +136,11 @@ public class MockRandomCodec extends Codec {
|
|||
|
||||
final String seedFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, SEED_EXT);
|
||||
final IndexOutput out = state.directory.createOutput(seedFileName);
|
||||
out.writeLong(seed);
|
||||
out.close();
|
||||
try {
|
||||
out.writeLong(seed);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
|
||||
final Random random = new Random(seed);
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue