merged with trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1129631 13f79535-47bb-0310-9956-ffa450edef68
|
@ -18,7 +18,6 @@
|
|||
-->
|
||||
|
||||
<project name="lucene-solr" default="test" basedir=".">
|
||||
<import file="common-build.xml"/>
|
||||
<target name="test" description="Test both Lucene and Solr" depends="validate">
|
||||
<sequential>
|
||||
<subant target="test" inheritall="false" failonerror="true">
|
||||
|
@ -73,6 +72,7 @@
|
|||
</target>
|
||||
|
||||
<target name="eclipse" description="Setup Eclipse configuration">
|
||||
<copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/>
|
||||
<copy file="dev-tools/eclipse/dot.classpath" tofile=".classpath" overwrite="true"/>
|
||||
<mkdir dir=".settings"/>
|
||||
<copy file="dev-tools/eclipse/resources.prefs"
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="all-common" basedir="."
|
||||
xmlns:artifact="antlib:org.apache.maven.artifact.ant">
|
||||
|
||||
<dirname file="${ant.file.all-common}" property="all.common.dir"/>
|
||||
<path id="tools.runtime.classpath">
|
||||
<pathelement location="${all.common.dir}/lucene/build/classes/tools"/>
|
||||
</path>
|
||||
<target name="compile-tools" description="Compile the Test Framework and Validation tools">
|
||||
<sequential>
|
||||
<subant target="compile-tools" inheritall="false" failonerror="true">
|
||||
<fileset dir="${all.common.dir}/lucene" includes="build.xml" />
|
||||
</subant>
|
||||
</sequential>
|
||||
</target>
|
||||
</project>
|
|
@ -20,8 +20,6 @@
|
|||
<classpathentry kind="src" path="lucene/contrib/queryparser/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spatial/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spatial/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/spellchecker/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/wordnet/src/java"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/wordnet/src/test"/>
|
||||
<classpathentry kind="src" path="lucene/contrib/xml-query-parser/src/java"/>
|
||||
|
@ -44,6 +42,8 @@
|
|||
<classpathentry kind="src" path="modules/benchmark/src/test"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/java"/>
|
||||
<classpathentry kind="src" path="modules/grouping/src/test"/>
|
||||
<classpathentry kind="src" path="modules/suggest/src/java"/>
|
||||
<classpathentry kind="src" path="modules/suggest/src/test"/>
|
||||
<classpathentry kind="src" path="solr/src/java"/>
|
||||
<classpathentry kind="src" path="solr/src/webapp/src"/>
|
||||
<classpathentry kind="src" path="solr/src/common"/>
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>lucene_solr</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -11,7 +11,6 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queries/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/queryparser/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spatial/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/spellchecker/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/wordnet/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/lucene/contrib/xml-query-parser/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/common/build.xml" />
|
||||
|
@ -21,6 +20,7 @@
|
|||
<buildFile url="file://$PROJECT_DIR$/modules/analysis/stempel/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/benchmark/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/grouping/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/modules/suggest/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/analysis-extras/build.xml" />
|
||||
<buildFile url="file://$PROJECT_DIR$/solr/contrib/clustering/build.xml" />
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
<module filepath="$PROJECT_DIR$/lucene/contrib/queries/queries.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/queryparser/queryparser.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spatial/spatial.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/spellchecker/spellchecker.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/wordnet/wordnet.iml" />
|
||||
<module filepath="$PROJECT_DIR$/lucene/contrib/xml-query-parser/xml-query-parser.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/analysis/common/common.iml" />
|
||||
|
@ -22,6 +21,7 @@
|
|||
<module filepath="$PROJECT_DIR$/modules/analysis/stempel/stempel.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/benchmark/benchmark.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/grouping/grouping.iml" />
|
||||
<module filepath="$PROJECT_DIR$/modules/suggest/suggest.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/solr.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/analysis-extras/analysis-extras.iml" />
|
||||
<module filepath="$PROJECT_DIR$/solr/contrib/clustering/clustering.iml" />
|
||||
|
|
|
@ -141,13 +141,6 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="spellchecker contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="spellchecker" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build/contrib/spellchecker" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="stempel analysis module" type="JUnit" factoryName="JUnit">
|
||||
<module name="stempel" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
@ -155,6 +148,13 @@
|
|||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="suggest module" type="JUnit" factoryName="JUnit">
|
||||
<module name="suggest" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/modules/suggest/build" />
|
||||
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
|
||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||
</configuration>
|
||||
<configuration default="false" name="uima contrib" type="JUnit" factoryName="JUnit">
|
||||
<module name="uima" />
|
||||
<option name="TEST_OBJECT" value="package" />
|
||||
|
@ -197,8 +197,8 @@
|
|||
<item index="17" class="java.lang.String" itemvalue="JUnit.smartcn analysis module" />
|
||||
<item index="18" class="java.lang.String" itemvalue="JUnit.solr" />
|
||||
<item index="19" class="java.lang.String" itemvalue="JUnit.spatial contrib" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.spellchecker contrib" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="20" class="java.lang.String" itemvalue="JUnit.stempel analysis module" />
|
||||
<item index="21" class="java.lang.String" itemvalue="JUnit.suggest module" />
|
||||
<item index="22" class="java.lang.String" itemvalue="JUnit.uima contrib" />
|
||||
<item index="23" class="java.lang.String" itemvalue="JUnit.wordnet contrib" />
|
||||
<item index="24" class="java.lang.String" itemvalue="JUnit.xml-query-parser contrib" />
|
||||
|
|
|
@ -1,18 +1,16 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/../../build/contrib/spellchecker/classes/test" />
|
||||
<output url="file://$MODULE_DIR$/build/classes/java" />
|
||||
<output-test url="file://$MODULE_DIR$/build/classes/test" />
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="common" />
|
||||
<orderEntry type="module" module-name="lucene" />
|
||||
</component>
|
|
@ -39,7 +39,6 @@
|
|||
<module>queries</module>
|
||||
<module>queryparser</module>
|
||||
<module>spatial</module>
|
||||
<module>spellchecker</module>
|
||||
<module>wordnet</module>
|
||||
<module>xml-query-parser</module>
|
||||
</modules>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
<module>analysis</module>
|
||||
<module>benchmark</module>
|
||||
<module>grouping</module>
|
||||
<module>suggest</module>
|
||||
</modules>
|
||||
<build>
|
||||
<directory>build/lucene-modules-aggregator</directory>
|
||||
|
|
|
@ -24,16 +24,16 @@
|
|||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-parent</artifactId>
|
||||
<version>@version@</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
<relativePath>../../lucene/pom.xml</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-spellchecker</artifactId>
|
||||
<artifactId>lucene-suggest</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Lucene Spellchecker</name>
|
||||
<description>Spell Checker</description>
|
||||
<name>Lucene Suggest</name>
|
||||
<description>Lucene Suggest Module</description>
|
||||
<properties>
|
||||
<module-directory>lucene/contrib/spellchecker</module-directory>
|
||||
<build-directory>../../build/contrib/spellchecker</build-directory>
|
||||
<module-directory>modules/suggest</module-directory>
|
||||
<build-directory>build</build-directory>
|
||||
</properties>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
|
@ -43,14 +43,14 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
|
@ -89,7 +89,7 @@
|
|||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-spellchecker</artifactId>
|
||||
<artifactId>lucene-suggest</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
|
@ -427,7 +427,32 @@ Bug fixes
|
|||
with more document deletions is requested before a reader with fewer
|
||||
deletions, provided they share some segments. (yonik)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
* LUCENE-3147,LUCENE-3152: Fixed open file handles leaks in many places in the
|
||||
code. Now MockDirectoryWrapper (in test-framework) tracks all open files,
|
||||
including locks, and fails if the test fails to release all of them.
|
||||
(Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer)
|
||||
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass
|
||||
of IndexInput) as its first argument. (Robert Muir, Dawid Weiss,
|
||||
Mike McCandless)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
* LUCENE-2834: the hash used to compute the lock file name when the
|
||||
lock file is not stored in the index has changed. This means you
|
||||
will see a different lucene-XXX-write.lock in your lock directory.
|
||||
(Robert Muir, Uwe Schindler, Mike McCandless)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3140: Added experimental FST implementation to Lucene.
|
||||
(Robert Muir, Dawid Weiss, Mike McCandless)
|
||||
|
||||
======================= Lucene 3.2.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -486,6 +511,10 @@ New features
|
|||
document IDs and scores encountered during the search, and "replay" them to
|
||||
another Collector. (Mike McCandless, Shai Erera)
|
||||
|
||||
* LUCENE-3112: Added experimental IndexWriter.add/updateDocuments,
|
||||
enabling a block of documents to be indexed, atomically, with
|
||||
guaranteed sequential docIDs. (Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
|
||||
|
@ -507,6 +536,9 @@ Optimizations
|
|||
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
||||
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
||||
|
||||
* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they
|
||||
aren't already and MergePolicy allows that. (Shai Erera)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
|
||||
|
@ -541,6 +573,9 @@ Build
|
|||
* LUCENE-3006: Building javadocs will fail on warnings by default.
|
||||
Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
|
||||
|
||||
* LUCENE-3128: "ant eclipse" creates a .project file for easier Eclipse
|
||||
integration (unless one already exists). (Daniel Serodio via Shai Erera)
|
||||
|
||||
Test Cases
|
||||
|
||||
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
||||
|
@ -1280,7 +1315,7 @@ Documentation
|
|||
* LUCENE-2894: Use google-code-prettify for syntax highlighting in javadoc.
|
||||
(Shinichiro Abe, Koji Sekiguchi)
|
||||
|
||||
================== Release 2.9.4 / 3.0.3 2010-12-03 ====================
|
||||
================== Release 2.9.4 / 3.0.3 ====================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
@ -1433,7 +1468,7 @@ Documentation
|
|||
Java NIO behavior when a Thread is interrupted while blocking on IO.
|
||||
(Simon Willnauer, Robert Muir)
|
||||
|
||||
================== Release 2.9.3 / 3.0.2 2010-06-18 ====================
|
||||
================== Release 2.9.3 / 3.0.2 ====================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -1578,7 +1613,7 @@ Build
|
|||
compiling/testing/packaging. This marks the benchmark contrib also
|
||||
as Java 1.5, as it depends on fast-vector-highlighter. (Uwe Schindler)
|
||||
|
||||
================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
|
||||
================== Release 2.9.2 / 3.0.1 ====================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -1681,7 +1716,7 @@ Documentation
|
|||
provided reader is per-segment (Simon Willnauer via Mike
|
||||
McCandless)
|
||||
|
||||
======================= Release 3.0.0 2009-11-25 =======================
|
||||
======================= Release 3.0.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -1882,7 +1917,7 @@ Build
|
|||
* LUCENE-2024: Raise build requirements to Java 1.5 and ANT 1.7.0
|
||||
(Uwe Schindler, Mike McCandless)
|
||||
|
||||
======================= Release 2.9.1 2009-11-06 =======================
|
||||
======================= Release 2.9.1 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -1952,7 +1987,7 @@ Documentation
|
|||
* LUCENE-2008: Javadoc improvements for TokenStream/Tokenizer/Token
|
||||
(Luke Nezda via Mike McCandless)
|
||||
|
||||
======================= Release 2.9.0 2009-09-23 =======================
|
||||
======================= Release 2.9.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -2866,7 +2901,7 @@ Test Cases
|
|||
* LUCENE-1836: Added a new LocalizedTestCase as base class for localization
|
||||
junit tests. (Robert Muir, Uwe Schindler via Michael Busch)
|
||||
|
||||
======================= Release 2.4.1 2009-03-09 =======================
|
||||
======================= Release 2.4.1 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
|
@ -2934,7 +2969,7 @@ Bug fixes
|
|||
rollback IndexWriter's internal state on hitting an
|
||||
exception. (Scott Garland via Mike McCandless)
|
||||
|
||||
======================= Release 2.4.0 2008-10-06 =======================
|
||||
======================= Release 2.4.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -3299,7 +3334,7 @@ Test Cases
|
|||
2. LUCENE-1348: relax TestTimeLimitedCollector to not fail due to
|
||||
timeout exceeded (just because test machine is very busy).
|
||||
|
||||
======================= Release 2.3.2 2008-05-05 =======================
|
||||
======================= Release 2.3.2 =======================
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
@ -3349,7 +3384,7 @@ Build
|
|||
1. LUCENE-1230: Include *pom.xml* in source release files. (Michael Busch)
|
||||
|
||||
|
||||
======================= Release 2.3.1 2008-02-22 =======================
|
||||
======================= Release 2.3.1 =======================
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
@ -3378,7 +3413,7 @@ Bug fixes
|
|||
preventing Fields with empty-string field name from working.
|
||||
(Sergey Kabashnyuk via Mike McCandless)
|
||||
|
||||
======================= Release 2.3.0 2008-01-21 =======================
|
||||
======================= Release 2.3.0 =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
@ -3770,7 +3805,7 @@ Test Cases
|
|||
1. LUCENE-766: Test adding two fields with the same name but different
|
||||
term vector setting. (Nicolas Lalevée via Doron Cohen)
|
||||
|
||||
======================= Release 2.2.0 2007-06-19 =======================
|
||||
======================= Release 2.2.0 =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
@ -4079,7 +4114,7 @@ Build
|
|||
of the Lucene core and the contrib modules.
|
||||
(Sami Siren, Karl Wettin, Michael Busch)
|
||||
|
||||
======================= Release 2.1.0 2007-02-14 =======================
|
||||
======================= Release 2.1.0 =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
@ -4567,7 +4602,7 @@ Build
|
|||
distribution of both binary and source distributions. Called by package
|
||||
and package-*-src
|
||||
|
||||
======================= Release 2.0.0 2006-05-26 =======================
|
||||
======================= Release 2.0.0 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
|
@ -5671,7 +5706,7 @@ Infrastructure
|
|||
11. Fixed bugs in GermanAnalyzer (gschwarz)
|
||||
|
||||
|
||||
1.2 RC2, 19 October 2001:
|
||||
1.2 RC2:
|
||||
- added sources to distribution
|
||||
- removed broken build scripts and libraries from distribution
|
||||
- SegmentsReader: fixed potential race condition
|
||||
|
@ -5686,7 +5721,7 @@ Infrastructure
|
|||
- JDK 1.1 compatibility fix: disabled lock files for JDK 1.1,
|
||||
since they rely on a feature added in JDK 1.2.
|
||||
|
||||
1.2 RC1 (first Apache release), 2 October 2001:
|
||||
1.2 RC1 (first Apache release):
|
||||
- packages renamed from com.lucene to org.apache.lucene
|
||||
- license switched from LGPL to Apache
|
||||
- ant-only build -- no more makefiles
|
||||
|
@ -5697,18 +5732,18 @@ Infrastructure
|
|||
- Analyzers can choose tokenizer based on field name
|
||||
- misc bug fixes.
|
||||
|
||||
1.01b (last Sourceforge release), 2 July 2001
|
||||
1.01b (last Sourceforge release)
|
||||
. a few bug fixes
|
||||
. new Query Parser
|
||||
. new prefix query (search for "foo*" matches "food")
|
||||
|
||||
1.0, 2000-10-04
|
||||
1.0
|
||||
|
||||
This release fixes a few serious bugs and also includes some
|
||||
performance optimizations, a stemmer, and a few other minor
|
||||
enhancements.
|
||||
|
||||
0.04 2000-04-19
|
||||
0.04
|
||||
|
||||
Lucene now includes a grammar-based tokenizer, StandardTokenizer.
|
||||
|
||||
|
@ -5733,7 +5768,7 @@ StandardTokenizer serves two purposes:
|
|||
the new tokenizer into the application and modifying it
|
||||
accordingly.
|
||||
|
||||
0.01, 2000-03-30
|
||||
0.01
|
||||
|
||||
First open source release.
|
||||
|
||||
|
|
|
@ -50,8 +50,7 @@
|
|||
excludes="*-src.jar"
|
||||
/>
|
||||
<patternset id="binary.root.dist.patterns"
|
||||
includes="docs/,*.txt,contrib/**/README*,**/CHANGES.txt,contrib/**/*.sh contrib/**/docs/ contrib/xml-query-parser/*.dtd"
|
||||
excludes="BUILD.txt"
|
||||
includes="docs/,CHANGES.txt,LICENSE.txt,NOTICE.txt,README.txt,MIGRATE.txt,JRE_VERSION_MIGRATION.txt,contrib/**/README*,**/CHANGES.txt,contrib/**/*.sh contrib/**/docs/ contrib/xml-query-parser/*.dtd,lib/*.jar,lib/*LICENSE*.txt,lib/*NOTICE*.txt,contrib/*/lib/*.jar,contrib/*/lib/*LICENSE*.txt,contrib/*/lib/*NOTICE*.txt"
|
||||
/>
|
||||
|
||||
|
||||
|
@ -227,7 +226,6 @@
|
|||
<packageset dir="contrib/misc/src/java"/>
|
||||
<packageset dir="contrib/queries/src/java"/>
|
||||
<packageset dir="contrib/spatial/src/java"/>
|
||||
<packageset dir="contrib/spellchecker/src/java"/>
|
||||
<packageset dir="contrib/wordnet/src/java"/>
|
||||
<packageset dir="contrib/xml-query-parser/src/java"/>
|
||||
<packageset dir="contrib/queryparser/src/java"/>
|
||||
|
@ -248,7 +246,6 @@
|
|||
<group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
|
||||
<group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
|
||||
<group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
|
||||
<group title="contrib: SpellChecker" packages="org.apache.lucene.search.spell*"/>
|
||||
<group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
|
||||
<group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>
|
||||
|
||||
|
@ -295,10 +292,10 @@
|
|||
<!-- -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="package-tgz" depends="package"
|
||||
description="--> Generates the lucene distribution as .tar.gz">
|
||||
description="--> Generates the lucene distribution as .tgz">
|
||||
|
||||
<delete file="${build.dir}/lucene-${version}.tar"/>
|
||||
<delete file="${dist.dir}/lucene-${version}.tar.gz"/>
|
||||
<delete file="${dist.dir}/lucene-${version}.tgz"/>
|
||||
<tar tarfile="${build.dir}/lucene-${version}.tar" longfile="gnu">
|
||||
<tarfileset prefix="lucene-${version}" dir=".">
|
||||
<patternset refid="binary.root.dist.patterns"/>
|
||||
|
@ -308,10 +305,10 @@
|
|||
</tarfileset>
|
||||
</tar>
|
||||
|
||||
<gzip zipfile="${dist.dir}/lucene-${version}.tar.gz"
|
||||
<gzip zipfile="${dist.dir}/lucene-${version}.tgz"
|
||||
src="${build.dir}/lucene-${version}.tar"
|
||||
/>
|
||||
<lucene-checksum file="${dist.dir}/lucene-${version}.tar.gz"/>
|
||||
<lucene-checksum file="${dist.dir}/lucene-${version}.tgz"/>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
|
@ -320,7 +317,7 @@
|
|||
<!-- -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="package-all-binary" depends="package-zip, package-tgz"
|
||||
description="--> Generates the .tar.gz and .zip distributions"/>
|
||||
description="--> Generates the .tgz and .zip distributions"/>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- same as package-all. it is just here for compatibility. -->
|
||||
|
@ -347,9 +344,9 @@
|
|||
<!-- Packages the sources from "svn export" with tar-gzip -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="package-tgz-src" depends="init, init-dist, svn-export-source"
|
||||
description="--> Generates the Lucene distribution as .tar.gz">
|
||||
description="--> Generates the Lucene distribution as .tgz">
|
||||
<property name="source.package.file"
|
||||
value="${dist.dir}/lucene-${version}-src.tar.gz"/>
|
||||
value="${dist.dir}/lucene-${version}-src.tgz"/>
|
||||
<delete file="${source.package.file}"/>
|
||||
<tar tarfile="${source.package.file}" compression="gzip" longfile="gnu">
|
||||
<tarfileset prefix="lucene-${version}" dir="${svn.export.dir}"/>
|
||||
|
@ -496,7 +493,7 @@
|
|||
<fileset dir="${dist.dir}">
|
||||
<include name="**/*.jar"/>
|
||||
<include name="**/*.zip"/>
|
||||
<include name="**/*.tar.gz"/>
|
||||
<include name="**/*.tgz"/>
|
||||
<include name="**/*.pom"/>
|
||||
</fileset>
|
||||
<globmapper from="*" to="*.asc"/>
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
</description>
|
||||
|
||||
<dirname file="${ant.file.common}" property="common.dir"/>
|
||||
<import file="${common.dir}/../common-build.xml"/>
|
||||
<property name="dev-tools.dir" value="${common.dir}/../dev-tools"/>
|
||||
<property name="prettify.dir" value="${common.dir}/src/tools/prettify"/>
|
||||
<property name="maven.build.dir" value="${common.dir}/build/maven"/>
|
||||
|
@ -61,6 +60,11 @@
|
|||
<fileset dir="${common.dir}/lib" includes="ant-*.jar"/>
|
||||
</path>
|
||||
|
||||
<path id="tools.runtime.classpath">
|
||||
<pathelement location="${common.dir}/build/classes/tools"/>
|
||||
</path>
|
||||
|
||||
|
||||
<!-- default arguments to pass to JVM executing tests -->
|
||||
<property name="testmethod" value=""/>
|
||||
<property name="args" value=""/>
|
||||
|
|
|
@ -6,6 +6,8 @@ Build
|
|||
|
||||
* LUCENE-2845: Moved contrib/benchmark to modules.
|
||||
|
||||
* LUCENE-2995: Moved contrib/spellchecker into modules/suggest.
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
|
||||
|
@ -48,7 +50,14 @@ Bug Fixes
|
|||
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
||||
not lowercasing the key before checking for the tag (Adriano Crestani)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
======================= Lucene 3.x (not yet released) ================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3141: add getter method to access fragInfos in FieldFragList.
|
||||
(Sujit Pal via Koji Sekiguchi)
|
||||
|
||||
======================= Lucene 3.2.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -433,7 +442,7 @@ Other
|
|||
* LUCENE-2415: Use reflection instead of a shim class to access Jakarta
|
||||
Regex prefix. (Uwe Schindler)
|
||||
|
||||
================== Release 2.9.4 / 3.0.3 2010-12-03 ====================
|
||||
================== Release 2.9.4 / 3.0.3 ====================
|
||||
|
||||
Bug Fixes
|
||||
|
||||
|
@ -468,11 +477,11 @@ Documentation
|
|||
as expected on all JRE's. For example, on an IBM JRE, it does nothing.
|
||||
(Robert Muir)
|
||||
|
||||
================== Release 2.9.3 / 3.0.2 2010-06-18 ====================
|
||||
================== Release 2.9.3 / 3.0.2 ====================
|
||||
|
||||
No changes.
|
||||
|
||||
================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
|
||||
================== Release 2.9.2 / 3.0.1 ====================
|
||||
|
||||
New features
|
||||
|
||||
|
@ -511,7 +520,7 @@ API Changes
|
|||
* LUCENE-2165: Add a constructor to SnowballAnalyzer that takes a Set of
|
||||
stopwords, and deprecate the String[] one. (Nick Burch via Robert Muir)
|
||||
|
||||
======================= Release 3.0.0 2009-11-25 =======================
|
||||
======================= Release 3.0.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -588,7 +597,7 @@ Build
|
|||
* LUCENE-2031: Moved PatternAnalyzer from contrib/memory into
|
||||
contrib/analyzers/common, under miscellaneous. (Robert Muir)
|
||||
|
||||
======================= Release 2.9.1 2009-11-06 =======================
|
||||
======================= Release 2.9.1 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
||||
|
@ -631,7 +640,7 @@ Bug fixes
|
|||
* LUCENE-2013: SpanRegexQuery does not work with QueryScorer.
|
||||
(Benjamin Keil via Mark Miller)
|
||||
|
||||
======================= Release 2.9.0 2009-09-23 =======================
|
||||
======================= Release 2.9.0 =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
@ -820,7 +829,7 @@ Build
|
|||
Test Cases
|
||||
|
||||
|
||||
======================= Release 2.4.0 2008-10-06 =======================
|
||||
======================= Release 2.4.0 =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
if( maxNumFragments < 0 )
|
||||
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
|
||||
|
||||
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
|
||||
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
|
||||
|
||||
List<String> fragments = new ArrayList<String>( maxNumFragments );
|
||||
Field[] values = getFields( reader, docId, fieldName );
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseIn
|
|||
*/
|
||||
public class FieldFragList {
|
||||
|
||||
List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
||||
private List<WeightedFragInfo> fragInfos = new ArrayList<WeightedFragInfo>();
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
|
@ -50,6 +50,15 @@ public class FieldFragList {
|
|||
fragInfos.add( new WeightedFragInfo( startOffset, endOffset, phraseInfoList ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* return the list of WeightedFragInfos.
|
||||
*
|
||||
* @return fragInfos.
|
||||
*/
|
||||
public List<WeightedFragInfo> getFragInfos() {
|
||||
return fragInfos;
|
||||
}
|
||||
|
||||
public static class WeightedFragInfo {
|
||||
|
||||
List<SubInfo> subInfos;
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.TermFreqVector;
|
|||
import org.apache.lucene.index.TermPositionVector;
|
||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
||||
/**
|
||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||
|
@ -80,16 +81,16 @@ public class FieldTermStack {
|
|||
Set<String> termSet = fieldQuery.getTermSet( fieldName );
|
||||
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
|
||||
if( termSet == null ) return;
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
for( BytesRef term : tpv.getTerms() ){
|
||||
if( !termSet.contains( term.utf8ToString() ) ) continue;
|
||||
if( !termSet.contains( term.utf8ToChars(spare).toString() ) ) continue;
|
||||
int index = tpv.indexOf( term );
|
||||
TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
|
||||
if( tvois == null ) return; // just return to make null snippets
|
||||
int[] poss = tpv.getTermPositions( index );
|
||||
if( poss == null ) return; // just return to make null snippets
|
||||
for( int i = 0; i < tvois.length; i++ )
|
||||
termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
|
||||
termList.add( new TermInfo( term.utf8ToChars(spare).toString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
|
||||
}
|
||||
|
||||
// sort by position
|
||||
|
|
|
@ -24,7 +24,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
public void testNullFieldFragList() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
}
|
||||
|
||||
public void testTooSmallFragSize() throws Exception {
|
||||
|
@ -40,90 +40,90 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
public void testSmallerFragSizeThanTermQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "abcdefghijklmnopqrs", "abcdefghijklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"abcdefgh jklmnopqrs\"", "abcdefgh jklmnopqrs" ), SimpleFragListBuilder.MIN_FRAG_CHAR_SIZE );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
if (VERBOSE) System.out.println( ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1TermIndex() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex1Frag() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a a" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((2,3)))/2.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((18,19)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "b b b b a b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex2Frags() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.fragInfos.get( 1 ).toString() );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((26,27)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a", "a b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.fragInfos.get( 1 ).toString() );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((20,21)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a b", "c d e" ), 20 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a b", "d b c" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(b((2,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "a b", "a b c" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))b((2,3)))/2.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testPhraseQuery() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"", "c d e" ), 20 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a c b" ), 20 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl( "\"a b\"", "a b c" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(ab((0,3)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testPhraseQuerySlop() throws Exception {
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "\"a b\"~1", "a c b" ), 20 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(ab((0,1)(4,5)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
private FieldPhraseList fpl( String queryValue, String indexValue ) throws Exception {
|
||||
|
@ -142,8 +142,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
@ -154,8 +154,8 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMVB() throws Exception {
|
||||
|
@ -166,7 +166,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,21 +24,21 @@ public class SingleFragListBuilderTest extends AbstractTestCase {
|
|||
public void testNullFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "b c d" ), 100 );
|
||||
assertEquals( 0, ffl.fragInfos.size() );
|
||||
assertEquals( 0, ffl.getFragInfos().size() );
|
||||
}
|
||||
|
||||
public void testShortFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testLongFieldFragList() throws Exception {
|
||||
SingleFragListBuilder sflb = new SingleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl( "a", "a b c d", "a b c d e f g h i", "j k l m n o p q r s t u v w x y z a b c", "d e f g" ), 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1))a((8,9))a((60,61)))/3.0(0,2147483647)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
private FieldPhraseList fpl( String queryValue, String... indexValues ) throws Exception {
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
|
|||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
||||
/**
|
||||
* Represented as a coupled graph of class instances, this
|
||||
|
@ -228,12 +229,13 @@ public class InstantiatedIndex
|
|||
if (fieldsC != null) {
|
||||
FieldsEnum fieldsEnum = fieldsC.iterator();
|
||||
String field;
|
||||
final CharsRef spare = new CharsRef();
|
||||
while((field = fieldsEnum.next()) != null) {
|
||||
if (fields == null || fields.contains(field)) {
|
||||
TermsEnum termsEnum = fieldsEnum.terms();
|
||||
BytesRef text;
|
||||
while((text = termsEnum.next()) != null) {
|
||||
String termText = text.utf8ToString();
|
||||
String termText = text.utf8ToChars(spare).toString();
|
||||
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
|
||||
final long totalTermFreq = termsEnum.totalTermFreq();
|
||||
if (totalTermFreq != -1) {
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[b9c8c8a170881dfe9c33adc87c26348904510954] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -42,4 +42,26 @@
|
|||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="build-native-unix" >
|
||||
<mkdir dir="${common.build.dir}/native"/>
|
||||
|
||||
<taskdef resource="cpptasks.tasks">
|
||||
<classpath>
|
||||
<pathelement location="ant_lib/cpptasks-1.0b5.jar"/>
|
||||
</classpath>
|
||||
</taskdef>
|
||||
|
||||
<cc outtype="shared" subsystem="console" outfile="${common.build.dir}/native/NativePosixUtil" >
|
||||
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
|
||||
<includepath>
|
||||
<pathelement location="${java.home}/../include"/>
|
||||
<pathelement location="${java.home}/../include/linux"/>
|
||||
<pathelement location="${java.home}/../include/solaris"/>
|
||||
</includepath>
|
||||
|
||||
<compilerarg value="-fPIC" />
|
||||
</cc>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.text.DecimalFormat;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter; // Required for javadocs
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
||||
|
@ -45,6 +46,11 @@ import org.apache.lucene.store.FSDirectory;
|
|||
* @lucene.experimental You can easily
|
||||
* accidentally remove segments from your index so be
|
||||
* careful!
|
||||
*
|
||||
* <p><b>NOTE</b>: this tool is unaware of documents added
|
||||
* atomically via {@link IndexWriter#addDocuments} or {@link
|
||||
* IndexWriter#updateDocuments}, which means it can easily
|
||||
* break up such document groups.
|
||||
*/
|
||||
public class IndexSplitter {
|
||||
public SegmentInfos infos;
|
||||
|
|
|
@ -40,6 +40,11 @@ import org.apache.lucene.util.Version;
|
|||
* <p>Note 2: the disadvantage of this tool is that source index needs to be
|
||||
* read as many times as there are parts to be created, hence the name of this
|
||||
* tool.
|
||||
*
|
||||
* <p><b>NOTE</b>: this tool is unaware of documents added
|
||||
* atomically via {@link IndexWriter#addDocuments} or {@link
|
||||
* IndexWriter#updateDocuments}, which means it can easily
|
||||
* break up such document groups.
|
||||
*/
|
||||
public class MultiPassIndexSplitter {
|
||||
|
||||
|
|
|
@ -269,7 +269,7 @@ public class NRTCachingDirectory extends Directory {
|
|||
in = cache.openInput(fileName);
|
||||
in.copyBytes(out, in.length());
|
||||
} finally {
|
||||
IOUtils.closeSafely(in, out);
|
||||
IOUtils.closeSafely(false, in, out);
|
||||
}
|
||||
synchronized(this) {
|
||||
cache.deleteFile(fileName);
|
||||
|
|
|
@ -51,9 +51,11 @@ for details.
|
|||
|
||||
Steps to build:
|
||||
<ul>
|
||||
<li> <tt>cd lucene/contrib/misc/src/java/org/apache/lucene/store</tt>
|
||||
<li> <tt>cd lucene/contrib/misc/</tt>
|
||||
|
||||
<li> Compile NativePosixUtil.cpp -> libNativePosixUtil.so. On linux, something like <tt>gcc -fPIC -o libNativePosixUtil.so -shared -Wl,-soname,libNativePosixUtil.so -I$JAVA_HOME/include -I$JAVA_HOME/include/linux NativePosixUtil.cpp -lc -lstdc++</tt>. Add <tt>-m64</tt> if you want to compile 64bit (and java must be run with -d64 so it knows to load a 64bit dynamic lib).
|
||||
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
|
||||
|
||||
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
|
||||
|
||||
<li> Make sure libNativePosixUtil.so is on your LD_LIBRARY_PATH so java can find it (something like <tt>export LD_LIBRARY_PATH=/path/to/dir:$LD_LIBRARY_PATH</tt>, where /path/to/dir contains libNativePosixUtil.so)
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.regex;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.regexp.CharacterIterator;
|
||||
import org.apache.regexp.RE;
|
||||
|
@ -104,11 +105,11 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
|||
|
||||
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
|
||||
private RE regexp;
|
||||
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
||||
private final CharsRef utf16 = new CharsRef(10);
|
||||
private final CharacterIterator utf16wrapper = new CharacterIterator() {
|
||||
|
||||
public char charAt(int pos) {
|
||||
return utf16.result[pos];
|
||||
return utf16.chars[pos];
|
||||
}
|
||||
|
||||
public boolean isEnd(int pos) {
|
||||
|
@ -120,7 +121,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
|||
}
|
||||
|
||||
public String substring(int beginIndex, int endIndex) {
|
||||
return new String(utf16.result, beginIndex, endIndex - beginIndex);
|
||||
return new String(utf16.chars, beginIndex, endIndex - beginIndex);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/**
|
||||
|
@ -95,25 +96,11 @@ public class JavaUtilRegexCapabilities implements RegexCapabilities {
|
|||
class JavaUtilRegexMatcher implements RegexCapabilities.RegexMatcher {
|
||||
private final Pattern pattern;
|
||||
private final Matcher matcher;
|
||||
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
|
||||
private final CharSequence utf16wrapper = new CharSequence() {
|
||||
|
||||
public int length() {
|
||||
return utf16.length;
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
return utf16.result[index];
|
||||
}
|
||||
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
return new String(utf16.result, start, end - start);
|
||||
}
|
||||
};
|
||||
private final CharsRef utf16 = new CharsRef(10);
|
||||
|
||||
public JavaUtilRegexMatcher(String regex, int flags) {
|
||||
this.pattern = Pattern.compile(regex, flags);
|
||||
this.matcher = this.pattern.matcher(utf16wrapper);
|
||||
this.matcher = this.pattern.matcher(utf16);
|
||||
}
|
||||
|
||||
public boolean match(BytesRef term) {
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
|
||||
|
@ -850,8 +851,9 @@ public final class MoreLikeThis {
|
|||
{
|
||||
BytesRef[] terms = vector.getTerms();
|
||||
int freqs[]=vector.getTermFrequencies();
|
||||
final CharsRef spare = new CharsRef();
|
||||
for (int j = 0; j < terms.length; j++) {
|
||||
String term = terms[j].utf8ToString();
|
||||
final String term = terms[j].utf8ToChars(spare).toString();
|
||||
|
||||
if(isNoiseWord(term)){
|
||||
continue;
|
||||
|
|
|
@ -1,5 +1,22 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="DTDDocAnt" default="main">
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -356,7 +341,7 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div class="section">
|
||||
<p>
|
||||
This document defines the index file formats used
|
||||
in Lucene version 4.0. If you are using a different
|
||||
in this version of Lucene. If you are using a different
|
||||
version of Lucene, please consult the copy of
|
||||
<span class="codefrag">docs/fileformats.html</span>
|
||||
that was distributed
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
|
|
@ -133,15 +133,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -154,9 +145,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -178,9 +166,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -241,7 +226,7 @@ document.write("Last Published: " + document.lastModified);
|
|||
<h1>Lucene Java Documentation</h1>
|
||||
|
||||
<p>
|
||||
This is the official documentation for Lucene Java 4.0<BR>
|
||||
This is the official documentation for Lucene Java<BR>
|
||||
Please use the menu on the left to access the Javadocs and different documents.
|
||||
</p>
|
||||
|
||||
|
|
|
@ -133,15 +133,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -154,9 +145,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -178,9 +166,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -311,24 +296,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
</li>
|
||||
<ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-ant/index.html">Ant</a> ___________________ <em>javadoc-contrib-ant</em>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a> ___________________ <em>javadoc-contrib-bdb</em>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a> ___________________ <em>javadoc-contrib-bdb-je</em>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a> ___________________ <em>javadoc-contrib-benchmark</em>
|
||||
|
@ -353,12 +320,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a> ___________________ <em>javadoc-contrib-lucli</em>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-memory/index.html">Memory</a> ___________________ <em>javadoc-contrib-memory</em>
|
||||
|
@ -402,12 +363,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-swing/index.html">Swing</a> ___________________ <em>javadoc-contrib-swing</em>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a> ___________________ <em>javadoc-contrib-wordnet</em>
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', '../skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="../api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="../api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="../api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -249,27 +234,18 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="#Contrib">Lucene Contrib</a>
|
||||
<ul class="minitoc">
|
||||
<li>
|
||||
<a href="#ant">ant</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#benchmark">benchmark</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#demo">demo</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#db">db</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#highlighter">highlighter</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#instantiated">instantiated</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#lucli">lucli</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#memory">memory</a>
|
||||
</li>
|
||||
<li>
|
||||
|
@ -291,9 +267,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="#spellchecker">spellchecker</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#swing">swing</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#wordnet">wordnet</a>
|
||||
</li>
|
||||
<li>
|
||||
|
@ -350,88 +323,68 @@ document.write("Last Published: " + document.lastModified);
|
|||
You can access the current trunk Contrib repository at
|
||||
<a href="http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/contrib/">http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/contrib/</a>.
|
||||
</p>
|
||||
<a name="N10035"></a><a name="ant"></a>
|
||||
<h3 class="boxed">ant</h3>
|
||||
<p>Ant task to create Lucene indexes.</p>
|
||||
<p>See <a href="../api/contrib-ant/index.html">ant javadoc</a>
|
||||
</p>
|
||||
<a name="N10044"></a><a name="benchmark"></a>
|
||||
<a name="N10035"></a><a name="benchmark"></a>
|
||||
<h3 class="boxed">benchmark</h3>
|
||||
<p>The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora.</p>
|
||||
<p>See <a href="../api/contrib-benchmark/index.html">benchmark javadoc</a>
|
||||
</p>
|
||||
<a name="N10053"></a><a name="demo"></a>
|
||||
<a name="N10044"></a><a name="demo"></a>
|
||||
<h3 class="boxed">demo</h3>
|
||||
<p>The demo contrib contains the Lucene demo: IndexFiles and SearchFiles, described under
|
||||
<a href="../gettingstarted.html">Getting Started</a>.</p>
|
||||
<p>See <a href="../api/contrib-demo/index.html">demo javadoc</a>
|
||||
</p>
|
||||
<a name="N10066"></a><a name="db"></a>
|
||||
<h3 class="boxed">db</h3>
|
||||
<p>Provides integration with Berkley DB.</p>
|
||||
<p>See <a href="../api/contrib-db/index.html">db javadoc</a>
|
||||
</p>
|
||||
<a name="N10075"></a><a name="highlighter"></a>
|
||||
<a name="N10057"></a><a name="highlighter"></a>
|
||||
<h3 class="boxed">highlighter</h3>
|
||||
<p>A set of classes for highlighting matching terms in search results.</p>
|
||||
<p>See <a href="../api/contrib-highlighter/index.html">highlighter javadoc</a>
|
||||
</p>
|
||||
<a name="N10084"></a><a name="instantiated"></a>
|
||||
<a name="N10066"></a><a name="instantiated"></a>
|
||||
<h3 class="boxed">instantiated</h3>
|
||||
<p>RAM-based index that enables much faster searching than RAMDirectory in certain situations.</p>
|
||||
<p>See <a href="../api/contrib-instantiated/index.html">instantiated javadoc</a>
|
||||
</p>
|
||||
<a name="N10093"></a><a name="lucli"></a>
|
||||
<h3 class="boxed">lucli</h3>
|
||||
<p>An application that allows Lucene index manipulation from the command-line.</p>
|
||||
<p>See <a href="../api/contrib-lucli/index.html">lucli javadoc</a>
|
||||
</p>
|
||||
<a name="N100A2"></a><a name="memory"></a>
|
||||
<a name="N10075"></a><a name="memory"></a>
|
||||
<h3 class="boxed">memory</h3>
|
||||
<p>High-performance single-document main memory index.</p>
|
||||
<p>See <a href="../api/contrib-memory/index.html">memory javadoc</a>
|
||||
</p>
|
||||
<a name="N100B1"></a><a name="misc"></a>
|
||||
<a name="N10084"></a><a name="misc"></a>
|
||||
<h3 class="boxed">misc</h3>
|
||||
<p>A variety of miscellaneous files, including QueryParsers, and other alternate Lucene class implementations and tools.</p>
|
||||
<p>See <a href="../api/contrib-misc/index.html">misc javadoc</a>
|
||||
</p>
|
||||
<a name="N100C0"></a><a name="queryparser"></a>
|
||||
<a name="N10093"></a><a name="queryparser"></a>
|
||||
<h3 class="boxed">queryparser</h3>
|
||||
<p>A new Lucene query parser implementation, which matches the syntax of the core QueryParser but offers a more modular architecture to enable customization.</p>
|
||||
<p>See <a href="../api/contrib-queryparser/index.html">queryparser javadoc</a>
|
||||
</p>
|
||||
<a name="N100CF"></a><a name="queries"></a>
|
||||
<a name="N100A2"></a><a name="queries"></a>
|
||||
<h3 class="boxed">queries</h3>
|
||||
<p>Additional queries for Lucene.</p>
|
||||
<p>See <a href="../api/contrib-queries/index.html">queries javadoc</a>
|
||||
</p>
|
||||
<a name="N100DE"></a><a name="remote"></a>
|
||||
<a name="N100B1"></a><a name="remote"></a>
|
||||
<h3 class="boxed">remote</h3>
|
||||
<p>Classes to help use Lucene with RMI.</p>
|
||||
<p>See <a href="../api/contrib-remote/index.html">remote javadoc</a>
|
||||
</p>
|
||||
<a name="N100ED"></a><a name="spatial"></a>
|
||||
<a name="N100C0"></a><a name="spatial"></a>
|
||||
<h3 class="boxed">spatial</h3>
|
||||
<p>Classes to help with efficient distance based sorting.</p>
|
||||
<p>See <a href="../api/contrib-spatial/index.html">spatial javadoc</a>
|
||||
</p>
|
||||
<a name="N100FC"></a><a name="spellchecker"></a>
|
||||
<a name="N100CF"></a><a name="spellchecker"></a>
|
||||
<h3 class="boxed">spellchecker</h3>
|
||||
<p>Provides tools for spellchecking and suggestions with Lucene.</p>
|
||||
<p>See <a href="../api/contrib-spellchecker/index.html">spellchecker javadoc</a>
|
||||
</p>
|
||||
<a name="N1010B"></a><a name="swing"></a>
|
||||
<h3 class="boxed">swing</h3>
|
||||
<p>Swing components designed to integrate with Lucene.</p>
|
||||
<p>See <a href="../api/contrib-swing/index.html">swing javadoc</a>
|
||||
</p>
|
||||
<a name="N1011A"></a><a name="wordnet"></a>
|
||||
<a name="N100DE"></a><a name="wordnet"></a>
|
||||
<h3 class="boxed">wordnet</h3>
|
||||
<p>Tools to help utilize wordnet synonyms with Lucene</p>
|
||||
<p>See <a href="../api/contrib-wordnet/index.html">wordnet javadoc</a>
|
||||
</p>
|
||||
<a name="N10129"></a><a name="xml-query-parser"></a>
|
||||
<a name="N100ED"></a><a name="xml-query-parser"></a>
|
||||
<h3 class="boxed">xml-query-parser</h3>
|
||||
<p>A QueryParser that can read queries written in an XML format.</p>
|
||||
<p>See <a href="../api/contrib-wordnet/index.html">xml-query-parser javadoc</a>
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
|
|
@ -135,15 +135,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -156,9 +147,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -180,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
|
Before Width: | Height: | Size: 348 B After Width: | Height: | Size: 345 B |
Before Width: | Height: | Size: 319 B After Width: | Height: | Size: 343 B |
Before Width: | Height: | Size: 200 B After Width: | Height: | Size: 205 B |
Before Width: | Height: | Size: 199 B After Width: | Height: | Size: 208 B |
Before Width: | Height: | Size: 209 B After Width: | Height: | Size: 216 B |
Before Width: | Height: | Size: 199 B After Width: | Height: | Size: 208 B |
Before Width: | Height: | Size: 390 B After Width: | Height: | Size: 391 B |
Before Width: | Height: | Size: 214 B After Width: | Height: | Size: 217 B |
Before Width: | Height: | Size: 215 B After Width: | Height: | Size: 218 B |
Before Width: | Height: | Size: 214 B After Width: | Height: | Size: 217 B |
|
@ -133,15 +133,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<div onclick="SwitchMenu('menu_1.1.3.4', 'skin/')" id="menu_1.1.3.4Title" class="menutitle">Contrib</div>
|
||||
<div id="menu_1.1.3.4" class="menuitemgroup">
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-ant/index.html">Ant</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb/index.html">Bdb</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-bdb-je/index.html">Bdb-je</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-benchmark/index.html">Benchmark</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -154,9 +145,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-instantiated/index.html">Instantiated</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-lucli/index.html">Lucli</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-memory/index.html">Memory</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -178,9 +166,6 @@ document.write("Last Published: " + document.lastModified);
|
|||
<a href="api/contrib-spellchecker/index.html">Spellchecker</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-swing/index.html">Swing</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
<a href="api/contrib-wordnet/index.html">Wordnet</a>
|
||||
</div>
|
||||
<div class="menuitem">
|
||||
|
@ -252,7 +237,7 @@ document.write("Last Published: " + document.lastModified);
|
|||
<h2 class="boxed">System Requirements</h2>
|
||||
<div class="section">
|
||||
<p>
|
||||
Lucene Java 4.0 has the following minimum requirements:
|
||||
Lucene Java has the following minimum requirements:
|
||||
<ul>
|
||||
|
||||
<li>Java 1.5.x or greater.</li>
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.zip.DataFormatException;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/** Simple utility class providing static methods to
|
||||
|
@ -118,9 +119,9 @@ public class CompressionTools {
|
|||
/** Decompress the byte array previously returned by
|
||||
* compressString back into a String */
|
||||
public static String decompressString(byte[] value) throws DataFormatException {
|
||||
UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
|
||||
final byte[] bytes = decompress(value);
|
||||
CharsRef result = new CharsRef(bytes.length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
|
||||
return new String(result.result, 0, result.length);
|
||||
return new String(result.chars, 0, result.length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,19 +51,18 @@ import org.apache.lucene.util.IOUtils;
|
|||
*/
|
||||
public final class CompoundFileWriter {
|
||||
|
||||
static final class FileEntry {
|
||||
|
||||
FileEntry(String file) {
|
||||
this.file = file;
|
||||
}
|
||||
private static final class FileEntry {
|
||||
/** source file */
|
||||
final String file;
|
||||
String file;
|
||||
|
||||
/** temporary holder for the start of directory entry for this file */
|
||||
long directoryOffset;
|
||||
|
||||
/** temporary holder for the start of this file's data section */
|
||||
long dataOffset;
|
||||
|
||||
/** the directory which contains the file. */
|
||||
Directory dir;
|
||||
}
|
||||
|
||||
// Before versioning started.
|
||||
|
@ -123,6 +122,14 @@ public final class CompoundFileWriter {
|
|||
* has been added already
|
||||
*/
|
||||
public void addFile(String file) {
|
||||
addFile(file, directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #addFile(String)}, only for files that are found in an
|
||||
* external {@link Directory}.
|
||||
*/
|
||||
public void addFile(String file, Directory dir) {
|
||||
if (merged)
|
||||
throw new IllegalStateException(
|
||||
"Can't add extensions after merge has been called");
|
||||
|
@ -134,7 +141,11 @@ public final class CompoundFileWriter {
|
|||
if (! ids.add(file))
|
||||
throw new IllegalArgumentException(
|
||||
"File " + file + " already added");
|
||||
entries.add(new FileEntry(file));
|
||||
|
||||
FileEntry entry = new FileEntry();
|
||||
entry.file = file;
|
||||
entry.dir = dir;
|
||||
entries.add(entry);
|
||||
}
|
||||
|
||||
/** Merge files with the extensions added up to now.
|
||||
|
@ -171,7 +182,7 @@ public final class CompoundFileWriter {
|
|||
fe.directoryOffset = os.getFilePointer();
|
||||
os.writeLong(0); // for now
|
||||
os.writeString(IndexFileNames.stripSegmentName(fe.file));
|
||||
totalSize += directory.fileLength(fe.file);
|
||||
totalSize += fe.dir.fileLength(fe.file);
|
||||
}
|
||||
|
||||
// Pre-allocate size of file as optimization --
|
||||
|
@ -217,7 +228,7 @@ public final class CompoundFileWriter {
|
|||
* output stream.
|
||||
*/
|
||||
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
|
||||
IndexInput is = directory.openInput(source.file);
|
||||
IndexInput is = source.dir.openInput(source.file);
|
||||
try {
|
||||
long startPtr = os.getFilePointer();
|
||||
long length = is.length();
|
||||
|
|
|
@ -91,11 +91,18 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
|
||||
@Override
|
||||
public void abort() {
|
||||
for(int i=0;i<fieldHash.length;i++) {
|
||||
DocFieldProcessorPerField field = fieldHash[i];
|
||||
while(field != null) {
|
||||
Throwable th = null;
|
||||
|
||||
for (DocFieldProcessorPerField field : fieldHash) {
|
||||
while (field != null) {
|
||||
final DocFieldProcessorPerField next = field.next;
|
||||
field.abort();
|
||||
try {
|
||||
field.abort();
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
field = next;
|
||||
}
|
||||
}
|
||||
|
@ -110,8 +117,26 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
|
||||
try {
|
||||
fieldsWriter.abort();
|
||||
} finally {
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
consumer.abort();
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
|
||||
// If any errors occured, throw it.
|
||||
if (th != null) {
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
// defensive code - we should not hit unchecked exceptions
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -87,6 +87,7 @@ final class DocInverter extends DocFieldConsumer {
|
|||
endConsumer.startDocument();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDocument() throws IOException {
|
||||
// TODO: allow endConsumer.finishDocument to also return
|
||||
// a DocWriter
|
||||
|
|
|
@ -53,8 +53,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
|
||||
@Override
|
||||
void abort() {
|
||||
consumer.abort();
|
||||
endConsumer.abort();
|
||||
try {
|
||||
consumer.abort();
|
||||
} finally {
|
||||
endConsumer.abort();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -228,14 +228,19 @@ final class DocumentsWriter {
|
|||
}
|
||||
|
||||
final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
|
||||
|
||||
while (threadsIterator.hasNext()) {
|
||||
ThreadState perThread = threadsIterator.next();
|
||||
final ThreadState perThread = threadsIterator.next();
|
||||
perThread.lock();
|
||||
try {
|
||||
if (perThread.isActive()) { // we might be closed
|
||||
perThread.perThread.abort();
|
||||
perThread.perThread.checkAndResetHasAborted();
|
||||
try {
|
||||
perThread.perThread.abort();
|
||||
} catch (IOException ex) {
|
||||
// continue
|
||||
} finally {
|
||||
perThread.perThread.checkAndResetHasAborted();
|
||||
flushControl.doOnAbort(perThread);
|
||||
}
|
||||
} else {
|
||||
assert closed;
|
||||
}
|
||||
|
@ -243,7 +248,6 @@ final class DocumentsWriter {
|
|||
perThread.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
if (infoStream != null) {
|
||||
|
@ -274,11 +278,9 @@ final class DocumentsWriter {
|
|||
flushControl.setClosed();
|
||||
}
|
||||
|
||||
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
private boolean preUpdate() throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
boolean maybeMerge = false;
|
||||
final boolean isUpdate = delTerm != null;
|
||||
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
|
||||
// Help out flushing any queued DWPTs so we can un-stall:
|
||||
if (infoStream != null) {
|
||||
|
@ -303,9 +305,59 @@ final class DocumentsWriter {
|
|||
message("continue indexing after helpling out flushing DocumentsWriter is healthy");
|
||||
}
|
||||
}
|
||||
return maybeMerge;
|
||||
}
|
||||
|
||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(),
|
||||
this, doc);
|
||||
private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
|
||||
if (flushingDWPT != null) {
|
||||
maybeMerge |= doFlush(flushingDWPT);
|
||||
} else {
|
||||
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
|
||||
if (nextPendingFlush != null) {
|
||||
maybeMerge |= doFlush(nextPendingFlush);
|
||||
}
|
||||
}
|
||||
|
||||
return maybeMerge;
|
||||
}
|
||||
|
||||
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
boolean maybeMerge = preUpdate();
|
||||
|
||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
|
||||
final DocumentsWriterPerThread flushingDWPT;
|
||||
|
||||
try {
|
||||
if (!perThread.isActive()) {
|
||||
ensureOpen();
|
||||
assert false: "perThread is not active but we are still open";
|
||||
}
|
||||
|
||||
final DocumentsWriterPerThread dwpt = perThread.perThread;
|
||||
try {
|
||||
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
|
||||
numDocsInRAM.addAndGet(docCount);
|
||||
} finally {
|
||||
if (dwpt.checkAndResetHasAborted()) {
|
||||
flushControl.doOnAbort(perThread);
|
||||
}
|
||||
}
|
||||
final boolean isUpdate = delTerm != null;
|
||||
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
||||
} finally {
|
||||
perThread.unlock();
|
||||
}
|
||||
|
||||
return postUpdate(flushingDWPT, maybeMerge);
|
||||
}
|
||||
|
||||
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
|
||||
boolean maybeMerge = preUpdate();
|
||||
|
||||
final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), this);
|
||||
final DocumentsWriterPerThread flushingDWPT;
|
||||
|
||||
try {
|
||||
|
@ -324,20 +376,13 @@ final class DocumentsWriter {
|
|||
flushControl.doOnAbort(perThread);
|
||||
}
|
||||
}
|
||||
final boolean isUpdate = delTerm != null;
|
||||
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
|
||||
} finally {
|
||||
perThread.unlock();
|
||||
}
|
||||
|
||||
if (flushingDWPT != null) {
|
||||
maybeMerge |= doFlush(flushingDWPT);
|
||||
} else {
|
||||
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
|
||||
if (nextPendingFlush != null) {
|
||||
maybeMerge |= doFlush(nextPendingFlush);
|
||||
}
|
||||
}
|
||||
return maybeMerge;
|
||||
return postUpdate(flushingDWPT, maybeMerge);
|
||||
}
|
||||
|
||||
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
|
||||
|
@ -541,4 +586,20 @@ final class DocumentsWriter {
|
|||
return (!isSegmentFlush || segment != null);
|
||||
}
|
||||
}
|
||||
|
||||
// use by IW during close to assert all DWPT are inactive after final flush
|
||||
boolean assertNoActiveDWPT() {
|
||||
Iterator<ThreadState> activePerThreadsIterator = perThreadPool.getAllPerThreadsIterator();
|
||||
while(activePerThreadsIterator.hasNext()) {
|
||||
ThreadState next = activePerThreadsIterator.next();
|
||||
next.lock();
|
||||
try {
|
||||
assert !next.isActive();
|
||||
} finally {
|
||||
next.unlock();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ package org.apache.lucene.index;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
@ -68,7 +69,7 @@ public final class DocumentsWriterFlushControl {
|
|||
this.stallControl = new DocumentsWriterStallControl();
|
||||
this.perThreadPool = documentsWriter.perThreadPool;
|
||||
this.flushPolicy = documentsWriter.flushPolicy;
|
||||
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;;
|
||||
this.hardMaxBytesPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
|
||||
this.config = config;
|
||||
this.documentsWriter = documentsWriter;
|
||||
}
|
||||
|
@ -162,8 +163,6 @@ public final class DocumentsWriterFlushControl {
|
|||
stallControl.updateStalled(this);
|
||||
assert assertMemory();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
|
||||
|
@ -217,7 +216,7 @@ public final class DocumentsWriterFlushControl {
|
|||
assert assertMemory();
|
||||
// Take it out of the loop this DWPT is stale
|
||||
perThreadPool.replaceForFlush(state, closed);
|
||||
}finally {
|
||||
} finally {
|
||||
stallControl.updateStalled(this);
|
||||
}
|
||||
}
|
||||
|
@ -305,6 +304,7 @@ public final class DocumentsWriterFlushControl {
|
|||
synchronized void setClosed() {
|
||||
// set by DW to signal that we should not release new DWPT after close
|
||||
this.closed = true;
|
||||
perThreadPool.deactivateUnreleasedStates();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -387,8 +387,12 @@ public final class DocumentsWriterFlushControl {
|
|||
toFlush.add(flushingDWPT);
|
||||
}
|
||||
} else {
|
||||
// get the new delete queue from DW
|
||||
next.perThread.initialize();
|
||||
if (closed) {
|
||||
next.resetWriter(null); // make this state inactive
|
||||
} else {
|
||||
// get the new delete queue from DW
|
||||
next.perThread.initialize();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
next.unlock();
|
||||
|
@ -451,10 +455,21 @@ public final class DocumentsWriterFlushControl {
|
|||
try {
|
||||
for (DocumentsWriterPerThread dwpt : flushQueue) {
|
||||
doAfterFlush(dwpt);
|
||||
try {
|
||||
dwpt.abort();
|
||||
} catch (IOException ex) {
|
||||
// continue
|
||||
}
|
||||
}
|
||||
for (BlockedFlush blockedFlush : blockedFlushes) {
|
||||
flushingWriters.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
flushingWriters
|
||||
.put(blockedFlush.dwpt, Long.valueOf(blockedFlush.bytes));
|
||||
doAfterFlush(blockedFlush.dwpt);
|
||||
try {
|
||||
blockedFlush.dwpt.abort();
|
||||
} catch (IOException ex) {
|
||||
// continue
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fullFlush = false;
|
||||
|
@ -512,5 +527,4 @@ public final class DocumentsWriterFlushControl {
|
|||
boolean anyStalledThreads() {
|
||||
return stallControl.anyStalledThreads();
|
||||
}
|
||||
|
||||
}
|
|
@ -105,7 +105,7 @@ public class DocumentsWriterPerThread {
|
|||
// largish:
|
||||
doc = null;
|
||||
analyzer = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class FlushedSegment {
|
||||
|
@ -179,7 +179,7 @@ public class DocumentsWriterPerThread {
|
|||
this.parent = parent;
|
||||
this.fieldInfos = fieldInfos;
|
||||
this.writer = parent.indexWriter;
|
||||
this.infoStream = parent.indexWriter.getInfoStream();
|
||||
this.infoStream = parent.infoStream;
|
||||
this.docState = new DocState(this);
|
||||
this.docState.similarityProvider = parent.indexWriter.getConfig()
|
||||
.getSimilarityProvider();
|
||||
|
@ -255,6 +255,82 @@ public class DocumentsWriterPerThread {
|
|||
finishDocument(delTerm);
|
||||
}
|
||||
|
||||
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
|
||||
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
|
||||
assert deleteQueue != null;
|
||||
docState.analyzer = analyzer;
|
||||
if (segment == null) {
|
||||
// this call is synchronized on IndexWriter.segmentInfos
|
||||
segment = writer.newSegmentName();
|
||||
assert numDocsInRAM == 0;
|
||||
}
|
||||
|
||||
int docCount = 0;
|
||||
try {
|
||||
for(Document doc : docs) {
|
||||
docState.doc = doc;
|
||||
docState.docID = numDocsInRAM;
|
||||
docCount++;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
consumer.processDocument(fieldInfos);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// An exc is being thrown...
|
||||
|
||||
if (!aborting) {
|
||||
// One of the documents hit a non-aborting
|
||||
// exception (eg something happened during
|
||||
// analysis). We now go and mark any docs
|
||||
// from this batch that we had already indexed
|
||||
// as deleted:
|
||||
int docID = docState.docID;
|
||||
final int endDocID = docID - docCount;
|
||||
while (docID > endDocID) {
|
||||
deleteDocID(docID);
|
||||
docID--;
|
||||
}
|
||||
|
||||
// Incr here because finishDocument will not
|
||||
// be called (because an exc is being thrown):
|
||||
numDocsInRAM++;
|
||||
fieldInfos.revertUncommitted();
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
success = false;
|
||||
try {
|
||||
consumer.finishDocument();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
finishDocument(null);
|
||||
}
|
||||
|
||||
// Apply delTerm only after all indexing has
|
||||
// succeeded, but apply it only to docs prior to when
|
||||
// this batch started:
|
||||
if (delTerm != null) {
|
||||
deleteQueue.add(delTerm, deleteSlice);
|
||||
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
|
||||
deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount);
|
||||
}
|
||||
|
||||
} finally {
|
||||
docState.clear();
|
||||
}
|
||||
|
||||
return docCount;
|
||||
}
|
||||
|
||||
private void finishDocument(Term delTerm) throws IOException {
|
||||
/*
|
||||
* here we actually finish the document in two steps 1. push the delete into
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
import java.util.Iterator;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
|
@ -194,6 +193,21 @@ public abstract class DocumentsWriterPerThreadPool {
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deactivate all unreleased threadstates
|
||||
*/
|
||||
protected synchronized void deactivateUnreleasedStates() {
|
||||
for (int i = numThreadStatesActive; i < perThreads.length; i++) {
|
||||
final ThreadState threadState = perThreads[i];
|
||||
threadState.lock();
|
||||
try {
|
||||
threadState.resetWriter(null);
|
||||
} finally {
|
||||
threadState.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
|
||||
assert threadState.isHeldByCurrentThread();
|
||||
final DocumentsWriterPerThread dwpt = threadState.perThread;
|
||||
|
@ -212,7 +226,7 @@ public abstract class DocumentsWriterPerThreadPool {
|
|||
// don't recycle DWPT by default
|
||||
}
|
||||
|
||||
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc);
|
||||
public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter);
|
||||
|
||||
/**
|
||||
* Returns an iterator providing access to all {@link ThreadState}
|
||||
|
|
|
@ -113,7 +113,7 @@ final class FieldsWriter {
|
|||
void close() throws IOException {
|
||||
if (directory != null) {
|
||||
try {
|
||||
IOUtils.closeSafely(fieldsStream, indexStream);
|
||||
IOUtils.closeSafely(false, fieldsStream, indexStream);
|
||||
} finally {
|
||||
fieldsStream = indexStream = null;
|
||||
}
|
||||
|
|
|
@ -57,9 +57,10 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
|||
|
||||
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
|
||||
|
||||
TermsHash termsHash = null;
|
||||
try {
|
||||
TermsHash termsHash = null;
|
||||
|
||||
/*
|
||||
/*
|
||||
Current writer chain:
|
||||
FieldsConsumer
|
||||
-> IMPL: FormatPostingsTermsDictWriter
|
||||
|
@ -69,36 +70,38 @@ final class FreqProxTermsWriter extends TermsHashConsumer {
|
|||
-> IMPL: FormatPostingsDocsWriter
|
||||
-> PositionsConsumer
|
||||
-> IMPL: FormatPostingsPositionsWriter
|
||||
*/
|
||||
*/
|
||||
|
||||
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
|
||||
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
|
||||
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
|
||||
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
|
||||
|
||||
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
|
||||
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
|
||||
|
||||
// Aggregate the storePayload as seen by the same
|
||||
// field across multiple threads
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||
// Aggregate the storePayload as seen by the same
|
||||
// field across multiple threads
|
||||
if (!fieldInfo.omitTermFreqAndPositions) {
|
||||
fieldInfo.storePayloads |= fieldWriter.hasPayloads;
|
||||
}
|
||||
|
||||
// If this field has postings then add them to the
|
||||
// segment
|
||||
fieldWriter.flush(fieldInfo.name, consumer, state);
|
||||
|
||||
TermsHashPerField perField = fieldWriter.termsHashPerField;
|
||||
assert termsHash == null || termsHash == perField.termsHash;
|
||||
termsHash = perField.termsHash;
|
||||
int numPostings = perField.bytesHash.size();
|
||||
perField.reset();
|
||||
perField.shrinkHash(numPostings);
|
||||
fieldWriter.reset();
|
||||
}
|
||||
|
||||
// If this field has postings then add them to the
|
||||
// segment
|
||||
fieldWriter.flush(fieldInfo.name, consumer, state);
|
||||
|
||||
TermsHashPerField perField = fieldWriter.termsHashPerField;
|
||||
assert termsHash == null || termsHash == perField.termsHash;
|
||||
termsHash = perField.termsHash;
|
||||
int numPostings = perField.bytesHash.size();
|
||||
perField.reset();
|
||||
perField.shrinkHash(numPostings);
|
||||
fieldWriter.reset();
|
||||
if (termsHash != null) {
|
||||
termsHash.reset();
|
||||
}
|
||||
} finally {
|
||||
consumer.close();
|
||||
}
|
||||
|
||||
if (termsHash != null) {
|
||||
termsHash.reset();
|
||||
}
|
||||
consumer.close();
|
||||
}
|
||||
|
||||
BytesRef payload;
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.index.codecs.Codec; // for javadocs
|
||||
|
||||
/**
|
||||
|
@ -238,4 +240,15 @@ public final class IndexFileNames {
|
|||
return filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given filename ends with the separate norms file
|
||||
* pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.
|
||||
*/
|
||||
public static boolean isSeparateNormsFile(String filename) {
|
||||
int idx = filename.lastIndexOf('.');
|
||||
if (idx == -1) return false;
|
||||
String ext = filename.substring(idx + 1);
|
||||
return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.io.PrintStream;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -51,6 +52,7 @@ import org.apache.lucene.store.LockObtainFailedException;
|
|||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.MapBackedSet;
|
||||
|
||||
|
@ -1071,7 +1073,8 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
if (infoStream != null)
|
||||
message("at close: " + segString());
|
||||
|
||||
// used by assert below
|
||||
final DocumentsWriter oldWriter = docWriter;
|
||||
synchronized(this) {
|
||||
readerPool.close();
|
||||
docWriter = null;
|
||||
|
@ -1085,6 +1088,7 @@ public class IndexWriter implements Closeable {
|
|||
synchronized(this) {
|
||||
closed = true;
|
||||
}
|
||||
assert oldWriter.assertNoActiveDWPT();
|
||||
} catch (OutOfMemoryError oom) {
|
||||
handleOOM(oom, "closeInternal");
|
||||
} finally {
|
||||
|
@ -1099,6 +1103,8 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Returns the Directory used by this index. */
|
||||
public Directory getDirectory() {
|
||||
// Pass false because the flush during closing calls getDirectory
|
||||
|
@ -1227,6 +1233,111 @@ public class IndexWriter implements Closeable {
|
|||
updateDocument(null, doc, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically adds a block of documents with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* <p><b>WARNING</b>: the index does not currently record
|
||||
* which documents were added as a block. Today this is
|
||||
* fine, because merging will preserve the block (as long
|
||||
* as none them were deleted). But it's possible in the
|
||||
* future that Lucene may more aggressively re-order
|
||||
* documents (for example, perhaps to obtain better index
|
||||
* compression), in which case you may need to fully
|
||||
* re-index your documents at that time.
|
||||
*
|
||||
* <p>See {@link #addDocument(Document)} for details on
|
||||
* index and IndexWriter state after an Exception, and
|
||||
* flushing/merging temporary free space requirements.</p>
|
||||
*
|
||||
* <p><b>NOTE</b>: tools that do offline splitting of an index
|
||||
* (for example, IndexSplitter in contrib) or
|
||||
* re-sorting of documents (for example, IndexSorter in
|
||||
* contrib) are not aware of these atomically added documents
|
||||
* and will likely break them up. Use such tools at your
|
||||
* own risk!
|
||||
*
|
||||
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
|
||||
* you should immediately close the writer. See <a
|
||||
* href="#OOME">above</a> for details.</p>
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||
addDocuments(docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically adds a block of documents, analyzed using the
|
||||
* provided analyzer, with sequentially assigned document
|
||||
* IDs, such that an external reader will see all or none
|
||||
* of the documents.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
updateDocuments(null, docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically deletes documents matching the provided
|
||||
* delTerm and adds a block of documents with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* See {@link #addDocuments(Iterable)}.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||
updateDocuments(delTerm, docs, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically deletes documents matching the provided
|
||||
* delTerm and adds a block of documents, analyzed using
|
||||
* the provided analyzer, with sequentially
|
||||
* assigned document IDs, such that an external reader
|
||||
* will see all or none of the documents.
|
||||
*
|
||||
* See {@link #addDocuments(Iterable)}.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
try {
|
||||
boolean success = false;
|
||||
boolean anySegmentFlushed = false;
|
||||
try {
|
||||
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success && infoStream != null) {
|
||||
message("hit exception updating document");
|
||||
}
|
||||
}
|
||||
if (anySegmentFlushed) {
|
||||
maybeMerge();
|
||||
}
|
||||
} catch (OutOfMemoryError oom) {
|
||||
handleOOM(oom, "updateDocuments");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the document(s) containing <code>term</code>.
|
||||
*
|
||||
|
@ -2217,10 +2328,10 @@ public class IndexWriter implements Closeable {
|
|||
* <p>
|
||||
* <b>NOTE:</b> this method only copies the segments of the incoming indexes
|
||||
* and does not merge them. Therefore deleted documents are not removed and
|
||||
* the new segments are not merged with the existing ones. Also, the segments
|
||||
* are copied as-is, meaning they are not converted to CFS if they aren't,
|
||||
* and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
|
||||
* or {@link #optimize} afterwards.
|
||||
* the new segments are not merged with the existing ones. Also, if the merge
|
||||
* policy allows compound files, then any segment that is not compound is
|
||||
* converted to such. However, if the segment is compound, it is copied as-is
|
||||
* even if the merge policy does not allow compound files.
|
||||
*
|
||||
* <p>This requires this index not be among those to be added.
|
||||
*
|
||||
|
@ -2244,6 +2355,7 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
int docCount = 0;
|
||||
List<SegmentInfo> infos = new ArrayList<SegmentInfo>();
|
||||
Comparator<String> versionComparator = StringHelper.getVersionComparator();
|
||||
for (Directory dir : dirs) {
|
||||
if (infoStream != null) {
|
||||
message("addIndexes: process directory " + dir);
|
||||
|
@ -2263,46 +2375,22 @@ public class IndexWriter implements Closeable {
|
|||
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
|
||||
}
|
||||
|
||||
// Determine if the doc store of this segment needs to be copied. It's
|
||||
// only relevant for segments who share doc store with others, because
|
||||
// the DS might have been copied already, in which case we just want
|
||||
// to update the DS name of this SegmentInfo.
|
||||
// NOTE: pre-3x segments include a null DSName if they don't share doc
|
||||
// store. So the following code ensures we don't accidentally insert
|
||||
// 'null' to the map.
|
||||
final String newDsName;
|
||||
if (dsName != null) {
|
||||
if (dsNames.containsKey(dsName)) {
|
||||
newDsName = dsNames.get(dsName);
|
||||
} else {
|
||||
dsNames.put(dsName, newSegName);
|
||||
newDsName = newSegName;
|
||||
}
|
||||
// create CFS only if the source segment is not CFS, and MP agrees it
|
||||
// should be CFS.
|
||||
boolean createCFS;
|
||||
synchronized (this) { // Guard segmentInfos
|
||||
createCFS = !info.getUseCompoundFile()
|
||||
&& mergePolicy.useCompoundFile(segmentInfos, info)
|
||||
// optimize case only for segments that don't share doc stores
|
||||
&& versionComparator.compare(info.getVersion(), "3.1") >= 0;
|
||||
}
|
||||
|
||||
if (createCFS) {
|
||||
copySegmentIntoCFS(info, newSegName);
|
||||
} else {
|
||||
newDsName = newSegName;
|
||||
copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied);
|
||||
}
|
||||
|
||||
// Copy the segment files
|
||||
for (String file: info.files()) {
|
||||
final String newFileName;
|
||||
if (IndexFileNames.isDocStoreFile(file)) {
|
||||
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
||||
if (dsFilesCopied.contains(newFileName)) {
|
||||
continue;
|
||||
}
|
||||
dsFilesCopied.add(newFileName);
|
||||
} else {
|
||||
newFileName = newSegName + IndexFileNames.stripSegmentName(file);
|
||||
}
|
||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||
dir.copy(directory, file, newFileName);
|
||||
}
|
||||
|
||||
// Update SI appropriately
|
||||
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
|
||||
info.dir = directory;
|
||||
info.name = newSegName;
|
||||
|
||||
infos.add(info);
|
||||
}
|
||||
}
|
||||
|
@ -2391,6 +2479,76 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Copies the segment into the IndexWriter's directory, as a compound segment. */
|
||||
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
|
||||
String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||
Collection<String> files = info.files();
|
||||
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName);
|
||||
for (String file : files) {
|
||||
String newFileName = segName + IndexFileNames.stripSegmentName(file);
|
||||
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
|
||||
&& !IndexFileNames.isSeparateNormsFile(file)) {
|
||||
cfsWriter.addFile(file, info.dir);
|
||||
} else {
|
||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||
info.dir.copy(directory, file, newFileName);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the .cfs
|
||||
cfsWriter.close();
|
||||
|
||||
info.dir = directory;
|
||||
info.name = segName;
|
||||
info.setUseCompoundFile(true);
|
||||
}
|
||||
|
||||
/** Copies the segment files as-is into the IndexWriter's directory. */
|
||||
private void copySegmentAsIs(SegmentInfo info, String segName,
|
||||
Map<String, String> dsNames, Set<String> dsFilesCopied)
|
||||
throws IOException {
|
||||
// Determine if the doc store of this segment needs to be copied. It's
|
||||
// only relevant for segments that share doc store with others,
|
||||
// because the DS might have been copied already, in which case we
|
||||
// just want to update the DS name of this SegmentInfo.
|
||||
// NOTE: pre-3x segments include a null DSName if they don't share doc
|
||||
// store. The following code ensures we don't accidentally insert
|
||||
// 'null' to the map.
|
||||
String dsName = info.getDocStoreSegment();
|
||||
final String newDsName;
|
||||
if (dsName != null) {
|
||||
if (dsNames.containsKey(dsName)) {
|
||||
newDsName = dsNames.get(dsName);
|
||||
} else {
|
||||
dsNames.put(dsName, segName);
|
||||
newDsName = segName;
|
||||
}
|
||||
} else {
|
||||
newDsName = segName;
|
||||
}
|
||||
|
||||
// Copy the segment files
|
||||
for (String file: info.files()) {
|
||||
final String newFileName;
|
||||
if (IndexFileNames.isDocStoreFile(file)) {
|
||||
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
||||
if (dsFilesCopied.contains(newFileName)) {
|
||||
continue;
|
||||
}
|
||||
dsFilesCopied.add(newFileName);
|
||||
} else {
|
||||
newFileName = segName + IndexFileNames.stripSegmentName(file);
|
||||
}
|
||||
|
||||
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
|
||||
info.dir.copy(directory, file, newFileName);
|
||||
}
|
||||
|
||||
info.setDocStore(info.getDocStoreOffset(), newDsName, info.getDocStoreIsCompoundFile());
|
||||
info.dir = directory;
|
||||
info.name = segName;
|
||||
}
|
||||
|
||||
/**
|
||||
* A hook for extending classes to execute operations after pending added and
|
||||
* deleted documents have been flushed to the Directory but before the change
|
||||
|
@ -3176,50 +3334,50 @@ public class IndexWriter implements Closeable {
|
|||
runningMerges.remove(merge);
|
||||
}
|
||||
|
||||
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
|
||||
private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
|
||||
final int numSegments = merge.readers.size();
|
||||
if (suppressExceptions) {
|
||||
// Suppress any new exceptions so we throw the
|
||||
// original cause
|
||||
boolean anyChanges = false;
|
||||
for (int i=0;i<numSegments;i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
try {
|
||||
anyChanges |= readerPool.release(merge.readers.get(i), false);
|
||||
} catch (Throwable t) {
|
||||
}
|
||||
merge.readers.set(i, null);
|
||||
}
|
||||
Throwable th = null;
|
||||
|
||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||
try {
|
||||
merge.readerClones.get(i).close();
|
||||
} catch (Throwable t) {
|
||||
boolean anyChanges = false;
|
||||
boolean drop = !suppressExceptions;
|
||||
for (int i = 0; i < numSegments; i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
try {
|
||||
anyChanges |= readerPool.release(merge.readers.get(i), drop);
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
// This was a private clone and we had the
|
||||
// only reference
|
||||
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
|
||||
merge.readerClones.set(i, null);
|
||||
}
|
||||
merge.readers.set(i, null);
|
||||
}
|
||||
if (anyChanges) {
|
||||
checkpoint();
|
||||
}
|
||||
} else {
|
||||
for (int i=0;i<numSegments;i++) {
|
||||
if (merge.readers.get(i) != null) {
|
||||
readerPool.release(merge.readers.get(i), true);
|
||||
merge.readers.set(i, null);
|
||||
}
|
||||
|
||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||
if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
|
||||
try {
|
||||
merge.readerClones.get(i).close();
|
||||
// This was a private clone and we had the only reference
|
||||
assert merge.readerClones.get(i).getRefCount() == 0;
|
||||
merge.readerClones.set(i, null);
|
||||
} catch (Throwable t) {
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
// This was a private clone and we had the
|
||||
// only reference
|
||||
assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
|
||||
merge.readerClones.set(i, null);
|
||||
}
|
||||
}
|
||||
|
||||
if (suppressExceptions && anyChanges) {
|
||||
checkpoint();
|
||||
}
|
||||
|
||||
// If any error occured, throw it.
|
||||
if (!suppressExceptions && th != null) {
|
||||
if (th instanceof IOException) throw (IOException) th;
|
||||
if (th instanceof RuntimeException) throw (RuntimeException) th;
|
||||
if (th instanceof Error) throw (Error) th;
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
|
||||
/** Does the actual (time-consuming) work of the merge,
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
// TODO FI: norms could actually be stored as doc store
|
||||
|
||||
|
@ -49,9 +50,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
|
||||
IndexOutput normsOut = state.directory.createOutput(normsFileName);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
normsOut.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
|
||||
normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
|
||||
|
||||
int normCount = 0;
|
||||
|
||||
|
@ -84,9 +85,9 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
|
||||
}
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
normsOut.close();
|
||||
IOUtils.closeSafely(!success, normsOut);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.index.codecs.TermsConsumer;
|
|||
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.values.DocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Enables native per field codec support. This class selects the codec used to
|
||||
|
@ -67,7 +68,15 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
assert segmentCodecs == state.segmentCodecs;
|
||||
final Codec[] codecs = segmentCodecs.codecs;
|
||||
for (int i = 0; i < codecs.length; i++) {
|
||||
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, i)));
|
||||
boolean success = false;
|
||||
try {
|
||||
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, i)));
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, consumers);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,22 +89,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
Iterator<FieldsConsumer> it = consumers.iterator();
|
||||
IOException err = null;
|
||||
while (it.hasNext()) {
|
||||
try {
|
||||
it.next().close();
|
||||
} catch (IOException ioe) {
|
||||
// keep first IOException we hit but keep
|
||||
// closing the rest
|
||||
if (err == null) {
|
||||
err = ioe;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != null) {
|
||||
throw err;
|
||||
}
|
||||
IOUtils.closeSafely(false, consumers);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -128,14 +122,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
// If we hit exception (eg, IOE because writer was
|
||||
// committing, or, for any other reason) we must
|
||||
// go back and close all FieldsProducers we opened:
|
||||
for(FieldsProducer fp : producers.values()) {
|
||||
try {
|
||||
fp.close();
|
||||
} catch (Throwable t) {
|
||||
// Suppress all exceptions here so we continue
|
||||
// to throw the original one
|
||||
}
|
||||
}
|
||||
IOUtils.closeSafely(true, producers.values());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -184,22 +171,7 @@ final class PerFieldCodecWrapper extends Codec {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
Iterator<FieldsProducer> it = codecs.values().iterator();
|
||||
IOException err = null;
|
||||
while (it.hasNext()) {
|
||||
try {
|
||||
it.next().close();
|
||||
} catch (IOException ioe) {
|
||||
// keep first IOException we hit but keep
|
||||
// closing the rest
|
||||
if (err == null) {
|
||||
err = ioe;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != null) {
|
||||
throw err;
|
||||
}
|
||||
IOUtils.closeSafely(false, codecs.values());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -59,7 +59,7 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
|||
|
||||
/**
|
||||
* Reads the snapshots information from the given {@link Directory}. This
|
||||
* method does can be used if the snapshots information is needed, however you
|
||||
* method can be used if the snapshots information is needed, however you
|
||||
* cannot instantiate the deletion policy (because e.g., some other process
|
||||
* keeps a lock on the snapshots directory).
|
||||
*/
|
||||
|
@ -122,11 +122,19 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
|||
writer.commit();
|
||||
}
|
||||
|
||||
// Initializes the snapshots information. This code should basically run
|
||||
// only if mode != CREATE, but if it is, it's no harm as we only open the
|
||||
// reader once and immediately close it.
|
||||
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
|
||||
registerSnapshotInfo(e.getKey(), e.getValue(), null);
|
||||
try {
|
||||
// Initializes the snapshots information. This code should basically run
|
||||
// only if mode != CREATE, but if it is, it's no harm as we only open the
|
||||
// reader once and immediately close it.
|
||||
for (Entry<String, String> e : readSnapshotsInfo(dir).entrySet()) {
|
||||
registerSnapshotInfo(e.getKey(), e.getValue(), null);
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
writer.close(); // don't leave any open file handles
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
writer.close(); // don't leave any open file handles
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -438,7 +438,7 @@ public final class SegmentInfo implements Cloneable {
|
|||
*/
|
||||
public String getNormFileName(int number) {
|
||||
if (hasSeparateNorms(number)) {
|
||||
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
|
||||
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
|
||||
} else {
|
||||
// single file for all norms
|
||||
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/**
|
||||
|
@ -323,17 +324,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
|||
SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
|
||||
segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
|
||||
infosWriter.prepareCommit(segnOutput);
|
||||
success = true;
|
||||
pendingSegnOutput = segnOutput;
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// We hit an exception above; try to close the file
|
||||
// but suppress any exception:
|
||||
try {
|
||||
segnOutput.close();
|
||||
} catch (Throwable t) {
|
||||
// Suppress so we keep throwing the original exception
|
||||
}
|
||||
IOUtils.closeSafely(true, segnOutput);
|
||||
try {
|
||||
// Try not to leave a truncated segments_N file in
|
||||
// the index:
|
||||
|
@ -945,6 +942,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
|||
} finally {
|
||||
genOutput.close();
|
||||
}
|
||||
} catch (ThreadInterruptedException t) {
|
||||
throw t;
|
||||
} catch (Throwable t) {
|
||||
// It's OK if we fail to write this file since it's
|
||||
// used only as one of the retry fallbacks.
|
||||
|
@ -963,7 +962,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfo> {
|
|||
finishCommit(dir);
|
||||
}
|
||||
|
||||
|
||||
public String toString(Directory directory) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
buffer.append(getCurrentSegmentFileName()).append(": ");
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.MergeState;
|
||||
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||
|
@ -36,6 +35,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.MultiBits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
|
@ -48,10 +48,6 @@ import org.apache.lucene.util.ReaderUtil;
|
|||
* @see #add
|
||||
*/
|
||||
final class SegmentMerger {
|
||||
|
||||
/** norms header placeholder */
|
||||
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
||||
|
||||
private Directory directory;
|
||||
private String segment;
|
||||
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
|
||||
|
@ -125,6 +121,12 @@ final class SegmentMerger {
|
|||
return mergedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: this method creates a compound file for all files returned by
|
||||
* info.files(). While, generally, this may include separate norms and
|
||||
* deletion files, this SegmentInfo must not reference such files when this
|
||||
* method is called, because they are not allowed within a compound file.
|
||||
*/
|
||||
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
|
||||
throws IOException {
|
||||
|
||||
|
@ -132,6 +134,10 @@ final class SegmentMerger {
|
|||
Collection<String> files = info.files();
|
||||
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
|
||||
for (String file : files) {
|
||||
assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
|
||||
: ".del file is not allowed in .cfs: " + file;
|
||||
assert !IndexFileNames.isSeparateNormsFile(file)
|
||||
: "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file;
|
||||
cfsWriter.addFile(file);
|
||||
}
|
||||
|
||||
|
@ -557,14 +563,13 @@ final class SegmentMerger {
|
|||
}
|
||||
codec = segmentWriteState.segmentCodecs.codec();
|
||||
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
|
||||
|
||||
// NOTE: this is silly, yet, necessary -- we create a
|
||||
// MultiBits as our skip docs only to have it broken
|
||||
// apart when we step through the docs enums in
|
||||
// MultiDocsEnum.
|
||||
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
|
||||
|
||||
try {
|
||||
// NOTE: this is silly, yet, necessary -- we create a
|
||||
// MultiBits as our skip docs only to have it broken
|
||||
// apart when we step through the docs enums in
|
||||
// MultiDocsEnum.
|
||||
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
|
||||
|
||||
consumer.merge(mergeState,
|
||||
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
|
||||
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
|
||||
|
@ -604,12 +609,13 @@ final class SegmentMerger {
|
|||
|
||||
private void mergeNorms() throws IOException {
|
||||
IndexOutput output = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
for (FieldInfo fi : fieldInfos) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
if (output == null) {
|
||||
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
|
||||
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
|
||||
output.writeBytes(SegmentNorms.NORMS_HEADER, SegmentNorms.NORMS_HEADER.length);
|
||||
}
|
||||
for (IndexReader reader : readers) {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
|
@ -637,10 +643,9 @@ final class SegmentMerger {
|
|||
}
|
||||
}
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (output != null) {
|
||||
output.close();
|
||||
}
|
||||
IOUtils.closeSafely(!success, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,10 @@ import org.apache.lucene.store.IndexOutput;
|
|||
*/
|
||||
|
||||
final class SegmentNorms implements Cloneable {
|
||||
|
||||
/** norms header placeholder */
|
||||
static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
||||
|
||||
int refCount = 1;
|
||||
|
||||
// If this instance is a clone, the originalNorm
|
||||
|
@ -219,7 +223,7 @@ final class SegmentNorms implements Cloneable {
|
|||
boolean success = false;
|
||||
try {
|
||||
try {
|
||||
out.writeBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.length);
|
||||
out.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
|
||||
out.writeBytes(bytes, owner.maxDoc());
|
||||
} finally {
|
||||
out.close();
|
||||
|
|
|
@ -576,7 +576,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
}
|
||||
|
||||
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
|
||||
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
|
||||
long nextNormSeek = SegmentNorms.NORMS_HEADER.length; //skip header (header unused for now)
|
||||
int maxDoc = maxDoc();
|
||||
for (FieldInfo fi : core.fieldInfos) {
|
||||
if (norms.containsKey(fi.name)) {
|
||||
|
@ -621,7 +621,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
if (isUnversioned) {
|
||||
normSeek = 0;
|
||||
} else {
|
||||
normSeek = SegmentMerger.NORMS_HEADER.length;
|
||||
normSeek = SegmentNorms.NORMS_HEADER.length;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -54,9 +54,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
fill(state.numDocs);
|
||||
assert state.segmentName != null;
|
||||
String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
|
||||
tvx.close();
|
||||
tvf.close();
|
||||
tvd.close();
|
||||
IOUtils.closeSafely(false, tvx, tvf, tvd);
|
||||
tvx = tvd = tvf = null;
|
||||
if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
|
||||
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
|
||||
|
@ -89,18 +87,25 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
|
||||
private final void initTermVectorsWriter() throws IOException {
|
||||
if (tvx == null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
// If we hit an exception while init'ing the term
|
||||
// vector output files, we must abort this segment
|
||||
// because those files will be in an unknown
|
||||
// state:
|
||||
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
|
||||
// If we hit an exception while init'ing the term
|
||||
// vector output files, we must abort this segment
|
||||
// because those files will be in an unknown
|
||||
// state:
|
||||
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||
}
|
||||
}
|
||||
|
||||
lastDocID = 0;
|
||||
}
|
||||
|
@ -139,7 +144,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
assert lastDocID == docState.docID;
|
||||
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
|
||||
|
||||
lastDocID++;
|
||||
|
||||
|
@ -152,21 +157,27 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
|||
public void abort() {
|
||||
hasVectors = false;
|
||||
try {
|
||||
IOUtils.closeSafely(tvx, tvd, tvf);
|
||||
} catch (IOException ignored) {
|
||||
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||
} catch (IOException e) {
|
||||
// cannot happen since we suppress exceptions
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try {
|
||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
try {
|
||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
try {
|
||||
docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
tvx = tvd = tvf = null;
|
||||
lastDocID = 0;
|
||||
|
||||
|
|
|
@ -31,15 +31,22 @@ final class TermVectorsWriter {
|
|||
private FieldInfos fieldInfos;
|
||||
|
||||
public TermVectorsWriter(Directory directory, String segment,
|
||||
FieldInfos fieldInfos)
|
||||
throws IOException {
|
||||
// Open files for TermVector storage
|
||||
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
FieldInfos fieldInfos) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
// Open files for TermVector storage
|
||||
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, tvx, tvd, tvf);
|
||||
}
|
||||
}
|
||||
|
||||
this.fieldInfos = fieldInfos;
|
||||
}
|
||||
|
@ -51,8 +58,7 @@ final class TermVectorsWriter {
|
|||
* @param vectors
|
||||
* @throws IOException
|
||||
*/
|
||||
public final void addAllDocVectors(TermFreqVector[] vectors)
|
||||
throws IOException {
|
||||
public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
|
||||
|
||||
tvx.writeLong(tvd.getFilePointer());
|
||||
tvx.writeLong(tvf.getFilePointer());
|
||||
|
@ -187,6 +193,6 @@ final class TermVectorsWriter {
|
|||
final void close() throws IOException {
|
||||
// make an effort to close all streams we can but remember and re-throw
|
||||
// the first exception encountered in this process
|
||||
IOUtils.closeSafely(tvx, tvd, tvf);
|
||||
IOUtils.closeSafely(false, tvx, tvd, tvf);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,7 +54,6 @@ final class TermsHash extends InvertedDocConsumer {
|
|||
|
||||
final boolean trackAllocations;
|
||||
|
||||
|
||||
public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
|
||||
this.docState = docWriter.docState;
|
||||
this.docWriter = docWriter;
|
||||
|
@ -108,11 +107,11 @@ final class TermsHash extends InvertedDocConsumer {
|
|||
}
|
||||
|
||||
for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
|
||||
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
|
||||
childFields.put(entry.getKey(), perField.consumer);
|
||||
if (nextTermsHash != null) {
|
||||
nextChildFields.put(entry.getKey(), perField.nextPerField);
|
||||
}
|
||||
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
|
||||
childFields.put(entry.getKey(), perField.consumer);
|
||||
if (nextTermsHash != null) {
|
||||
nextChildFields.put(entry.getKey(), perField.nextPerField);
|
||||
}
|
||||
}
|
||||
|
||||
consumer.flush(childFields, state);
|
||||
|
@ -134,12 +133,9 @@ final class TermsHash extends InvertedDocConsumer {
|
|||
|
||||
@Override
|
||||
void finishDocument() throws IOException {
|
||||
try {
|
||||
consumer.finishDocument(this);
|
||||
} finally {
|
||||
if (nextTermsHash != null) {
|
||||
nextTermsHash.consumer.finishDocument(nextTermsHash);
|
||||
}
|
||||
consumer.finishDocument(this);
|
||||
if (nextTermsHash != null) {
|
||||
nextTermsHash.consumer.finishDocument(nextTermsHash);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; //javadoc
|
||||
|
||||
/**
|
||||
|
@ -48,12 +47,10 @@ public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerT
|
|||
}
|
||||
|
||||
@Override
|
||||
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) {
|
||||
public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter) {
|
||||
ThreadState threadState = threadBindings.get(requestingThread);
|
||||
if (threadState != null) {
|
||||
if (threadState.tryLock()) {
|
||||
return threadState;
|
||||
}
|
||||
if (threadState != null && threadState.tryLock()) {
|
||||
return threadState;
|
||||
}
|
||||
ThreadState minThreadState = null;
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO: currently we encode all terms between two indexed
|
||||
|
@ -66,24 +67,29 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
|
||||
//private final String segment;
|
||||
|
||||
public BlockTermsWriter(
|
||||
TermsIndexWriterBase termsIndexWriter,
|
||||
SegmentWriteState state,
|
||||
PostingsWriterBase postingsWriter)
|
||||
throws IOException
|
||||
{
|
||||
public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
|
||||
SegmentWriteState state, PostingsWriterBase postingsWriter)
|
||||
throws IOException {
|
||||
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), TERMS_EXTENSION);
|
||||
this.termsIndexWriter = termsIndexWriter;
|
||||
out = state.directory.createOutput(termsFileName);
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
currentField = null;
|
||||
this.postingsWriter = postingsWriter;
|
||||
//segment = state.segmentName;
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
currentField = null;
|
||||
this.postingsWriter = postingsWriter;
|
||||
//segment = state.segmentName;
|
||||
|
||||
//System.out.println("BTW.init seg=" + state.segmentName);
|
||||
//System.out.println("BTW.init seg=" + state.segmentName);
|
||||
|
||||
postingsWriter.start(out); // have consumer write its format/header
|
||||
postingsWriter.start(out); // have consumer write its format/header
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
|
@ -130,20 +136,11 @@ public class BlockTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
try {
|
||||
out.close();
|
||||
} finally {
|
||||
try {
|
||||
postingsWriter.close();
|
||||
} finally {
|
||||
termsIndexWriter.close();
|
||||
}
|
||||
}
|
||||
IOUtils.closeSafely(false, out, postingsWriter, termsIndexWriter);
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
|
|
|
@ -89,6 +89,15 @@ public class CodecProvider {
|
|||
return codec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff a codec with the given name is registered
|
||||
* @param name codec name
|
||||
* @return <code>true</code> iff a codec with the given name is registered, otherwise <code>false</code>.
|
||||
*/
|
||||
public synchronized boolean isCodecRegistered(String name) {
|
||||
return codecs.containsKey(name);
|
||||
}
|
||||
|
||||
public SegmentInfosWriter getSegmentInfosWriter() {
|
||||
return infosWriter;
|
||||
}
|
||||
|
@ -146,6 +155,14 @@ public class CodecProvider {
|
|||
return codec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if this provider has a Codec registered for this
|
||||
* field.
|
||||
*/
|
||||
public synchronized boolean hasFieldCodec(String name) {
|
||||
return perFieldMap.containsKey(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default {@link Codec} for this {@link CodecProvider}
|
||||
*
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentInfos;
|
|||
import org.apache.lucene.store.ChecksumIndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Default implementation of {@link SegmentInfosWriter}.
|
||||
|
@ -56,16 +57,24 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
|
|||
public IndexOutput writeInfos(Directory dir, String segmentFileName, SegmentInfos infos)
|
||||
throws IOException {
|
||||
IndexOutput out = createOutput(dir, segmentFileName);
|
||||
out.writeInt(FORMAT_CURRENT); // write FORMAT
|
||||
out.writeLong(infos.version);
|
||||
out.writeInt(infos.counter); // write counter
|
||||
out.writeLong(infos.getGlobalFieldMapVersion());
|
||||
out.writeInt(infos.size()); // write infos
|
||||
for (SegmentInfo si : infos) {
|
||||
si.write(out);
|
||||
boolean success = false;
|
||||
try {
|
||||
out.writeInt(FORMAT_CURRENT); // write FORMAT
|
||||
out.writeLong(infos.version);
|
||||
out.writeInt(infos.counter); // write counter
|
||||
out.writeLong(infos.getGlobalFieldMapVersion());
|
||||
out.writeInt(infos.size()); // write infos
|
||||
for (SegmentInfo si : infos) {
|
||||
si.write(out);
|
||||
}
|
||||
out.writeStringStringMap(infos.getUserData());
|
||||
success = true;
|
||||
return out;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
out.writeStringStringMap(infos.getUserData());
|
||||
return out;
|
||||
}
|
||||
|
||||
protected IndexOutput createOutput(Directory dir, String segmentFileName)
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
|
@ -108,6 +109,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
|||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) IOUtils.closeSafely(true, in);
|
||||
if (indexDivisor > 0) {
|
||||
in.close();
|
||||
in = null;
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -58,9 +59,17 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), TERMS_INDEX_EXTENSION);
|
||||
termIndexInterval = state.termIndexInterval;
|
||||
out = state.directory.createOutput(indexFileName);
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
out.writeInt(termIndexInterval);
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldInfos = state.fieldInfos;
|
||||
writeHeader(out);
|
||||
out.writeInt(termIndexInterval);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
|
@ -202,33 +211,37 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
boolean success = false;
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
nonNullFieldCount++;
|
||||
int nonNullFieldCount = 0;
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
nonNullFieldCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVInt(field.numIndexTerms);
|
||||
out.writeVLong(field.termsStart);
|
||||
out.writeVLong(field.indexStart);
|
||||
out.writeVLong(field.packedIndexStart);
|
||||
out.writeVLong(field.packedOffsetsStart);
|
||||
out.writeVInt(nonNullFieldCount);
|
||||
for(int i=0;i<fieldCount;i++) {
|
||||
SimpleFieldWriter field = fields.get(i);
|
||||
if (field.numIndexTerms > 0) {
|
||||
out.writeVInt(field.fieldInfo.number);
|
||||
out.writeVInt(field.numIndexTerms);
|
||||
out.writeVLong(field.termsStart);
|
||||
out.writeVLong(field.indexStart);
|
||||
out.writeVLong(field.packedIndexStart);
|
||||
out.writeVLong(field.packedOffsetsStart);
|
||||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
success = true;
|
||||
} finally {
|
||||
IOUtils.closeSafely(!success, out);
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
out.close();
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
|
|
|
@ -19,10 +19,12 @@ package org.apache.lucene.index.codecs;
|
|||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public abstract class TermsIndexWriterBase {
|
||||
public abstract class TermsIndexWriterBase implements Closeable {
|
||||
|
||||
public abstract class FieldWriter {
|
||||
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
|
||||
|
@ -31,6 +33,4 @@ public abstract class TermsIndexWriterBase {
|
|||
}
|
||||
|
||||
public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
|
||||
|
||||
public abstract void close() throws IOException;
|
||||
}
|
||||
|
|
|
@ -33,11 +33,11 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder;
|
||||
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.automaton.fst.FST;
|
||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.automaton.fst.Util; // for toDot
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util; // for toDot
|
||||
|
||||
/** See {@link VariableGapTermsIndexWriter}
|
||||
*
|
||||
|
|
|
@ -28,9 +28,10 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.automaton.fst.Builder;
|
||||
import org.apache.lucene.util.automaton.fst.FST;
|
||||
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
/**
|
||||
* Selects index terms according to provided pluggable
|
||||
|
@ -159,9 +160,17 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
public VariableGapTermsIndexWriter(SegmentWriteState state, IndexTermSelector policy) throws IOException {
|
||||
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecIdAsString(), TERMS_INDEX_EXTENSION);
|
||||
out = state.directory.createOutput(indexFileName);
|
||||
fieldInfos = state.fieldInfos;
|
||||
this.policy = policy;
|
||||
writeHeader(out);
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldInfos = state.fieldInfos;
|
||||
this.policy = policy;
|
||||
writeHeader(out);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeSafely(true, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeHeader(IndexOutput out) throws IOException {
|
||||
|
@ -265,8 +274,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
final long dirStart = out.getFilePointer();
|
||||
final int fieldCount = fields.size();
|
||||
|
||||
|
@ -287,8 +296,10 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
|
|||
}
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeTrailer(long dirStart) throws IOException {
|
||||
out.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
|
|
|
@ -41,6 +41,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
|||
protected final IndexOutput out;
|
||||
|
||||
private int upto;
|
||||
private boolean hitExcDuringWrite;
|
||||
|
||||
// TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
|
||||
// if its less than 128 we should set that as max and use byte?
|
||||
|
@ -105,19 +106,23 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
|||
|
||||
@Override
|
||||
public void write(int v) throws IOException {
|
||||
hitExcDuringWrite = true;
|
||||
upto -= add(v)-1;
|
||||
hitExcDuringWrite = false;
|
||||
assert upto >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
// stuff 0s in until the "real" data is flushed:
|
||||
int stuffed = 0;
|
||||
while(upto > stuffed) {
|
||||
upto -= add(0)-1;
|
||||
assert upto >= 0;
|
||||
stuffed += 1;
|
||||
if (!hitExcDuringWrite) {
|
||||
// stuff 0s in until the "real" data is flushed:
|
||||
int stuffed = 0;
|
||||
while(upto > stuffed) {
|
||||
upto -= add(0)-1;
|
||||
assert upto >= 0;
|
||||
stuffed += 1;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.index.codecs.PostingsWriterBase;
|
|||
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
|
||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
||||
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
|
@ -45,6 +44,7 @@ import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
|||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** This codec "inlines" the postings for terms that have
|
||||
* low docFreq. It wraps another codec, which is used for
|
||||
|
@ -88,7 +88,7 @@ public class PulsingCodec extends Codec {
|
|||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
pulsingWriter.close();
|
||||
IOUtils.closeSafely(true, pulsingWriter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -100,11 +100,7 @@ public class PulsingCodec extends Codec {
|
|||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
try {
|
||||
pulsingWriter.close();
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
}
|
||||
IOUtils.closeSafely(true, pulsingWriter, indexWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|