mirror of https://github.com/apache/lucene.git
merged with trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1102677 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
54a2d7aab4
|
@ -7,6 +7,7 @@ modules/ is shared code
|
||||||
To compile the sources run 'ant compile'
|
To compile the sources run 'ant compile'
|
||||||
To run all the tests run 'ant test'
|
To run all the tests run 'ant test'
|
||||||
To setup your ide run 'ant idea' or 'ant eclipse'
|
To setup your ide run 'ant idea' or 'ant eclipse'
|
||||||
|
For Maven info, see dev-tools/maven/README.maven.
|
||||||
|
|
||||||
For more information on how to contribute see:
|
For more information on how to contribute see:
|
||||||
http://wiki.apache.org/lucene-java/HowToContribute
|
http://wiki.apache.org/lucene-java/HowToContribute
|
||||||
|
|
|
@ -95,7 +95,7 @@
|
||||||
<classpathentry kind="lib" path="modules/benchmark/lib/commons-digester-1.7.jar"/>
|
<classpathentry kind="lib" path="modules/benchmark/lib/commons-digester-1.7.jar"/>
|
||||||
<classpathentry kind="lib" path="modules/benchmark/lib/commons-logging-1.0.4.jar"/>
|
<classpathentry kind="lib" path="modules/benchmark/lib/commons-logging-1.0.4.jar"/>
|
||||||
<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>
|
<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r944541.jar"/>
|
<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r1099557.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/lib/commons-beanutils-1.7.0.jar"/>
|
<classpathentry kind="lib" path="solr/lib/commons-beanutils-1.7.0.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/lib/commons-codec-1.4.jar"/>
|
<classpathentry kind="lib" path="solr/lib/commons-codec-1.4.jar"/>
|
||||||
<classpathentry kind="lib" path="solr/lib/commons-collections-3.2.1.jar"/>
|
<classpathentry kind="lib" path="solr/lib/commons-collections-3.2.1.jar"/>
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
====================================
|
||||||
|
Lucene/Solr Maven build instructions
|
||||||
|
====================================
|
||||||
|
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
|
||||||
|
B. How to generate Lucene Maven artifacts
|
||||||
|
C. How to generate Solr Maven artifacts
|
||||||
|
D. How to use Maven to build Lucene/Solr
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
|
||||||
|
|
||||||
|
The most recently produced nightly Jenkins-built Lucene and Solr Maven
|
||||||
|
artifacts are available in Maven repository layout here:
|
||||||
|
|
||||||
|
<https://builds.apache.org/hudson/job/Lucene-Solr-Maven-trunk/lastSuccessfulBuild/artifact/maven_artifacts/>
|
||||||
|
|
||||||
|
|
||||||
|
B. How to generate Lucene Maven artifacts
|
||||||
|
|
||||||
|
1. Prerequisites: JDK 1.5+, Ant 1.7.X, and maven-ant-tasks-2.1.1.jar
|
||||||
|
|
||||||
|
In order to generate Maven artifacts for Lucene/Solr, you must first
|
||||||
|
download the Maven ant tasks JAR (maven-ant-tasks-2.1.1.jar), e.g.
|
||||||
|
from <http://maven.apache.org/ant-tasks/download.html>, and add it
|
||||||
|
to any one of the following:
|
||||||
|
|
||||||
|
a. Your $HOME/.ant/lib/ directory (C:\Users\username\.ant\lib\ under
|
||||||
|
Windows Vista/7); or
|
||||||
|
b. Your $ANT_HOME/lib/ directory (%ANT_HOME%\lib\ under Windows); or
|
||||||
|
c. Your $CLASSPATH (%CLASSPATH% under Windows); or
|
||||||
|
d. Your ant commond line: "-lib /path/to/maven-ant-tasks-2.1.1.jar".
|
||||||
|
|
||||||
|
2. Run the following command from the lucene/ directory:
|
||||||
|
|
||||||
|
ant generate-maven-artifacts
|
||||||
|
|
||||||
|
The above command will create an internal Maven repository under
|
||||||
|
lucene/dist/maven/, including POMs, binary .jars, source .jars,
|
||||||
|
and javadoc .jars, for Lucene Core, for the Lucene test framework,
|
||||||
|
for each contrib, and for each module under the top-level modules/
|
||||||
|
directory.
|
||||||
|
|
||||||
|
|
||||||
|
C. How to generate Solr Maven artifacts
|
||||||
|
|
||||||
|
1. Prerequisites: JDK 1.6+; Ant 1.7.X; and maven-ant-tasks-2.1.1.jar
|
||||||
|
(see item A.1. above for where to put the Maven ant tasks jar).
|
||||||
|
|
||||||
|
2. Run the following from the solr/ directory:
|
||||||
|
|
||||||
|
ant generate-maven-artifacts
|
||||||
|
|
||||||
|
The above command will create an internal Maven repository under
|
||||||
|
solr/package/maven/, including POMs, binary .jars, source .jars,
|
||||||
|
and javadoc .jars, for Solr Core, for the Solr test framework,
|
||||||
|
for each contrib, and for the Solr .war (for which there are no
|
||||||
|
source or javadoc .jars).
|
||||||
|
|
||||||
|
|
||||||
|
D. How to use Maven to build Lucene/Solr
|
||||||
|
|
||||||
|
In summary, to enable Maven builds, perform the following:
|
||||||
|
|
||||||
|
svn update
|
||||||
|
ant get-maven-poms
|
||||||
|
mvn -N -Pbootstrap install
|
||||||
|
|
||||||
|
The details, followed by some example Maven commands:
|
||||||
|
|
||||||
|
1. Prerequisites: JDK 1.5+ (for Lucene); JDK 1.6+ (for Solr);
|
||||||
|
Maven 2.2.1 or 3.0.X
|
||||||
|
|
||||||
|
2. Make sure your sources are up to date. If you checked your sources out
|
||||||
|
from the Apache Subversion repository, run "svn update" from the top
|
||||||
|
level.
|
||||||
|
|
||||||
|
3. Copy the Maven POM templates from under dev-tools/maven/ to where they
|
||||||
|
they need to go in order to drive the Maven build, using the following
|
||||||
|
command from the top-level directory:
|
||||||
|
|
||||||
|
ant get-maven-poms
|
||||||
|
|
||||||
|
Note that you will need to do this whenever changes to the POM
|
||||||
|
templates are committed. It's a good idea to follow every "svn update"
|
||||||
|
with "ant get-maven-poms" for this reason.
|
||||||
|
|
||||||
|
The above command copies all of the POM templates from dev-tools/maven/,
|
||||||
|
filling in the project version with the default "X.X-SNAPSHOT". If you
|
||||||
|
want the POMs and the Maven-built artifacts to have a version other than
|
||||||
|
the default, you can supply an alternate version on the command line
|
||||||
|
with the above command, e.g.:
|
||||||
|
|
||||||
|
ant -Dversion=4.0-my-special-version get-maven-poms
|
||||||
|
|
||||||
|
4. Populate your local repository with .jars & POMs for dependencies that
|
||||||
|
are not available from public Maven repositories (a.k.a. "non-mavenized
|
||||||
|
dependencies"):
|
||||||
|
|
||||||
|
mvn -N -Pbootstrap install
|
||||||
|
|
||||||
|
Note that you will need to do this whenever changes to the non-Mavenized
|
||||||
|
dependencies are committed. It's a good idea to follow every
|
||||||
|
"svn update" with "ant get-maven-poms" and "mvn -N -Pbootstrap install"
|
||||||
|
for this reason.
|
||||||
|
|
||||||
|
|
||||||
|
Some example Maven commands you can use after you perform the above
|
||||||
|
preparatory steps:
|
||||||
|
|
||||||
|
- Compile, package, and install all artifacts to your local repository:
|
||||||
|
|
||||||
|
mvn install
|
||||||
|
|
||||||
|
After compiling and packaging, but before installing each module's
|
||||||
|
artifact, the above command will also run all the module's tests.
|
||||||
|
|
||||||
|
To compile, package and install all artifacts without running any tests:
|
||||||
|
|
||||||
|
mvn -DskipTests install
|
||||||
|
|
||||||
|
- Run tests:
|
||||||
|
|
||||||
|
mvn test
|
||||||
|
|
||||||
|
To run all test methods defined in a test class:
|
||||||
|
|
||||||
|
mvn -Dtest=TestClassName test
|
|
@ -699,7 +699,7 @@
|
||||||
<artifactId>solr-noggit</artifactId>
|
<artifactId>solr-noggit</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<file>solr/lib/apache-solr-noggit-r944541.jar</file>
|
<file>solr/lib/apache-solr-noggit-r1099557.jar</file>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
<execution>
|
<execution>
|
||||||
|
|
|
@ -103,8 +103,8 @@
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
<directory>${build-directory}</directory>
|
<directory>${build-directory}</directory>
|
||||||
<outputDirectory>${build-directory}/extras/classes</outputDirectory>
|
<outputDirectory>${build-directory}/classes</outputDirectory>
|
||||||
<testOutputDirectory>${build-directory}/extras/test-classes</testOutputDirectory>
|
<testOutputDirectory>${build-directory}/test-classes</testOutputDirectory>
|
||||||
<sourceDirectory>main/java</sourceDirectory>
|
<sourceDirectory>main/java</sourceDirectory>
|
||||||
<testSourceDirectory>test/java</testSourceDirectory>
|
<testSourceDirectory>test/java</testSourceDirectory>
|
||||||
<testResources>
|
<testResources>
|
||||||
|
|
|
@ -159,7 +159,6 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
|
|
|
@ -85,7 +85,7 @@
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<build>
|
<build>
|
||||||
<directory>${build-directory}</directory>
|
<directory>${build-directory}</directory>
|
||||||
<outputDirectory>${build-directory}</outputDirectory>
|
<outputDirectory>${build-directory}/classes</outputDirectory>
|
||||||
<sourceDirectory>.</sourceDirectory>
|
<sourceDirectory>.</sourceDirectory>
|
||||||
<testResources/>
|
<testResources/>
|
||||||
<plugins>
|
<plugins>
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# recursive, unified output format, treat missing files as present but empty
|
||||||
|
DIFF_FLAGS = '-ruN'
|
||||||
|
|
||||||
|
if '-skipWhitespace' in sys.argv:
|
||||||
|
sys.argv.remove('-skipWhitespace')
|
||||||
|
# ignores only whitespace changes
|
||||||
|
DIFF_FLAGS += 'bBw'
|
||||||
|
|
||||||
|
if len(sys.argv) != 3:
|
||||||
|
print
|
||||||
|
print 'Usage: python -u diffSources.py <dir1> <dir2> [-skipWhitespace]'
|
||||||
|
print
|
||||||
|
print '''This tool creates an applying patch between two directories.
|
||||||
|
|
||||||
|
While you could use this to make a committable patch from a branch, that approach loses
|
||||||
|
the svn history from the branch (better to use "svn merge --reintegrate", for example). This
|
||||||
|
diff output should not be considered "authoritative" from a merging standpoint as it does
|
||||||
|
not reflect what svn will do on merge.
|
||||||
|
'''
|
||||||
|
print
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
p = subprocess.Popen(['diff', DIFF_FLAGS, '-x', '.svn', '-x', 'build', sys.argv[1], sys.argv[2]], shell=False, stdout=subprocess.PIPE)
|
||||||
|
|
||||||
|
keep = False
|
||||||
|
while True:
|
||||||
|
l = p.stdout.readline()
|
||||||
|
if l == '':
|
||||||
|
break
|
||||||
|
if l.endswith('\r\n'):
|
||||||
|
l = l[:-2]
|
||||||
|
elif l.endswith('\n'):
|
||||||
|
l = l[:-1]
|
||||||
|
if l.startswith('diff ') or l.startswith('Binary files '):
|
||||||
|
keep = l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml')) and l.find('/.svn/') == -1))
|
||||||
|
if keep:
|
||||||
|
print
|
||||||
|
print
|
||||||
|
print l.strip()
|
||||||
|
elif keep:
|
||||||
|
print l
|
||||||
|
elif l.startswith('Only in'):
|
||||||
|
print l.strip()
|
|
@ -472,13 +472,63 @@ Changes in backwards compatibility policy
|
||||||
a method getHeapArray() was added to retrieve the internal heap array as a
|
a method getHeapArray() was added to retrieve the internal heap array as a
|
||||||
non-generic Object[]. (Uwe Schindler, Yonik Seeley)
|
non-generic Object[]. (Uwe Schindler, Yonik Seeley)
|
||||||
|
|
||||||
|
* LUCENE-1076: IndexWriter.setInfoStream now throws IOException
|
||||||
|
(Mike McCandless, Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
|
||||||
|
SegmentInfos to a List<SegmentInfo>; this is actually a minor change
|
||||||
|
because SegmentInfos itself extends Vector<SegmentInfo>. (Uwe
|
||||||
|
Schindler, Mike McCandless)
|
||||||
|
|
||||||
|
Changes in runtime behavior
|
||||||
|
|
||||||
|
* LUCENE-3065: When a NumericField is retrieved from a Document loaded
|
||||||
|
from IndexReader (or IndexSearcher), it will now come back as
|
||||||
|
NumericField not as a Field with a string-ified version of the
|
||||||
|
numeric value you had indexed. Note that this only applies for
|
||||||
|
newly-indexed Documents; older indices will still return Field
|
||||||
|
with the string-ified numeric value. If you call Document.get(),
|
||||||
|
the value comes still back as String, but Document.getFieldable()
|
||||||
|
returns NumericField instances. (Uwe Schindler, Ryan McKinley,
|
||||||
|
Mike McCandless)
|
||||||
|
|
||||||
|
New features
|
||||||
|
|
||||||
|
* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
|
||||||
|
that allows to upgrade all segments to last recent supported index
|
||||||
|
format without fully optimizing. (Uwe Schindler, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-1076: Added TieredMergePolicy which is able to merge non-contiguous
|
||||||
|
segments, which means docIDs no longer necessarily stay "in order".
|
||||||
|
(Mike McCandless, Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-3071: Adding ReversePathHierarchyTokenizer, added skip parameter to
|
||||||
|
PathHierarchyTokenizer (Olivier Favre via ryan)
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
|
||||||
|
(though @lucene.experimental), allowing for custom MergeScheduler
|
||||||
|
implementations. (Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-3065: Document.getField() was deprecated, as it throws
|
||||||
|
ClassCastException when loading lazy fields or NumericFields.
|
||||||
|
(Uwe Schindler, Ryan McKinley, Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early
|
* LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early
|
||||||
on empty or one-element lists/arrays. (Uwe Schindler)
|
on empty or one-element lists/arrays. (Uwe Schindler)
|
||||||
|
|
||||||
|
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
||||||
|
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
|
* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new
|
||||||
|
indexes, causing existing deletions to be applied on the incoming indexes as
|
||||||
|
well. (Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
* LUCENE-3024: Index with more than 2.1B terms was hitting AIOOBE when
|
* LUCENE-3024: Index with more than 2.1B terms was hitting AIOOBE when
|
||||||
seeking TermEnum (eg used by Solr's faceting) (Tom Burton-West, Mike
|
seeking TermEnum (eg used by Solr's faceting) (Tom Burton-West, Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
@ -491,6 +541,17 @@ Bug fixes
|
||||||
very special use cases of the TokenStream-API, most users would not
|
very special use cases of the TokenStream-API, most users would not
|
||||||
have recognized it. (Uwe Schindler, Robert Muir)
|
have recognized it. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3054: PhraseQuery can in some cases stack overflow in
|
||||||
|
SorterTemplate.quickSort(). This fix also adds an optimization to
|
||||||
|
PhraseQuery as term with lower doc freq will also have less positions.
|
||||||
|
(Uwe Schindler, Robert Muir, Otis Gospodnetic)
|
||||||
|
|
||||||
|
Test Cases
|
||||||
|
|
||||||
|
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
|
||||||
|
stop iterating if at least 'tests.iter.min' ran and a failure occured.
|
||||||
|
(Shai Erera, Chris Hostetter)
|
||||||
|
|
||||||
======================= Lucene 3.1.0 =======================
|
======================= Lucene 3.1.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
@ -1472,6 +1533,10 @@ Bug fixes
|
||||||
that warming is free to do whatever it needs to. (Earwin Burrfoot
|
that warming is free to do whatever it needs to. (Earwin Burrfoot
|
||||||
via Mike McCandless)
|
via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3029: Fix corner case when MultiPhraseQuery is used with zero
|
||||||
|
position-increment tokens that would sometimes assign different
|
||||||
|
scores to identical docs. (Mike McCandless)
|
||||||
|
|
||||||
* LUCENE-2486: Fixed intermittent FileNotFoundException on doc store
|
* LUCENE-2486: Fixed intermittent FileNotFoundException on doc store
|
||||||
files when a mergedSegmentWarmer is set on IndexWriter. (Mike
|
files when a mergedSegmentWarmer is set on IndexWriter. (Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
|
@ -312,6 +312,8 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
||||||
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
|
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
|
||||||
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
||||||
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
||||||
|
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
|
||||||
|
- o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
|
||||||
|
|
||||||
* LUCENE-2514: The option to use a Collator's order (instead of binary order) for
|
* LUCENE-2514: The option to use a Collator's order (instead of binary order) for
|
||||||
sorting and range queries has been moved to contrib/queries.
|
sorting and range queries has been moved to contrib/queries.
|
||||||
|
|
|
@ -73,6 +73,7 @@
|
||||||
</condition>
|
</condition>
|
||||||
<property name="tests.multiplier" value="1" />
|
<property name="tests.multiplier" value="1" />
|
||||||
<property name="tests.codec" value="randomPerField" />
|
<property name="tests.codec" value="randomPerField" />
|
||||||
|
<property name="tests.codecprovider" value="random" />
|
||||||
<property name="tests.locale" value="random" />
|
<property name="tests.locale" value="random" />
|
||||||
<property name="tests.timezone" value="random" />
|
<property name="tests.timezone" value="random" />
|
||||||
<property name="tests.directory" value="random" />
|
<property name="tests.directory" value="random" />
|
||||||
|
@ -499,6 +500,8 @@
|
||||||
<sysproperty key="tests.verbose" value="${tests.verbose}"/>
|
<sysproperty key="tests.verbose" value="${tests.verbose}"/>
|
||||||
<!-- set the codec tests should run with -->
|
<!-- set the codec tests should run with -->
|
||||||
<sysproperty key="tests.codec" value="${tests.codec}"/>
|
<sysproperty key="tests.codec" value="${tests.codec}"/>
|
||||||
|
<!-- set the codec provider tests should run with -->
|
||||||
|
<sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
|
||||||
<!-- set the locale tests should run with -->
|
<!-- set the locale tests should run with -->
|
||||||
<sysproperty key="tests.locale" value="${tests.locale}"/>
|
<sysproperty key="tests.locale" value="${tests.locale}"/>
|
||||||
<!-- set the timezone tests should run with -->
|
<!-- set the timezone tests should run with -->
|
||||||
|
|
|
@ -50,6 +50,11 @@ Bug Fixes
|
||||||
|
|
||||||
======================= Lucene 3.x (not yet released) =======================
|
======================= Lucene 3.x (not yet released) =======================
|
||||||
|
|
||||||
|
Changes in runtime behavior
|
||||||
|
|
||||||
|
* LUCENE-3086: ItalianAnalyzer now uses ElisionFilter with a set of Italian
|
||||||
|
contractions by default. (Robert Muir)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
||||||
|
@ -183,6 +188,10 @@ Bug fixes
|
||||||
* LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter.
|
* LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter.
|
||||||
(Robert Muir)
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3087: Highlighter: fix case that was preventing highlighting
|
||||||
|
of exact phrase when tokens overlap. (Pierre Gossé via Mike
|
||||||
|
McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-2867: Some contrib queryparser methods that receives CharSequence as
|
* LUCENE-2867: Some contrib queryparser methods that receives CharSequence as
|
||||||
|
|
|
@ -355,6 +355,7 @@ public class Highlighter
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
tokenStream.end();
|
||||||
tokenStream.close();
|
tokenStream.close();
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.TermFreqVector;
|
import org.apache.lucene.index.TermFreqVector;
|
||||||
|
@ -158,10 +159,13 @@ public class TokenSources {
|
||||||
|
|
||||||
OffsetAttribute offsetAtt;
|
OffsetAttribute offsetAtt;
|
||||||
|
|
||||||
|
PositionIncrementAttribute posincAtt;
|
||||||
|
|
||||||
StoredTokenStream(Token tokens[]) {
|
StoredTokenStream(Token tokens[]) {
|
||||||
this.tokens = tokens;
|
this.tokens = tokens;
|
||||||
termAtt = addAttribute(CharTermAttribute.class);
|
termAtt = addAttribute(CharTermAttribute.class);
|
||||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
posincAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -173,6 +177,10 @@ public class TokenSources {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
termAtt.setEmpty().append(token);
|
termAtt.setEmpty().append(token);
|
||||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||||
|
posincAtt
|
||||||
|
.setPositionIncrement(currentToken <= 1
|
||||||
|
|| tokens[currentToken - 1].startOffset() > tokens[currentToken - 2]
|
||||||
|
.startOffset() ? 1 : 0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -180,7 +188,6 @@ public class TokenSources {
|
||||||
BytesRef[] terms = tpv.getTerms();
|
BytesRef[] terms = tpv.getTerms();
|
||||||
int[] freq = tpv.getTermFrequencies();
|
int[] freq = tpv.getTermFrequencies();
|
||||||
int totalTokens = 0;
|
int totalTokens = 0;
|
||||||
|
|
||||||
for (int t = 0; t < freq.length; t++) {
|
for (int t = 0; t < freq.length; t++) {
|
||||||
totalTokens += freq[t];
|
totalTokens += freq[t];
|
||||||
}
|
}
|
||||||
|
@ -189,7 +196,8 @@ public class TokenSources {
|
||||||
for (int t = 0; t < freq.length; t++) {
|
for (int t = 0; t < freq.length; t++) {
|
||||||
TermVectorOffsetInfo[] offsets = tpv.getOffsets(t);
|
TermVectorOffsetInfo[] offsets = tpv.getOffsets(t);
|
||||||
if (offsets == null) {
|
if (offsets == null) {
|
||||||
throw new IllegalArgumentException("Required TermVector Offset information was not found");
|
throw new IllegalArgumentException(
|
||||||
|
"Required TermVector Offset information was not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] pos = null;
|
int[] pos = null;
|
||||||
|
@ -205,8 +213,8 @@ public class TokenSources {
|
||||||
unsortedTokens = new ArrayList<Token>();
|
unsortedTokens = new ArrayList<Token>();
|
||||||
}
|
}
|
||||||
for (int tp = 0; tp < offsets.length; tp++) {
|
for (int tp = 0; tp < offsets.length; tp++) {
|
||||||
Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(), offsets[tp]
|
Token token = new Token(terms[t].utf8ToString(),
|
||||||
.getEndOffset());
|
offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
|
||||||
unsortedTokens.add(token);
|
unsortedTokens.add(token);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -221,8 +229,8 @@ public class TokenSources {
|
||||||
// tokens stored with positions - can use this to index straight into
|
// tokens stored with positions - can use this to index straight into
|
||||||
// sorted array
|
// sorted array
|
||||||
for (int tp = 0; tp < pos.length; tp++) {
|
for (int tp = 0; tp < pos.length; tp++) {
|
||||||
Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(),
|
Token token = new Token(terms[t].utf8ToString(),
|
||||||
offsets[tp].getEndOffset());
|
offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
|
||||||
tokensInOriginalOrder[pos[tp]] = token;
|
tokensInOriginalOrder[pos[tp]] = token;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -231,12 +239,11 @@ public class TokenSources {
|
||||||
if (unsortedTokens != null) {
|
if (unsortedTokens != null) {
|
||||||
tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens
|
tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens
|
||||||
.size()]);
|
.size()]);
|
||||||
ArrayUtil.quickSort(tokensInOriginalOrder, new Comparator<Token>() {
|
ArrayUtil.mergeSort(tokensInOriginalOrder, new Comparator<Token>() {
|
||||||
public int compare(Token t1, Token t2) {
|
public int compare(Token t1, Token t2) {
|
||||||
if (t1.startOffset() == t2.startOffset())
|
if (t1.startOffset() == t2.startOffset()) return t1.endOffset()
|
||||||
return t1.endOffset() - t2.endOffset();
|
- t2.endOffset();
|
||||||
else
|
else return t1.startOffset() - t2.startOffset();
|
||||||
return t1.startOffset() - t2.startOffset();
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -1093,6 +1093,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMaxSizeHighlight() throws Exception {
|
public void testMaxSizeHighlight() throws Exception {
|
||||||
|
final MockAnalyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
|
||||||
|
// we disable MockTokenizer checks because we will forcefully limit the
|
||||||
|
// tokenstream and call end() before incrementToken() returns false.
|
||||||
|
analyzer.setEnableChecks(false);
|
||||||
TestHighlightRunner helper = new TestHighlightRunner() {
|
TestHighlightRunner helper = new TestHighlightRunner() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1122,7 +1126,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
String goodWord = "goodtoken";
|
String goodWord = "goodtoken";
|
||||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
|
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
|
||||||
|
// we disable MockTokenizer checks because we will forcefully limit the
|
||||||
|
// tokenstream and call end() before incrementToken() returns false.
|
||||||
|
final MockAnalyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true);
|
||||||
|
analyzer.setEnableChecks(false);
|
||||||
TermQuery query = new TermQuery(new Term("data", goodWord));
|
TermQuery query = new TermQuery(new Term("data", goodWord));
|
||||||
|
|
||||||
String match;
|
String match;
|
||||||
|
@ -1134,13 +1141,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
sb.append("stoppedtoken");
|
sb.append("stoppedtoken");
|
||||||
}
|
}
|
||||||
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
|
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
|
||||||
Highlighter hg = getHighlighter(query, "data", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true).tokenStream(
|
Highlighter hg = getHighlighter(query, "data", analyzer.tokenStream(
|
||||||
"data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
|
"data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
|
||||||
// new
|
// new
|
||||||
// QueryTermScorer(query));
|
// QueryTermScorer(query));
|
||||||
hg.setTextFragmenter(new NullFragmenter());
|
hg.setTextFragmenter(new NullFragmenter());
|
||||||
hg.setMaxDocCharsToAnalyze(100);
|
hg.setMaxDocCharsToAnalyze(100);
|
||||||
match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString());
|
match = hg.getBestFragment(analyzer, "data", sb.toString());
|
||||||
assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
|
assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
|
||||||
.getMaxDocCharsToAnalyze());
|
.getMaxDocCharsToAnalyze());
|
||||||
|
|
||||||
|
@ -1151,7 +1158,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
||||||
// + whitespace)
|
// + whitespace)
|
||||||
sb.append(" ");
|
sb.append(" ");
|
||||||
sb.append(goodWord);
|
sb.append(goodWord);
|
||||||
match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString());
|
match = hg.getBestFragment(analyzer, "data", sb.toString());
|
||||||
assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
|
assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
|
||||||
.getMaxDocCharsToAnalyze());
|
.getMaxDocCharsToAnalyze());
|
||||||
}
|
}
|
||||||
|
@ -1726,6 +1733,11 @@ final class SynonymAnalyzer extends Analyzer {
|
||||||
stream.addAttribute(CharTermAttribute.class);
|
stream.addAttribute(CharTermAttribute.class);
|
||||||
stream.addAttribute(PositionIncrementAttribute.class);
|
stream.addAttribute(PositionIncrementAttribute.class);
|
||||||
stream.addAttribute(OffsetAttribute.class);
|
stream.addAttribute(OffsetAttribute.class);
|
||||||
|
try {
|
||||||
|
stream.reset();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
return new SynonymTokenizer(stream, synonyms);
|
return new SynonymTokenizer(stream, synonyms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,32 +28,38 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testFilter() throws Exception {
|
public void testFilter() throws Exception {
|
||||||
TokenStream stream = new MockTokenizer(new StringReader(
|
// we disable MockTokenizer checks because we will forcefully limit the
|
||||||
|
// tokenstream and call end() before incrementToken() returns false.
|
||||||
|
MockTokenizer stream = new MockTokenizer(new StringReader(
|
||||||
"short toolong evenmuchlongertext a ab toolong foo"),
|
"short toolong evenmuchlongertext a ab toolong foo"),
|
||||||
MockTokenizer.WHITESPACE, false);
|
MockTokenizer.WHITESPACE, false);
|
||||||
|
stream.setEnableChecks(false);
|
||||||
OffsetLimitTokenFilter filter = new OffsetLimitTokenFilter(stream, 10);
|
OffsetLimitTokenFilter filter = new OffsetLimitTokenFilter(stream, 10);
|
||||||
assertTokenStreamContents(filter, new String[] {"short", "toolong"});
|
assertTokenStreamContents(filter, new String[] {"short", "toolong"});
|
||||||
|
|
||||||
stream = new MockTokenizer(new StringReader(
|
stream = new MockTokenizer(new StringReader(
|
||||||
"short toolong evenmuchlongertext a ab toolong foo"),
|
"short toolong evenmuchlongertext a ab toolong foo"),
|
||||||
MockTokenizer.WHITESPACE, false);
|
MockTokenizer.WHITESPACE, false);
|
||||||
|
stream.setEnableChecks(false);
|
||||||
filter = new OffsetLimitTokenFilter(stream, 12);
|
filter = new OffsetLimitTokenFilter(stream, 12);
|
||||||
assertTokenStreamContents(filter, new String[] {"short", "toolong"});
|
assertTokenStreamContents(filter, new String[] {"short", "toolong"});
|
||||||
|
|
||||||
stream = new MockTokenizer(new StringReader(
|
stream = new MockTokenizer(new StringReader(
|
||||||
"short toolong evenmuchlongertext a ab toolong foo"),
|
"short toolong evenmuchlongertext a ab toolong foo"),
|
||||||
MockTokenizer.WHITESPACE, false);
|
MockTokenizer.WHITESPACE, false);
|
||||||
|
stream.setEnableChecks(false);
|
||||||
filter = new OffsetLimitTokenFilter(stream, 30);
|
filter = new OffsetLimitTokenFilter(stream, 30);
|
||||||
assertTokenStreamContents(filter, new String[] {"short", "toolong",
|
assertTokenStreamContents(filter, new String[] {"short", "toolong",
|
||||||
"evenmuchlongertext"});
|
"evenmuchlongertext"});
|
||||||
|
|
||||||
|
// TODO: This is not actually testing reuse! (reusableTokenStream is not implemented)
|
||||||
checkOneTermReuse(new Analyzer() {
|
checkOneTermReuse(new Analyzer() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||||
return new OffsetLimitTokenFilter(new MockTokenizer(reader,
|
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||||
MockTokenizer.WHITESPACE, false), 10);
|
tokenizer.setEnableChecks(false);
|
||||||
|
return new OffsetLimitTokenFilter(tokenizer, 10);
|
||||||
}
|
}
|
||||||
}, "llenges", "llenges");
|
}, "llenges", "llenges");
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,10 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermPositionVector;
|
import org.apache.lucene.index.TermPositionVector;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.LockObtainFailedException;
|
import org.apache.lucene.store.LockObtainFailedException;
|
||||||
|
@ -86,12 +89,12 @@ public class TokenSourcesTest extends LuceneTestCase {
|
||||||
public void reset() {
|
public void reset() {
|
||||||
this.i = -1;
|
this.i = -1;
|
||||||
this.tokens = new Token[] {
|
this.tokens = new Token[] {
|
||||||
new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
|
new Token(new char[] {'t', 'h', 'e'}, 0, 3, 0, 3),
|
||||||
new Token(new char[] { '{', 'f', 'o', 'x', '}' }, 0, 5, 0, 7),
|
new Token(new char[] {'{', 'f', 'o', 'x', '}'}, 0, 5, 0, 7),
|
||||||
new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
|
new Token(new char[] {'f', 'o', 'x'}, 0, 3, 4, 7),
|
||||||
new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11),
|
new Token(new char[] {'d', 'i', 'd'}, 0, 3, 8, 11),
|
||||||
new Token(new char[] { 'n', 'o', 't' }, 0, 3, 12, 15),
|
new Token(new char[] {'n', 'o', 't'}, 0, 3, 12, 15),
|
||||||
new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) };
|
new Token(new char[] {'j', 'u', 'm', 'p'}, 0, 4, 16, 20)};
|
||||||
this.tokens[1].setPositionIncrement(0);
|
this.tokens[1].setPositionIncrement(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -188,4 +191,97 @@ public class TokenSourcesTest extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testOverlapWithOffsetExactPhrase() throws CorruptIndexException,
|
||||||
|
LockObtainFailedException, IOException, InvalidTokenOffsetsException {
|
||||||
|
final String TEXT = "the fox did not jump";
|
||||||
|
final Directory directory = newDirectory();
|
||||||
|
final IndexWriter indexWriter = new IndexWriter(directory,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||||
|
try {
|
||||||
|
final Document document = new Document();
|
||||||
|
document.add(new Field(FIELD, new TokenStreamOverlap(),
|
||||||
|
TermVector.WITH_OFFSETS));
|
||||||
|
indexWriter.addDocument(document);
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
final IndexReader indexReader = IndexReader.open(directory, true);
|
||||||
|
try {
|
||||||
|
assertEquals(1, indexReader.numDocs());
|
||||||
|
final IndexSearcher indexSearcher = newSearcher(indexReader);
|
||||||
|
try {
|
||||||
|
// final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
|
||||||
|
// query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
|
||||||
|
// query.add(new SpanTermQuery(new Term(FIELD, "fox")));
|
||||||
|
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
|
||||||
|
new SpanTermQuery(new Term(FIELD, "the")),
|
||||||
|
new SpanTermQuery(new Term(FIELD, "fox"))}, 0, true);
|
||||||
|
|
||||||
|
TopDocs hits = indexSearcher.search(phraseQuery, 1);
|
||||||
|
assertEquals(1, hits.totalHits);
|
||||||
|
final Highlighter highlighter = new Highlighter(
|
||||||
|
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
|
||||||
|
new QueryScorer(phraseQuery));
|
||||||
|
final TokenStream tokenStream = TokenSources
|
||||||
|
.getTokenStream(
|
||||||
|
(TermPositionVector) indexReader.getTermFreqVector(0, FIELD),
|
||||||
|
false);
|
||||||
|
assertEquals("<B>the fox</B> did not jump",
|
||||||
|
highlighter.getBestFragment(tokenStream, TEXT));
|
||||||
|
} finally {
|
||||||
|
indexSearcher.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOverlapWithPositionsAndOffsetExactPhrase()
|
||||||
|
throws CorruptIndexException, LockObtainFailedException, IOException,
|
||||||
|
InvalidTokenOffsetsException {
|
||||||
|
final String TEXT = "the fox did not jump";
|
||||||
|
final Directory directory = newDirectory();
|
||||||
|
final IndexWriter indexWriter = new IndexWriter(directory,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||||
|
try {
|
||||||
|
final Document document = new Document();
|
||||||
|
document.add(new Field(FIELD, new TokenStreamOverlap(),
|
||||||
|
TermVector.WITH_POSITIONS_OFFSETS));
|
||||||
|
indexWriter.addDocument(document);
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
final IndexReader indexReader = IndexReader.open(directory, true);
|
||||||
|
try {
|
||||||
|
assertEquals(1, indexReader.numDocs());
|
||||||
|
final IndexSearcher indexSearcher = newSearcher(indexReader);
|
||||||
|
try {
|
||||||
|
// final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
|
||||||
|
// query.add(new SpanTermQuery(new Term(FIELD, "the")));
|
||||||
|
// query.add(new SpanTermQuery(new Term(FIELD, "fox")));
|
||||||
|
final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
|
||||||
|
new SpanTermQuery(new Term(FIELD, "the")),
|
||||||
|
new SpanTermQuery(new Term(FIELD, "fox"))}, 0, true);
|
||||||
|
|
||||||
|
TopDocs hits = indexSearcher.search(phraseQuery, 1);
|
||||||
|
assertEquals(1, hits.totalHits);
|
||||||
|
final Highlighter highlighter = new Highlighter(
|
||||||
|
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
|
||||||
|
new QueryScorer(phraseQuery));
|
||||||
|
final TokenStream tokenStream = TokenSources
|
||||||
|
.getTokenStream(
|
||||||
|
(TermPositionVector) indexReader.getTermFreqVector(0, FIELD),
|
||||||
|
false);
|
||||||
|
assertEquals("<B>the fox</B> did not jump",
|
||||||
|
highlighter.getBestFragment(tokenStream, TEXT));
|
||||||
|
} finally {
|
||||||
|
indexSearcher.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -192,6 +192,7 @@ public class FuzzyLikeThisQuery extends Query
|
||||||
int corpusNumDocs=reader.numDocs();
|
int corpusNumDocs=reader.numDocs();
|
||||||
Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
|
Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
|
||||||
HashSet<String> processedTerms=new HashSet<String>();
|
HashSet<String> processedTerms=new HashSet<String>();
|
||||||
|
ts.reset();
|
||||||
while (ts.incrementToken())
|
while (ts.incrementToken())
|
||||||
{
|
{
|
||||||
String term = termAtt.toString();
|
String term = termAtt.toString();
|
||||||
|
@ -213,17 +214,15 @@ public class FuzzyLikeThisQuery extends Query
|
||||||
BoostAttribute boostAtt =
|
BoostAttribute boostAtt =
|
||||||
fe.attributes().addAttribute(BoostAttribute.class);
|
fe.attributes().addAttribute(BoostAttribute.class);
|
||||||
while ((possibleMatch = fe.next()) != null) {
|
while ((possibleMatch = fe.next()) != null) {
|
||||||
if (possibleMatch!=null) {
|
numVariants++;
|
||||||
numVariants++;
|
totalVariantDocFreqs+=fe.docFreq();
|
||||||
totalVariantDocFreqs+=fe.docFreq();
|
float score=boostAtt.getBoost();
|
||||||
float score=boostAtt.getBoost();
|
if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
|
||||||
if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
|
ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm);
|
||||||
ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm);
|
variantsQ.insertWithOverflow(st);
|
||||||
variantsQ.insertWithOverflow(st);
|
minScore = variantsQ.top().score; // maintain minScore
|
||||||
minScore = variantsQ.top().score; // maintain minScore
|
|
||||||
}
|
|
||||||
maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
|
|
||||||
}
|
}
|
||||||
|
maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(numVariants>0)
|
if(numVariants>0)
|
||||||
|
@ -247,6 +246,8 @@ public class FuzzyLikeThisQuery extends Query
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -885,7 +885,7 @@ public final class MoreLikeThis {
|
||||||
int tokenCount=0;
|
int tokenCount=0;
|
||||||
// for every token
|
// for every token
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
String word = termAtt.toString();
|
String word = termAtt.toString();
|
||||||
tokenCount++;
|
tokenCount++;
|
||||||
|
@ -906,6 +906,8 @@ public final class MoreLikeThis {
|
||||||
cnt.x++;
|
cnt.x++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -110,6 +110,11 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
int countTokens = 0;
|
int countTokens = 0;
|
||||||
|
try {
|
||||||
|
source.reset();
|
||||||
|
} catch (IOException e1) {
|
||||||
|
throw new RuntimeException(e1);
|
||||||
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
if (!source.incrementToken()) break;
|
if (!source.incrementToken()) break;
|
||||||
|
@ -126,6 +131,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
|
@ -191,7 +197,11 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||||
List<String> tlist = new ArrayList<String>();
|
List<String> tlist = new ArrayList<String>();
|
||||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
|
try {
|
||||||
|
source.reset();
|
||||||
|
} catch (IOException e1) {
|
||||||
|
throw new RuntimeException(e1);
|
||||||
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
if (!source.incrementToken()) break;
|
if (!source.incrementToken()) break;
|
||||||
|
@ -202,6 +212,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
|
@ -242,6 +253,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
boolean multipleTokens = false;
|
boolean multipleTokens = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
source.reset();
|
||||||
if (source.incrementToken()) {
|
if (source.incrementToken()) {
|
||||||
nextToken = termAtt.toString();
|
nextToken = termAtt.toString();
|
||||||
}
|
}
|
||||||
|
@ -251,6 +263,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
|
@ -281,6 +294,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
try {
|
try {
|
||||||
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
||||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
|
source.reset();
|
||||||
multipleTokens = false;
|
multipleTokens = false;
|
||||||
|
|
||||||
|
|
||||||
|
@ -292,6 +306,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
|
@ -308,6 +323,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
source.reset();
|
||||||
if (source.incrementToken()) {
|
if (source.incrementToken()) {
|
||||||
part2 = termAtt.toString();
|
part2 = termAtt.toString();
|
||||||
}
|
}
|
||||||
|
@ -316,6 +332,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
|
|
|
@ -123,6 +123,11 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
||||||
|
|
||||||
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
|
TokenStream source = this.analyzer.tokenStream(field, new StringReader(
|
||||||
text));
|
text));
|
||||||
|
try {
|
||||||
|
source.reset();
|
||||||
|
} catch (IOException e1) {
|
||||||
|
throw new RuntimeException(e1);
|
||||||
|
}
|
||||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
||||||
|
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
|
|
|
@ -118,12 +118,14 @@ public final class SynExpand {
|
||||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||||
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
TokenStream ts = a.tokenStream( field, new StringReader( query));
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
String word = termAtt.toString();
|
String word = termAtt.toString();
|
||||||
if ( already.add( word))
|
if ( already.add( word))
|
||||||
top.add( word);
|
top.add( word);
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
final BooleanQuery tmp = new BooleanQuery();
|
final BooleanQuery tmp = new BooleanQuery();
|
||||||
|
|
||||||
// [2] form query
|
// [2] form query
|
||||||
|
|
|
@ -111,7 +111,6 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
|
||||||
setPreviousTokenStream(streams);
|
setPreviousTokenStream(streams);
|
||||||
} else {
|
} else {
|
||||||
streams.source.reset(reader);
|
streams.source.reset(reader);
|
||||||
streams.result.reset(); // reset the SynonymTokenFilter
|
|
||||||
}
|
}
|
||||||
return streams.result;
|
return streams.result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -80,9 +80,12 @@ public class LikeThisQueryBuilder implements QueryBuilder {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
ts.reset();
|
||||||
while(ts.incrementToken()) {
|
while(ts.incrementToken()) {
|
||||||
stopWordsSet.add(termAtt.toString());
|
stopWordsSet.add(termAtt.toString());
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
}
|
}
|
||||||
catch(IOException ioe)
|
catch(IOException ioe)
|
||||||
{
|
{
|
||||||
|
|
|
@ -59,11 +59,14 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
|
||||||
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, new BytesRef(bytes)));
|
SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, new BytesRef(bytes)));
|
||||||
clausesList.add(stq);
|
clausesList.add(stq);
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
|
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
|
||||||
soq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
|
soq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
|
||||||
return soq;
|
return soq;
|
||||||
|
|
|
@ -64,6 +64,7 @@ public class TermsFilterBuilder implements FilterBuilder
|
||||||
{
|
{
|
||||||
Term term = null;
|
Term term = null;
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
if (term == null)
|
if (term == null)
|
||||||
|
@ -76,6 +77,8 @@ public class TermsFilterBuilder implements FilterBuilder
|
||||||
}
|
}
|
||||||
tf.addTerm(term);
|
tf.addTerm(term);
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
}
|
}
|
||||||
catch (IOException ioe)
|
catch (IOException ioe)
|
||||||
{
|
{
|
||||||
|
|
|
@ -61,6 +61,7 @@ public class TermsQueryBuilder implements QueryBuilder {
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
Term term = null;
|
Term term = null;
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
if (term == null)
|
if (term == null)
|
||||||
|
@ -73,6 +74,8 @@ public class TermsQueryBuilder implements QueryBuilder {
|
||||||
}
|
}
|
||||||
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
|
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
ts.close();
|
||||||
}
|
}
|
||||||
catch (IOException ioe)
|
catch (IOException ioe)
|
||||||
{
|
{
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Contributions
|
Apache Lucene - Contributions
|
||||||
|
@ -275,7 +275,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
<a href="#PDFTextStream -- PDF text and metadata extraction">PDFTextStream -- PDF text and metadata extraction</a>
|
<a href="#PDFTextStream -- PDF text and metadata extraction">PDFTextStream -- PDF text and metadata extraction</a>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
<a href="#PJ Classic & PJ Professional - PDF Document Conversion">PJ Classic & PJ Professional - PDF Document Conversion</a>
|
<a href="#PJ Classic & PJ Professional - PDF Document Conversion">PJ Classic & PJ Professional - PDF Document Conversion</a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
|
@ -403,7 +403,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
URL
|
URL
|
||||||
</th>
|
</th>
|
||||||
<td>
|
<td>
|
||||||
<a href="http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2">
|
<a href="http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2">
|
||||||
http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2
|
http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2
|
||||||
</a>
|
</a>
|
||||||
</td>
|
</td>
|
||||||
|
@ -538,7 +538,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
</table>
|
</table>
|
||||||
<a name="N10124"></a><a name="PJ Classic & PJ Professional - PDF Document Conversion"></a>
|
<a name="N10124"></a><a name="PJ Classic & PJ Professional - PDF Document Conversion"></a>
|
||||||
<h3 class="boxed">PJ Classic & PJ Professional - PDF Document Conversion</h3>
|
<h3 class="boxed">PJ Classic & PJ Professional - PDF Document Conversion</h3>
|
||||||
<table class="ForrestTable" cellspacing="1" cellpadding="4">
|
<table class="ForrestTable" cellspacing="1" cellpadding="4">
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Building and Installing the Basic Demo
|
Apache Lucene - Building and Installing the Basic Demo
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Basic Demo Sources Walk-through
|
Apache Lucene - Basic Demo Sources Walk-through
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Index File Formats
|
Apache Lucene - Index File Formats
|
||||||
|
@ -425,11 +425,19 @@ document.write("Last Published: " + document.lastModified);
|
||||||
<p>
|
<p>
|
||||||
In version 3.1, segments records the code version
|
In version 3.1, segments records the code version
|
||||||
that created them. See LUCENE-2720 for details.
|
that created them. See LUCENE-2720 for details.
|
||||||
|
|
||||||
|
Additionally segments track explicitly whether or
|
||||||
|
not they have term vectors. See LUCENE-2811 for details.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
In version 3.2, numeric fields are written as natively
|
||||||
|
to stored fields file, previously they were stored in
|
||||||
|
text format only.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N10037"></a><a name="Definitions"></a>
|
<a name="N1003A"></a><a name="Definitions"></a>
|
||||||
<h2 class="boxed">Definitions</h2>
|
<h2 class="boxed">Definitions</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>
|
<p>
|
||||||
|
@ -470,7 +478,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
strings, the first naming the field, and the second naming text
|
strings, the first naming the field, and the second naming text
|
||||||
within the field.
|
within the field.
|
||||||
</p>
|
</p>
|
||||||
<a name="N10057"></a><a name="Inverted Indexing"></a>
|
<a name="N1005A"></a><a name="Inverted Indexing"></a>
|
||||||
<h3 class="boxed">Inverted Indexing</h3>
|
<h3 class="boxed">Inverted Indexing</h3>
|
||||||
<p>
|
<p>
|
||||||
The index stores statistics about terms in order
|
The index stores statistics about terms in order
|
||||||
|
@ -480,7 +488,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
it. This is the inverse of the natural relationship, in which
|
it. This is the inverse of the natural relationship, in which
|
||||||
documents list terms.
|
documents list terms.
|
||||||
</p>
|
</p>
|
||||||
<a name="N10063"></a><a name="Types of Fields"></a>
|
<a name="N10066"></a><a name="Types of Fields"></a>
|
||||||
<h3 class="boxed">Types of Fields</h3>
|
<h3 class="boxed">Types of Fields</h3>
|
||||||
<p>
|
<p>
|
||||||
In Lucene, fields may be <i>stored</i>, in which
|
In Lucene, fields may be <i>stored</i>, in which
|
||||||
|
@ -494,7 +502,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
to be indexed literally.
|
to be indexed literally.
|
||||||
</p>
|
</p>
|
||||||
<p>See the <a href="api/core/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
|
<p>See the <a href="api/core/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
|
||||||
<a name="N10080"></a><a name="Segments"></a>
|
<a name="N10083"></a><a name="Segments"></a>
|
||||||
<h3 class="boxed">Segments</h3>
|
<h3 class="boxed">Segments</h3>
|
||||||
<p>
|
<p>
|
||||||
Lucene indexes may be composed of multiple sub-indexes, or
|
Lucene indexes may be composed of multiple sub-indexes, or
|
||||||
|
@ -520,7 +528,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
Searches may involve multiple segments and/or multiple indexes, each
|
Searches may involve multiple segments and/or multiple indexes, each
|
||||||
index potentially composed of a set of segments.
|
index potentially composed of a set of segments.
|
||||||
</p>
|
</p>
|
||||||
<a name="N1009E"></a><a name="Document Numbers"></a>
|
<a name="N100A1"></a><a name="Document Numbers"></a>
|
||||||
<h3 class="boxed">Document Numbers</h3>
|
<h3 class="boxed">Document Numbers</h3>
|
||||||
<p>
|
<p>
|
||||||
Internally, Lucene refers to documents by an integer <i>document
|
Internally, Lucene refers to documents by an integer <i>document
|
||||||
|
@ -575,7 +583,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N100C5"></a><a name="Overview"></a>
|
<a name="N100C8"></a><a name="Overview"></a>
|
||||||
<h2 class="boxed">Overview</h2>
|
<h2 class="boxed">Overview</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>
|
<p>
|
||||||
|
@ -674,7 +682,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N10108"></a><a name="File Naming"></a>
|
<a name="N1010B"></a><a name="File Naming"></a>
|
||||||
<h2 class="boxed">File Naming</h2>
|
<h2 class="boxed">File Naming</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>
|
<p>
|
||||||
|
@ -701,7 +709,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<a name="N10117"></a><a name="file-names"></a>
|
<a name="N1011A"></a><a name="file-names"></a>
|
||||||
<h2 class="boxed">Summary of File Extensions</h2>
|
<h2 class="boxed">Summary of File Extensions</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>The following table summarizes the names and extensions of the files in Lucene:
|
<p>The following table summarizes the names and extensions of the files in Lucene:
|
||||||
|
@ -843,10 +851,10 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N10201"></a><a name="Primitive Types"></a>
|
<a name="N10204"></a><a name="Primitive Types"></a>
|
||||||
<h2 class="boxed">Primitive Types</h2>
|
<h2 class="boxed">Primitive Types</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<a name="N10206"></a><a name="Byte"></a>
|
<a name="N10209"></a><a name="Byte"></a>
|
||||||
<h3 class="boxed">Byte</h3>
|
<h3 class="boxed">Byte</h3>
|
||||||
<p>
|
<p>
|
||||||
The most primitive type
|
The most primitive type
|
||||||
|
@ -854,7 +862,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
other data types are defined as sequences
|
other data types are defined as sequences
|
||||||
of bytes, so file formats are byte-order independent.
|
of bytes, so file formats are byte-order independent.
|
||||||
</p>
|
</p>
|
||||||
<a name="N1020F"></a><a name="UInt32"></a>
|
<a name="N10212"></a><a name="UInt32"></a>
|
||||||
<h3 class="boxed">UInt32</h3>
|
<h3 class="boxed">UInt32</h3>
|
||||||
<p>
|
<p>
|
||||||
32-bit unsigned integers are written as four
|
32-bit unsigned integers are written as four
|
||||||
|
@ -864,7 +872,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
UInt32 --> <Byte><sup>4</sup>
|
UInt32 --> <Byte><sup>4</sup>
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
<a name="N1021E"></a><a name="Uint64"></a>
|
<a name="N10221"></a><a name="Uint64"></a>
|
||||||
<h3 class="boxed">Uint64</h3>
|
<h3 class="boxed">Uint64</h3>
|
||||||
<p>
|
<p>
|
||||||
64-bit unsigned integers are written as eight
|
64-bit unsigned integers are written as eight
|
||||||
|
@ -873,7 +881,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
<p>UInt64 --> <Byte><sup>8</sup>
|
<p>UInt64 --> <Byte><sup>8</sup>
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
<a name="N1022D"></a><a name="VInt"></a>
|
<a name="N10230"></a><a name="VInt"></a>
|
||||||
<h3 class="boxed">VInt</h3>
|
<h3 class="boxed">VInt</h3>
|
||||||
<p>
|
<p>
|
||||||
A variable-length format for positive integers is
|
A variable-length format for positive integers is
|
||||||
|
@ -1423,13 +1431,13 @@ document.write("Last Published: " + document.lastModified);
|
||||||
This provides compression while still being
|
This provides compression while still being
|
||||||
efficient to decode.
|
efficient to decode.
|
||||||
</p>
|
</p>
|
||||||
<a name="N10512"></a><a name="Chars"></a>
|
<a name="N10515"></a><a name="Chars"></a>
|
||||||
<h3 class="boxed">Chars</h3>
|
<h3 class="boxed">Chars</h3>
|
||||||
<p>
|
<p>
|
||||||
Lucene writes unicode
|
Lucene writes unicode
|
||||||
character sequences as UTF-8 encoded bytes.
|
character sequences as UTF-8 encoded bytes.
|
||||||
</p>
|
</p>
|
||||||
<a name="N1051B"></a><a name="String"></a>
|
<a name="N1051E"></a><a name="String"></a>
|
||||||
<h3 class="boxed">String</h3>
|
<h3 class="boxed">String</h3>
|
||||||
<p>
|
<p>
|
||||||
Lucene writes strings as UTF-8 encoded bytes.
|
Lucene writes strings as UTF-8 encoded bytes.
|
||||||
|
@ -1442,10 +1450,10 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N10528"></a><a name="Compound Types"></a>
|
<a name="N1052B"></a><a name="Compound Types"></a>
|
||||||
<h2 class="boxed">Compound Types</h2>
|
<h2 class="boxed">Compound Types</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<a name="N1052D"></a><a name="MapStringString"></a>
|
<a name="N10530"></a><a name="MapStringString"></a>
|
||||||
<h3 class="boxed">Map<String,String></h3>
|
<h3 class="boxed">Map<String,String></h3>
|
||||||
<p>
|
<p>
|
||||||
In a couple places Lucene stores a Map
|
In a couple places Lucene stores a Map
|
||||||
|
@ -1458,13 +1466,13 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N1053D"></a><a name="Per-Index Files"></a>
|
<a name="N10540"></a><a name="Per-Index Files"></a>
|
||||||
<h2 class="boxed">Per-Index Files</h2>
|
<h2 class="boxed">Per-Index Files</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>
|
<p>
|
||||||
The files in this section exist one-per-index.
|
The files in this section exist one-per-index.
|
||||||
</p>
|
</p>
|
||||||
<a name="N10545"></a><a name="Segments File"></a>
|
<a name="N10548"></a><a name="Segments File"></a>
|
||||||
<h3 class="boxed">Segments File</h3>
|
<h3 class="boxed">Segments File</h3>
|
||||||
<p>
|
<p>
|
||||||
The active segments in the index are stored in the
|
The active segments in the index are stored in the
|
||||||
|
@ -1508,7 +1516,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
<b>3.1</b>
|
<b>3.1</b>
|
||||||
Segments --> Format, Version, NameCounter, SegCount, <SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
|
Segments --> Format, Version, NameCounter, SegCount, <SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
|
||||||
NormGen<sup>NumField</sup>,
|
NormGen<sup>NumField</sup>,
|
||||||
IsCompoundFile, DeletionCount, HasProx, Diagnostics><sup>SegCount</sup>, CommitUserData, Checksum
|
IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors><sup>SegCount</sup>, CommitUserData, Checksum
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
Format, NameCounter, SegCount, SegSize, NumField,
|
Format, NameCounter, SegCount, SegSize, NumField,
|
||||||
|
@ -1525,7 +1533,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
IsCompoundFile, HasSingleNormFile,
|
IsCompoundFile, HasSingleNormFile,
|
||||||
DocStoreIsCompoundFile, HasProx --> Int8
|
DocStoreIsCompoundFile, HasProx, HasVectors --> Int8
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
CommitUserData --> Map<String,String>
|
CommitUserData --> Map<String,String>
|
||||||
|
@ -1634,7 +1642,10 @@ document.write("Last Published: " + document.lastModified);
|
||||||
Lucene version, OS, Java version, why the segment
|
Lucene version, OS, Java version, why the segment
|
||||||
was created (merge, flush, addIndexes), etc.
|
was created (merge, flush, addIndexes), etc.
|
||||||
</p>
|
</p>
|
||||||
<a name="N105CD"></a><a name="Lock File"></a>
|
<p> HasVectors is 1 if this segment stores term vectors,
|
||||||
|
else it's 0.
|
||||||
|
</p>
|
||||||
|
<a name="N105D3"></a><a name="Lock File"></a>
|
||||||
<h3 class="boxed">Lock File</h3>
|
<h3 class="boxed">Lock File</h3>
|
||||||
<p>
|
<p>
|
||||||
The write lock, which is stored in the index
|
The write lock, which is stored in the index
|
||||||
|
@ -1648,14 +1659,14 @@ document.write("Last Published: " + document.lastModified);
|
||||||
documents). This lock file ensures that only one
|
documents). This lock file ensures that only one
|
||||||
writer is modifying the index at a time.
|
writer is modifying the index at a time.
|
||||||
</p>
|
</p>
|
||||||
<a name="N105D6"></a><a name="Deletable File"></a>
|
<a name="N105DC"></a><a name="Deletable File"></a>
|
||||||
<h3 class="boxed">Deletable File</h3>
|
<h3 class="boxed">Deletable File</h3>
|
||||||
<p>
|
<p>
|
||||||
A writer dynamically computes
|
A writer dynamically computes
|
||||||
the files that are deletable, instead, so no file
|
the files that are deletable, instead, so no file
|
||||||
is written.
|
is written.
|
||||||
</p>
|
</p>
|
||||||
<a name="N105DF"></a><a name="Compound Files"></a>
|
<a name="N105E5"></a><a name="Compound Files"></a>
|
||||||
<h3 class="boxed">Compound Files</h3>
|
<h3 class="boxed">Compound Files</h3>
|
||||||
<p>Starting with Lucene 1.4 the compound file format became default. This
|
<p>Starting with Lucene 1.4 the compound file format became default. This
|
||||||
is simply a container for all files described in the next section
|
is simply a container for all files described in the next section
|
||||||
|
@ -1682,14 +1693,14 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N10607"></a><a name="Per-Segment Files"></a>
|
<a name="N1060D"></a><a name="Per-Segment Files"></a>
|
||||||
<h2 class="boxed">Per-Segment Files</h2>
|
<h2 class="boxed">Per-Segment Files</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>
|
<p>
|
||||||
The remaining files are all per-segment, and are
|
The remaining files are all per-segment, and are
|
||||||
thus defined by suffix.
|
thus defined by suffix.
|
||||||
</p>
|
</p>
|
||||||
<a name="N1060F"></a><a name="Fields"></a>
|
<a name="N10615"></a><a name="Fields"></a>
|
||||||
<h3 class="boxed">Fields</h3>
|
<h3 class="boxed">Fields</h3>
|
||||||
<p>
|
<p>
|
||||||
|
|
||||||
|
@ -1863,12 +1874,28 @@ document.write("Last Published: " + document.lastModified);
|
||||||
(if compression is enabled, the algorithm used is ZLIB),
|
(if compression is enabled, the algorithm used is ZLIB),
|
||||||
only available for indexes until Lucene version 2.9.x</li>
|
only available for indexes until Lucene version 2.9.x</li>
|
||||||
|
|
||||||
|
<li>4th to 6th bits (mask: 0x7<<3) define the type of a
|
||||||
|
numeric field: <ul>
|
||||||
|
|
||||||
|
<li>all bits in mask are cleared if no numeric field at all</li>
|
||||||
|
|
||||||
|
<li>1<<3: Value is Int</li>
|
||||||
|
|
||||||
|
<li>2<<3: Value is Long</li>
|
||||||
|
|
||||||
|
<li>3<<3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
|
||||||
|
|
||||||
|
<li>4<<3: Value is Long as Double (as of Double.longBitsToDouble)</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>Value -->
|
<p>Value -->
|
||||||
String | BinaryValue (depending on Bits)
|
String | BinaryValue | Int | Long (depending on Bits)
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>BinaryValue -->
|
<p>BinaryValue -->
|
||||||
|
@ -1883,7 +1910,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
</ol>
|
</ol>
|
||||||
<a name="N106B6"></a><a name="Term Dictionary"></a>
|
<a name="N106D0"></a><a name="Term Dictionary"></a>
|
||||||
<h3 class="boxed">Term Dictionary</h3>
|
<h3 class="boxed">Term Dictionary</h3>
|
||||||
<p>
|
<p>
|
||||||
The term dictionary is represented as two files:
|
The term dictionary is represented as two files:
|
||||||
|
@ -2075,7 +2102,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
</ol>
|
</ol>
|
||||||
<a name="N1073A"></a><a name="Frequencies"></a>
|
<a name="N10754"></a><a name="Frequencies"></a>
|
||||||
<h3 class="boxed">Frequencies</h3>
|
<h3 class="boxed">Frequencies</h3>
|
||||||
<p>
|
<p>
|
||||||
The .frq file contains the lists of documents
|
The .frq file contains the lists of documents
|
||||||
|
@ -2203,7 +2230,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
|
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
|
||||||
to entry 31 on level 0.
|
to entry 31 on level 0.
|
||||||
</p>
|
</p>
|
||||||
<a name="N107C2"></a><a name="Positions"></a>
|
<a name="N107DC"></a><a name="Positions"></a>
|
||||||
<h3 class="boxed">Positions</h3>
|
<h3 class="boxed">Positions</h3>
|
||||||
<p>
|
<p>
|
||||||
The .prx file contains the lists of positions that
|
The .prx file contains the lists of positions that
|
||||||
|
@ -2273,7 +2300,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
Payload. If PayloadLength is not stored, then this Payload has the same
|
Payload. If PayloadLength is not stored, then this Payload has the same
|
||||||
length as the Payload at the previous position.
|
length as the Payload at the previous position.
|
||||||
</p>
|
</p>
|
||||||
<a name="N107FE"></a><a name="Normalization Factors"></a>
|
<a name="N10818"></a><a name="Normalization Factors"></a>
|
||||||
<h3 class="boxed">Normalization Factors</h3>
|
<h3 class="boxed">Normalization Factors</h3>
|
||||||
<p>There's a single .nrm file containing all norms:
|
<p>There's a single .nrm file containing all norms:
|
||||||
</p>
|
</p>
|
||||||
|
@ -2353,7 +2380,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</p>
|
</p>
|
||||||
<p>Separate norm files are created (when adequate) for both compound and non compound segments.
|
<p>Separate norm files are created (when adequate) for both compound and non compound segments.
|
||||||
</p>
|
</p>
|
||||||
<a name="N1084F"></a><a name="Term Vectors"></a>
|
<a name="N10869"></a><a name="Term Vectors"></a>
|
||||||
<h3 class="boxed">Term Vectors</h3>
|
<h3 class="boxed">Term Vectors</h3>
|
||||||
<p>
|
<p>
|
||||||
Term Vector support is an optional on a field by
|
Term Vector support is an optional on a field by
|
||||||
|
@ -2489,7 +2516,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
</ol>
|
</ol>
|
||||||
<a name="N108EB"></a><a name="Deleted Documents"></a>
|
<a name="N10905"></a><a name="Deleted Documents"></a>
|
||||||
<h3 class="boxed">Deleted Documents</h3>
|
<h3 class="boxed">Deleted Documents</h3>
|
||||||
<p>The .del file is
|
<p>The .del file is
|
||||||
optional, and only exists when a segment contains deletions.
|
optional, and only exists when a segment contains deletions.
|
||||||
|
@ -2553,7 +2580,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<a name="N10925"></a><a name="Limitations"></a>
|
<a name="N1093F"></a><a name="Limitations"></a>
|
||||||
<h2 class="boxed">Limitations</h2>
|
<h2 class="boxed">Limitations</h2>
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<p>
|
<p>
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Getting Started Guide
|
Apache Lucene - Getting Started Guide
|
||||||
|
@ -269,14 +269,12 @@ may wish to skip sections.
|
||||||
<li>
|
<li>
|
||||||
<a href="demo.html">About the command-line Lucene demo and its usage</a>. This section
|
<a href="demo.html">About the command-line Lucene demo and its usage</a>. This section
|
||||||
is intended for anyone who wants to use the command-line Lucene demo.</li>
|
is intended for anyone who wants to use the command-line Lucene demo.</li>
|
||||||
<p></p>
|
|
||||||
|
|
||||||
|
|
||||||
<li>
|
<li>
|
||||||
<a href="demo2.html">About the sources and implementation for the command-line Lucene
|
<a href="demo2.html">About the sources and implementation for the command-line Lucene
|
||||||
demo</a>. This section walks through the implementation details (sources) of the
|
demo</a>. This section walks through the implementation details (sources) of the
|
||||||
command-line Lucene demo. This section is intended for developers.</li>
|
command-line Lucene demo. This section is intended for developers.</li>
|
||||||
<p></p>
|
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>Lucene Java Documentation</title>
|
<title>Lucene Java Documentation</title>
|
||||||
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>Site Linkmap Table of Contents</title>
|
<title>Site Linkmap Table of Contents</title>
|
||||||
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Lucene Contrib
|
Apache Lucene - Lucene Contrib
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Query Parser Syntax
|
Apache Lucene - Query Parser Syntax
|
||||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>
|
<title>
|
||||||
Apache Lucene - Scoring
|
Apache Lucene - Scoring
|
||||||
|
|
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 4.7 KiB |
Binary file not shown.
After Width: | Height: | Size: 2.2 KiB |
|
@ -3,7 +3,7 @@
|
||||||
<head>
|
<head>
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
<meta content="Apache Forrest" name="Generator">
|
<meta content="Apache Forrest" name="Generator">
|
||||||
<meta name="Forrest-version" content="0.8">
|
<meta name="Forrest-version" content="0.9">
|
||||||
<meta name="Forrest-skin-name" content="lucene">
|
<meta name="Forrest-skin-name" content="lucene">
|
||||||
<title>Apache Lucene - System Requirements</title>
|
<title>Apache Lucene - System Requirements</title>
|
||||||
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
<link type="text/css" href="skin/basic.css" rel="stylesheet">
|
||||||
|
|
Binary file not shown.
|
@ -131,8 +131,13 @@ public final class Document {
|
||||||
/** Returns a field with the given name if any exist in this document, or
|
/** Returns a field with the given name if any exist in this document, or
|
||||||
* null. If multiple fields exists with this name, this method returns the
|
* null. If multiple fields exists with this name, this method returns the
|
||||||
* first value added.
|
* first value added.
|
||||||
* Do not use this method with lazy loaded fields.
|
* Do not use this method with lazy loaded fields or {@link NumericField}.
|
||||||
|
* @deprecated use {@link #getFieldable} instead and cast depending on
|
||||||
|
* data type.
|
||||||
|
* @throws ClassCastException if you try to retrieve a numerical or
|
||||||
|
* lazy loaded field.
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public final Field getField(String name) {
|
public final Field getField(String name) {
|
||||||
return (Field) getFieldable(name);
|
return (Field) getFieldable(name);
|
||||||
}
|
}
|
||||||
|
@ -154,6 +159,8 @@ public final class Document {
|
||||||
* this document, or null. If multiple fields exist with this name, this
|
* this document, or null. If multiple fields exist with this name, this
|
||||||
* method returns the first value added. If only binary fields with this name
|
* method returns the first value added. If only binary fields with this name
|
||||||
* exist, returns null.
|
* exist, returns null.
|
||||||
|
* For {@link NumericField} it returns the string value of the number. If you want
|
||||||
|
* the actual {@code NumericField} instance back, use {@link #getFieldable}.
|
||||||
*/
|
*/
|
||||||
public final String get(String name) {
|
public final String get(String name) {
|
||||||
for (Fieldable field : fields) {
|
for (Fieldable field : fields) {
|
||||||
|
@ -177,13 +184,18 @@ public final class Document {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an array of {@link Field}s with the given name.
|
* Returns an array of {@link Field}s with the given name.
|
||||||
* Do not use with lazy loaded fields.
|
|
||||||
* This method returns an empty array when there are no
|
* This method returns an empty array when there are no
|
||||||
* matching fields. It never returns null.
|
* matching fields. It never returns null.
|
||||||
|
* Do not use this method with lazy loaded fields or {@link NumericField}.
|
||||||
*
|
*
|
||||||
* @param name the name of the field
|
* @param name the name of the field
|
||||||
* @return a <code>Field[]</code> array
|
* @return a <code>Field[]</code> array
|
||||||
|
* @deprecated use {@link #getFieldable} instead and cast depending on
|
||||||
|
* data type.
|
||||||
|
* @throws ClassCastException if you try to retrieve a numerical or
|
||||||
|
* lazy loaded field.
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public final Field[] getFields(String name) {
|
public final Field[] getFields(String name) {
|
||||||
List<Field> result = new ArrayList<Field>();
|
List<Field> result = new ArrayList<Field>();
|
||||||
for (Fieldable field : fields) {
|
for (Fieldable field : fields) {
|
||||||
|
@ -230,6 +242,8 @@ public final class Document {
|
||||||
* Returns an array of values of the field specified as the method parameter.
|
* Returns an array of values of the field specified as the method parameter.
|
||||||
* This method returns an empty array when there are no
|
* This method returns an empty array when there are no
|
||||||
* matching fields. It never returns null.
|
* matching fields. It never returns null.
|
||||||
|
* For {@link NumericField}s it returns the string value of the number. If you want
|
||||||
|
* the actual {@code NumericField} instances back, use {@link #getFieldables}.
|
||||||
* @param name the name of the field
|
* @param name the name of the field
|
||||||
* @return a <code>String[]</code> of field values
|
* @return a <code>String[]</code> of field values
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -127,18 +127,18 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
||||||
* class is a wrapper around this token stream type for
|
* class is a wrapper around this token stream type for
|
||||||
* easier, more intuitive usage.</p>
|
* easier, more intuitive usage.</p>
|
||||||
*
|
*
|
||||||
* <p><b>NOTE:</b> This class is only used during
|
|
||||||
* indexing. When retrieving the stored field value from a
|
|
||||||
* {@link Document} instance after search, you will get a
|
|
||||||
* conventional {@link Fieldable} instance where the numeric
|
|
||||||
* values are returned as {@link String}s (according to
|
|
||||||
* <code>toString(value)</code> of the used data type).
|
|
||||||
*
|
|
||||||
* @since 2.9
|
* @since 2.9
|
||||||
*/
|
*/
|
||||||
public final class NumericField extends AbstractField {
|
public final class NumericField extends AbstractField {
|
||||||
|
|
||||||
private final NumericTokenStream numericTS;
|
/** Data type of the value in {@link NumericField}.
|
||||||
|
* @since 3.2
|
||||||
|
*/
|
||||||
|
public static enum DataType { INT, LONG, FLOAT, DOUBLE }
|
||||||
|
|
||||||
|
private transient NumericTokenStream numericTS;
|
||||||
|
private DataType type;
|
||||||
|
private final int precisionStep;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a field for numeric values using the default <code>precisionStep</code>
|
* Creates a field for numeric values using the default <code>precisionStep</code>
|
||||||
|
@ -158,8 +158,8 @@ public final class NumericField extends AbstractField {
|
||||||
* a numeric value, before indexing a document containing this field,
|
* a numeric value, before indexing a document containing this field,
|
||||||
* set a value using the various set<em>???</em>Value() methods.
|
* set a value using the various set<em>???</em>Value() methods.
|
||||||
* @param name the field name
|
* @param name the field name
|
||||||
* @param store if the field should be stored in plain text form
|
* @param store if the field should be stored, {@link Document#getFieldable}
|
||||||
* (according to <code>toString(value)</code> of the used data type)
|
* then returns {@code NumericField} instances on search results.
|
||||||
* @param index if the field should be indexed using {@link NumericTokenStream}
|
* @param index if the field should be indexed using {@link NumericTokenStream}
|
||||||
*/
|
*/
|
||||||
public NumericField(String name, Field.Store store, boolean index) {
|
public NumericField(String name, Field.Store store, boolean index) {
|
||||||
|
@ -186,19 +186,43 @@ public final class NumericField extends AbstractField {
|
||||||
* set a value using the various set<em>???</em>Value() methods.
|
* set a value using the various set<em>???</em>Value() methods.
|
||||||
* @param name the field name
|
* @param name the field name
|
||||||
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
|
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
|
||||||
* @param store if the field should be stored in plain text form
|
* @param store if the field should be stored, {@link Document#getFieldable}
|
||||||
* (according to <code>toString(value)</code> of the used data type)
|
* then returns {@code NumericField} instances on search results.
|
||||||
* @param index if the field should be indexed using {@link NumericTokenStream}
|
* @param index if the field should be indexed using {@link NumericTokenStream}
|
||||||
*/
|
*/
|
||||||
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
|
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
|
||||||
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
|
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
|
||||||
|
this.precisionStep = precisionStep;
|
||||||
setOmitTermFreqAndPositions(true);
|
setOmitTermFreqAndPositions(true);
|
||||||
numericTS = new NumericTokenStream(precisionStep);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
|
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
|
||||||
public TokenStream tokenStreamValue() {
|
public TokenStream tokenStreamValue() {
|
||||||
return isIndexed() ? numericTS : null;
|
if (!isIndexed())
|
||||||
|
return null;
|
||||||
|
if (numericTS == null) {
|
||||||
|
// lazy init the TokenStream as it is heavy to instantiate (attributes,...),
|
||||||
|
// if not needed (stored field loading)
|
||||||
|
numericTS = new NumericTokenStream(precisionStep);
|
||||||
|
// initialize value in TokenStream
|
||||||
|
if (fieldsData != null) {
|
||||||
|
assert type != null;
|
||||||
|
final Number val = (Number) fieldsData;
|
||||||
|
switch (type) {
|
||||||
|
case INT:
|
||||||
|
numericTS.setIntValue(val.intValue()); break;
|
||||||
|
case LONG:
|
||||||
|
numericTS.setLongValue(val.longValue()); break;
|
||||||
|
case FLOAT:
|
||||||
|
numericTS.setFloatValue(val.floatValue()); break;
|
||||||
|
case DOUBLE:
|
||||||
|
numericTS.setDoubleValue(val.doubleValue()); break;
|
||||||
|
default:
|
||||||
|
assert false : "Should never get here";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return numericTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns always <code>null</code> for numeric fields */
|
/** Returns always <code>null</code> for numeric fields */
|
||||||
|
@ -212,7 +236,10 @@ public final class NumericField extends AbstractField {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
|
/** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
|
||||||
|
* on search results. It is recommended to use {@link Document#getFieldable} instead
|
||||||
|
* that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
|
||||||
|
* to return the stored value. */
|
||||||
public String stringValue() {
|
public String stringValue() {
|
||||||
return (fieldsData == null) ? null : fieldsData.toString();
|
return (fieldsData == null) ? null : fieldsData.toString();
|
||||||
}
|
}
|
||||||
|
@ -224,7 +251,14 @@ public final class NumericField extends AbstractField {
|
||||||
|
|
||||||
/** Returns the precision step. */
|
/** Returns the precision step. */
|
||||||
public int getPrecisionStep() {
|
public int getPrecisionStep() {
|
||||||
return numericTS.getPrecisionStep();
|
return precisionStep;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the data type of the current value, {@code null} if not yet set.
|
||||||
|
* @since 3.2
|
||||||
|
*/
|
||||||
|
public DataType getDataType() {
|
||||||
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -234,8 +268,9 @@ public final class NumericField extends AbstractField {
|
||||||
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
|
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
|
||||||
*/
|
*/
|
||||||
public NumericField setLongValue(final long value) {
|
public NumericField setLongValue(final long value) {
|
||||||
numericTS.setLongValue(value);
|
if (numericTS != null) numericTS.setLongValue(value);
|
||||||
fieldsData = Long.valueOf(value);
|
fieldsData = Long.valueOf(value);
|
||||||
|
type = DataType.LONG;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,8 +281,9 @@ public final class NumericField extends AbstractField {
|
||||||
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
|
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
|
||||||
*/
|
*/
|
||||||
public NumericField setIntValue(final int value) {
|
public NumericField setIntValue(final int value) {
|
||||||
numericTS.setIntValue(value);
|
if (numericTS != null) numericTS.setIntValue(value);
|
||||||
fieldsData = Integer.valueOf(value);
|
fieldsData = Integer.valueOf(value);
|
||||||
|
type = DataType.INT;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,8 +294,9 @@ public final class NumericField extends AbstractField {
|
||||||
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
|
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
|
||||||
*/
|
*/
|
||||||
public NumericField setDoubleValue(final double value) {
|
public NumericField setDoubleValue(final double value) {
|
||||||
numericTS.setDoubleValue(value);
|
if (numericTS != null) numericTS.setDoubleValue(value);
|
||||||
fieldsData = Double.valueOf(value);
|
fieldsData = Double.valueOf(value);
|
||||||
|
type = DataType.DOUBLE;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,8 +307,9 @@ public final class NumericField extends AbstractField {
|
||||||
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
|
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
|
||||||
*/
|
*/
|
||||||
public NumericField setFloatValue(final float value) {
|
public NumericField setFloatValue(final float value) {
|
||||||
numericTS.setFloatValue(value);
|
if (numericTS != null) numericTS.setFloatValue(value);
|
||||||
fieldsData = Float.valueOf(value);
|
fieldsData = Float.valueOf(value);
|
||||||
|
type = DataType.FLOAT;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -132,9 +132,9 @@ class BufferedDeletesStream {
|
||||||
public final long gen;
|
public final long gen;
|
||||||
|
|
||||||
// If non-null, contains segments that are 100% deleted
|
// If non-null, contains segments that are 100% deleted
|
||||||
public final SegmentInfos allDeleted;
|
public final List<SegmentInfo> allDeleted;
|
||||||
|
|
||||||
ApplyDeletesResult(boolean anyDeletes, long gen, SegmentInfos allDeleted) {
|
ApplyDeletesResult(boolean anyDeletes, long gen, List<SegmentInfo> allDeleted) {
|
||||||
this.anyDeletes = anyDeletes;
|
this.anyDeletes = anyDeletes;
|
||||||
this.gen = gen;
|
this.gen = gen;
|
||||||
this.allDeleted = allDeleted;
|
this.allDeleted = allDeleted;
|
||||||
|
@ -164,7 +164,7 @@ class BufferedDeletesStream {
|
||||||
/** Resolves the buffered deleted Term/Query/docIDs, into
|
/** Resolves the buffered deleted Term/Query/docIDs, into
|
||||||
* actual deleted docIDs in the deletedDocs BitVector for
|
* actual deleted docIDs in the deletedDocs BitVector for
|
||||||
* each SegmentReader. */
|
* each SegmentReader. */
|
||||||
public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos infos) throws IOException {
|
public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List<SegmentInfo> infos) throws IOException {
|
||||||
final long t0 = System.currentTimeMillis();
|
final long t0 = System.currentTimeMillis();
|
||||||
|
|
||||||
if (infos.size() == 0) {
|
if (infos.size() == 0) {
|
||||||
|
@ -182,7 +182,7 @@ class BufferedDeletesStream {
|
||||||
message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
|
message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
SegmentInfos infos2 = new SegmentInfos();
|
List<SegmentInfo> infos2 = new ArrayList<SegmentInfo>();
|
||||||
infos2.addAll(infos);
|
infos2.addAll(infos);
|
||||||
Collections.sort(infos2, sortSegInfoByDelGen);
|
Collections.sort(infos2, sortSegInfoByDelGen);
|
||||||
|
|
||||||
|
@ -192,7 +192,7 @@ class BufferedDeletesStream {
|
||||||
int infosIDX = infos2.size()-1;
|
int infosIDX = infos2.size()-1;
|
||||||
int delIDX = deletes.size()-1;
|
int delIDX = deletes.size()-1;
|
||||||
|
|
||||||
SegmentInfos allDeleted = null;
|
List<SegmentInfo> allDeleted = null;
|
||||||
|
|
||||||
while (infosIDX >= 0) {
|
while (infosIDX >= 0) {
|
||||||
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
|
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
|
||||||
|
@ -245,7 +245,7 @@ class BufferedDeletesStream {
|
||||||
|
|
||||||
if (segAllDeletes) {
|
if (segAllDeletes) {
|
||||||
if (allDeleted == null) {
|
if (allDeleted == null) {
|
||||||
allDeleted = new SegmentInfos();
|
allDeleted = new ArrayList<SegmentInfo>();
|
||||||
}
|
}
|
||||||
allDeleted.add(info);
|
allDeleted.add(info);
|
||||||
}
|
}
|
||||||
|
@ -287,7 +287,7 @@ class BufferedDeletesStream {
|
||||||
|
|
||||||
if (segAllDeletes) {
|
if (segAllDeletes) {
|
||||||
if (allDeleted == null) {
|
if (allDeleted == null) {
|
||||||
allDeleted = new SegmentInfos();
|
allDeleted = new ArrayList<SegmentInfo>();
|
||||||
}
|
}
|
||||||
allDeleted.add(info);
|
allDeleted.add(info);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,8 +46,10 @@ import org.apache.lucene.util.IOUtils;
|
||||||
* file. The {directory} that follows has that many entries. Each directory entry
|
* file. The {directory} that follows has that many entries. Each directory entry
|
||||||
* contains a long pointer to the start of this file's data section, and a String
|
* contains a long pointer to the start of this file's data section, and a String
|
||||||
* with that file's name.
|
* with that file's name.
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
final class CompoundFileWriter {
|
public final class CompoundFileWriter {
|
||||||
|
|
||||||
static final class FileEntry {
|
static final class FileEntry {
|
||||||
|
|
||||||
|
@ -137,8 +139,7 @@ final class CompoundFileWriter {
|
||||||
|
|
||||||
/** Merge files with the extensions added up to now.
|
/** Merge files with the extensions added up to now.
|
||||||
* All files with these extensions are combined sequentially into the
|
* All files with these extensions are combined sequentially into the
|
||||||
* compound stream. After successful merge, the source files
|
* compound stream.
|
||||||
* are deleted.
|
|
||||||
* @throws IllegalStateException if close() had been called before or
|
* @throws IllegalStateException if close() had been called before or
|
||||||
* if no file has been added to this object
|
* if no file has been added to this object
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -135,8 +135,8 @@ public class ConcurrentMergeScheduler extends MergeScheduler {
|
||||||
final MergePolicy.OneMerge m1 = t1.getCurrentMerge();
|
final MergePolicy.OneMerge m1 = t1.getCurrentMerge();
|
||||||
final MergePolicy.OneMerge m2 = t2.getCurrentMerge();
|
final MergePolicy.OneMerge m2 = t2.getCurrentMerge();
|
||||||
|
|
||||||
final int c1 = m1 == null ? Integer.MAX_VALUE : m1.segments.totalDocCount();
|
final int c1 = m1 == null ? Integer.MAX_VALUE : m1.totalDocCount;
|
||||||
final int c2 = m2 == null ? Integer.MAX_VALUE : m2.segments.totalDocCount();
|
final int c2 = m2 == null ? Integer.MAX_VALUE : m2.totalDocCount;
|
||||||
|
|
||||||
return c2 - c1;
|
return c2 - c1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -263,9 +263,10 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
// enabled; we could save [small amount of] CPU
|
// enabled; we could save [small amount of] CPU
|
||||||
// here.
|
// here.
|
||||||
ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
|
ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
|
||||||
|
for(int i=0;i<fieldCount;i++) {
|
||||||
for(int i=0;i<fieldCount;i++)
|
final DocFieldProcessorPerField perField = fields[i];
|
||||||
fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
|
perField.consumer.processFields(perField.fields, perField.fieldCount);
|
||||||
|
}
|
||||||
|
|
||||||
if (docState.maxTermPrefix != null && docState.infoStream != null) {
|
if (docState.maxTermPrefix != null && docState.infoStream != null) {
|
||||||
docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
|
docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
|
||||||
|
|
|
@ -188,7 +188,7 @@ final class DocumentsWriter {
|
||||||
this.infoStream = infoStream;
|
this.infoStream = infoStream;
|
||||||
final Iterator<ThreadState> it = perThreadPool.getAllPerThreadsIterator();
|
final Iterator<ThreadState> it = perThreadPool.getAllPerThreadsIterator();
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
it.next().perThread.docState.infoStream = infoStream;
|
it.next().perThread.setInfoStream(infoStream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -63,9 +63,10 @@ import org.apache.lucene.search.Query;
|
||||||
*/
|
*/
|
||||||
final class DocumentsWriterDeleteQueue {
|
final class DocumentsWriterDeleteQueue {
|
||||||
|
|
||||||
private volatile Node tail;
|
private volatile Node<?> tail;
|
||||||
|
|
||||||
private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue, Node> tailUpdater = AtomicReferenceFieldUpdater
|
@SuppressWarnings("rawtypes")
|
||||||
|
private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue,Node> tailUpdater = AtomicReferenceFieldUpdater
|
||||||
.newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail");
|
.newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail");
|
||||||
|
|
||||||
private final DeleteSlice globalSlice;
|
private final DeleteSlice globalSlice;
|
||||||
|
@ -90,7 +91,7 @@ final class DocumentsWriterDeleteQueue {
|
||||||
* we use a sentinel instance as our initial tail. No slice will ever try to
|
* we use a sentinel instance as our initial tail. No slice will ever try to
|
||||||
* apply this tail since the head is always omitted.
|
* apply this tail since the head is always omitted.
|
||||||
*/
|
*/
|
||||||
tail = new Node(null); // sentinel
|
tail = new Node<Object>(null); // sentinel
|
||||||
globalSlice = new DeleteSlice(tail);
|
globalSlice = new DeleteSlice(tail);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,14 +127,14 @@ final class DocumentsWriterDeleteQueue {
|
||||||
// we can do it just every n times or so?
|
// we can do it just every n times or so?
|
||||||
}
|
}
|
||||||
|
|
||||||
void add(Node item) {
|
void add(Node<?> item) {
|
||||||
/*
|
/*
|
||||||
* this non-blocking / 'wait-free' linked list add was inspired by Apache
|
* this non-blocking / 'wait-free' linked list add was inspired by Apache
|
||||||
* Harmony's ConcurrentLinkedQueue Implementation.
|
* Harmony's ConcurrentLinkedQueue Implementation.
|
||||||
*/
|
*/
|
||||||
while (true) {
|
while (true) {
|
||||||
final Node currentTail = this.tail;
|
final Node<?> currentTail = this.tail;
|
||||||
final Node tailNext = currentTail.next;
|
final Node<?> tailNext = currentTail.next;
|
||||||
if (tail == currentTail) {
|
if (tail == currentTail) {
|
||||||
if (tailNext != null) {
|
if (tailNext != null) {
|
||||||
/*
|
/*
|
||||||
|
@ -196,7 +197,7 @@ final class DocumentsWriterDeleteQueue {
|
||||||
* deletes in the queue and reset the global slice to let the GC prune the
|
* deletes in the queue and reset the global slice to let the GC prune the
|
||||||
* queue.
|
* queue.
|
||||||
*/
|
*/
|
||||||
final Node currentTail = tail; // take the current tail make this local any
|
final Node<?> currentTail = tail; // take the current tail make this local any
|
||||||
// Changes after this call are applied later
|
// Changes after this call are applied later
|
||||||
// and not relevant here
|
// and not relevant here
|
||||||
if (callerSlice != null) {
|
if (callerSlice != null) {
|
||||||
|
@ -232,10 +233,10 @@ final class DocumentsWriterDeleteQueue {
|
||||||
|
|
||||||
static class DeleteSlice {
|
static class DeleteSlice {
|
||||||
// No need to be volatile, slices are thread captive (only accessed by one thread)!
|
// No need to be volatile, slices are thread captive (only accessed by one thread)!
|
||||||
Node sliceHead; // we don't apply this one
|
Node<?> sliceHead; // we don't apply this one
|
||||||
Node sliceTail;
|
Node<?> sliceTail;
|
||||||
|
|
||||||
DeleteSlice(Node currentTail) {
|
DeleteSlice(Node<?> currentTail) {
|
||||||
assert currentTail != null;
|
assert currentTail != null;
|
||||||
/*
|
/*
|
||||||
* Initially this is a 0 length slice pointing to the 'current' tail of
|
* Initially this is a 0 length slice pointing to the 'current' tail of
|
||||||
|
@ -256,7 +257,7 @@ final class DocumentsWriterDeleteQueue {
|
||||||
* tail in this slice are not equal then there will be at least one more
|
* tail in this slice are not equal then there will be at least one more
|
||||||
* non-null node in the slice!
|
* non-null node in the slice!
|
||||||
*/
|
*/
|
||||||
Node current = sliceHead;
|
Node<?> current = sliceHead;
|
||||||
do {
|
do {
|
||||||
current = current.next;
|
current = current.next;
|
||||||
assert current != null : "slice property violated between the head on the tail must not be a null node";
|
assert current != null : "slice property violated between the head on the tail must not be a null node";
|
||||||
|
@ -290,7 +291,7 @@ final class DocumentsWriterDeleteQueue {
|
||||||
void clear() {
|
void clear() {
|
||||||
globalBufferLock.lock();
|
globalBufferLock.lock();
|
||||||
try {
|
try {
|
||||||
final Node currentTail = tail;
|
final Node<?> currentTail = tail;
|
||||||
globalSlice.sliceHead = globalSlice.sliceTail = currentTail;
|
globalSlice.sliceHead = globalSlice.sliceTail = currentTail;
|
||||||
globalBufferedDeletes.clear();
|
globalBufferedDeletes.clear();
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -298,27 +299,28 @@ final class DocumentsWriterDeleteQueue {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class Node {
|
private static class Node<T> {
|
||||||
volatile Node next;
|
volatile Node<?> next;
|
||||||
final Object item;
|
final T item;
|
||||||
|
|
||||||
private Node(Object item) {
|
Node(T item) {
|
||||||
this.item = item;
|
this.item = item;
|
||||||
}
|
}
|
||||||
|
|
||||||
static final AtomicReferenceFieldUpdater<Node, Node> nextUpdater = AtomicReferenceFieldUpdater
|
@SuppressWarnings("rawtypes")
|
||||||
|
static final AtomicReferenceFieldUpdater<Node,Node> nextUpdater = AtomicReferenceFieldUpdater
|
||||||
.newUpdater(Node.class, Node.class, "next");
|
.newUpdater(Node.class, Node.class, "next");
|
||||||
|
|
||||||
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
||||||
assert false : "sentinel item must never be applied";
|
assert false : "sentinel item must never be applied";
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean casNext(Node cmp, Node val) {
|
boolean casNext(Node<?> cmp, Node<?> val) {
|
||||||
return nextUpdater.compareAndSet(this, cmp, val);
|
return nextUpdater.compareAndSet(this, cmp, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class TermNode extends Node {
|
private static final class TermNode extends Node<Term> {
|
||||||
|
|
||||||
TermNode(Term term) {
|
TermNode(Term term) {
|
||||||
super(term);
|
super(term);
|
||||||
|
@ -326,33 +328,31 @@ final class DocumentsWriterDeleteQueue {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
||||||
bufferedDeletes.addTerm((Term) item, docIDUpto);
|
bufferedDeletes.addTerm(item, docIDUpto);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class QueryArrayNode extends Node {
|
private static final class QueryArrayNode extends Node<Query[]> {
|
||||||
QueryArrayNode(Query[] query) {
|
QueryArrayNode(Query[] query) {
|
||||||
super(query);
|
super(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
||||||
final Query[] queries = (Query[]) item;
|
for (Query query : item) {
|
||||||
for (Query query : queries) {
|
|
||||||
bufferedDeletes.addQuery(query, docIDUpto);
|
bufferedDeletes.addQuery(query, docIDUpto);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class TermArrayNode extends Node {
|
private static final class TermArrayNode extends Node<Term[]> {
|
||||||
TermArrayNode(Term[] term) {
|
TermArrayNode(Term[] term) {
|
||||||
super(term);
|
super(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
|
||||||
final Term[] terms = (Term[]) item;
|
for (Term term : item) {
|
||||||
for (Term term : terms) {
|
|
||||||
bufferedDeletes.addTerm(term, docIDUpto);
|
bufferedDeletes.addTerm(term, docIDUpto);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -361,7 +361,7 @@ final class DocumentsWriterDeleteQueue {
|
||||||
|
|
||||||
private boolean forceApplyGlobalSlice() {
|
private boolean forceApplyGlobalSlice() {
|
||||||
globalBufferLock.lock();
|
globalBufferLock.lock();
|
||||||
final Node currentTail = tail;
|
final Node<?> currentTail = tail;
|
||||||
try {
|
try {
|
||||||
if (globalSlice.sliceTail != currentTail) {
|
if (globalSlice.sliceTail != currentTail) {
|
||||||
globalSlice.sliceTail = currentTail;
|
globalSlice.sliceTail = currentTail;
|
||||||
|
|
|
@ -122,13 +122,13 @@ public final class DocumentsWriterFlushControl {
|
||||||
// is super important since we can not address more than 2048 MB per DWPT
|
// is super important since we can not address more than 2048 MB per DWPT
|
||||||
setFlushPending(perThread);
|
setFlushPending(perThread);
|
||||||
if (fullFlush) {
|
if (fullFlush) {
|
||||||
DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false);
|
DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
|
||||||
assert toBlock != null;
|
assert toBlock != null;
|
||||||
blockedFlushes.add(toBlock);
|
blockedFlushes.add(toBlock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false);
|
final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
|
||||||
healthiness.updateStalled(this);
|
healthiness.updateStalled(this);
|
||||||
return flushingDWPT;
|
return flushingDWPT;
|
||||||
}
|
}
|
||||||
|
@ -189,18 +189,15 @@ public final class DocumentsWriterFlushControl {
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
|
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
|
||||||
ThreadState perThread, boolean setPending) {
|
ThreadState perThread) {
|
||||||
if (fullFlush) {
|
if (fullFlush) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return internalTryCheckOutForFlush(perThread, setPending);
|
return internalTryCheckOutForFlush(perThread);
|
||||||
}
|
}
|
||||||
|
|
||||||
private DocumentsWriterPerThread internalTryCheckOutForFlush(
|
private DocumentsWriterPerThread internalTryCheckOutForFlush(
|
||||||
ThreadState perThread, boolean setPending) {
|
ThreadState perThread) {
|
||||||
if (setPending && !perThread.flushPending) {
|
|
||||||
setFlushPending(perThread);
|
|
||||||
}
|
|
||||||
if (perThread.flushPending) {
|
if (perThread.flushPending) {
|
||||||
// We are pending so all memory is already moved to flushBytes
|
// We are pending so all memory is already moved to flushBytes
|
||||||
if (perThread.tryLock()) {
|
if (perThread.tryLock()) {
|
||||||
|
@ -245,7 +242,7 @@ public final class DocumentsWriterFlushControl {
|
||||||
while (allActiveThreads.hasNext() && numPending > 0) {
|
while (allActiveThreads.hasNext() && numPending > 0) {
|
||||||
ThreadState next = allActiveThreads.next();
|
ThreadState next = allActiveThreads.next();
|
||||||
if (next.flushPending) {
|
if (next.flushPending) {
|
||||||
final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false);
|
final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next);
|
||||||
if (dwpt != null) {
|
if (dwpt != null) {
|
||||||
return dwpt;
|
return dwpt;
|
||||||
}
|
}
|
||||||
|
@ -330,7 +327,12 @@ public final class DocumentsWriterFlushControl {
|
||||||
}
|
}
|
||||||
if (next.perThread.getNumDocsInRAM() > 0 ) {
|
if (next.perThread.getNumDocsInRAM() > 0 ) {
|
||||||
final DocumentsWriterPerThread dwpt = next.perThread; // just for assert
|
final DocumentsWriterPerThread dwpt = next.perThread; // just for assert
|
||||||
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true);
|
synchronized (this) {
|
||||||
|
if (!next.flushPending) {
|
||||||
|
setFlushPending(next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
|
||||||
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
|
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
|
||||||
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
|
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
|
||||||
toFlush.add(flushingDWPT);
|
toFlush.add(flushingDWPT);
|
||||||
|
|
|
@ -163,7 +163,7 @@ public class DocumentsWriterPerThread {
|
||||||
boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting
|
boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
private final PrintStream infoStream;
|
private PrintStream infoStream;
|
||||||
private int numDocsInRAM;
|
private int numDocsInRAM;
|
||||||
private int flushedDocCount;
|
private int flushedDocCount;
|
||||||
DocumentsWriterDeleteQueue deleteQueue;
|
DocumentsWriterDeleteQueue deleteQueue;
|
||||||
|
@ -235,6 +235,7 @@ public class DocumentsWriterPerThread {
|
||||||
// mark document as deleted
|
// mark document as deleted
|
||||||
deleteDocID(docState.docID);
|
deleteDocID(docState.docID);
|
||||||
numDocsInRAM++;
|
numDocsInRAM++;
|
||||||
|
fieldInfos.revertUncommitted();
|
||||||
} else {
|
} else {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
@ -377,15 +378,12 @@ public class DocumentsWriterPerThread {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
|
|
||||||
consumer.flush(flushState);
|
consumer.flush(flushState);
|
||||||
pendingDeletes.terms.clear();
|
pendingDeletes.terms.clear();
|
||||||
newSegment.setHasVectors(flushState.hasVectors);
|
final SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, flushState.segmentCodecs, fieldInfos.asReadOnly());
|
||||||
|
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs");
|
message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs");
|
||||||
message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
|
message("new segment has " + (newSegment.getHasVectors() ? "vectors" : "no vectors"));
|
||||||
message("flushedFiles=" + newSegment.files());
|
message("flushedFiles=" + newSegment.files());
|
||||||
message("flushed codecs=" + newSegment.getSegmentCodecs());
|
message("flushed codecs=" + newSegment.getSegmentCodecs());
|
||||||
}
|
}
|
||||||
|
@ -435,10 +433,6 @@ public class DocumentsWriterPerThread {
|
||||||
return bytesUsed.get() + pendingDeletes.bytesUsed.get();
|
return bytesUsed.get() + pendingDeletes.bytesUsed.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldInfos getFieldInfos() {
|
|
||||||
return fieldInfos;
|
|
||||||
}
|
|
||||||
|
|
||||||
void message(String message) {
|
void message(String message) {
|
||||||
writer.message("DWPT: " + message);
|
writer.message("DWPT: " + message);
|
||||||
}
|
}
|
||||||
|
@ -498,4 +492,9 @@ public class DocumentsWriterPerThread {
|
||||||
assert segment != null;
|
assert segment != null;
|
||||||
return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId);
|
return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setInfoStream(PrintStream infoStream) {
|
||||||
|
this.infoStream = infoStream;
|
||||||
|
docState.infoStream = infoStream;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.index.values.Type;
|
||||||
/** @lucene.experimental */
|
/** @lucene.experimental */
|
||||||
public final class FieldInfo {
|
public final class FieldInfo {
|
||||||
public static final int UNASSIGNED_CODEC_ID = -1;
|
public static final int UNASSIGNED_CODEC_ID = -1;
|
||||||
|
|
||||||
public final String name;
|
public final String name;
|
||||||
public final int number;
|
public final int number;
|
||||||
|
|
||||||
|
@ -113,7 +112,6 @@ public final class FieldInfo {
|
||||||
}
|
}
|
||||||
assert !this.omitTermFreqAndPositions || !this.storePayloads;
|
assert !this.omitTermFreqAndPositions || !this.storePayloads;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setDocValues(Type v) {
|
void setDocValues(Type v) {
|
||||||
if (docValues == null) {
|
if (docValues == null) {
|
||||||
docValues = v;
|
docValues = v;
|
||||||
|
@ -127,4 +125,29 @@ public final class FieldInfo {
|
||||||
public Type getDocValues() {
|
public Type getDocValues() {
|
||||||
return docValues;
|
return docValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean vectorsCommitted;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverts all uncommitted changes on this {@link FieldInfo}
|
||||||
|
* @see #commitVectors()
|
||||||
|
*/
|
||||||
|
void revertUncommitted() {
|
||||||
|
if (storeTermVector && !vectorsCommitted) {
|
||||||
|
storeOffsetWithTermVector = false;
|
||||||
|
storePositionWithTermVector = false;
|
||||||
|
storeTermVector = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commits term vector modifications. Changes to term-vectors must be
|
||||||
|
* explicitly committed once the necessary files are created. If those changes
|
||||||
|
* are not committed subsequent {@link #revertUncommitted()} will reset the
|
||||||
|
* all term-vector flags before the next document.
|
||||||
|
*/
|
||||||
|
void commitVectors() {
|
||||||
|
assert storeTermVector;
|
||||||
|
vectorsCommitted = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -220,6 +220,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
||||||
|
|
||||||
private int format;
|
private int format;
|
||||||
|
private boolean hasProx; // only set if readonly
|
||||||
|
private boolean hasVectors; // only set if readonly
|
||||||
|
private long version; // internal use to track changes
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link FieldInfos} instance with a private
|
* Creates a new {@link FieldInfos} instance with a private
|
||||||
|
@ -267,7 +271,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
*/
|
*/
|
||||||
public FieldInfos(Directory d, String name) throws IOException {
|
public FieldInfos(Directory d, String name) throws IOException {
|
||||||
this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
|
this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
|
||||||
IndexInput input = d.openInput(name);
|
final IndexInput input = d.openInput(name);
|
||||||
try {
|
try {
|
||||||
read(input, name);
|
read(input, name);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -303,6 +307,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
@Override
|
@Override
|
||||||
synchronized public Object clone() {
|
synchronized public Object clone() {
|
||||||
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
|
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
|
||||||
|
fis.format = format;
|
||||||
|
fis.hasProx = hasProx;
|
||||||
|
fis.hasVectors = hasVectors;
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
FieldInfo clone = (FieldInfo) (fi).clone();
|
FieldInfo clone = (FieldInfo) (fi).clone();
|
||||||
fis.putInternal(clone);
|
fis.putInternal(clone);
|
||||||
|
@ -312,6 +319,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
|
|
||||||
/** Returns true if any fields do not omitTermFreqAndPositions */
|
/** Returns true if any fields do not omitTermFreqAndPositions */
|
||||||
public boolean hasProx() {
|
public boolean hasProx() {
|
||||||
|
if (isReadOnly()) {
|
||||||
|
return hasProx;
|
||||||
|
}
|
||||||
|
// mutable FIs must check!
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
|
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -445,6 +456,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
||||||
segmentCodecsBuilder.tryAddAndSet(fi);
|
segmentCodecsBuilder.tryAddAndSet(fi);
|
||||||
}
|
}
|
||||||
|
version++;
|
||||||
return fi;
|
return fi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -514,6 +526,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasVectors() {
|
public boolean hasVectors() {
|
||||||
|
if (isReadOnly()) {
|
||||||
|
return hasVectors;
|
||||||
|
}
|
||||||
|
// mutable FIs must check
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
if (fi.storeTermVector) {
|
if (fi.storeTermVector) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -567,6 +583,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
return globalFieldNumbers == null;
|
return globalFieldNumbers == null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
synchronized final long getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
public void write(IndexOutput output) throws IOException {
|
public void write(IndexOutput output) throws IOException {
|
||||||
output.writeVInt(FORMAT_CURRENT);
|
output.writeVInt(FORMAT_CURRENT);
|
||||||
output.writeVInt(size());
|
output.writeVInt(size());
|
||||||
|
@ -658,7 +678,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
if (omitTermFreqAndPositions) {
|
if (omitTermFreqAndPositions) {
|
||||||
storePayloads = false;
|
storePayloads = false;
|
||||||
}
|
}
|
||||||
|
hasVectors |= storeTermVector;
|
||||||
|
hasProx |= isIndexed && !omitTermFreqAndPositions;
|
||||||
Type docValuesType = null;
|
Type docValuesType = null;
|
||||||
if (format <= FORMAT_INDEX_VALUES) {
|
if (format <= FORMAT_INDEX_VALUES) {
|
||||||
final byte b = input.readByte();
|
final byte b = input.readByte();
|
||||||
|
@ -706,4 +727,28 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverts all uncommitted changes
|
||||||
|
* @see FieldInfo#revertUncommitted()
|
||||||
|
*/
|
||||||
|
void revertUncommitted() {
|
||||||
|
for (FieldInfo fieldInfo : this) {
|
||||||
|
fieldInfo.revertUncommitted();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final FieldInfos asReadOnly() {
|
||||||
|
if (isReadOnly()) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
final FieldInfos roFis = new FieldInfos((FieldNumberBiMap)null, null);
|
||||||
|
for (FieldInfo fieldInfo : this) {
|
||||||
|
FieldInfo clone = (FieldInfo) (fieldInfo).clone();
|
||||||
|
roFis.putInternal(clone);
|
||||||
|
roFis.hasVectors |= clone.storeTermVector;
|
||||||
|
roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
|
||||||
|
}
|
||||||
|
return roFis;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,10 +24,11 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.document.FieldSelectorResult;
|
import org.apache.lucene.document.FieldSelectorResult;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.document.NumericField;
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.BufferedIndexInput;
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -212,40 +213,39 @@ public final class FieldsReader implements Cloneable {
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
int numFields = fieldsStream.readVInt();
|
int numFields = fieldsStream.readVInt();
|
||||||
for (int i = 0; i < numFields; i++) {
|
out: for (int i = 0; i < numFields; i++) {
|
||||||
int fieldNumber = fieldsStream.readVInt();
|
int fieldNumber = fieldsStream.readVInt();
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
|
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
|
||||||
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
||||||
|
|
||||||
byte bits = fieldsStream.readByte();
|
int bits = fieldsStream.readByte() & 0xFF;
|
||||||
assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
|
assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
|
||||||
|
|
||||||
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
||||||
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||||
//TODO: Find an alternative approach here if this list continues to grow beyond the
|
final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
|
||||||
//list of 5 or 6 currently here. See Lucene 762 for discussion
|
|
||||||
if (acceptField.equals(FieldSelectorResult.LOAD)) {
|
switch (acceptField) {
|
||||||
addField(doc, fi, binary, tokenize);
|
case LOAD:
|
||||||
}
|
addField(doc, fi, binary, tokenize, numeric);
|
||||||
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
|
break;
|
||||||
addField(doc, fi, binary, tokenize);
|
case LOAD_AND_BREAK:
|
||||||
break;//Get out of this loop
|
addField(doc, fi, binary, tokenize, numeric);
|
||||||
}
|
break out; //Get out of this loop
|
||||||
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
|
case LAZY_LOAD:
|
||||||
addFieldLazy(doc, fi, binary, tokenize, true);
|
addFieldLazy(doc, fi, binary, tokenize, true, numeric);
|
||||||
}
|
break;
|
||||||
else if (acceptField.equals(FieldSelectorResult.LATENT)) {
|
case LATENT:
|
||||||
addFieldLazy(doc, fi, binary, tokenize, false);
|
addFieldLazy(doc, fi, binary, tokenize, false, numeric);
|
||||||
}
|
break;
|
||||||
else if (acceptField.equals(FieldSelectorResult.SIZE)){
|
case SIZE:
|
||||||
skipField(addFieldSize(doc, fi, binary));
|
skipFieldBytes(addFieldSize(doc, fi, binary, numeric));
|
||||||
}
|
break;
|
||||||
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
|
case SIZE_AND_BREAK:
|
||||||
addFieldSize(doc, fi, binary);
|
addFieldSize(doc, fi, binary, numeric);
|
||||||
break;
|
break out; //Get out of this loop
|
||||||
}
|
default:
|
||||||
else {
|
skipField(numeric);
|
||||||
skipField();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,72 +282,121 @@ public final class FieldsReader implements Cloneable {
|
||||||
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
|
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
|
||||||
* This will have the most payoff on large fields.
|
* This will have the most payoff on large fields.
|
||||||
*/
|
*/
|
||||||
private void skipField() throws IOException {
|
private void skipField(int numeric) throws IOException {
|
||||||
skipField(fieldsStream.readVInt());
|
final int numBytes;
|
||||||
|
switch(numeric) {
|
||||||
|
case 0:
|
||||||
|
numBytes = fieldsStream.readVInt();
|
||||||
|
break;
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_INT:
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
||||||
|
numBytes = 4;
|
||||||
|
break;
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
||||||
|
numBytes = 8;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||||
|
}
|
||||||
|
|
||||||
|
skipFieldBytes(numBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipField(int toRead) throws IOException {
|
private void skipFieldBytes(int toRead) throws IOException {
|
||||||
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
|
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
|
private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
|
||||||
|
assert numeric != 0;
|
||||||
|
switch(numeric) {
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_INT:
|
||||||
|
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
|
||||||
|
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
||||||
|
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
||||||
|
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
|
||||||
|
default:
|
||||||
|
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
|
||||||
|
final AbstractField f;
|
||||||
if (binary) {
|
if (binary) {
|
||||||
int toRead = fieldsStream.readVInt();
|
int toRead = fieldsStream.readVInt();
|
||||||
long pointer = fieldsStream.getFilePointer();
|
long pointer = fieldsStream.getFilePointer();
|
||||||
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult);
|
||||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
|
|
||||||
//Need to move the pointer ahead by toRead positions
|
//Need to move the pointer ahead by toRead positions
|
||||||
fieldsStream.seek(pointer + toRead);
|
fieldsStream.seek(pointer + toRead);
|
||||||
|
} else if (numeric != 0) {
|
||||||
|
f = loadNumericField(fi, numeric);
|
||||||
} else {
|
} else {
|
||||||
Field.Store store = Field.Store.YES;
|
Field.Store store = Field.Store.YES;
|
||||||
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
||||||
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
||||||
|
|
||||||
AbstractField f;
|
|
||||||
int length = fieldsStream.readVInt();
|
int length = fieldsStream.readVInt();
|
||||||
long pointer = fieldsStream.getFilePointer();
|
long pointer = fieldsStream.getFilePointer();
|
||||||
//Skip ahead of where we are by the length of what is stored
|
//Skip ahead of where we are by the length of what is stored
|
||||||
fieldsStream.seek(pointer+length);
|
fieldsStream.seek(pointer+length);
|
||||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
|
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
|
||||||
f.setOmitNorms(fi.omitNorms);
|
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
|
||||||
|
|
||||||
doc.add(f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f.setOmitNorms(fi.omitNorms);
|
||||||
|
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||||
|
doc.add(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
|
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
|
||||||
|
final AbstractField f;
|
||||||
|
|
||||||
if (binary) {
|
if (binary) {
|
||||||
int toRead = fieldsStream.readVInt();
|
int toRead = fieldsStream.readVInt();
|
||||||
final byte[] b = new byte[toRead];
|
final byte[] b = new byte[toRead];
|
||||||
fieldsStream.readBytes(b, 0, b.length);
|
fieldsStream.readBytes(b, 0, b.length);
|
||||||
doc.add(new Field(fi.name, b));
|
f = new Field(fi.name, b);
|
||||||
|
} else if (numeric != 0) {
|
||||||
|
f = loadNumericField(fi, numeric);
|
||||||
} else {
|
} else {
|
||||||
Field.Store store = Field.Store.YES;
|
|
||||||
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
||||||
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
||||||
|
|
||||||
AbstractField f;
|
|
||||||
f = new Field(fi.name, // name
|
f = new Field(fi.name, // name
|
||||||
false,
|
false,
|
||||||
fieldsStream.readString(), // read value
|
fieldsStream.readString(), // read value
|
||||||
store,
|
Field.Store.YES,
|
||||||
index,
|
index,
|
||||||
termVector);
|
termVector);
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
|
||||||
f.setOmitNorms(fi.omitNorms);
|
|
||||||
|
|
||||||
doc.add(f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||||
|
f.setOmitNorms(fi.omitNorms);
|
||||||
|
doc.add(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
||||||
// Read just the size -- caller must skip the field content to continue reading fields
|
// Read just the size -- caller must skip the field content to continue reading fields
|
||||||
// Return the size in bytes or chars, depending on field type
|
// Return the size in bytes or chars, depending on field type
|
||||||
private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
|
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException {
|
||||||
int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
|
final int bytesize, size;
|
||||||
|
switch(numeric) {
|
||||||
|
case 0:
|
||||||
|
size = fieldsStream.readVInt();
|
||||||
|
bytesize = binary ? size : 2*size;
|
||||||
|
break;
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_INT:
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
||||||
|
size = bytesize = 4;
|
||||||
|
break;
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
|
||||||
|
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
||||||
|
size = bytesize = 8;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||||
|
}
|
||||||
byte[] sizebytes = new byte[4];
|
byte[] sizebytes = new byte[4];
|
||||||
sizebytes[0] = (byte) (bytesize>>>24);
|
sizebytes[0] = (byte) (bytesize>>>24);
|
||||||
sizebytes[1] = (byte) (bytesize>>>16);
|
sizebytes[1] = (byte) (bytesize>>>16);
|
||||||
|
@ -358,7 +407,7 @@ public final class FieldsReader implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
|
* A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
|
||||||
* loaded.
|
* loaded.
|
||||||
*/
|
*/
|
||||||
private class LazyField extends AbstractField implements Fieldable {
|
private class LazyField extends AbstractField implements Fieldable {
|
||||||
|
|
|
@ -21,22 +21,40 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.document.NumericField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
final class FieldsWriter {
|
final class FieldsWriter {
|
||||||
static final byte FIELD_IS_TOKENIZED = 0x1;
|
static final int FIELD_IS_TOKENIZED = 1 << 0;
|
||||||
static final byte FIELD_IS_BINARY = 0x2;
|
static final int FIELD_IS_BINARY = 1 << 1;
|
||||||
|
|
||||||
|
// the old bit 1 << 2 was compressed, is now left out
|
||||||
|
|
||||||
|
private static final int _NUMERIC_BIT_SHIFT = 3;
|
||||||
|
static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
|
||||||
|
|
||||||
|
static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
|
||||||
|
static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
|
||||||
|
static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
|
||||||
|
static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
|
||||||
|
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
|
||||||
|
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
|
||||||
|
|
||||||
|
// the next possible bits are: 1 << 6; 1 << 7
|
||||||
|
|
||||||
// Lucene 3.0: Removal of compressed fields
|
// Lucene 3.0: Removal of compressed fields
|
||||||
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
|
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
|
||||||
|
|
||||||
|
// Lucene 3.2: NumericFields are stored in binary format
|
||||||
|
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
|
||||||
|
|
||||||
// NOTE: if you introduce a new format, make it 1 higher
|
// NOTE: if you introduce a new format, make it 1 higher
|
||||||
// than the current one, and always change this if you
|
// than the current one, and always change this if you
|
||||||
// switch to a new format!
|
// switch to a new format!
|
||||||
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
|
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
|
||||||
|
|
||||||
// when removing support for old versions, leave the last supported version here
|
// when removing support for old versions, leave the last supported version here
|
||||||
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
|
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
|
||||||
|
@ -121,13 +139,26 @@ final class FieldsWriter {
|
||||||
|
|
||||||
final void writeField(int fieldNumber, Fieldable field) throws IOException {
|
final void writeField(int fieldNumber, Fieldable field) throws IOException {
|
||||||
fieldsStream.writeVInt(fieldNumber);
|
fieldsStream.writeVInt(fieldNumber);
|
||||||
byte bits = 0;
|
int bits = 0;
|
||||||
if (field.isTokenized())
|
if (field.isTokenized())
|
||||||
bits |= FieldsWriter.FIELD_IS_TOKENIZED;
|
bits |= FIELD_IS_TOKENIZED;
|
||||||
if (field.isBinary())
|
if (field.isBinary())
|
||||||
bits |= FieldsWriter.FIELD_IS_BINARY;
|
bits |= FIELD_IS_BINARY;
|
||||||
|
if (field instanceof NumericField) {
|
||||||
fieldsStream.writeByte(bits);
|
switch (((NumericField) field).getDataType()) {
|
||||||
|
case INT:
|
||||||
|
bits |= FIELD_IS_NUMERIC_INT; break;
|
||||||
|
case LONG:
|
||||||
|
bits |= FIELD_IS_NUMERIC_LONG; break;
|
||||||
|
case FLOAT:
|
||||||
|
bits |= FIELD_IS_NUMERIC_FLOAT; break;
|
||||||
|
case DOUBLE:
|
||||||
|
bits |= FIELD_IS_NUMERIC_DOUBLE; break;
|
||||||
|
default:
|
||||||
|
assert false : "Should never get here";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fieldsStream.writeByte((byte) bits);
|
||||||
|
|
||||||
if (field.isBinary()) {
|
if (field.isBinary()) {
|
||||||
final byte[] data;
|
final byte[] data;
|
||||||
|
@ -139,8 +170,22 @@ final class FieldsWriter {
|
||||||
|
|
||||||
fieldsStream.writeVInt(len);
|
fieldsStream.writeVInt(len);
|
||||||
fieldsStream.writeBytes(data, offset, len);
|
fieldsStream.writeBytes(data, offset, len);
|
||||||
}
|
} else if (field instanceof NumericField) {
|
||||||
else {
|
final NumericField nf = (NumericField) field;
|
||||||
|
final Number n = nf.getNumericValue();
|
||||||
|
switch (nf.getDataType()) {
|
||||||
|
case INT:
|
||||||
|
fieldsStream.writeInt(n.intValue()); break;
|
||||||
|
case LONG:
|
||||||
|
fieldsStream.writeLong(n.longValue()); break;
|
||||||
|
case FLOAT:
|
||||||
|
fieldsStream.writeInt(Float.floatToIntBits(n.floatValue())); break;
|
||||||
|
case DOUBLE:
|
||||||
|
fieldsStream.writeLong(Double.doubleToLongBits(n.doubleValue())); break;
|
||||||
|
default:
|
||||||
|
assert false : "Should never get here";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
fieldsStream.writeString(field.stringValue());
|
fieldsStream.writeString(field.stringValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.FilenameFilter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
@ -196,7 +197,31 @@ final class IndexFileDeleter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sis != null) {
|
if (sis != null) {
|
||||||
CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
|
final SegmentInfos infos = sis;
|
||||||
|
for (SegmentInfo segmentInfo : infos) {
|
||||||
|
try {
|
||||||
|
/*
|
||||||
|
* Force FI to load for each segment since we could see a
|
||||||
|
* segments file and load successfully above if the files are
|
||||||
|
* still referenced when they are deleted and the os doesn't let
|
||||||
|
* you delete them. Yet its likely that fnm files are removed
|
||||||
|
* while seg file is still around Since LUCENE-2984 we need FI
|
||||||
|
* to find out if a seg has vectors and prox so we need those
|
||||||
|
* files to be opened for a commit point.
|
||||||
|
*/
|
||||||
|
segmentInfo.getFieldInfos();
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
refresh(segmentInfo.name);
|
||||||
|
sis = null;
|
||||||
|
if (infoStream != null) {
|
||||||
|
message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (sis != null) {
|
||||||
|
final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
|
||||||
if (sis.getGeneration() == segmentInfos.getGeneration()) {
|
if (sis.getGeneration() == segmentInfos.getGeneration()) {
|
||||||
currentCommitPoint = commitPoint;
|
currentCommitPoint = commitPoint;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1428,7 +1428,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
cfr = new CompoundFileReader(dir, filename);
|
cfr = new CompoundFileReader(dir, filename);
|
||||||
|
|
||||||
String [] files = cfr.listAll();
|
String [] files = cfr.listAll();
|
||||||
ArrayUtil.quickSort(files); // sort the array of filename so that the output is more readable
|
ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable
|
||||||
|
|
||||||
for (int i = 0; i < files.length; ++i) {
|
for (int i = 0; i < files.length; ++i) {
|
||||||
long len = cfr.fileLength(files[i]);
|
long len = cfr.fileLength(files[i]);
|
||||||
|
|
|
@ -0,0 +1,129 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.util.Constants;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is an easy-to-use tool that upgrades all segments of an index from previous Lucene versions
|
||||||
|
* to the current segment file format. It can be used from command line:
|
||||||
|
* <pre>
|
||||||
|
* java -cp lucene-core.jar org.apache.lucene.index.IndexUpgrader [-delete-prior-commits] [-verbose] indexDir
|
||||||
|
* </pre>
|
||||||
|
* Alternatively this class can be instantiated and {@link #upgrade} invoked. It uses {@link UpgradeIndexMergePolicy}
|
||||||
|
* and triggers the upgrade via an optimize request to {@link IndexWriter}.
|
||||||
|
* <p>This tool keeps only the last commit in an index; for this
|
||||||
|
* reason, if the incoming index has more than one commit, the tool
|
||||||
|
* refuses to run by default. Specify {@code -delete-prior-commits}
|
||||||
|
* to override this, allowing the tool to delete all but the last commit.
|
||||||
|
* From Java code this can be enabled by passing {@code true} to
|
||||||
|
* {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
|
||||||
|
*/
|
||||||
|
public final class IndexUpgrader {
|
||||||
|
|
||||||
|
private static void printUsage() {
|
||||||
|
System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten.");
|
||||||
|
System.err.println("Usage:");
|
||||||
|
System.err.println(" java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir");
|
||||||
|
System.err.println("This tool keeps only the last commit in an index; for this");
|
||||||
|
System.err.println("reason, if the incoming index has more than one commit, the tool");
|
||||||
|
System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
|
||||||
|
System.err.println("this, allowing the tool to delete all but the last commit.");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
String dir = null;
|
||||||
|
boolean deletePriorCommits = false;
|
||||||
|
PrintStream out = null;
|
||||||
|
for (String arg : args) {
|
||||||
|
if ("-delete-prior-commits".equals(arg)) {
|
||||||
|
deletePriorCommits = true;
|
||||||
|
} else if ("-verbose".equals(arg)) {
|
||||||
|
out = System.out;
|
||||||
|
} else if (dir == null) {
|
||||||
|
dir = arg;
|
||||||
|
} else {
|
||||||
|
printUsage();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (dir == null) {
|
||||||
|
printUsage();
|
||||||
|
}
|
||||||
|
|
||||||
|
new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
|
||||||
|
}
|
||||||
|
|
||||||
|
private final Directory dir;
|
||||||
|
private final PrintStream infoStream;
|
||||||
|
private final IndexWriterConfig iwc;
|
||||||
|
private final boolean deletePriorCommits;
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
public IndexUpgrader(Directory dir) {
|
||||||
|
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
|
||||||
|
this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
|
||||||
|
this.dir = dir;
|
||||||
|
this.iwc = iwc;
|
||||||
|
this.infoStream = infoStream;
|
||||||
|
this.deletePriorCommits = deletePriorCommits;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void upgrade() throws IOException {
|
||||||
|
if (!IndexReader.indexExists(dir)) {
|
||||||
|
throw new IndexNotFoundException(dir.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!deletePriorCommits) {
|
||||||
|
final Collection<IndexCommit> commits = IndexReader.listCommits(dir);
|
||||||
|
if (commits.size() > 1) {
|
||||||
|
throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final IndexWriterConfig c = (IndexWriterConfig) iwc.clone();
|
||||||
|
c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy()));
|
||||||
|
c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
|
||||||
|
|
||||||
|
final IndexWriter w = new IndexWriter(dir, c);
|
||||||
|
try {
|
||||||
|
w.setInfoStream(infoStream);
|
||||||
|
w.message("Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
|
||||||
|
w.optimize();
|
||||||
|
w.message("All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
|
||||||
|
} finally {
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -421,7 +421,7 @@ public class IndexWriter implements Closeable {
|
||||||
private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
|
private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
|
||||||
|
|
||||||
/** Forcefully clear changes for the specified segments. This is called on successful merge. */
|
/** Forcefully clear changes for the specified segments. This is called on successful merge. */
|
||||||
synchronized void clear(SegmentInfos infos) throws IOException {
|
synchronized void clear(List<SegmentInfo> infos) throws IOException {
|
||||||
if (infos == null) {
|
if (infos == null) {
|
||||||
for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
|
for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
|
||||||
ent.getValue().hasChanges = false;
|
ent.getValue().hasChanges = false;
|
||||||
|
@ -511,7 +511,7 @@ public class IndexWriter implements Closeable {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void drop(SegmentInfos infos) throws IOException {
|
public synchronized void drop(List<SegmentInfo> infos) throws IOException {
|
||||||
for(SegmentInfo info : infos) {
|
for(SegmentInfo info : infos) {
|
||||||
drop(info);
|
drop(info);
|
||||||
}
|
}
|
||||||
|
@ -2355,7 +2355,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
String mergedName = newSegmentName();
|
String mergedName = newSegmentName();
|
||||||
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
|
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
|
||||||
mergedName, null, codecs, payloadProcessorProvider,
|
mergedName, null, payloadProcessorProvider,
|
||||||
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
|
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
|
||||||
|
|
||||||
for (IndexReader reader : readers) // add new indexes
|
for (IndexReader reader : readers) // add new indexes
|
||||||
|
@ -2365,8 +2365,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
final FieldInfos fieldInfos = merger.fieldInfos();
|
final FieldInfos fieldInfos = merger.fieldInfos();
|
||||||
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
|
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
|
||||||
false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
|
false, merger.getSegmentCodecs(),
|
||||||
fieldInfos.hasVectors(),
|
|
||||||
fieldInfos);
|
fieldInfos);
|
||||||
setDiagnostics(info, "addIndexes(IndexReader...)");
|
setDiagnostics(info, "addIndexes(IndexReader...)");
|
||||||
|
|
||||||
|
@ -2729,7 +2728,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
assert testPoint("startCommitMergeDeletes");
|
assert testPoint("startCommitMergeDeletes");
|
||||||
|
|
||||||
final SegmentInfos sourceSegments = merge.segments;
|
final List<SegmentInfo> sourceSegments = merge.segments;
|
||||||
|
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
message("commitMergeDeletes " + merge.segString(directory));
|
message("commitMergeDeletes " + merge.segString(directory));
|
||||||
|
@ -2741,7 +2740,7 @@ public class IndexWriter implements Closeable {
|
||||||
long minGen = Long.MAX_VALUE;
|
long minGen = Long.MAX_VALUE;
|
||||||
|
|
||||||
for(int i=0; i < sourceSegments.size(); i++) {
|
for(int i=0; i < sourceSegments.size(); i++) {
|
||||||
SegmentInfo info = sourceSegments.info(i);
|
SegmentInfo info = sourceSegments.get(i);
|
||||||
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
|
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
|
||||||
int docCount = info.docCount;
|
int docCount = info.docCount;
|
||||||
final SegmentReader previousReader = merge.readerClones.get(i);
|
final SegmentReader previousReader = merge.readerClones.get(i);
|
||||||
|
@ -3041,7 +3040,16 @@ public class IndexWriter implements Closeable {
|
||||||
// is running (while synchronized) to avoid race
|
// is running (while synchronized) to avoid race
|
||||||
// condition where two conflicting merges from different
|
// condition where two conflicting merges from different
|
||||||
// threads, start
|
// threads, start
|
||||||
message("registerMerge merging=" + mergingSegments);
|
if (infoStream != null) {
|
||||||
|
StringBuilder builder = new StringBuilder("registerMerge merging= [");
|
||||||
|
for (SegmentInfo info : mergingSegments) {
|
||||||
|
builder.append(info.name).append(", ");
|
||||||
|
}
|
||||||
|
builder.append("]");
|
||||||
|
// don't call mergingSegments.toString() could lead to ConcurrentModException
|
||||||
|
// since merge updates the segments FieldInfos
|
||||||
|
message(builder.toString());
|
||||||
|
}
|
||||||
for(SegmentInfo info : merge.segments) {
|
for(SegmentInfo info : merge.segments) {
|
||||||
message("registerMerge info=" + info);
|
message("registerMerge info=" + info);
|
||||||
mergingSegments.add(info);
|
mergingSegments.add(info);
|
||||||
|
@ -3094,7 +3102,7 @@ public class IndexWriter implements Closeable {
|
||||||
// Bind a new segment name here so even with
|
// Bind a new segment name here so even with
|
||||||
// ConcurrentMergePolicy we keep deterministic segment
|
// ConcurrentMergePolicy we keep deterministic segment
|
||||||
// names.
|
// names.
|
||||||
merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
|
merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, null, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
|
||||||
|
|
||||||
// Lock order: IW -> BD
|
// Lock order: IW -> BD
|
||||||
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
|
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
|
||||||
|
@ -3133,6 +3141,16 @@ public class IndexWriter implements Closeable {
|
||||||
message("merge seg=" + merge.info.name);
|
message("merge seg=" + merge.info.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert merge.estimatedMergeBytes == 0;
|
||||||
|
for(SegmentInfo info : merge.segments) {
|
||||||
|
if (info.docCount > 0) {
|
||||||
|
final int delCount = numDeletedDocs(info);
|
||||||
|
assert delCount <= info.docCount;
|
||||||
|
final double delRatio = ((double) delCount)/info.docCount;
|
||||||
|
merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: I think this should no longer be needed (we
|
// TODO: I think this should no longer be needed (we
|
||||||
// now build CFS before adding segment to the infos);
|
// now build CFS before adding segment to the infos);
|
||||||
// however, on removing it, tests fail for some reason!
|
// however, on removing it, tests fail for some reason!
|
||||||
|
@ -3174,7 +3192,7 @@ public class IndexWriter implements Closeable {
|
||||||
// It's possible we are called twice, eg if there was an
|
// It's possible we are called twice, eg if there was an
|
||||||
// exception inside mergeInit
|
// exception inside mergeInit
|
||||||
if (merge.registerDone) {
|
if (merge.registerDone) {
|
||||||
final SegmentInfos sourceSegments = merge.segments;
|
final List<SegmentInfo> sourceSegments = merge.segments;
|
||||||
for(SegmentInfo info : sourceSegments) {
|
for(SegmentInfo info : sourceSegments) {
|
||||||
mergingSegments.remove(info);
|
mergingSegments.remove(info);
|
||||||
}
|
}
|
||||||
|
@ -3245,21 +3263,17 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
int mergedDocCount = 0;
|
int mergedDocCount = 0;
|
||||||
|
|
||||||
SegmentInfos sourceSegments = merge.segments;
|
List<SegmentInfo> sourceSegments = merge.segments;
|
||||||
|
|
||||||
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
|
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
|
||||||
codecs, payloadProcessorProvider,
|
payloadProcessorProvider, merge.info.getFieldInfos());
|
||||||
merge.info.getFieldInfos());
|
|
||||||
|
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
|
message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getFieldInfos().hasVectors());
|
||||||
}
|
}
|
||||||
|
|
||||||
merge.readers = new ArrayList<SegmentReader>();
|
merge.readers = new ArrayList<SegmentReader>();
|
||||||
merge.readerClones = new ArrayList<SegmentReader>();
|
merge.readerClones = new ArrayList<SegmentReader>();
|
||||||
|
|
||||||
merge.estimatedMergeBytes = 0;
|
|
||||||
|
|
||||||
// This is try/finally to make sure merger's readers are
|
// This is try/finally to make sure merger's readers are
|
||||||
// closed:
|
// closed:
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
@ -3268,7 +3282,7 @@ public class IndexWriter implements Closeable {
|
||||||
int segUpto = 0;
|
int segUpto = 0;
|
||||||
while(segUpto < sourceSegments.size()) {
|
while(segUpto < sourceSegments.size()) {
|
||||||
|
|
||||||
final SegmentInfo info = sourceSegments.info(segUpto);
|
final SegmentInfo info = sourceSegments.get(segUpto);
|
||||||
|
|
||||||
// Hold onto the "live" reader; we will use this to
|
// Hold onto the "live" reader; we will use this to
|
||||||
// commit merged deletes
|
// commit merged deletes
|
||||||
|
@ -3277,13 +3291,6 @@ public class IndexWriter implements Closeable {
|
||||||
-config.getReaderTermsIndexDivisor());
|
-config.getReaderTermsIndexDivisor());
|
||||||
merge.readers.add(reader);
|
merge.readers.add(reader);
|
||||||
|
|
||||||
final int readerMaxDoc = reader.maxDoc();
|
|
||||||
if (readerMaxDoc > 0) {
|
|
||||||
final int delCount = reader.numDeletedDocs();
|
|
||||||
final double delRatio = ((double) delCount)/readerMaxDoc;
|
|
||||||
merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We clone the segment readers because other
|
// We clone the segment readers because other
|
||||||
// deletes may come in while we're merging so we
|
// deletes may come in while we're merging so we
|
||||||
// need readers that will not change
|
// need readers that will not change
|
||||||
|
@ -3308,8 +3315,6 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
// Record which codec was used to write the segment
|
// Record which codec was used to write the segment
|
||||||
merge.info.setSegmentCodecs(merger.getSegmentCodecs());
|
merge.info.setSegmentCodecs(merger.getSegmentCodecs());
|
||||||
// Record if we have merged vectors
|
|
||||||
merge.info.setHasVectors(merger.fieldInfos().hasVectors());
|
|
||||||
|
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("merge segmentCodecs=" + merger.getSegmentCodecs());
|
message("merge segmentCodecs=" + merger.getSegmentCodecs());
|
||||||
|
@ -3323,8 +3328,6 @@ public class IndexWriter implements Closeable {
|
||||||
// because codec must know if prox was written for
|
// because codec must know if prox was written for
|
||||||
// this segment:
|
// this segment:
|
||||||
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
|
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
|
||||||
merge.info.setHasProx(merger.fieldInfos().hasProx());
|
|
||||||
|
|
||||||
boolean useCompoundFile;
|
boolean useCompoundFile;
|
||||||
synchronized (this) { // Guard segmentInfos
|
synchronized (this) { // Guard segmentInfos
|
||||||
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
|
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
|
||||||
|
@ -3469,14 +3472,14 @@ public class IndexWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public synchronized String segString(SegmentInfos infos) throws IOException {
|
public synchronized String segString(List<SegmentInfo> infos) throws IOException {
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
final int count = infos.size();
|
final int count = infos.size();
|
||||||
for(int i = 0; i < count; i++) {
|
for(int i = 0; i < count; i++) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
buffer.append(' ');
|
buffer.append(' ');
|
||||||
}
|
}
|
||||||
buffer.append(segString(infos.info(i)));
|
buffer.append(segString(infos.get(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return buffer.toString();
|
return buffer.toString();
|
||||||
|
@ -3531,6 +3534,7 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
// called only from assert
|
// called only from assert
|
||||||
private boolean filesExist(SegmentInfos toSync) throws IOException {
|
private boolean filesExist(SegmentInfos toSync) throws IOException {
|
||||||
|
|
||||||
Collection<String> files = toSync.files(directory, false);
|
Collection<String> files = toSync.files(directory, false);
|
||||||
for(final String fileName: files) {
|
for(final String fileName: files) {
|
||||||
assert directory.fileExists(fileName): "file " + fileName + " does not exist";
|
assert directory.fileExists(fileName): "file " + fileName + " does not exist";
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -595,7 +594,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
} else if (!anyTooLarge) {
|
} else if (!anyTooLarge) {
|
||||||
if (spec == null)
|
if (spec == null)
|
||||||
spec = new MergeSpecification();
|
spec = new MergeSpecification();
|
||||||
final SegmentInfos mergeInfos = new SegmentInfos();
|
final List<SegmentInfo> mergeInfos = new ArrayList<SegmentInfo>();
|
||||||
for(int i=start;i<end;i++) {
|
for(int i=start;i<end;i++) {
|
||||||
mergeInfos.add(levels.get(i).info);
|
mergeInfos.add(levels.get(i).info);
|
||||||
assert infos.contains(levels.get(i).info);
|
assert infos.contains(levels.get(i).info);
|
||||||
|
|
|
@ -32,7 +32,7 @@ final class MergeDocIDRemapper {
|
||||||
|
|
||||||
public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) {
|
public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) {
|
||||||
this.docMaps = docMaps;
|
this.docMaps = docMaps;
|
||||||
SegmentInfo firstSegment = merge.segments.info(0);
|
SegmentInfo firstSegment = merge.segments.get(0);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while(true) {
|
while(true) {
|
||||||
SegmentInfo info = infos.info(i);
|
SegmentInfo info = infos.info(i);
|
||||||
|
@ -45,7 +45,7 @@ final class MergeDocIDRemapper {
|
||||||
int numDocs = 0;
|
int numDocs = 0;
|
||||||
for(int j=0;j<docMaps.length;i++,j++) {
|
for(int j=0;j<docMaps.length;i++,j++) {
|
||||||
numDocs += infos.info(i).docCount;
|
numDocs += infos.info(i).docCount;
|
||||||
assert infos.info(i).equals(merge.segments.info(j));
|
assert infos.info(i).equals(merge.segments.get(j));
|
||||||
}
|
}
|
||||||
maxDocID = minDocID + numDocs;
|
maxDocID = minDocID + numDocs;
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ final class MergeDocIDRemapper {
|
||||||
starts[0] = minDocID;
|
starts[0] = minDocID;
|
||||||
newStarts[0] = minDocID;
|
newStarts[0] = minDocID;
|
||||||
for(i=1;i<docMaps.length;i++) {
|
for(i=1;i<docMaps.length;i++) {
|
||||||
final int lastDocCount = merge.segments.info(i-1).docCount;
|
final int lastDocCount = merge.segments.get(i-1).docCount;
|
||||||
starts[i] = starts[i-1] + lastDocCount;
|
starts[i] = starts[i-1] + lastDocCount;
|
||||||
newStarts[i] = newStarts[i-1] + lastDocCount - delCounts[i-1];
|
newStarts[i] = newStarts[i-1] + lastDocCount - delCounts[i-1];
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ final class MergeDocIDRemapper {
|
||||||
// assert docShift > 0;
|
// assert docShift > 0;
|
||||||
|
|
||||||
// Make sure it all adds up:
|
// Make sure it all adds up:
|
||||||
assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.info(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
|
assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.get(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int remap(int oldDocID) {
|
public int remap(int oldDocID) {
|
||||||
|
|
|
@ -75,15 +75,21 @@ public abstract class MergePolicy implements java.io.Closeable {
|
||||||
long estimatedMergeBytes; // used by IndexWriter
|
long estimatedMergeBytes; // used by IndexWriter
|
||||||
List<SegmentReader> readers; // used by IndexWriter
|
List<SegmentReader> readers; // used by IndexWriter
|
||||||
List<SegmentReader> readerClones; // used by IndexWriter
|
List<SegmentReader> readerClones; // used by IndexWriter
|
||||||
public final SegmentInfos segments;
|
public final List<SegmentInfo> segments;
|
||||||
|
public final int totalDocCount;
|
||||||
boolean aborted;
|
boolean aborted;
|
||||||
Throwable error;
|
Throwable error;
|
||||||
boolean paused;
|
boolean paused;
|
||||||
|
|
||||||
public OneMerge(SegmentInfos segments) {
|
public OneMerge(List<SegmentInfo> segments) {
|
||||||
if (0 == segments.size())
|
if (0 == segments.size())
|
||||||
throw new RuntimeException("segments must include at least one segment");
|
throw new RuntimeException("segments must include at least one segment");
|
||||||
this.segments = segments;
|
this.segments = segments;
|
||||||
|
int count = 0;
|
||||||
|
for(SegmentInfo info : segments) {
|
||||||
|
count += info.docCount;
|
||||||
|
}
|
||||||
|
totalDocCount = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Record that an exception occurred while executing
|
/** Record that an exception occurred while executing
|
||||||
|
@ -147,7 +153,7 @@ public abstract class MergePolicy implements java.io.Closeable {
|
||||||
final int numSegments = segments.size();
|
final int numSegments = segments.size();
|
||||||
for(int i=0;i<numSegments;i++) {
|
for(int i=0;i<numSegments;i++) {
|
||||||
if (i > 0) b.append(' ');
|
if (i > 0) b.append(' ');
|
||||||
b.append(segments.info(i).toString(dir, 0));
|
b.append(segments.get(i).toString(dir, 0));
|
||||||
}
|
}
|
||||||
if (info != null)
|
if (info != null)
|
||||||
b.append(" into ").append(info.name);
|
b.append(" into ").append(info.name);
|
||||||
|
|
|
@ -43,7 +43,8 @@ import org.apache.lucene.util.Constants;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class SegmentInfo {
|
public final class SegmentInfo {
|
||||||
|
// TODO: remove with hasVector and hasProx
|
||||||
|
private static final int CHECK_FIELDINFO = -2;
|
||||||
static final int NO = -1; // e.g. no norms; no deletes;
|
static final int NO = -1; // e.g. no norms; no deletes;
|
||||||
static final int YES = 1; // e.g. have norms; have deletes;
|
static final int YES = 1; // e.g. have norms; have deletes;
|
||||||
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
|
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
|
||||||
|
@ -86,9 +87,11 @@ public final class SegmentInfo {
|
||||||
|
|
||||||
private int delCount; // How many deleted docs in this segment
|
private int delCount; // How many deleted docs in this segment
|
||||||
|
|
||||||
private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
|
//TODO: remove when we don't have to support old indexes anymore that had this field
|
||||||
|
private int hasVectors = CHECK_FIELDINFO;
|
||||||
|
//TODO: remove when we don't have to support old indexes anymore that had this field
|
||||||
|
private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with omitTermFreqAndPositions==false
|
||||||
|
|
||||||
private boolean hasVectors; // True if this segment wrote term vectors
|
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
|
@ -107,8 +110,11 @@ public final class SegmentInfo {
|
||||||
// this is never written to/read from the Directory
|
// this is never written to/read from the Directory
|
||||||
private long bufferedDeletesGen;
|
private long bufferedDeletesGen;
|
||||||
|
|
||||||
|
// holds the fieldInfos Version to refresh files() cache if FI has changed
|
||||||
|
private long fieldInfosVersion;
|
||||||
|
|
||||||
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
|
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
|
||||||
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
|
SegmentCodecs segmentCodecs, FieldInfos fieldInfos) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.docCount = docCount;
|
this.docCount = docCount;
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
|
@ -116,9 +122,7 @@ public final class SegmentInfo {
|
||||||
this.isCompoundFile = isCompoundFile;
|
this.isCompoundFile = isCompoundFile;
|
||||||
this.docStoreOffset = -1;
|
this.docStoreOffset = -1;
|
||||||
this.docStoreSegment = name;
|
this.docStoreSegment = name;
|
||||||
this.hasProx = hasProx;
|
|
||||||
this.segmentCodecs = segmentCodecs;
|
this.segmentCodecs = segmentCodecs;
|
||||||
this.hasVectors = hasVectors;
|
|
||||||
delCount = 0;
|
delCount = 0;
|
||||||
version = Constants.LUCENE_MAIN_VERSION;
|
version = Constants.LUCENE_MAIN_VERSION;
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
|
@ -213,7 +217,7 @@ public final class SegmentInfo {
|
||||||
delCount = input.readInt();
|
delCount = input.readInt();
|
||||||
assert delCount <= docCount;
|
assert delCount <= docCount;
|
||||||
|
|
||||||
hasProx = input.readByte() == YES;
|
hasProx = input.readByte();
|
||||||
|
|
||||||
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
|
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
|
||||||
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
|
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
|
||||||
|
@ -226,7 +230,7 @@ public final class SegmentInfo {
|
||||||
diagnostics = input.readStringStringMap();
|
diagnostics = input.readStringStringMap();
|
||||||
|
|
||||||
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
|
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
|
||||||
hasVectors = input.readByte() == 1;
|
hasVectors = input.readByte();
|
||||||
} else {
|
} else {
|
||||||
final String storesSegment;
|
final String storesSegment;
|
||||||
final String ext;
|
final String ext;
|
||||||
|
@ -247,7 +251,7 @@ public final class SegmentInfo {
|
||||||
dirToTest = dir;
|
dirToTest = dir;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
|
||||||
} finally {
|
} finally {
|
||||||
if (isCompoundFile) {
|
if (isCompoundFile) {
|
||||||
dirToTest.close();
|
dirToTest.close();
|
||||||
|
@ -311,12 +315,7 @@ public final class SegmentInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getHasVectors() throws IOException {
|
public boolean getHasVectors() throws IOException {
|
||||||
return hasVectors;
|
return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES;
|
||||||
}
|
|
||||||
|
|
||||||
public void setHasVectors(boolean v) {
|
|
||||||
hasVectors = v;
|
|
||||||
clearFilesCache();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public FieldInfos getFieldInfos() throws IOException {
|
public FieldInfos getFieldInfos() throws IOException {
|
||||||
|
@ -349,7 +348,7 @@ public final class SegmentInfo {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
|
final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, segmentCodecs,
|
||||||
fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
|
fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
|
||||||
si.docStoreOffset = docStoreOffset;
|
si.docStoreOffset = docStoreOffset;
|
||||||
si.docStoreSegment = docStoreSegment;
|
si.docStoreSegment = docStoreSegment;
|
||||||
|
@ -364,6 +363,8 @@ public final class SegmentInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
si.version = version;
|
si.version = version;
|
||||||
|
si.hasProx = hasProx;
|
||||||
|
si.hasVectors = hasVectors;
|
||||||
return si;
|
return si;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,19 +570,14 @@ public final class SegmentInfo {
|
||||||
|
|
||||||
output.writeByte((byte) (isCompoundFile ? YES : NO));
|
output.writeByte((byte) (isCompoundFile ? YES : NO));
|
||||||
output.writeInt(delCount);
|
output.writeInt(delCount);
|
||||||
output.writeByte((byte) (hasProx ? 1:0));
|
output.writeByte((byte) (hasProx));
|
||||||
segmentCodecs.write(output);
|
segmentCodecs.write(output);
|
||||||
output.writeStringStringMap(diagnostics);
|
output.writeStringStringMap(diagnostics);
|
||||||
output.writeByte((byte) (hasVectors ? 1 : 0));
|
output.writeByte((byte) (hasVectors));
|
||||||
}
|
}
|
||||||
|
|
||||||
void setHasProx(boolean hasProx) {
|
public boolean getHasProx() throws IOException {
|
||||||
this.hasProx = hasProx;
|
return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES;
|
||||||
clearFilesCache();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean getHasProx() {
|
|
||||||
return hasProx;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Can only be called once. */
|
/** Can only be called once. */
|
||||||
|
@ -609,13 +605,14 @@ public final class SegmentInfo {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public List<String> files() throws IOException {
|
public List<String> files() throws IOException {
|
||||||
|
final long fisVersion = fieldInfosVersion;
|
||||||
if (files != null) {
|
if (fisVersion != (fieldInfosVersion = getFieldInfos().getVersion())) {
|
||||||
|
clearFilesCache(); // FIS has modifications - need to recompute
|
||||||
|
} else if (files != null) {
|
||||||
// Already cached:
|
// Already cached:
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
final Set<String> fileSet = new HashSet<String>();
|
||||||
Set<String> fileSet = new HashSet<String>();
|
|
||||||
|
|
||||||
boolean useCompoundFile = getUseCompoundFile();
|
boolean useCompoundFile = getUseCompoundFile();
|
||||||
|
|
||||||
|
@ -637,7 +634,7 @@ public final class SegmentInfo {
|
||||||
} else {
|
} else {
|
||||||
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
|
||||||
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
|
||||||
if (hasVectors) {
|
if (getHasVectors()) {
|
||||||
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||||
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||||
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||||
|
@ -646,7 +643,7 @@ public final class SegmentInfo {
|
||||||
} else if (!useCompoundFile) {
|
} else if (!useCompoundFile) {
|
||||||
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
|
||||||
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
|
||||||
if (hasVectors) {
|
if (getHasVectors()) {
|
||||||
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
|
||||||
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
|
||||||
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
|
||||||
|
@ -709,8 +706,12 @@ public final class SegmentInfo {
|
||||||
if (this.dir != dir) {
|
if (this.dir != dir) {
|
||||||
s.append('x');
|
s.append('x');
|
||||||
}
|
}
|
||||||
if (hasVectors) {
|
try {
|
||||||
s.append('v');
|
if (getHasVectors()) {
|
||||||
|
s.append('v');
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
s.append(docCount);
|
s.append(docCount);
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ final class SegmentMerger {
|
||||||
|
|
||||||
private PayloadProcessorProvider payloadProcessorProvider;
|
private PayloadProcessorProvider payloadProcessorProvider;
|
||||||
|
|
||||||
SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
|
SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
|
||||||
this.payloadProcessorProvider = payloadProcessorProvider;
|
this.payloadProcessorProvider = payloadProcessorProvider;
|
||||||
directory = dir;
|
directory = dir;
|
||||||
segment = name;
|
segment = name;
|
||||||
|
|
|
@ -32,7 +32,6 @@ public class SegmentWriteState {
|
||||||
public final String segmentName;
|
public final String segmentName;
|
||||||
public final FieldInfos fieldInfos;
|
public final FieldInfos fieldInfos;
|
||||||
public final int numDocs;
|
public final int numDocs;
|
||||||
public boolean hasVectors;
|
|
||||||
|
|
||||||
// Deletes to apply while we are flushing the segment. A
|
// Deletes to apply while we are flushing the segment. A
|
||||||
// Term is enrolled in here if it was deleted at one
|
// Term is enrolled in here if it was deleted at one
|
||||||
|
|
|
@ -63,7 +63,6 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
lastDocID = 0;
|
lastDocID = 0;
|
||||||
state.hasVectors = hasVectors;
|
|
||||||
hasVectors = false;
|
hasVectors = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,8 +120,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
fill(docState.docID);
|
fill(docState.docID);
|
||||||
|
|
||||||
// Append term vectors to the real outputs:
|
// Append term vectors to the real outputs:
|
||||||
long pointer = tvd.getFilePointer();
|
tvx.writeLong(tvd.getFilePointer());
|
||||||
tvx.writeLong(pointer);
|
|
||||||
tvx.writeLong(tvf.getFilePointer());
|
tvx.writeLong(tvf.getFilePointer());
|
||||||
tvd.writeVInt(numVectorFields);
|
tvd.writeVInt(numVectorFields);
|
||||||
if (numVectorFields > 0) {
|
if (numVectorFields > 0) {
|
||||||
|
@ -136,6 +134,8 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
|
||||||
tvd.writeVLong(pos-lastPos);
|
tvd.writeVLong(pos-lastPos);
|
||||||
lastPos = pos;
|
lastPos = pos;
|
||||||
perFields[i].finishDocument();
|
perFields[i].finishDocument();
|
||||||
|
// commit the termVectors once successful success - FI will otherwise reset them
|
||||||
|
perFields[i].fieldInfo.commitVectors();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,8 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges segments of approximately equal size, subject to
|
* Merges segments of approximately equal size, subject to
|
||||||
|
@ -249,7 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
||||||
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
|
final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
|
||||||
|
|
||||||
final SegmentInfos infosSorted = new SegmentInfos();
|
final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
|
||||||
infosSorted.addAll(infos);
|
infosSorted.addAll(infos);
|
||||||
|
|
||||||
Collections.sort(infosSorted, segmentByteSizeDescending);
|
Collections.sort(infosSorted, segmentByteSizeDescending);
|
||||||
|
@ -277,7 +279,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
// If we have too-large segments, grace them out
|
// If we have too-large segments, grace them out
|
||||||
// of the maxSegmentCount:
|
// of the maxSegmentCount:
|
||||||
int tooBigCount = 0;
|
int tooBigCount = 0;
|
||||||
while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
|
while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
|
||||||
totIndexBytes -= size(infosSorted.get(tooBigCount));
|
totIndexBytes -= size(infosSorted.get(tooBigCount));
|
||||||
tooBigCount++;
|
tooBigCount++;
|
||||||
}
|
}
|
||||||
|
@ -310,7 +312,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
// Gather eligible segments for merging, ie segments
|
// Gather eligible segments for merging, ie segments
|
||||||
// not already being merged and not already picked (by
|
// not already being merged and not already picked (by
|
||||||
// prior iteration of this loop) for merging:
|
// prior iteration of this loop) for merging:
|
||||||
final SegmentInfos eligible = new SegmentInfos();
|
final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
|
||||||
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
||||||
final SegmentInfo info = infosSorted.get(idx);
|
final SegmentInfo info = infosSorted.get(idx);
|
||||||
if (merging.contains(info)) {
|
if (merging.contains(info)) {
|
||||||
|
@ -332,7 +334,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
// OK we are over budget -- find best merge!
|
// OK we are over budget -- find best merge!
|
||||||
MergeScore bestScore = null;
|
MergeScore bestScore = null;
|
||||||
SegmentInfos best = null;
|
List<SegmentInfo> best = null;
|
||||||
boolean bestTooLarge = false;
|
boolean bestTooLarge = false;
|
||||||
long bestMergeBytes = 0;
|
long bestMergeBytes = 0;
|
||||||
|
|
||||||
|
@ -341,10 +343,10 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
long totAfterMergeBytes = 0;
|
long totAfterMergeBytes = 0;
|
||||||
|
|
||||||
final SegmentInfos candidate = new SegmentInfos();
|
final List<SegmentInfo> candidate = new ArrayList<SegmentInfo>();
|
||||||
boolean hitTooLarge = false;
|
boolean hitTooLarge = false;
|
||||||
for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
|
for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
|
||||||
final SegmentInfo info = eligible.info(idx);
|
final SegmentInfo info = eligible.get(idx);
|
||||||
final long segBytes = size(info);
|
final long segBytes = size(info);
|
||||||
|
|
||||||
if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
|
if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
|
||||||
|
@ -398,7 +400,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Expert: scores one merge; subclasses can override. */
|
/** Expert: scores one merge; subclasses can override. */
|
||||||
protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
|
protected MergeScore score(List<SegmentInfo> candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
|
||||||
long totBeforeMergeBytes = 0;
|
long totBeforeMergeBytes = 0;
|
||||||
long totAfterMergeBytes = 0;
|
long totAfterMergeBytes = 0;
|
||||||
long totAfterMergeBytesFloored = 0;
|
long totAfterMergeBytesFloored = 0;
|
||||||
|
@ -420,7 +422,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
// over time:
|
// over time:
|
||||||
skew = 1.0/maxMergeAtOnce;
|
skew = 1.0/maxMergeAtOnce;
|
||||||
} else {
|
} else {
|
||||||
skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored;
|
skew = ((double) floorSize(size(candidate.get(0))))/totAfterMergeBytesFloored;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Strongly favor merges with less skew (smaller
|
// Strongly favor merges with less skew (smaller
|
||||||
|
@ -458,7 +460,8 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
if (verbose()) {
|
if (verbose()) {
|
||||||
message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
|
message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
|
||||||
}
|
}
|
||||||
SegmentInfos eligible = new SegmentInfos();
|
|
||||||
|
List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
|
||||||
boolean optimizeMergeRunning = false;
|
boolean optimizeMergeRunning = false;
|
||||||
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
||||||
for(SegmentInfo info : infos) {
|
for(SegmentInfo info : infos) {
|
||||||
|
@ -499,7 +502,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
if (spec == null) {
|
if (spec == null) {
|
||||||
spec = new MergeSpecification();
|
spec = new MergeSpecification();
|
||||||
}
|
}
|
||||||
final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
|
final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
|
||||||
if (verbose()) {
|
if (verbose()) {
|
||||||
message("add merge=" + writer.get().segString(merge.segments));
|
message("add merge=" + writer.get().segString(merge.segments));
|
||||||
}
|
}
|
||||||
|
@ -510,7 +513,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
if (spec == null && !optimizeMergeRunning) {
|
if (spec == null && !optimizeMergeRunning) {
|
||||||
// Do final merge
|
// Do final merge
|
||||||
final int numToMerge = end - maxSegmentCount + 1;
|
final int numToMerge = end - maxSegmentCount + 1;
|
||||||
final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
|
final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
|
||||||
if (verbose()) {
|
if (verbose()) {
|
||||||
message("add final merge=" + merge.segString(writer.get().getDirectory()));
|
message("add final merge=" + merge.segString(writer.get().getDirectory()));
|
||||||
}
|
}
|
||||||
|
@ -527,7 +530,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
if (verbose()) {
|
if (verbose()) {
|
||||||
message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
|
message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
|
||||||
}
|
}
|
||||||
final SegmentInfos eligible = new SegmentInfos();
|
final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
|
||||||
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
||||||
for(SegmentInfo info : infos) {
|
for(SegmentInfo info : infos) {
|
||||||
double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
|
double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
|
||||||
|
@ -580,7 +583,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
spec = new MergeSpecification();
|
spec = new MergeSpecification();
|
||||||
}
|
}
|
||||||
|
|
||||||
final OneMerge merge = new OneMerge(eligible.range(start, upto));
|
final OneMerge merge = new OneMerge(eligible.subList(start, upto));
|
||||||
if (verbose()) {
|
if (verbose()) {
|
||||||
message("add merge=" + writer.get().segString(merge.segments));
|
message("add merge=" + writer.get().segString(merge.segments));
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,152 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Constants;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/** This {@link MergePolicy} is used for upgrading all existing segments of
|
||||||
|
* an index when calling {@link IndexWriter#optimize()}.
|
||||||
|
* All other methods delegate to the base {@code MergePolicy} given to the constructor.
|
||||||
|
* This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that
|
||||||
|
* are created by previous Lucene versions. Optimize does no longer really optimize
|
||||||
|
* it is just used to "optimize" older segment versions away.
|
||||||
|
* <p>In general one would use {@link IndexUpgrader}, but for a fully customizeable upgrade,
|
||||||
|
* you can use this like any other {@code MergePolicy} and call {@link IndexWriter#optimize()}:
|
||||||
|
* <pre class="prettyprint lang-java">
|
||||||
|
* IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_XX, new KeywordAnalyzer());
|
||||||
|
* iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy()));
|
||||||
|
* IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
* w.optimize();
|
||||||
|
* w.close();
|
||||||
|
* </pre>
|
||||||
|
* @lucene.experimental
|
||||||
|
* @see IndexUpgrader
|
||||||
|
*/
|
||||||
|
public class UpgradeIndexMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
|
protected final MergePolicy base;
|
||||||
|
|
||||||
|
/** Wrap the given {@link MergePolicy} and intercept optimize requests to
|
||||||
|
* only upgrade segments written with previous Lucene versions. */
|
||||||
|
public UpgradeIndexMergePolicy(MergePolicy base) {
|
||||||
|
this.base = base;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns if the given segment should be upgraded. The default implementation
|
||||||
|
* will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
|
||||||
|
* so all segments created with a different version number than this Lucene version will
|
||||||
|
* get upgraded.
|
||||||
|
*/
|
||||||
|
protected boolean shouldUpgradeSegment(SegmentInfo si) {
|
||||||
|
return !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setIndexWriter(IndexWriter writer) {
|
||||||
|
super.setIndexWriter(writer);
|
||||||
|
base.setIndexWriter(writer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
|
||||||
|
return base.findMerges(segmentInfos);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws CorruptIndexException, IOException {
|
||||||
|
// first find all old segments
|
||||||
|
final HashSet<SegmentInfo> oldSegments = new HashSet<SegmentInfo>();
|
||||||
|
for (final SegmentInfo si : segmentInfos) {
|
||||||
|
if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) {
|
||||||
|
oldSegments.add(si);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose()) message("findMergesForOptimize: segmentsToUpgrade=" + oldSegments);
|
||||||
|
|
||||||
|
if (oldSegments.isEmpty())
|
||||||
|
return null;
|
||||||
|
|
||||||
|
MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);
|
||||||
|
|
||||||
|
if (spec != null) {
|
||||||
|
// remove all segments that are in merge specification from oldSegments,
|
||||||
|
// the resulting set contains all segments that are left over
|
||||||
|
// and will be merged to one additional segment:
|
||||||
|
for (final OneMerge om : spec.merges) {
|
||||||
|
oldSegments.removeAll(om.segments);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!oldSegments.isEmpty()) {
|
||||||
|
if (verbose())
|
||||||
|
message("findMergesForOptimize: " + base.getClass().getSimpleName() +
|
||||||
|
" does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
|
||||||
|
final List<SegmentInfo> newInfos = new ArrayList<SegmentInfo>();
|
||||||
|
for (final SegmentInfo si : segmentInfos) {
|
||||||
|
if (oldSegments.contains(si)) {
|
||||||
|
newInfos.add(si);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add the final merge
|
||||||
|
if (spec == null) {
|
||||||
|
spec = new MergeSpecification();
|
||||||
|
}
|
||||||
|
spec.add(new OneMerge(newInfos));
|
||||||
|
}
|
||||||
|
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
|
||||||
|
return base.findMergesToExpungeDeletes(segmentInfos);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException {
|
||||||
|
return base.useCompoundFile(segments, newSegment);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
base.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "[" + getClass().getSimpleName() + "->" + base + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean verbose() {
|
||||||
|
IndexWriter w = writer.get();
|
||||||
|
return w != null && w.verbose();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void message(String message) {
|
||||||
|
if (verbose())
|
||||||
|
writer.get().message("UPGMP: " + message);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -73,6 +73,11 @@ public class CodecProvider {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
public synchronized Set<String> listAll() {
|
||||||
|
return codecs.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
public Collection<String> getAllExtensions() {
|
public Collection<String> getAllExtensions() {
|
||||||
return knownExtensions;
|
return knownExtensions;
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,15 +68,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
PulsingTermState clone;
|
PulsingTermState clone = new PulsingTermState();
|
||||||
clone = (PulsingTermState) super.clone();
|
clone.copyFrom(this);
|
||||||
if (postingsSize != -1) {
|
|
||||||
clone.postings = new byte[postingsSize];
|
|
||||||
System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
|
|
||||||
} else {
|
|
||||||
assert wrappedTermState != null;
|
|
||||||
clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
|
|
||||||
}
|
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,8 +83,10 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
||||||
postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
|
postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
|
||||||
}
|
}
|
||||||
System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
|
System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
|
||||||
} else {
|
} else if (wrappedTermState != null) {
|
||||||
wrappedTermState.copyFrom(other.wrappedTermState);
|
wrappedTermState.copyFrom(other.wrappedTermState);
|
||||||
|
} else {
|
||||||
|
wrappedTermState = (BlockTermState) other.wrappedTermState.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: we do not copy the
|
// NOTE: we do not copy the
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) {
|
public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) throws IOException {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
||||||
|
|
||||||
|
@ -151,14 +151,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
SepTermState other = (SepTermState) super.clone();
|
SepTermState other = new SepTermState();
|
||||||
other.docIndex = (IntIndexInput.Index) docIndex.clone();
|
other.copyFrom(this);
|
||||||
if (freqIndex != null) {
|
|
||||||
other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
|
|
||||||
}
|
|
||||||
if (posIndex != null) {
|
|
||||||
other.posIndex = (IntIndexInput.Index) posIndex.clone();
|
|
||||||
}
|
|
||||||
return other;
|
return other;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,12 +160,28 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
public void copyFrom(TermState _other) {
|
public void copyFrom(TermState _other) {
|
||||||
super.copyFrom(_other);
|
super.copyFrom(_other);
|
||||||
SepTermState other = (SepTermState) _other;
|
SepTermState other = (SepTermState) _other;
|
||||||
docIndex.set(other.docIndex);
|
if (docIndex == null) {
|
||||||
if (freqIndex != null && other.freqIndex != null) {
|
docIndex = (IntIndexInput.Index) other.docIndex.clone();
|
||||||
freqIndex.set(other.freqIndex);
|
} else {
|
||||||
|
docIndex.set(other.docIndex);
|
||||||
}
|
}
|
||||||
if (posIndex != null && other.posIndex != null) {
|
if (other.freqIndex != null) {
|
||||||
posIndex.set(other.posIndex);
|
if (freqIndex == null) {
|
||||||
|
freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
|
||||||
|
} else {
|
||||||
|
freqIndex.set(other.freqIndex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
freqIndex = null;
|
||||||
|
}
|
||||||
|
if (other.posIndex != null) {
|
||||||
|
if (posIndex == null) {
|
||||||
|
posIndex = (IntIndexInput.Index) other.posIndex.clone();
|
||||||
|
} else {
|
||||||
|
posIndex.set(other.posIndex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
posIndex = null;
|
||||||
}
|
}
|
||||||
payloadFP = other.payloadFP;
|
payloadFP = other.payloadFP;
|
||||||
skipFP = other.skipFP;
|
skipFP = other.skipFP;
|
||||||
|
|
|
@ -806,6 +806,7 @@ public abstract class QueryParserBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException ignored) {}
|
} catch (IOException ignored) {}
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,6 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
final class HitQueue extends PriorityQueue<ScoreDoc> {
|
final class HitQueue extends PriorityQueue<ScoreDoc> {
|
||||||
|
|
||||||
private boolean prePopulate;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new instance with <code>size</code> elements. If
|
* Creates a new instance with <code>size</code> elements. If
|
||||||
* <code>prePopulate</code> is set to true, the queue will pre-populate itself
|
* <code>prePopulate</code> is set to true, the queue will pre-populate itself
|
||||||
|
|
|
@ -46,8 +46,18 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
*
|
*
|
||||||
* <p>Applications usually need only call the inherited
|
* <p>Applications usually need only call the inherited
|
||||||
* {@link #search(Query,int)}
|
* {@link #search(Query,int)}
|
||||||
* or {@link #search(Query,Filter,int)} methods. For performance reasons it is
|
* or {@link #search(Query,Filter,int)} methods. For
|
||||||
* recommended to open only one IndexSearcher and use it for all of your searches.
|
* performance reasons, if your index is unchanging, you
|
||||||
|
* should share a single IndexSearcher instance across
|
||||||
|
* multiple searches instead of creating a new one
|
||||||
|
* per-search. If your index has changed and you wish to
|
||||||
|
* see the changes reflected in searching, you should
|
||||||
|
* use {@link IndexReader#reopen} to obtain a new reader and
|
||||||
|
* then create a new IndexSearcher from that. Also, for
|
||||||
|
* low-latency turnaround it's best to use a near-real-time
|
||||||
|
* reader ({@link IndexReader#open(IndexWriter,boolean)}).
|
||||||
|
* Once you have a new {@link IndexReader}, it's relatively
|
||||||
|
* cheap to create a new IndexSearcher from it.
|
||||||
*
|
*
|
||||||
* <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link
|
* <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link
|
||||||
* IndexSearcher}</code> instances are completely
|
* IndexSearcher}</code> instances are completely
|
||||||
|
|
|
@ -214,12 +214,12 @@ public class MultiPhraseQuery extends Query {
|
||||||
docFreq = reader.docFreq(term.field(), term.bytes());
|
docFreq = reader.docFreq(term.field(), term.bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
|
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by increasing docFreq order
|
// sort by increasing docFreq order
|
||||||
if (slop == 0) {
|
if (slop == 0) {
|
||||||
ArrayUtil.quickSort(postingsFreqs);
|
ArrayUtil.mergeSort(postingsFreqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slop == 0) {
|
if (slop == 0) {
|
||||||
|
|
|
@ -28,13 +28,15 @@ final class PhrasePositions {
|
||||||
int position; // position in doc
|
int position; // position in doc
|
||||||
int count; // remaining pos in this doc
|
int count; // remaining pos in this doc
|
||||||
int offset; // position in phrase
|
int offset; // position in phrase
|
||||||
|
final int ord; // unique across all PhrasePositions instances
|
||||||
final DocsAndPositionsEnum postings; // stream of docs & positions
|
final DocsAndPositionsEnum postings; // stream of docs & positions
|
||||||
PhrasePositions next; // used to make lists
|
PhrasePositions next; // used to make lists
|
||||||
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
|
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
|
||||||
|
|
||||||
PhrasePositions(DocsAndPositionsEnum postings, int o) {
|
PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
|
||||||
this.postings = postings;
|
this.postings = postings;
|
||||||
offset = o;
|
offset = o;
|
||||||
|
this.ord = ord;
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean next() throws IOException { // increments to next doc
|
final boolean next() throws IOException { // increments to next doc
|
||||||
|
|
|
@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
|
||||||
final DocsAndPositionsEnum postings;
|
final DocsAndPositionsEnum postings;
|
||||||
final int docFreq;
|
final int docFreq;
|
||||||
final int position;
|
final int position;
|
||||||
|
final Term term;
|
||||||
|
|
||||||
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
|
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
|
||||||
this.postings = postings;
|
this.postings = postings;
|
||||||
this.docFreq = docFreq;
|
this.docFreq = docFreq;
|
||||||
this.position = position;
|
this.position = position;
|
||||||
|
this.term = term;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int compareTo(PostingsAndFreq other) {
|
public int compareTo(PostingsAndFreq other) {
|
||||||
|
if (docFreq == other.docFreq) {
|
||||||
|
if (position == other.position) {
|
||||||
|
return term.compareTo(other.term);
|
||||||
|
}
|
||||||
|
return position - other.position;
|
||||||
|
}
|
||||||
return docFreq - other.docFreq;
|
return docFreq - other.docFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 1;
|
||||||
|
result = prime * result + docFreq;
|
||||||
|
result = prime * result + position;
|
||||||
|
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj) return true;
|
||||||
|
if (obj == null) return false;
|
||||||
|
if (getClass() != obj.getClass()) return false;
|
||||||
|
PostingsAndFreq other = (PostingsAndFreq) obj;
|
||||||
|
if (docFreq != other.docFreq) return false;
|
||||||
|
if (position != other.position) return false;
|
||||||
|
if (term == null) {
|
||||||
|
if (other.term != null) return false;
|
||||||
|
} else if (!term.equals(other.term)) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PhraseWeight extends Weight {
|
private class PhraseWeight extends Weight {
|
||||||
|
@ -197,12 +229,12 @@ public class PhraseQuery extends Query {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
|
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by increasing docFreq order
|
// sort by increasing docFreq order
|
||||||
if (slop == 0) {
|
if (slop == 0) {
|
||||||
ArrayUtil.quickSort(postingsFreqs);
|
ArrayUtil.mergeSort(postingsFreqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slop == 0) { // optimize exact case
|
if (slop == 0) { // optimize exact case
|
||||||
|
|
|
@ -30,10 +30,16 @@ final class PhraseQueue extends PriorityQueue<PhrasePositions> {
|
||||||
if (pp1.position == pp2.position)
|
if (pp1.position == pp2.position)
|
||||||
// same doc and pp.position, so decide by actual term positions.
|
// same doc and pp.position, so decide by actual term positions.
|
||||||
// rely on: pp.position == tp.position - offset.
|
// rely on: pp.position == tp.position - offset.
|
||||||
return pp1.offset < pp2.offset;
|
if (pp1.offset == pp2.offset) {
|
||||||
else
|
return pp1.ord < pp2.ord;
|
||||||
|
} else {
|
||||||
|
return pp1.offset < pp2.offset;
|
||||||
|
}
|
||||||
|
else {
|
||||||
return pp1.position < pp2.position;
|
return pp1.position < pp2.position;
|
||||||
else
|
}
|
||||||
|
else {
|
||||||
return pp1.doc < pp2.doc;
|
return pp1.doc < pp2.doc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@ abstract class PhraseScorer extends Scorer {
|
||||||
// this allows to easily identify a matching (exact) phrase
|
// this allows to easily identify a matching (exact) phrase
|
||||||
// when all PhrasePositions have exactly the same position.
|
// when all PhrasePositions have exactly the same position.
|
||||||
for (int i = 0; i < postings.length; i++) {
|
for (int i = 0; i < postings.length; i++) {
|
||||||
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position);
|
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
|
||||||
if (last != null) { // add next to end of list
|
if (last != null) { // add next to end of list
|
||||||
last.next = pp;
|
last.next = pp;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -134,7 +134,7 @@ public abstract class TopTermsRewrite<Q extends Query> extends TermCollectingRew
|
||||||
final Term placeholderTerm = new Term(query.field);
|
final Term placeholderTerm = new Term(query.field);
|
||||||
final Q q = getTopLevelQuery();
|
final Q q = getTopLevelQuery();
|
||||||
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
|
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
|
||||||
ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
|
ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
|
||||||
for (final ScoreTerm st : scoreTerms) {
|
for (final ScoreTerm st : scoreTerms) {
|
||||||
final Term term = placeholderTerm.createTerm(st.bytes);
|
final Term term = placeholderTerm.createTerm(st.bytes);
|
||||||
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
|
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
|
||||||
|
|
|
@ -190,7 +190,7 @@ public class NearSpansOrdered extends Spans {
|
||||||
|
|
||||||
/** Advance the subSpans to the same document */
|
/** Advance the subSpans to the same document */
|
||||||
private boolean toSameDoc() throws IOException {
|
private boolean toSameDoc() throws IOException {
|
||||||
ArrayUtil.quickSort(subSpansByDoc, spanDocComparator);
|
ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
|
||||||
int firstIndex = 0;
|
int firstIndex = 0;
|
||||||
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
|
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
|
||||||
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
|
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
|
||||||
|
|
|
@ -62,13 +62,26 @@ public abstract class SorterTemplate {
|
||||||
|
|
||||||
/** Sorts via in-place, but unstable, QuickSort algorithm.
|
/** Sorts via in-place, but unstable, QuickSort algorithm.
|
||||||
* For small collections falls back to {@link #insertionSort(int,int)}. */
|
* For small collections falls back to {@link #insertionSort(int,int)}. */
|
||||||
public final void quickSort(int lo, int hi) {
|
public final void quickSort(final int lo, final int hi) {
|
||||||
|
if (hi <= lo) return;
|
||||||
|
// from Integer's Javadocs: ceil(log2(x)) = 32 - numberOfLeadingZeros(x - 1)
|
||||||
|
quickSort(lo, hi, (Integer.SIZE - Integer.numberOfLeadingZeros(hi - lo)) << 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void quickSort(int lo, int hi, int maxDepth) {
|
||||||
|
// fall back to insertion when array has short length
|
||||||
final int diff = hi - lo;
|
final int diff = hi - lo;
|
||||||
if (diff <= QUICKSORT_THRESHOLD) {
|
if (diff <= QUICKSORT_THRESHOLD) {
|
||||||
insertionSort(lo, hi);
|
insertionSort(lo, hi);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fall back to merge sort when recursion depth gets too big
|
||||||
|
if (--maxDepth == 0) {
|
||||||
|
mergeSort(lo, hi);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
final int mid = lo + (diff >>> 1);
|
final int mid = lo + (diff >>> 1);
|
||||||
|
|
||||||
if (compare(lo, mid) > 0) {
|
if (compare(lo, mid) > 0) {
|
||||||
|
@ -101,8 +114,8 @@ public abstract class SorterTemplate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
quickSort(lo, left);
|
quickSort(lo, left, maxDepth);
|
||||||
quickSort(left + 1, hi);
|
quickSort(left + 1, hi, maxDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Sorts via stable in-place MergeSort algorithm
|
/** Sorts via stable in-place MergeSort algorithm
|
||||||
|
|
|
@ -261,9 +261,12 @@ public class Builder<T> {
|
||||||
add(scratchIntsRef, output);
|
add(scratchIntsRef, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** It's OK to add the same input twice in a row with
|
||||||
|
* different outputs, as long as outputs impls the merge
|
||||||
|
* method. */
|
||||||
public void add(IntsRef input, T output) throws IOException {
|
public void add(IntsRef input, T output) throws IOException {
|
||||||
//System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
|
//System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
|
||||||
assert lastInput.length == 0 || input.compareTo(lastInput) > 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
|
assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
|
||||||
assert validOutput(output);
|
assert validOutput(output);
|
||||||
|
|
||||||
//System.out.println("\nadd: " + input);
|
//System.out.println("\nadd: " + input);
|
||||||
|
@ -347,8 +350,15 @@ public class Builder<T> {
|
||||||
assert validOutput(output);
|
assert validOutput(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
// push remaining output:
|
if (lastInput.length == input.length && prefixLenPlus1 == 1+input.length) {
|
||||||
frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
|
// same input more than 1 time in a row, mapping to
|
||||||
|
// multiple outputs
|
||||||
|
lastNode.output = fst.outputs.merge(lastNode.output, output);
|
||||||
|
} else {
|
||||||
|
// this new arc is private to this new input; set its
|
||||||
|
// arc output to the leftover output:
|
||||||
|
frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
|
||||||
|
}
|
||||||
|
|
||||||
// save last input
|
// save last input
|
||||||
lastInput.copy(input);
|
lastInput.copy(input);
|
||||||
|
|
|
@ -231,10 +231,13 @@ public class FST<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
void setEmptyOutput(T v) throws IOException {
|
void setEmptyOutput(T v) throws IOException {
|
||||||
if (emptyOutput != null && !emptyOutput.equals(v)) {
|
if (emptyOutput != null) {
|
||||||
throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v));
|
if (!emptyOutput.equals(v)) {
|
||||||
|
emptyOutput = outputs.merge(emptyOutput, v);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
emptyOutput = v;
|
||||||
}
|
}
|
||||||
emptyOutput = v;
|
|
||||||
|
|
||||||
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
|
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
|
||||||
// bytes private
|
// bytes private
|
||||||
|
@ -446,25 +449,17 @@ public class FST<T> {
|
||||||
// reverse bytes in-place; we do this so that the
|
// reverse bytes in-place; we do this so that the
|
||||||
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
|
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
|
||||||
// node just before the current one
|
// node just before the current one
|
||||||
final int endAddress = writer.posWrite;
|
final int endAddress = lastFrozenNode = writer.posWrite - 1;
|
||||||
final int stopAt = (endAddress - startAddress)/2;
|
|
||||||
int upto = 0;
|
int left = startAddress;
|
||||||
while (upto < stopAt) {
|
int right = endAddress;
|
||||||
final byte b = bytes[startAddress+upto];
|
while (left < right) {
|
||||||
bytes[startAddress+upto] = bytes[endAddress-upto-1];
|
final byte b = bytes[left];
|
||||||
bytes[endAddress-upto-1] = b;
|
bytes[left++] = bytes[right];
|
||||||
upto++;
|
bytes[right--] = b;
|
||||||
}
|
}
|
||||||
|
|
||||||
lastFrozenNode = endAddress - 1;
|
return endAddress;
|
||||||
/*
|
|
||||||
System.out.println(" return node addr=" + (endAddress-1));
|
|
||||||
for(int i=endAddress-1;i>=startAddress;i--) {
|
|
||||||
System.out.println(" bytes[" + i + "]=" + bytes[i]);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
return endAddress-1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Fills virtual 'start' arc, ie, an empty incoming arc to
|
/** Fills virtual 'start' arc, ie, an empty incoming arc to
|
||||||
|
|
|
@ -140,7 +140,7 @@ abstract class FSTEnum<T> {
|
||||||
// Arcs are fixed array -- use binary search to find
|
// Arcs are fixed array -- use binary search to find
|
||||||
// the target.
|
// the target.
|
||||||
|
|
||||||
final FST.BytesReader in = fst.getBytesReader(0);
|
final FST<T>.BytesReader in = fst.getBytesReader(0);
|
||||||
int low = arc.arcIdx;
|
int low = arc.arcIdx;
|
||||||
int high = arc.numArcs-1;
|
int high = arc.numArcs-1;
|
||||||
int mid = 0;
|
int mid = 0;
|
||||||
|
@ -278,7 +278,7 @@ abstract class FSTEnum<T> {
|
||||||
// Arcs are fixed array -- use binary search to find
|
// Arcs are fixed array -- use binary search to find
|
||||||
// the target.
|
// the target.
|
||||||
|
|
||||||
final FST.BytesReader in = fst.getBytesReader(0);
|
final FST<T>.BytesReader in = fst.getBytesReader(0);
|
||||||
int low = arc.arcIdx;
|
int low = arc.arcIdx;
|
||||||
int high = arc.numArcs-1;
|
int high = arc.numArcs-1;
|
||||||
int mid = 0;
|
int mid = 0;
|
||||||
|
|
|
@ -40,7 +40,7 @@ final class NodeHash<T> {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
|
for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
|
||||||
final Builder.Arc arc = node.arcs[arcUpto];
|
final Builder.Arc<T> arc = node.arcs[arcUpto];
|
||||||
if (arc.label != scratchArc.label ||
|
if (arc.label != scratchArc.label ||
|
||||||
!arc.output.equals(scratchArc.output) ||
|
!arc.output.equals(scratchArc.output) ||
|
||||||
((Builder.CompiledNode) arc.target).address != scratchArc.target ||
|
((Builder.CompiledNode) arc.target).address != scratchArc.target ||
|
||||||
|
|
|
@ -54,4 +54,8 @@ public abstract class Outputs<T> {
|
||||||
public abstract T getNoOutput();
|
public abstract T getNoOutput();
|
||||||
|
|
||||||
public abstract String outputToString(T output);
|
public abstract String outputToString(T output);
|
||||||
|
|
||||||
|
public T merge(T first, T second) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
|
||||||
this.output2 = output2;
|
this.output2 = output2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override @SuppressWarnings("unchecked")
|
@Override @SuppressWarnings("rawtypes")
|
||||||
public boolean equals(Object other) {
|
public boolean equals(Object other) {
|
||||||
if (other == this) {
|
if (other == this) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -22,14 +22,11 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
// TODO: make a sharing and non-sharing variant; eg if you
|
|
||||||
// output docFreq per term the FST will be smaller if you
|
|
||||||
// don't share since they are not "well shared"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Output is a long, for each input term. NOTE: the
|
* Output is a long, for each input term. NOTE: the
|
||||||
* resulting FST is not guaranteed to be minimal! See
|
* resulting FST is not guaranteed to be minimal! See
|
||||||
* {@link Builder}.
|
* {@link Builder}. You cannot store 0 output with this
|
||||||
|
* (that's reserved to mean "no output")!
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,224 @@
|
||||||
|
package org.apache.lucene.util.automaton.fst;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds one or two longs for each input term. If it's a
|
||||||
|
* single output, Long is returned; else, TwoLongs. Order
|
||||||
|
* is preseved in the TwoLongs case, ie .first is the first
|
||||||
|
* input/output added to Builder, and .second is the
|
||||||
|
* second. You cannot store 0 output with this (that's
|
||||||
|
* reserved to mean "no output")!
|
||||||
|
*
|
||||||
|
* NOTE: the resulting FST is not guaranteed to be minimal!
|
||||||
|
* See {@link Builder}.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
public final class UpToTwoPositiveIntOutputs extends Outputs<Object> {
|
||||||
|
|
||||||
|
public final static class TwoLongs {
|
||||||
|
final long first;
|
||||||
|
final long second;
|
||||||
|
|
||||||
|
public TwoLongs(long first, long second) {
|
||||||
|
this.first = first;
|
||||||
|
this.second = second;
|
||||||
|
assert first >= 0;
|
||||||
|
assert second >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "TwoLongs:" + first + "," + second;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object _other) {
|
||||||
|
if (_other instanceof TwoLongs) {
|
||||||
|
final TwoLongs other = (TwoLongs) _other;
|
||||||
|
return first == other.first && second == other.second;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return (int) ((first^(first>>>32)) ^ (second^(second>>32)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static Long NO_OUTPUT = new Long(0);
|
||||||
|
|
||||||
|
private final boolean doShare;
|
||||||
|
|
||||||
|
private final static UpToTwoPositiveIntOutputs singletonShare = new UpToTwoPositiveIntOutputs(true);
|
||||||
|
private final static UpToTwoPositiveIntOutputs singletonNoShare = new UpToTwoPositiveIntOutputs(false);
|
||||||
|
|
||||||
|
private UpToTwoPositiveIntOutputs(boolean doShare) {
|
||||||
|
this.doShare = doShare;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static UpToTwoPositiveIntOutputs getSingleton(boolean doShare) {
|
||||||
|
return doShare ? singletonShare : singletonNoShare;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long get(long v) {
|
||||||
|
if (v == 0) {
|
||||||
|
return NO_OUTPUT;
|
||||||
|
} else {
|
||||||
|
return Long.valueOf(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public TwoLongs get(long first, long second) {
|
||||||
|
return new TwoLongs(first, second);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Long common(Object _output1, Object _output2) {
|
||||||
|
assert valid(_output1, false);
|
||||||
|
assert valid(_output2, false);
|
||||||
|
final Long output1 = (Long) _output1;
|
||||||
|
final Long output2 = (Long) _output2;
|
||||||
|
if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) {
|
||||||
|
return NO_OUTPUT;
|
||||||
|
} else if (doShare) {
|
||||||
|
assert output1 > 0;
|
||||||
|
assert output2 > 0;
|
||||||
|
return Math.min(output1, output2);
|
||||||
|
} else if (output1.equals(output2)) {
|
||||||
|
return output1;
|
||||||
|
} else {
|
||||||
|
return NO_OUTPUT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Long subtract(Object _output, Object _inc) {
|
||||||
|
assert valid(_output, false);
|
||||||
|
assert valid(_inc, false);
|
||||||
|
final Long output = (Long) _output;
|
||||||
|
final Long inc = (Long) _inc;
|
||||||
|
assert output >= inc;
|
||||||
|
|
||||||
|
if (inc == NO_OUTPUT) {
|
||||||
|
return output;
|
||||||
|
} else if (output.equals(inc)) {
|
||||||
|
return NO_OUTPUT;
|
||||||
|
} else {
|
||||||
|
return output - inc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object add(Object _prefix, Object _output) {
|
||||||
|
assert valid(_prefix, false);
|
||||||
|
assert valid(_output, true);
|
||||||
|
final Long prefix = (Long) _prefix;
|
||||||
|
if (_output instanceof Long) {
|
||||||
|
final Long output = (Long) _output;
|
||||||
|
if (prefix == NO_OUTPUT) {
|
||||||
|
return output;
|
||||||
|
} else if (output == NO_OUTPUT) {
|
||||||
|
return prefix;
|
||||||
|
} else {
|
||||||
|
return prefix + output;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
final TwoLongs output = (TwoLongs) _output;
|
||||||
|
final long v = prefix;
|
||||||
|
return new TwoLongs(output.first + v, output.second + v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Object _output, DataOutput out) throws IOException {
|
||||||
|
assert valid(_output, true);
|
||||||
|
if (_output instanceof Long) {
|
||||||
|
final Long output = (Long) _output;
|
||||||
|
out.writeVLong(output<<1);
|
||||||
|
} else {
|
||||||
|
final TwoLongs output = (TwoLongs) _output;
|
||||||
|
out.writeVLong((output.first<<1) | 1);
|
||||||
|
out.writeVLong(output.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object read(DataInput in) throws IOException {
|
||||||
|
final long code = in.readVLong();
|
||||||
|
if ((code & 1) == 0) {
|
||||||
|
// single long
|
||||||
|
final long v = code >>> 1;
|
||||||
|
if (v == 0) {
|
||||||
|
return NO_OUTPUT;
|
||||||
|
} else {
|
||||||
|
return Long.valueOf(v);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// two longs
|
||||||
|
final long first = code >>> 1;
|
||||||
|
final long second = in.readVLong();
|
||||||
|
return new TwoLongs(first, second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean valid(Long o) {
|
||||||
|
assert o != null;
|
||||||
|
assert o instanceof Long;
|
||||||
|
assert o == NO_OUTPUT || o > 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used only by assert
|
||||||
|
private boolean valid(Object _o, boolean allowDouble) {
|
||||||
|
if (!allowDouble) {
|
||||||
|
assert _o instanceof Long;
|
||||||
|
return valid((Long) _o);
|
||||||
|
} else if (_o instanceof TwoLongs) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return valid((Long) _o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getNoOutput() {
|
||||||
|
return NO_OUTPUT;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String outputToString(Object output) {
|
||||||
|
return output.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object merge(Object first, Object second) {
|
||||||
|
assert valid(first, false);
|
||||||
|
assert valid(second, false);
|
||||||
|
return new TwoLongs((Long) first, (Long) second);
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue