mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
Merge branch 'master' into feature/autoscaling
# Conflicts: # solr/CHANGES.txt
This commit is contained in:
commit
fb7803d9a0
@ -66,6 +66,13 @@
|
|||||||
</foaf:Person>
|
</foaf:Person>
|
||||||
</maintainer>
|
</maintainer>
|
||||||
|
|
||||||
|
<release>
|
||||||
|
<Version>
|
||||||
|
<name>lucene-6.6.0</name>
|
||||||
|
<created>2017-06-06</created>
|
||||||
|
<revision>6.6.0</revision>
|
||||||
|
</Version>
|
||||||
|
</release>
|
||||||
<release>
|
<release>
|
||||||
<Version>
|
<Version>
|
||||||
<name>lucene-6.5.1</name>
|
<name>lucene-6.5.1</name>
|
||||||
|
@ -66,6 +66,13 @@
|
|||||||
</foaf:Person>
|
</foaf:Person>
|
||||||
</maintainer>
|
</maintainer>
|
||||||
|
|
||||||
|
<release>
|
||||||
|
<Version>
|
||||||
|
<name>solr-6.6.0</name>
|
||||||
|
<created>2017-06-06</created>
|
||||||
|
<revision>6.6.0</revision>
|
||||||
|
</Version>
|
||||||
|
</release>
|
||||||
<release>
|
<release>
|
||||||
<Version>
|
<Version>
|
||||||
<name>solr-6.5.1</name>
|
<name>solr-6.5.1</name>
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
|
||||||
</content>
|
</content>
|
||||||
|
<content url="file://$MODULE_DIR$/../resources">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/../resources" type="java-resource" />
|
||||||
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
<orderEntry type="library" name="Solrj library" level="project" />
|
<orderEntry type="library" name="Solrj library" level="project" />
|
||||||
|
@ -14,6 +14,16 @@ New Features
|
|||||||
well as the oldest Lucene version that contributed to the segment.
|
well as the oldest Lucene version that contributed to the segment.
|
||||||
(Adrien Grand)
|
(Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7854: The new TermFrequencyAttribute used during analysis
|
||||||
|
with a custom token stream allows indexing custom term frequencies
|
||||||
|
(Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-7866: Add a new DelimitedTermFrequencyTokenFilter that allows to
|
||||||
|
mark tokens with a custom term frequency (LUCENE-7854). It parses a numeric
|
||||||
|
value after a separator char ('|') at the end of each token and changes
|
||||||
|
the term frequency to this value. (Uwe Schindler, Robert Muir, Mike
|
||||||
|
McCandless)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
|
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
|
||||||
@ -59,6 +69,11 @@ API Changes
|
|||||||
|
|
||||||
* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
|
* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-7500: Removed abstract LeafReader.fields(); instead terms(fieldName)
|
||||||
|
has been made abstract, fomerly was final. Also, MultiFields.getTerms
|
||||||
|
was optimized to work directly instead of being implemented on getFields.
|
||||||
|
(David Smiley)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
||||||
@ -97,6 +112,8 @@ Optimizations
|
|||||||
query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
|
query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
|
||||||
the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
|
the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-7874: DisjunctionMaxQuery rewrites to a BooleanQuery when tiebreaker is set to 1. (Jim Ferenczi)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
||||||
@ -113,18 +130,53 @@ Other
|
|||||||
* LUCENE-7852: Correct copyright year(s) in lucene/LICENSE.txt file.
|
* LUCENE-7852: Correct copyright year(s) in lucene/LICENSE.txt file.
|
||||||
(Christine Poerschke, Steve Rowe)
|
(Christine Poerschke, Steve Rowe)
|
||||||
|
|
||||||
|
* LUCENE-7719: Generalized the UnifiedHighlighter's support for AutomatonQuery
|
||||||
|
for character & binary automata. Added AutomatonQuery.isBinary. (David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-7873: Due to serious problems with context class loaders in several
|
||||||
|
frameworks (OSGI, Java 9 Jigsaw), the lookup of Codecs, PostingsFormats,
|
||||||
|
DocValuesFormats and all analysis factories was changed to only inspect the
|
||||||
|
current classloader that defined the interface class (lucene-core.jar).
|
||||||
|
See MIGRATE.txt for more information! (Uwe Schindler, Dawid Weiss)
|
||||||
|
|
||||||
======================= Lucene 6.7.0 =======================
|
======================= Lucene 6.7.0 =======================
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
* LUCENE-7855: Added advanced options of the Wikipedia tokenizer to its factory.
|
||||||
|
(Juan Pedro via Adrien Grand)
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-7864: IndexMergeTool is not using intermediate hard links (even
|
||||||
|
if possible). (Dawid Weiss)
|
||||||
|
|
||||||
|
* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d points, the PointInSetQuery.MergePointVisitor expects
|
||||||
|
that these points are visited in ascending order. The memory index doesn't do this and this can result in document
|
||||||
|
with multiple points that should match to not match. (Martijn van Groningen)
|
||||||
|
|
||||||
|
* LUCENE-7878: Fix query builder to keep the SHOULD clause that wraps multi-word synonyms. (Jim Ferenczi)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-7800: Remove code that potentially rethrows checked exceptions
|
* LUCENE-7800: Remove code that potentially rethrows checked exceptions
|
||||||
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
|
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
|
||||||
Uwe Schindler, Dawid Weiss)
|
Uwe Schindler, Dawid Weiss)
|
||||||
|
|
||||||
|
* LUCENE-7876: Avoid calls to LeafReader.fields() and MultiFields.getFields()
|
||||||
|
that are trivially replaced by LeafReader.terms() and MultiFields.getTerms()
|
||||||
|
(David Smiley)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
|
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
|
||||||
|
* LUCENE-7828: Speed up range queries on range fields by improving how we
|
||||||
|
compute the relation between the query and inner nodes of the BKD tree.
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
======================= Lucene 6.6.0 =======================
|
======================= Lucene 6.6.0 =======================
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
@ -1,5 +1,46 @@
|
|||||||
# Apache Lucene Migration Guide
|
# Apache Lucene Migration Guide
|
||||||
|
|
||||||
|
## Changed SPI lookups for codecs and analysis changed (LUCENE-7873) ##
|
||||||
|
|
||||||
|
Due to serious problems with context class loaders in several frameworks
|
||||||
|
(OSGI, Java 9 Jigsaw), the lookup of Codecs, PostingsFormats, DocValuesFormats
|
||||||
|
and all analysis factories was changed to only inspect the current classloader
|
||||||
|
that defined the interface class (`lucene-core.jar`). Normal applications
|
||||||
|
should not encounter any issues with that change, because the application
|
||||||
|
classloader (unnamed module in Java 9) can load all SPIs from all JARs
|
||||||
|
from classpath.
|
||||||
|
|
||||||
|
For any code that relies on the old behaviour (e.g., certain web applications
|
||||||
|
or components in application servers) one can manually instruct the Lucene
|
||||||
|
SPI implementation to also inspect the context classloader. To do this,
|
||||||
|
add this code to the early startup phase of your application before any
|
||||||
|
Apache Lucene component is used:
|
||||||
|
|
||||||
|
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||||
|
// Codecs:
|
||||||
|
PostingsFormat.reloadPostingsFormats(cl);
|
||||||
|
DocValuesFormat.reloadDocValuesFormats(cl);
|
||||||
|
Codec.reloadCodecs(cl);
|
||||||
|
// Analysis:
|
||||||
|
CharFilterFactory.reloadCharFilters(cl);
|
||||||
|
TokenFilterFactory.reloadTokenFilters(cl);
|
||||||
|
TokenizerFactory.reloadTokenizers(cl);
|
||||||
|
|
||||||
|
This code will reload all service providers from the given class loader
|
||||||
|
(in our case the context class loader). Of course, instead of specifying
|
||||||
|
the context class loader, it is receommended to use the application's main
|
||||||
|
class loader or the module class loader.
|
||||||
|
|
||||||
|
If you are migrating your project to Java 9 Jigsaw module system, keep in mind
|
||||||
|
that Lucene currently does not yet support `module-info.java` declarations of
|
||||||
|
service provider impls (`provides` statement). It is therefore recommended
|
||||||
|
to keep all of Lucene in one Uber-Module and not try to split Lucene into
|
||||||
|
several modules. As soon as Lucene will migrate to Java 9 as minimum requirement,
|
||||||
|
we will work on improving that.
|
||||||
|
|
||||||
|
For OSGI, the same applies. You have to create a bundle with all of Lucene for
|
||||||
|
SPI to work correctly.
|
||||||
|
|
||||||
## Query.hashCode and Query.equals are now abstract methods (LUCENE-7277)
|
## Query.hashCode and Query.equals are now abstract methods (LUCENE-7277)
|
||||||
|
|
||||||
Any custom query subclasses should redeclare equivalence relationship according
|
Any custom query subclasses should redeclare equivalence relationship according
|
||||||
|
@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Characters before the delimiter are the "token", the textual integer after is the term frequency.
|
||||||
|
* To use this {@code TokenFilter} the field must be indexed with
|
||||||
|
* {@link IndexOptions#DOCS_AND_FREQS} but no positions or offsets.
|
||||||
|
* <p>
|
||||||
|
* For example, if the delimiter is '|', then for the string "foo|5", "foo" is the token
|
||||||
|
* and "5" is a term frequency. If there is no delimiter, the TokenFilter does not modify
|
||||||
|
* the term frequency.
|
||||||
|
* <p>
|
||||||
|
* Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
|
||||||
|
*/
|
||||||
|
public final class DelimitedTermFrequencyTokenFilter extends TokenFilter {
|
||||||
|
public static final char DEFAULT_DELIMITER = '|';
|
||||||
|
|
||||||
|
private final char delimiter;
|
||||||
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
private final TermFrequencyAttribute tfAtt = addAttribute(TermFrequencyAttribute.class);
|
||||||
|
|
||||||
|
|
||||||
|
public DelimitedTermFrequencyTokenFilter(TokenStream input) {
|
||||||
|
this(input, DEFAULT_DELIMITER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DelimitedTermFrequencyTokenFilter(TokenStream input, char delimiter) {
|
||||||
|
super(input);
|
||||||
|
this.delimiter = delimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
if (input.incrementToken()) {
|
||||||
|
final char[] buffer = termAtt.buffer();
|
||||||
|
final int length = termAtt.length();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
if (buffer[i] == delimiter) {
|
||||||
|
termAtt.setLength(i); // simply set a new length
|
||||||
|
i++;
|
||||||
|
tfAtt.setTermFrequency(ArrayUtil.parseInt(buffer, i, length - i));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link DelimitedTermFrequencyTokenFilter}. The field must have {@code omitPositions=true}.
|
||||||
|
* <pre class="prettyprint">
|
||||||
|
* <fieldType name="text_tfdl" class="solr.TextField" omitPositions="true">
|
||||||
|
* <analyzer>
|
||||||
|
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
* <filter class="solr.DelimitedTermFrequencyTokenFilterFactory" delimiter="|"/>
|
||||||
|
* </analyzer>
|
||||||
|
* </fieldType></pre>
|
||||||
|
*/
|
||||||
|
public class DelimitedTermFrequencyTokenFilterFactory extends TokenFilterFactory {
|
||||||
|
public static final String DELIMITER_ATTR = "delimiter";
|
||||||
|
|
||||||
|
private final char delimiter;
|
||||||
|
|
||||||
|
/** Creates a new DelimitedPayloadTokenFilterFactory */
|
||||||
|
public DelimitedTermFrequencyTokenFilterFactory(Map<String, String> args) {
|
||||||
|
super(args);
|
||||||
|
delimiter = getChar(args, DELIMITER_ATTR, DelimitedTermFrequencyTokenFilter.DEFAULT_DELIMITER);
|
||||||
|
if (!args.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DelimitedTermFrequencyTokenFilter create(TokenStream input) {
|
||||||
|
return new DelimitedTermFrequencyTokenFilter(input, delimiter);
|
||||||
|
}
|
||||||
|
}
|
@ -48,7 +48,7 @@ public final class AnalysisSPILoader<S extends AbstractAnalysisFactory> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public AnalysisSPILoader(Class<S> clazz, String[] suffixes) {
|
public AnalysisSPILoader(Class<S> clazz, String[] suffixes) {
|
||||||
this(clazz, suffixes, Thread.currentThread().getContextClassLoader());
|
this(clazz, suffixes, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public AnalysisSPILoader(Class<S> clazz, String[] suffixes, ClassLoader classloader) {
|
public AnalysisSPILoader(Class<S> clazz, String[] suffixes, ClassLoader classloader) {
|
||||||
|
@ -16,9 +16,9 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.wikipedia;
|
package org.apache.lucene.analysis.wikipedia;
|
||||||
|
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.apache.lucene.util.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
@ -33,19 +33,28 @@ import org.apache.lucene.util.AttributeFactory;
|
|||||||
* </fieldType></pre>
|
* </fieldType></pre>
|
||||||
*/
|
*/
|
||||||
public class WikipediaTokenizerFactory extends TokenizerFactory {
|
public class WikipediaTokenizerFactory extends TokenizerFactory {
|
||||||
|
public static final String TOKEN_OUTPUT = "tokenOutput";
|
||||||
|
public static final String UNTOKENIZED_TYPES = "untokenizedTypes";
|
||||||
|
|
||||||
|
protected final int tokenOutput;
|
||||||
|
protected Set<String> untokenizedTypes;
|
||||||
|
|
||||||
/** Creates a new WikipediaTokenizerFactory */
|
/** Creates a new WikipediaTokenizerFactory */
|
||||||
public WikipediaTokenizerFactory(Map<String,String> args) {
|
public WikipediaTokenizerFactory(Map<String,String> args) {
|
||||||
super(args);
|
super(args);
|
||||||
|
tokenOutput = getInt(args, TOKEN_OUTPUT, WikipediaTokenizer.TOKENS_ONLY);
|
||||||
|
untokenizedTypes = getSet(args, UNTOKENIZED_TYPES);
|
||||||
|
|
||||||
|
if (untokenizedTypes == null) {
|
||||||
|
untokenizedTypes = Collections.emptySet();
|
||||||
|
}
|
||||||
if (!args.isEmpty()) {
|
if (!args.isEmpty()) {
|
||||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: add support for WikipediaTokenizer's advanced options.
|
|
||||||
@Override
|
@Override
|
||||||
public WikipediaTokenizer create(AttributeFactory factory) {
|
public WikipediaTokenizer create(AttributeFactory factory) {
|
||||||
return new WikipediaTokenizer(factory, WikipediaTokenizer.TOKENS_ONLY,
|
return new WikipediaTokenizer(factory, tokenOutput, untokenizedTypes);
|
||||||
Collections.<String>emptySet());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,6 +63,7 @@ org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
|
|||||||
org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
|
org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
|
||||||
org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
|
org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
|
||||||
org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
|
org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
|
||||||
|
org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory
|
||||||
org.apache.lucene.analysis.miscellaneous.FingerprintFilterFactory
|
org.apache.lucene.analysis.miscellaneous.FingerprintFilterFactory
|
||||||
org.apache.lucene.analysis.miscellaneous.FixBrokenOffsetsFilterFactory
|
org.apache.lucene.analysis.miscellaneous.FixBrokenOffsetsFilterFactory
|
||||||
org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
|
org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
|
||||||
|
@ -21,13 +21,17 @@ import java.io.IOException;
|
|||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||||
@ -49,6 +53,12 @@ import org.apache.lucene.util.Version;
|
|||||||
|
|
||||||
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
||||||
public class TestFactories extends BaseTokenStreamTestCase {
|
public class TestFactories extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
/** Factories that are excluded from testing it with random data */
|
||||||
|
private static final Set<Class<? extends AbstractAnalysisFactory>> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList(
|
||||||
|
DelimitedTermFrequencyTokenFilterFactory.class
|
||||||
|
));
|
||||||
|
|
||||||
public void test() throws IOException {
|
public void test() throws IOException {
|
||||||
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
||||||
doTestTokenizer(tokenizer);
|
doTestTokenizer(tokenizer);
|
||||||
@ -77,11 +87,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||||||
assertFalse(mtc instanceof CharFilterFactory);
|
assertFalse(mtc instanceof CharFilterFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
// beast it just a little, it shouldnt throw exceptions:
|
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||||
// (it should have thrown them in initialize)
|
// beast it just a little, it shouldnt throw exceptions:
|
||||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
// (it should have thrown them in initialize)
|
||||||
checkRandomData(random(), a, 20, 20, false, false);
|
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||||
a.close();
|
checkRandomData(random(), a, 20, 20, false, false);
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,11 +111,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||||||
assertTrue(mtc instanceof TokenFilterFactory);
|
assertTrue(mtc instanceof TokenFilterFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
// beast it just a little, it shouldnt throw exceptions:
|
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||||
// (it should have thrown them in initialize)
|
// beast it just a little, it shouldnt throw exceptions:
|
||||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
// (it should have thrown them in initialize)
|
||||||
checkRandomData(random(), a, 20, 20, false, false);
|
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||||
a.close();
|
checkRandomData(random(), a, 20, 20, false, false);
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,11 +135,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||||||
assertTrue(mtc instanceof CharFilterFactory);
|
assertTrue(mtc instanceof CharFilterFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
// beast it just a little, it shouldnt throw exceptions:
|
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||||
// (it should have thrown them in initialize)
|
// beast it just a little, it shouldnt throw exceptions:
|
||||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
// (it should have thrown them in initialize)
|
||||||
checkRandomData(random(), a, 20, 20, false, false);
|
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||||
a.close();
|
checkRandomData(random(), a, 20, 20, false, false);
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +73,7 @@ import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
|
|||||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||||
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
|
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
|
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
|
import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
|
||||||
@ -159,6 +160,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||||||
WordDelimiterFilter.class,
|
WordDelimiterFilter.class,
|
||||||
// Cannot correct offsets when a char filter had changed them:
|
// Cannot correct offsets when a char filter had changed them:
|
||||||
WordDelimiterGraphFilter.class,
|
WordDelimiterGraphFilter.class,
|
||||||
|
// requires a special encoded token value, so it may fail with random data:
|
||||||
|
DelimitedTermFrequencyTokenFilter.class,
|
||||||
// clones of core's filters:
|
// clones of core's filters:
|
||||||
org.apache.lucene.analysis.core.StopFilter.class,
|
org.apache.lucene.analysis.core.StopFilter.class,
|
||||||
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
|
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
|
||||||
|
@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||||
|
|
||||||
|
public class DelimitedTermFrequencyTokenFilterTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
public void testTermFrequency() throws Exception {
|
||||||
|
String test = "The quick|40 red|4 fox|06 jumped|1 over the lazy|2 brown|123 dogs|1024";
|
||||||
|
DelimitedTermFrequencyTokenFilter filter =
|
||||||
|
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
|
||||||
|
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||||
|
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
|
||||||
|
filter.reset();
|
||||||
|
assertTermEquals("The", filter, termAtt, tfAtt, 1);
|
||||||
|
assertTermEquals("quick", filter, termAtt, tfAtt, 40);
|
||||||
|
assertTermEquals("red", filter, termAtt, tfAtt, 4);
|
||||||
|
assertTermEquals("fox", filter, termAtt, tfAtt, 6);
|
||||||
|
assertTermEquals("jumped", filter, termAtt, tfAtt, 1);
|
||||||
|
assertTermEquals("over", filter, termAtt, tfAtt, 1);
|
||||||
|
assertTermEquals("the", filter, termAtt, tfAtt, 1);
|
||||||
|
assertTermEquals("lazy", filter, termAtt, tfAtt, 2);
|
||||||
|
assertTermEquals("brown", filter, termAtt, tfAtt, 123);
|
||||||
|
assertTermEquals("dogs", filter, termAtt, tfAtt, 1024);
|
||||||
|
assertFalse(filter.incrementToken());
|
||||||
|
filter.end();
|
||||||
|
filter.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidNegativeTf() throws Exception {
|
||||||
|
String test = "foo bar|-20";
|
||||||
|
DelimitedTermFrequencyTokenFilter filter =
|
||||||
|
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
|
||||||
|
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||||
|
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
|
||||||
|
filter.reset();
|
||||||
|
assertTermEquals("foo", filter, termAtt, tfAtt, 1);
|
||||||
|
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, filter::incrementToken);
|
||||||
|
assertEquals("Term frequency must be 1 or greater; got -20", iae.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidFloatTf() throws Exception {
|
||||||
|
String test = "foo bar|1.2";
|
||||||
|
DelimitedTermFrequencyTokenFilter filter =
|
||||||
|
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
|
||||||
|
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||||
|
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
|
||||||
|
filter.reset();
|
||||||
|
assertTermEquals("foo", filter, termAtt, tfAtt, 1);
|
||||||
|
expectThrows(NumberFormatException.class, filter::incrementToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, TermFrequencyAttribute tfAtt, int expectedTf) throws Exception {
|
||||||
|
assertTrue(stream.incrementToken());
|
||||||
|
assertEquals(expected, termAtt.toString());
|
||||||
|
assertEquals(expectedTf, tfAtt.getTermFrequency());
|
||||||
|
}
|
||||||
|
}
|
@ -17,34 +17,90 @@
|
|||||||
package org.apache.lucene.analysis.wikipedia;
|
package org.apache.lucene.analysis.wikipedia;
|
||||||
|
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple tests to ensure the wikipedia tokenizer is working.
|
* Simple tests to ensure the wikipedia tokenizer is working.
|
||||||
*/
|
*/
|
||||||
public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCase {
|
public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCase {
|
||||||
|
|
||||||
|
private final String WIKIPEDIA = "Wikipedia";
|
||||||
|
private final String TOKEN_OUTPUT = "tokenOutput";
|
||||||
|
private final String UNTOKENIZED_TYPES = "untokenizedTypes";
|
||||||
|
|
||||||
public void testTokenizer() throws Exception {
|
public void testTokenizer() throws Exception {
|
||||||
Reader reader = new StringReader("This is a [[Category:foo]]");
|
String text = "This is a [[Category:foo]]";
|
||||||
Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(newAttributeFactory());
|
Tokenizer tf = tokenizerFactory(WIKIPEDIA).create(newAttributeFactory());
|
||||||
tokenizer.setReader(reader);
|
tf.setReader(new StringReader(text));
|
||||||
assertTokenStreamContents(tokenizer,
|
assertTokenStreamContents(tf,
|
||||||
new String[] { "This", "is", "a", "foo" },
|
new String[] { "This", "is", "a", "foo" },
|
||||||
new int[] { 0, 5, 8, 21 },
|
new int[] { 0, 5, 8, 21 },
|
||||||
new int[] { 4, 7, 9, 24 },
|
new int[] { 4, 7, 9, 24 },
|
||||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
|
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
|
||||||
new int[] { 1, 1, 1, 1, });
|
new int[] { 1, 1, 1, 1, },
|
||||||
|
text.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTokenizerTokensOnly() throws Exception {
|
||||||
|
String text = "This is a [[Category:foo]]";
|
||||||
|
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer( WikipediaTokenizer.TOKENS_ONLY).toString()).create(newAttributeFactory());
|
||||||
|
tf.setReader(new StringReader(text));
|
||||||
|
assertTokenStreamContents(tf,
|
||||||
|
new String[] { "This", "is", "a", "foo" },
|
||||||
|
new int[] { 0, 5, 8, 21 },
|
||||||
|
new int[] { 4, 7, 9, 24 },
|
||||||
|
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
|
||||||
|
new int[] { 1, 1, 1, 1, },
|
||||||
|
text.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTokenizerUntokenizedOnly() throws Exception {
|
||||||
|
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
|
||||||
|
Set<String> untoks = new HashSet<>();
|
||||||
|
untoks.add(WikipediaTokenizer.CATEGORY);
|
||||||
|
untoks.add(WikipediaTokenizer.ITALICS);
|
||||||
|
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.UNTOKENIZED_ONLY).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
|
||||||
|
tf.setReader(new StringReader(test));
|
||||||
|
assertTokenStreamContents(tf,
|
||||||
|
new String[] { "a b c d", "e f g", "link", "here", "link",
|
||||||
|
"there", "italics here", "something", "more italics", "h i j" },
|
||||||
|
new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
|
||||||
|
new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
|
||||||
|
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTokenizerBoth() throws Exception {
|
||||||
|
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
|
||||||
|
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.BOTH).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
|
||||||
|
tf.setReader(new StringReader(test));
|
||||||
|
assertTokenStreamContents(tf,
|
||||||
|
new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
|
||||||
|
"link", "here", "link", "there", "italics here", "italics", "here",
|
||||||
|
"something", "more italics", "more", "italics", "h i j", "h", "i", "j" },
|
||||||
|
new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 },
|
||||||
|
new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
|
||||||
|
new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/** Test that bogus arguments result in exception */
|
/** Test that bogus arguments result in exception */
|
||||||
public void testBogusArguments() throws Exception {
|
public void testBogusArguments() throws Exception {
|
||||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||||
tokenizerFactory("Wikipedia", "bogusArg", "bogusValue");
|
tokenizerFactory(WIKIPEDIA, "bogusArg", "bogusValue").create(newAttributeFactory());
|
||||||
});
|
});
|
||||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
public void testIllegalArguments() throws Exception {
|
||||||
|
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||||
|
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
|
||||||
|
});
|
||||||
|
assertTrue(expected.getMessage().contains("tokenOutput must be TOKENS_ONLY, UNTOKENIZED_ONLY or BOTH"));
|
||||||
|
}
|
||||||
|
}
|
@ -21,13 +21,17 @@ import java.io.IOException;
|
|||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||||
@ -46,6 +50,12 @@ import org.apache.lucene.util.Version;
|
|||||||
|
|
||||||
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
||||||
public class TestFactories extends BaseTokenStreamTestCase {
|
public class TestFactories extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
/** Factories that are excluded from testing it with random data */
|
||||||
|
private static final Set<Class<? extends AbstractAnalysisFactory>> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList(
|
||||||
|
DelimitedTermFrequencyTokenFilterFactory.class
|
||||||
|
));
|
||||||
|
|
||||||
public void test() throws IOException {
|
public void test() throws IOException {
|
||||||
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
||||||
doTestTokenizer(tokenizer);
|
doTestTokenizer(tokenizer);
|
||||||
@ -74,11 +84,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||||||
assertFalse(mtc instanceof CharFilterFactory);
|
assertFalse(mtc instanceof CharFilterFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
// beast it just a little, it shouldnt throw exceptions:
|
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||||
// (it should have thrown them in initialize)
|
// beast it just a little, it shouldnt throw exceptions:
|
||||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
// (it should have thrown them in initialize)
|
||||||
checkRandomData(random(), a, 20, 20, false, false);
|
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||||
a.close();
|
checkRandomData(random(), a, 20, 20, false, false);
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,11 +108,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||||||
assertTrue(mtc instanceof TokenFilterFactory);
|
assertTrue(mtc instanceof TokenFilterFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
// beast it just a little, it shouldnt throw exceptions:
|
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||||
// (it should have thrown them in initialize)
|
// beast it just a little, it shouldnt throw exceptions:
|
||||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
// (it should have thrown them in initialize)
|
||||||
checkRandomData(random(), a, 20, 20, false, false);
|
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||||
a.close();
|
checkRandomData(random(), a, 20, 20, false, false);
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,11 +132,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||||||
assertTrue(mtc instanceof CharFilterFactory);
|
assertTrue(mtc instanceof CharFilterFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
// beast it just a little, it shouldnt throw exceptions:
|
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||||
// (it should have thrown them in initialize)
|
// beast it just a little, it shouldnt throw exceptions:
|
||||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
// (it should have thrown them in initialize)
|
||||||
checkRandomData(random(), a, 20, 20, false, false);
|
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||||
a.close();
|
checkRandomData(random(), a, 20, 20, false, false);
|
||||||
|
a.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,7 +297,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||||||
"6.5.0-cfs",
|
"6.5.0-cfs",
|
||||||
"6.5.0-nocfs",
|
"6.5.0-nocfs",
|
||||||
"6.5.1-cfs",
|
"6.5.1-cfs",
|
||||||
"6.5.1-nocfs"
|
"6.5.1-nocfs",
|
||||||
|
"6.6.0-cfs",
|
||||||
|
"6.6.0-nocfs"
|
||||||
};
|
};
|
||||||
|
|
||||||
final String[] unsupportedNames = {
|
final String[] unsupportedNames = {
|
||||||
@ -1190,7 +1192,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||||||
for (String name : oldNames) {
|
for (String name : oldNames) {
|
||||||
Directory dir = oldIndexDirs.get(name);
|
Directory dir = oldIndexDirs.get(name);
|
||||||
IndexReader r = DirectoryReader.open(dir);
|
IndexReader r = DirectoryReader.open(dir);
|
||||||
TermsEnum terms = MultiFields.getFields(r).terms("content").iterator();
|
TermsEnum terms = MultiFields.getTerms(r, "content").iterator();
|
||||||
BytesRef t = terms.next();
|
BytesRef t = terms.next();
|
||||||
assertNotNull(t);
|
assertNotNull(t);
|
||||||
|
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -26,15 +26,18 @@ import org.apache.lucene.util.AttributeReflector;
|
|||||||
* <li>{@link PositionIncrementAttribute}
|
* <li>{@link PositionIncrementAttribute}
|
||||||
* <li>{@link PositionLengthAttribute}
|
* <li>{@link PositionLengthAttribute}
|
||||||
* <li>{@link OffsetAttribute}
|
* <li>{@link OffsetAttribute}
|
||||||
|
* <li>{@link TermFrequencyAttribute}
|
||||||
* </ul>*/
|
* </ul>*/
|
||||||
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
||||||
implements TypeAttribute, PositionIncrementAttribute,
|
implements TypeAttribute, PositionIncrementAttribute,
|
||||||
PositionLengthAttribute, OffsetAttribute {
|
PositionLengthAttribute, OffsetAttribute,
|
||||||
|
TermFrequencyAttribute {
|
||||||
|
|
||||||
private int startOffset,endOffset;
|
private int startOffset,endOffset;
|
||||||
private String type = DEFAULT_TYPE;
|
private String type = DEFAULT_TYPE;
|
||||||
private int positionIncrement = 1;
|
private int positionIncrement = 1;
|
||||||
private int positionLength = 1;
|
private int positionLength = 1;
|
||||||
|
private int termFrequency = 1;
|
||||||
|
|
||||||
/** Constructs the attribute implementation. */
|
/** Constructs the attribute implementation. */
|
||||||
public PackedTokenAttributeImpl() {
|
public PackedTokenAttributeImpl() {
|
||||||
@ -132,12 +135,26 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void setTermFrequency(int termFrequency) {
|
||||||
|
if (termFrequency < 1) {
|
||||||
|
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
|
||||||
|
}
|
||||||
|
this.termFrequency = termFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int getTermFrequency() {
|
||||||
|
return termFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
/** Resets the attributes
|
/** Resets the attributes
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
super.clear();
|
super.clear();
|
||||||
positionIncrement = positionLength = 1;
|
positionIncrement = positionLength = 1;
|
||||||
|
termFrequency = 1;
|
||||||
startOffset = endOffset = 0;
|
startOffset = endOffset = 0;
|
||||||
type = DEFAULT_TYPE;
|
type = DEFAULT_TYPE;
|
||||||
}
|
}
|
||||||
@ -147,10 +164,8 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||||||
@Override
|
@Override
|
||||||
public void end() {
|
public void end() {
|
||||||
super.end();
|
super.end();
|
||||||
|
// super.end already calls this.clear, so we only set values that are different from clear:
|
||||||
positionIncrement = 0;
|
positionIncrement = 0;
|
||||||
positionLength = 1;
|
|
||||||
startOffset = endOffset = 0;
|
|
||||||
type = DEFAULT_TYPE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -170,6 +185,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||||||
positionIncrement == other.positionIncrement &&
|
positionIncrement == other.positionIncrement &&
|
||||||
positionLength == other.positionLength &&
|
positionLength == other.positionLength &&
|
||||||
(type == null ? other.type == null : type.equals(other.type)) &&
|
(type == null ? other.type == null : type.equals(other.type)) &&
|
||||||
|
termFrequency == other.termFrequency &&
|
||||||
super.equals(obj)
|
super.equals(obj)
|
||||||
);
|
);
|
||||||
} else
|
} else
|
||||||
@ -185,6 +201,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||||||
code = code * 31 + positionLength;
|
code = code * 31 + positionLength;
|
||||||
if (type != null)
|
if (type != null)
|
||||||
code = code * 31 + type.hashCode();
|
code = code * 31 + type.hashCode();
|
||||||
|
code = code * 31 + termFrequency;;
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,12 +215,14 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||||||
to.startOffset = startOffset;
|
to.startOffset = startOffset;
|
||||||
to.endOffset = endOffset;
|
to.endOffset = endOffset;
|
||||||
to.type = type;
|
to.type = type;
|
||||||
|
to.termFrequency = termFrequency;
|
||||||
} else {
|
} else {
|
||||||
super.copyTo(target);
|
super.copyTo(target);
|
||||||
((OffsetAttribute) target).setOffset(startOffset, endOffset);
|
((OffsetAttribute) target).setOffset(startOffset, endOffset);
|
||||||
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
|
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
|
||||||
((PositionLengthAttribute) target).setPositionLength(positionLength);
|
((PositionLengthAttribute) target).setPositionLength(positionLength);
|
||||||
((TypeAttribute) target).setType(type);
|
((TypeAttribute) target).setType(type);
|
||||||
|
((TermFrequencyAttribute) target).setTermFrequency(termFrequency);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,6 +234,6 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||||||
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
|
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
|
||||||
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
|
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
|
||||||
reflector.reflect(TypeAttribute.class, "type", type);
|
reflector.reflect(TypeAttribute.class, "type", type);
|
||||||
|
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.analysis.tokenattributes;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.util.Attribute;
|
||||||
|
|
||||||
|
/** Sets the custom term frequency of a term within one document. If this attribute
|
||||||
|
* is present in your analysis chain for a given field, that field must be indexed with
|
||||||
|
* {@link IndexOptions#DOCS_AND_FREQS}. */
|
||||||
|
public interface TermFrequencyAttribute extends Attribute {
|
||||||
|
|
||||||
|
/** Set the custom term frequency of the current term within one document. */
|
||||||
|
public void setTermFrequency(int termFrequency);
|
||||||
|
|
||||||
|
/** Returns the custom term frequencey. */
|
||||||
|
public int getTermFrequency();
|
||||||
|
}
|
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.tokenattributes;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
|
|
||||||
|
/** Default implementation of {@link TermFrequencyAttribute}. */
|
||||||
|
public class TermFrequencyAttributeImpl extends AttributeImpl implements TermFrequencyAttribute, Cloneable {
|
||||||
|
private int termFrequency = 1;
|
||||||
|
|
||||||
|
/** Initialize this attribute with term frequencey of 1 */
|
||||||
|
public TermFrequencyAttributeImpl() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setTermFrequency(int termFrequency) {
|
||||||
|
if (termFrequency < 1) {
|
||||||
|
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
|
||||||
|
}
|
||||||
|
this.termFrequency = termFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getTermFrequency() {
|
||||||
|
return termFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
this.termFrequency = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void end() {
|
||||||
|
this.termFrequency = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (other == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other instanceof TermFrequencyAttributeImpl) {
|
||||||
|
TermFrequencyAttributeImpl _other = (TermFrequencyAttributeImpl) other;
|
||||||
|
return termFrequency == _other.termFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Integer.hashCode(termFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copyTo(AttributeImpl target) {
|
||||||
|
TermFrequencyAttribute t = (TermFrequencyAttribute) target;
|
||||||
|
t.setTermFrequency(termFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reflectWith(AttributeReflector reflector) {
|
||||||
|
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
|
||||||
|
}
|
||||||
|
}
|
@ -121,12 +121,6 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||||||
|
|
||||||
private final TreeMap<String,FieldReader> fields = new TreeMap<>();
|
private final TreeMap<String,FieldReader> fields = new TreeMap<>();
|
||||||
|
|
||||||
/** File offset where the directory starts in the terms file. */
|
|
||||||
private long dirOffset;
|
|
||||||
|
|
||||||
/** File offset where the directory starts in the index file. */
|
|
||||||
private long indexDirOffset;
|
|
||||||
|
|
||||||
final String segment;
|
final String segment;
|
||||||
|
|
||||||
final int version;
|
final int version;
|
||||||
@ -167,8 +161,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||||||
CodecUtil.retrieveChecksum(termsIn);
|
CodecUtil.retrieveChecksum(termsIn);
|
||||||
|
|
||||||
// Read per-field details
|
// Read per-field details
|
||||||
seekDir(termsIn, dirOffset);
|
seekDir(termsIn);
|
||||||
seekDir(indexIn, indexDirOffset);
|
seekDir(indexIn);
|
||||||
|
|
||||||
final int numFields = termsIn.readVInt();
|
final int numFields = termsIn.readVInt();
|
||||||
if (numFields < 0) {
|
if (numFields < 0) {
|
||||||
@ -181,13 +175,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||||||
if (numTerms <= 0) {
|
if (numTerms <= 0) {
|
||||||
throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
|
throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
|
||||||
}
|
}
|
||||||
final int numBytes = termsIn.readVInt();
|
final BytesRef rootCode = readBytesRef(termsIn);
|
||||||
if (numBytes < 0) {
|
|
||||||
throw new CorruptIndexException("invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
|
|
||||||
}
|
|
||||||
final BytesRef rootCode = new BytesRef(new byte[numBytes]);
|
|
||||||
termsIn.readBytes(rootCode.bytes, 0, numBytes);
|
|
||||||
rootCode.length = numBytes;
|
|
||||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||||
if (fieldInfo == null) {
|
if (fieldInfo == null) {
|
||||||
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
||||||
@ -230,19 +218,24 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static BytesRef readBytesRef(IndexInput in) throws IOException {
|
private static BytesRef readBytesRef(IndexInput in) throws IOException {
|
||||||
|
int numBytes = in.readVInt();
|
||||||
|
if (numBytes < 0) {
|
||||||
|
throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
|
||||||
|
}
|
||||||
|
|
||||||
BytesRef bytes = new BytesRef();
|
BytesRef bytes = new BytesRef();
|
||||||
bytes.length = in.readVInt();
|
bytes.length = numBytes;
|
||||||
bytes.bytes = new byte[bytes.length];
|
bytes.bytes = new byte[numBytes];
|
||||||
in.readBytes(bytes.bytes, 0, bytes.length);
|
in.readBytes(bytes.bytes, 0, numBytes);
|
||||||
|
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Seek {@code input} to the directory offset. */
|
/** Seek {@code input} to the directory offset. */
|
||||||
private void seekDir(IndexInput input, long dirOffset)
|
private static void seekDir(IndexInput input) throws IOException {
|
||||||
throws IOException {
|
|
||||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||||
dirOffset = input.readLong();
|
long offset = input.readLong();
|
||||||
input.seek(dirOffset);
|
input.seek(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
// for debugging
|
||||||
|
@ -19,22 +19,20 @@ package org.apache.lucene.document;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.function.IntPredicate;
|
|
||||||
import java.util.function.Predicate;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
import org.apache.lucene.index.PointValues.Relation;
|
|
||||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||||
|
import org.apache.lucene.index.PointValues.Relation;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreWeight;
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.ScorerSupplier;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.util.DocIdSetBuilder;
|
import org.apache.lucene.util.DocIdSetBuilder;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
@ -60,13 +58,167 @@ abstract class RangeFieldQuery extends Query {
|
|||||||
/** Used by {@code RangeFieldQuery} to check how each internal or leaf node relates to the query. */
|
/** Used by {@code RangeFieldQuery} to check how each internal or leaf node relates to the query. */
|
||||||
enum QueryType {
|
enum QueryType {
|
||||||
/** Use this for intersects queries. */
|
/** Use this for intersects queries. */
|
||||||
INTERSECTS,
|
INTERSECTS {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
|
int numDims, int bytesPerDim, int dim) {
|
||||||
|
int minOffset = dim * bytesPerDim;
|
||||||
|
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, minOffset) < 0
|
||||||
|
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, maxOffset) > 0) {
|
||||||
|
// disjoint
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, minOffset) >= 0
|
||||||
|
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, maxOffset) <= 0) {
|
||||||
|
return Relation.CELL_INSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||||
|
int minOffset = dim * bytesPerDim;
|
||||||
|
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||||
|
return StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, minOffset) >= 0
|
||||||
|
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, maxOffset) <= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
/** Use this for within queries. */
|
/** Use this for within queries. */
|
||||||
WITHIN,
|
WITHIN {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
|
int numDims, int bytesPerDim, int dim) {
|
||||||
|
int minOffset = dim * bytesPerDim;
|
||||||
|
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) < 0
|
||||||
|
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) > 0) {
|
||||||
|
// all ranges have at least one point outside of the query
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) >= 0
|
||||||
|
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) <= 0) {
|
||||||
|
return Relation.CELL_INSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||||
|
int minOffset = dim * bytesPerDim;
|
||||||
|
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||||
|
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) <= 0
|
||||||
|
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
/** Use this for contains */
|
/** Use this for contains */
|
||||||
CONTAINS,
|
CONTAINS {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
|
int numDims, int bytesPerDim, int dim) {
|
||||||
|
int minOffset = dim * bytesPerDim;
|
||||||
|
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) > 0
|
||||||
|
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) < 0) {
|
||||||
|
// all ranges are either less than the query max or greater than the query min
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) <= 0
|
||||||
|
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) >= 0) {
|
||||||
|
return Relation.CELL_INSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||||
|
int minOffset = dim * bytesPerDim;
|
||||||
|
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||||
|
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) >= 0
|
||||||
|
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) <= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
/** Use this for crosses queries */
|
/** Use this for crosses queries */
|
||||||
CROSSES
|
CROSSES {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
|
int numDims, int bytesPerDim, int dim) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
|
int numDims, int bytesPerDim) {
|
||||||
|
Relation intersectRelation = QueryType.INTERSECTS.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
|
||||||
|
if (intersectRelation == Relation.CELL_OUTSIDE_QUERY) {
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
Relation withinRelation = QueryType.WITHIN.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
|
||||||
|
if (withinRelation == Relation.CELL_INSIDE_QUERY) {
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intersectRelation == Relation.CELL_INSIDE_QUERY && withinRelation == Relation.CELL_OUTSIDE_QUERY) {
|
||||||
|
return Relation.CELL_INSIDE_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
|
||||||
|
return INTERSECTS.matches(queryPackedValue, packedValue, numDims, bytesPerDim)
|
||||||
|
&& WITHIN.matches(queryPackedValue, packedValue, numDims, bytesPerDim) == false;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
abstract Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim, int dim);
|
||||||
|
|
||||||
|
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim) {
|
||||||
|
boolean inside = true;
|
||||||
|
for (int dim = 0; dim < numDims; ++dim) {
|
||||||
|
Relation relation = compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim, dim);
|
||||||
|
if (relation == Relation.CELL_OUTSIDE_QUERY) {
|
||||||
|
return Relation.CELL_OUTSIDE_QUERY;
|
||||||
|
} else if (relation != Relation.CELL_INSIDE_QUERY) {
|
||||||
|
inside = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return inside ? Relation.CELL_INSIDE_QUERY : Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim);
|
||||||
|
|
||||||
|
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
|
||||||
|
for (int dim = 0; dim < numDims; ++dim) {
|
||||||
|
if (matches(queryPackedValue, packedValue, numDims, bytesPerDim, dim) == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -111,54 +263,33 @@ abstract class RangeFieldQuery extends Query {
|
|||||||
@Override
|
@Override
|
||||||
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
return new ConstantScoreWeight(this, boost) {
|
return new ConstantScoreWeight(this, boost) {
|
||||||
final RangeFieldComparator target = new RangeFieldComparator();
|
|
||||||
|
|
||||||
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
|
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
return new IntersectVisitor() {
|
||||||
values.intersect(
|
DocIdSetBuilder.BulkAdder adder;
|
||||||
new IntersectVisitor() {
|
@Override
|
||||||
DocIdSetBuilder.BulkAdder adder;
|
public void grow(int count) {
|
||||||
@Override
|
adder = result.grow(count);
|
||||||
public void grow(int count) {
|
}
|
||||||
adder = result.grow(count);
|
@Override
|
||||||
}
|
public void visit(int docID) throws IOException {
|
||||||
@Override
|
adder.add(docID);
|
||||||
public void visit(int docID) throws IOException {
|
}
|
||||||
adder.add(docID);
|
@Override
|
||||||
}
|
public void visit(int docID, byte[] leaf) throws IOException {
|
||||||
@Override
|
if (queryType.matches(ranges, leaf, numDims, bytesPerDim)) {
|
||||||
public void visit(int docID, byte[] leaf) throws IOException {
|
adder.add(docID);
|
||||||
if (target.matches(leaf)) {
|
}
|
||||||
adder.add(docID);
|
}
|
||||||
}
|
@Override
|
||||||
}
|
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
@Override
|
return queryType.compare(ranges, minPackedValue, maxPackedValue, numDims, bytesPerDim);
|
||||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
}
|
||||||
return compareRange(minPackedValue, maxPackedValue);
|
};
|
||||||
}
|
|
||||||
});
|
|
||||||
return result.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
|
|
||||||
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
|
|
||||||
// compute range relation for BKD traversal
|
|
||||||
if (target.intersects(node) == false) {
|
|
||||||
return Relation.CELL_OUTSIDE_QUERY;
|
|
||||||
} else if (target.within(node)) {
|
|
||||||
// target within cell; continue traversing:
|
|
||||||
return Relation.CELL_CROSSES_QUERY;
|
|
||||||
} else if (target.contains(node)) {
|
|
||||||
// target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
|
|
||||||
return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ?
|
|
||||||
Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
|
|
||||||
}
|
|
||||||
// target intersects cell; continue traversing:
|
|
||||||
return Relation.CELL_CROSSES_QUERY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||||
LeafReader reader = context.reader();
|
LeafReader reader = context.reader();
|
||||||
PointValues values = reader.getPointValues(field);
|
PointValues values = reader.getPointValues(field);
|
||||||
if (values == null) {
|
if (values == null) {
|
||||||
@ -173,115 +304,59 @@ abstract class RangeFieldQuery extends Query {
|
|||||||
checkFieldInfo(fieldInfo);
|
checkFieldInfo(fieldInfo);
|
||||||
boolean allDocsMatch = false;
|
boolean allDocsMatch = false;
|
||||||
if (values.getDocCount() == reader.maxDoc()
|
if (values.getDocCount() == reader.maxDoc()
|
||||||
&& compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
|
&& queryType.compare(ranges, values.getMinPackedValue(), values.getMaxPackedValue(), numDims, bytesPerDim) == Relation.CELL_INSIDE_QUERY) {
|
||||||
allDocsMatch = true;
|
allDocsMatch = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
DocIdSetIterator iterator = allDocsMatch == true ?
|
final Weight weight = this;
|
||||||
DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
|
if (allDocsMatch) {
|
||||||
return new ConstantScoreScorer(this, score(), iterator);
|
return new ScorerSupplier() {
|
||||||
}
|
@Override
|
||||||
|
public Scorer get(boolean randomAccess) {
|
||||||
|
return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
|
||||||
|
}
|
||||||
|
|
||||||
/** get an encoded byte representation of the internal node; this is
|
@Override
|
||||||
* the lower half of the min array and the upper half of the max array */
|
public long cost() {
|
||||||
private byte[] getInternalRange(byte[] min, byte[] max) {
|
return reader.maxDoc();
|
||||||
byte[] range = new byte[min.length];
|
}
|
||||||
final int dimSize = numDims * bytesPerDim;
|
};
|
||||||
System.arraycopy(min, 0, range, 0, dimSize);
|
} else {
|
||||||
System.arraycopy(max, dimSize, range, dimSize, dimSize);
|
return new ScorerSupplier() {
|
||||||
return range;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||||
* RangeFieldComparator class provides the core comparison logic for accepting or rejecting indexed
|
final IntersectVisitor visitor = getIntersectVisitor(result);
|
||||||
* {@code RangeField} types based on the defined query range and relation.
|
long cost = -1;
|
||||||
*/
|
|
||||||
class RangeFieldComparator {
|
|
||||||
final Predicate<byte[]> predicate;
|
|
||||||
|
|
||||||
/** constructs the comparator based on the query type */
|
@Override
|
||||||
RangeFieldComparator() {
|
public Scorer get(boolean randomAccess) throws IOException {
|
||||||
switch (queryType) {
|
values.intersect(visitor);
|
||||||
case INTERSECTS:
|
DocIdSetIterator iterator = result.build().iterator();
|
||||||
predicate = this::intersects;
|
return new ConstantScoreScorer(weight, score(), iterator);
|
||||||
break;
|
}
|
||||||
case WITHIN:
|
|
||||||
predicate = this::contains;
|
|
||||||
break;
|
|
||||||
case CONTAINS:
|
|
||||||
predicate = this::within;
|
|
||||||
break;
|
|
||||||
case CROSSES:
|
|
||||||
// crosses first checks intersection (disjoint automatic fails),
|
|
||||||
// then ensures the query doesn't wholly contain the leaf:
|
|
||||||
predicate = (byte[] leaf) -> this.intersects(leaf)
|
|
||||||
&& this.contains(leaf) == false;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("invalid queryType [" + queryType + "] found.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** determines if the candidate range matches the query request */
|
@Override
|
||||||
private boolean matches(final byte[] candidate) {
|
public long cost() {
|
||||||
return (Arrays.equals(ranges, candidate) && queryType != QueryType.CROSSES)
|
if (cost == -1) {
|
||||||
|| predicate.test(candidate);
|
// Computing the cost may be expensive, so only do it if necessary
|
||||||
}
|
cost = values.estimatePointCount(visitor);
|
||||||
|
assert cost >= 0;
|
||||||
/** check if query intersects candidate range */
|
}
|
||||||
private boolean intersects(final byte[] candidate) {
|
return cost;
|
||||||
return relate((int d) -> compareMinMax(candidate, d) > 0 || compareMaxMin(candidate, d) < 0);
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
/** check if query is within candidate range */
|
|
||||||
private boolean within(final byte[] candidate) {
|
|
||||||
return relate((int d) -> compareMinMin(candidate, d) < 0 || compareMaxMax(candidate, d) > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** check if query contains candidate range */
|
|
||||||
private boolean contains(final byte[] candidate) {
|
|
||||||
return relate((int d) -> compareMinMin(candidate, d) > 0 || compareMaxMax(candidate, d) < 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** internal method used by each relation method to test range relation logic */
|
|
||||||
private boolean relate(IntPredicate predicate) {
|
|
||||||
for (int d=0; d<numDims; ++d) {
|
|
||||||
if (predicate.test(d)) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** compare the encoded min value (for the defined query dimension) with the encoded min value in the byte array */
|
@Override
|
||||||
private int compareMinMin(byte[] b, int dimension) {
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
// convert dimension to offset:
|
ScorerSupplier scorerSupplier = scorerSupplier(context);
|
||||||
dimension *= bytesPerDim;
|
if (scorerSupplier == null) {
|
||||||
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
|
return null;
|
||||||
}
|
}
|
||||||
|
return scorerSupplier.get(false);
|
||||||
/** compare the encoded min value (for the defined query dimension) with the encoded max value in the byte array */
|
}
|
||||||
private int compareMinMax(byte[] b, int dimension) {
|
};
|
||||||
// convert dimension to offset:
|
|
||||||
dimension *= bytesPerDim;
|
|
||||||
return StringHelper.compare(bytesPerDim, ranges, dimension, b, numDims * bytesPerDim + dimension);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** compare the encoded max value (for the defined query dimension) with the encoded min value in the byte array */
|
|
||||||
private int compareMaxMin(byte[] b, int dimension) {
|
|
||||||
// convert dimension to offset:
|
|
||||||
dimension *= bytesPerDim;
|
|
||||||
return StringHelper.compare(bytesPerDim, ranges, numDims * bytesPerDim + dimension, b, dimension);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** compare the encoded max value (for the defined query dimension) with the encoded max value in the byte array */
|
|
||||||
private int compareMaxMax(byte[] b, int dimension) {
|
|
||||||
// convert dimension to max offset:
|
|
||||||
dimension = numDims * bytesPerDim + dimension * bytesPerDim;
|
|
||||||
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -501,9 +501,8 @@ class BufferedUpdatesStream implements Accountable {
|
|||||||
queue = new SegmentQueue(numReaders);
|
queue = new SegmentQueue(numReaders);
|
||||||
|
|
||||||
long segTermCount = 0;
|
long segTermCount = 0;
|
||||||
for(int i=0;i<numReaders;i++) {
|
for (SegmentState state : segStates) {
|
||||||
SegmentState state = segStates[i];
|
Terms terms = state.reader.terms(field);
|
||||||
Terms terms = state.reader.fields().terms(field);
|
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
segTermCount += terms.size();
|
segTermCount += terms.size();
|
||||||
state.termsEnum = terms.iterator();
|
state.termsEnum = terms.iterator();
|
||||||
@ -617,7 +616,6 @@ class BufferedUpdatesStream implements Accountable {
|
|||||||
// DocValues updates
|
// DocValues updates
|
||||||
private synchronized void applyDocValuesUpdates(List<DocValuesUpdate> updates,
|
private synchronized void applyDocValuesUpdates(List<DocValuesUpdate> updates,
|
||||||
SegmentState segState, DocValuesFieldUpdates.Container dvUpdatesContainer) throws IOException {
|
SegmentState segState, DocValuesFieldUpdates.Container dvUpdatesContainer) throws IOException {
|
||||||
Fields fields = segState.reader.fields();
|
|
||||||
|
|
||||||
// TODO: we can process the updates per DV field, from last to first so that
|
// TODO: we can process the updates per DV field, from last to first so that
|
||||||
// if multiple terms affect same document for the same field, we add an update
|
// if multiple terms affect same document for the same field, we add an update
|
||||||
@ -651,7 +649,7 @@ class BufferedUpdatesStream implements Accountable {
|
|||||||
// if we change the code to process updates in terms order, enable this assert
|
// if we change the code to process updates in terms order, enable this assert
|
||||||
// assert currentField == null || currentField.compareTo(term.field()) < 0;
|
// assert currentField == null || currentField.compareTo(term.field()) < 0;
|
||||||
currentField = term.field();
|
currentField = term.field();
|
||||||
Terms terms = fields.terms(currentField);
|
Terms terms = segState.reader.terms(currentField);
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
termsEnum = terms.iterator();
|
termsEnum = terms.iterator();
|
||||||
} else {
|
} else {
|
||||||
|
@ -98,12 +98,15 @@ public abstract class CodecReader extends LeafReader implements Accountable {
|
|||||||
throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
|
throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Fields fields() {
|
public final Terms terms(String field) throws IOException {
|
||||||
return getPostingsReader();
|
//ensureOpen(); no; getPostingsReader calls this
|
||||||
|
// We could check the FieldInfo IndexOptions but there's no point since
|
||||||
|
// PostingsReader will simply return null for fields that don't exist or that have no terms index.
|
||||||
|
return getPostingsReader().terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the FieldInfo that corresponds to the given field and type, or
|
// returns the FieldInfo that corresponds to the given field and type, or
|
||||||
// null if the field does not exist, or not indexed as the requested
|
// null if the field does not exist, or not indexed as the requested
|
||||||
// DovDocValuesType.
|
// DovDocValuesType.
|
||||||
|
@ -770,10 +770,12 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||||||
}
|
}
|
||||||
invertState.lastStartOffset = startOffset;
|
invertState.lastStartOffset = startOffset;
|
||||||
|
|
||||||
invertState.length++;
|
try {
|
||||||
if (invertState.length < 0) {
|
invertState.length = Math.addExact(invertState.length, invertState.termFreqAttribute.getTermFrequency());
|
||||||
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
|
} catch (ArithmeticException ae) {
|
||||||
|
throw new IllegalArgumentException("too many tokens for field \"" + field.name() + "\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println(" term=" + invertState.termAttribute);
|
//System.out.println(" term=" + invertState.termAttribute);
|
||||||
|
|
||||||
// If we hit an exception in here, we abort
|
// If we hit an exception in here, we abort
|
||||||
|
@ -17,14 +17,13 @@
|
|||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.index.FilterLeafReader.FilterFields;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FilterLeafReader.FilterTerms;
|
import org.apache.lucene.index.FilterLeafReader.FilterTerms;
|
||||||
import org.apache.lucene.index.FilterLeafReader.FilterTermsEnum;
|
import org.apache.lucene.index.FilterLeafReader.FilterTermsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The {@link ExitableDirectoryReader} wraps a real index {@link DirectoryReader} and
|
* The {@link ExitableDirectoryReader} wraps a real index {@link DirectoryReader} and
|
||||||
@ -79,14 +78,12 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
Fields fields = super.fields();
|
Terms terms = in.terms(field);
|
||||||
if (queryTimeout.isTimeoutEnabled()) {
|
if (terms == null) {
|
||||||
return new ExitableFields(fields, queryTimeout);
|
return null;
|
||||||
}
|
|
||||||
else {
|
|
||||||
return fields; // break out of wrapper as soon as possible
|
|
||||||
}
|
}
|
||||||
|
return (queryTimeout.isTimeoutEnabled()) ? new ExitableTerms(terms, queryTimeout) : terms;
|
||||||
}
|
}
|
||||||
|
|
||||||
// this impl does not change deletes or data so we can delegate the
|
// this impl does not change deletes or data so we can delegate the
|
||||||
@ -103,29 +100,6 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrapper class for another Fields implementation that is used by the ExitableFilterAtomicReader.
|
|
||||||
*/
|
|
||||||
public static class ExitableFields extends FilterFields {
|
|
||||||
|
|
||||||
private QueryTimeout queryTimeout;
|
|
||||||
|
|
||||||
/** Constructor **/
|
|
||||||
public ExitableFields(Fields fields, QueryTimeout queryTimeout) {
|
|
||||||
super(fields);
|
|
||||||
this.queryTimeout = queryTimeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms(String field) throws IOException {
|
|
||||||
Terms terms = in.terms(field);
|
|
||||||
if (terms == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return new ExitableTerms(terms, queryTimeout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapper class for another Terms implementation that is used by ExitableFields.
|
* Wrapper class for another Terms implementation that is used by ExitableFields.
|
||||||
*/
|
*/
|
||||||
|
@ -20,6 +20,7 @@ import org.apache.lucene.analysis.TokenStream; // javadocs
|
|||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
@ -48,6 +49,7 @@ public final class FieldInvertState {
|
|||||||
PositionIncrementAttribute posIncrAttribute;
|
PositionIncrementAttribute posIncrAttribute;
|
||||||
PayloadAttribute payloadAttribute;
|
PayloadAttribute payloadAttribute;
|
||||||
TermToBytesRefAttribute termAttribute;
|
TermToBytesRefAttribute termAttribute;
|
||||||
|
TermFrequencyAttribute termFreqAttribute;
|
||||||
|
|
||||||
/** Creates {code FieldInvertState} for the specified
|
/** Creates {code FieldInvertState} for the specified
|
||||||
* field name. */
|
* field name. */
|
||||||
@ -88,6 +90,7 @@ public final class FieldInvertState {
|
|||||||
if (this.attributeSource != attributeSource) {
|
if (this.attributeSource != attributeSource) {
|
||||||
this.attributeSource = attributeSource;
|
this.attributeSource = attributeSource;
|
||||||
termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
|
termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
|
||||||
posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
|
posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
|
||||||
offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
|
offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
|
||||||
payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
|
payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
|
||||||
|
@ -20,9 +20,15 @@ package org.apache.lucene.index;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
/** Flex API for access to fields and terms
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
* @lucene.experimental */
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides a {@link Terms} index for fields that have it, and lists which fields do.
|
||||||
|
* This is primarily an internal/experimental API (see {@link FieldsProducer}),
|
||||||
|
* although it is also used to expose the set of term vectors per document.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
public abstract class Fields implements Iterable<String> {
|
public abstract class Fields implements Iterable<String> {
|
||||||
|
|
||||||
/** Sole constructor. (For invocation by subclass
|
/** Sole constructor. (For invocation by subclass
|
||||||
|
@ -345,11 +345,11 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||||||
protected void doClose() throws IOException {
|
protected void doClose() throws IOException {
|
||||||
in.close();
|
in.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return in.fields();
|
return in.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -113,9 +113,10 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||||||
if (!hasFreq) {
|
if (!hasFreq) {
|
||||||
assert postings.termFreqs == null;
|
assert postings.termFreqs == null;
|
||||||
postings.lastDocCodes[termID] = docState.docID;
|
postings.lastDocCodes[termID] = docState.docID;
|
||||||
|
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||||
} else {
|
} else {
|
||||||
postings.lastDocCodes[termID] = docState.docID << 1;
|
postings.lastDocCodes[termID] = docState.docID << 1;
|
||||||
postings.termFreqs[termID] = 1;
|
postings.termFreqs[termID] = getTermFreq();
|
||||||
if (hasProx) {
|
if (hasProx) {
|
||||||
writeProx(termID, fieldState.position);
|
writeProx(termID, fieldState.position);
|
||||||
if (hasOffsets) {
|
if (hasOffsets) {
|
||||||
@ -124,19 +125,21 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||||||
} else {
|
} else {
|
||||||
assert !hasOffsets;
|
assert !hasOffsets;
|
||||||
}
|
}
|
||||||
|
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
|
||||||
}
|
}
|
||||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
|
||||||
fieldState.uniqueTermCount++;
|
fieldState.uniqueTermCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void addTerm(final int termID) {
|
void addTerm(final int termID) {
|
||||||
final FreqProxPostingsArray postings = freqProxPostingsArray;
|
final FreqProxPostingsArray postings = freqProxPostingsArray;
|
||||||
|
|
||||||
assert !hasFreq || postings.termFreqs[termID] > 0;
|
assert !hasFreq || postings.termFreqs[termID] > 0;
|
||||||
|
|
||||||
if (!hasFreq) {
|
if (!hasFreq) {
|
||||||
assert postings.termFreqs == null;
|
assert postings.termFreqs == null;
|
||||||
|
if (termFreqAtt.getTermFrequency() != 1) {
|
||||||
|
throw new IllegalStateException("field \"" + fieldInfo.name + "\": must index term freq while using custom TermFrequencyAttribute");
|
||||||
|
}
|
||||||
if (docState.docID != postings.lastDocIDs[termID]) {
|
if (docState.docID != postings.lastDocIDs[termID]) {
|
||||||
// New document; now encode docCode for previous doc:
|
// New document; now encode docCode for previous doc:
|
||||||
assert docState.docID > postings.lastDocIDs[termID];
|
assert docState.docID > postings.lastDocIDs[termID];
|
||||||
@ -160,8 +163,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init freq for the current document
|
// Init freq for the current document
|
||||||
postings.termFreqs[termID] = 1;
|
postings.termFreqs[termID] = getTermFreq();
|
||||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
|
||||||
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
||||||
postings.lastDocIDs[termID] = docState.docID;
|
postings.lastDocIDs[termID] = docState.docID;
|
||||||
if (hasProx) {
|
if (hasProx) {
|
||||||
@ -175,7 +178,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||||||
}
|
}
|
||||||
fieldState.uniqueTermCount++;
|
fieldState.uniqueTermCount++;
|
||||||
} else {
|
} else {
|
||||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
|
postings.termFreqs[termID] = Math.addExact(postings.termFreqs[termID], getTermFreq());
|
||||||
|
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, postings.termFreqs[termID]);
|
||||||
if (hasProx) {
|
if (hasProx) {
|
||||||
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
||||||
if (hasOffsets) {
|
if (hasOffsets) {
|
||||||
@ -185,6 +189,17 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int getTermFreq() {
|
||||||
|
int freq = termFreqAtt.getTermFrequency();
|
||||||
|
if (freq != 1) {
|
||||||
|
if (hasProx) {
|
||||||
|
throw new IllegalStateException("field \"" + fieldInfo.name + "\": cannot index positions while using custom TermFrequencyAttribute");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void newPostingsArray() {
|
public void newPostingsArray() {
|
||||||
freqProxPostingsArray = (FreqProxPostingsArray) postingsArray;
|
freqProxPostingsArray = (FreqProxPostingsArray) postingsArray;
|
||||||
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.CacheHelper;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
|
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
|
||||||
@ -60,7 +59,7 @@ public abstract class LeafReader extends IndexReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Optional method: Return a {@link CacheHelper} that can be used to cache
|
* Optional method: Return a {@link IndexReader.CacheHelper} that can be used to cache
|
||||||
* based on the content of this leaf regardless of deletions. Two readers
|
* based on the content of this leaf regardless of deletions. Two readers
|
||||||
* that have the same data but different sets of deleted documents or doc
|
* that have the same data but different sets of deleted documents or doc
|
||||||
* values updates may be considered equal. Consider using
|
* values updates may be considered equal. Consider using
|
||||||
@ -73,12 +72,6 @@ public abstract class LeafReader extends IndexReader {
|
|||||||
*/
|
*/
|
||||||
public abstract CacheHelper getCoreCacheHelper();
|
public abstract CacheHelper getCoreCacheHelper();
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns {@link Fields} for this reader.
|
|
||||||
* This method will not return null.
|
|
||||||
*/
|
|
||||||
public abstract Fields fields() throws IOException;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final int docFreq(Term term) throws IOException {
|
public final int docFreq(Term term) throws IOException {
|
||||||
final Terms terms = terms(term.field());
|
final Terms terms = terms(term.field());
|
||||||
@ -139,10 +132,8 @@ public abstract class LeafReader extends IndexReader {
|
|||||||
return terms.getSumTotalTermFreq();
|
return terms.getSumTotalTermFreq();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This may return null if the field does not exist.*/
|
/** Returns the {@link Terms} index for this field, or null if it has none. */
|
||||||
public final Terms terms(String field) throws IOException {
|
public abstract Terms terms(String field) throws IOException;
|
||||||
return fields().terms(field);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns {@link PostingsEnum} for the specified term.
|
/** Returns {@link PostingsEnum} for the specified term.
|
||||||
* This will return null if either the field or
|
* This will return null if either the field or
|
||||||
|
@ -70,8 +70,11 @@ class MergeReaderWrapper extends LeafReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return fields;
|
ensureOpen();
|
||||||
|
// We could check the FieldInfo IndexOptions but there's no point since
|
||||||
|
// PostingsReader will simply return null for fields that don't exist or that have no terms index.
|
||||||
|
return fields.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -31,11 +32,12 @@ import org.apache.lucene.util.BytesRef;
|
|||||||
import org.apache.lucene.util.MergedIterator;
|
import org.apache.lucene.util.MergedIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Exposes flex API, merged from flex API of sub-segments.
|
* Provides a single {@link Fields} term index view over an
|
||||||
|
* {@link IndexReader}.
|
||||||
* This is useful when you're interacting with an {@link
|
* This is useful when you're interacting with an {@link
|
||||||
* IndexReader} implementation that consists of sequential
|
* IndexReader} implementation that consists of sequential
|
||||||
* sub-readers (eg {@link DirectoryReader} or {@link
|
* sub-readers (eg {@link DirectoryReader} or {@link
|
||||||
* MultiReader}).
|
* MultiReader}) and you must treat it as a {@link LeafReader}.
|
||||||
*
|
*
|
||||||
* <p><b>NOTE</b>: for composite readers, you'll get better
|
* <p><b>NOTE</b>: for composite readers, you'll get better
|
||||||
* performance by gathering the sub readers using
|
* performance by gathering the sub readers using
|
||||||
@ -45,7 +47,6 @@ import org.apache.lucene.util.MergedIterator;
|
|||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class MultiFields extends Fields {
|
public final class MultiFields extends Fields {
|
||||||
private final Fields[] subs;
|
private final Fields[] subs;
|
||||||
private final ReaderSlice[] subSlices;
|
private final ReaderSlice[] subSlices;
|
||||||
@ -64,13 +65,13 @@ public final class MultiFields extends Fields {
|
|||||||
switch (leaves.size()) {
|
switch (leaves.size()) {
|
||||||
case 1:
|
case 1:
|
||||||
// already an atomic reader / reader with one leave
|
// already an atomic reader / reader with one leave
|
||||||
return leaves.get(0).reader().fields();
|
return new LeafReaderFields(leaves.get(0).reader());
|
||||||
default:
|
default:
|
||||||
final List<Fields> fields = new ArrayList<>(leaves.size());
|
final List<Fields> fields = new ArrayList<>(leaves.size());
|
||||||
final List<ReaderSlice> slices = new ArrayList<>(leaves.size());
|
final List<ReaderSlice> slices = new ArrayList<>(leaves.size());
|
||||||
for (final LeafReaderContext ctx : leaves) {
|
for (final LeafReaderContext ctx : leaves) {
|
||||||
final LeafReader r = ctx.reader();
|
final LeafReader r = ctx.reader();
|
||||||
final Fields f = r.fields();
|
final Fields f = new LeafReaderFields(r);
|
||||||
fields.add(f);
|
fields.add(f);
|
||||||
slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1));
|
slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1));
|
||||||
}
|
}
|
||||||
@ -115,9 +116,31 @@ public final class MultiFields extends Fields {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This method may return null if the field does not exist.*/
|
/** This method may return null if the field does not exist or if it has no terms. */
|
||||||
public static Terms getTerms(IndexReader r, String field) throws IOException {
|
public static Terms getTerms(IndexReader r, String field) throws IOException {
|
||||||
return getFields(r).terms(field);
|
final List<LeafReaderContext> leaves = r.leaves();
|
||||||
|
if (leaves.size() == 1) {
|
||||||
|
return leaves.get(0).reader().terms(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Terms> termsPerLeaf = new ArrayList<>(leaves.size());
|
||||||
|
final List<ReaderSlice> slicePerLeaf = new ArrayList<>(leaves.size());
|
||||||
|
|
||||||
|
for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) {
|
||||||
|
LeafReaderContext ctx = leaves.get(leafIdx);
|
||||||
|
Terms subTerms = ctx.reader().terms(field);
|
||||||
|
if (subTerms != null) {
|
||||||
|
termsPerLeaf.add(subTerms);
|
||||||
|
slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (termsPerLeaf.size() == 0) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return new MultiTerms(termsPerLeaf.toArray(Terms.EMPTY_ARRAY),
|
||||||
|
slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns {@link PostingsEnum} for the specified field and
|
/** Returns {@link PostingsEnum} for the specified field and
|
||||||
@ -264,5 +287,37 @@ public final class MultiFields extends Fields {
|
|||||||
}
|
}
|
||||||
return fields;
|
return fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class LeafReaderFields extends Fields {
|
||||||
|
|
||||||
|
private final LeafReader leafReader;
|
||||||
|
private final List<String> indexedFields;
|
||||||
|
|
||||||
|
LeafReaderFields(LeafReader leafReader) {
|
||||||
|
this.leafReader = leafReader;
|
||||||
|
this.indexedFields = new ArrayList<>();
|
||||||
|
for (FieldInfo fieldInfo : leafReader.getFieldInfos()) {
|
||||||
|
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||||
|
indexedFields.add(fieldInfo.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Collections.sort(indexedFields);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<String> iterator() {
|
||||||
|
return Collections.unmodifiableList(indexedFields).iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return indexedFields.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
return leafReader.terms(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.IdentityHashMap;
|
import java.util.IdentityHashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@ -50,7 +51,6 @@ import org.apache.lucene.util.Version;
|
|||||||
*/
|
*/
|
||||||
public class ParallelLeafReader extends LeafReader {
|
public class ParallelLeafReader extends LeafReader {
|
||||||
private final FieldInfos fieldInfos;
|
private final FieldInfos fieldInfos;
|
||||||
private final ParallelFields fields = new ParallelFields();
|
|
||||||
private final LeafReader[] parallelReaders, storedFieldsReaders;
|
private final LeafReader[] parallelReaders, storedFieldsReaders;
|
||||||
private final Set<LeafReader> completeReaderSet =
|
private final Set<LeafReader> completeReaderSet =
|
||||||
Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>());
|
Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>());
|
||||||
@ -58,9 +58,10 @@ public class ParallelLeafReader extends LeafReader {
|
|||||||
private final int maxDoc, numDocs;
|
private final int maxDoc, numDocs;
|
||||||
private final boolean hasDeletions;
|
private final boolean hasDeletions;
|
||||||
private final LeafMetaData metaData;
|
private final LeafMetaData metaData;
|
||||||
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();
|
|
||||||
private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
|
private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
|
||||||
|
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();//TODO needn't sort?
|
||||||
|
private final Map<String,LeafReader> termsFieldToReader = new HashMap<>();
|
||||||
|
|
||||||
/** Create a ParallelLeafReader based on the provided
|
/** Create a ParallelLeafReader based on the provided
|
||||||
* readers; auto-closes the given readers on {@link #close()}. */
|
* readers; auto-closes the given readers on {@link #close()}. */
|
||||||
public ParallelLeafReader(LeafReader... readers) throws IOException {
|
public ParallelLeafReader(LeafReader... readers) throws IOException {
|
||||||
@ -130,9 +131,15 @@ public class ParallelLeafReader extends LeafReader {
|
|||||||
if (!fieldToReader.containsKey(fieldInfo.name)) {
|
if (!fieldToReader.containsKey(fieldInfo.name)) {
|
||||||
builder.add(fieldInfo);
|
builder.add(fieldInfo);
|
||||||
fieldToReader.put(fieldInfo.name, reader);
|
fieldToReader.put(fieldInfo.name, reader);
|
||||||
|
// only add these if the reader responsible for that field name is the current:
|
||||||
|
// TODO consider populating 1st leaf with vectors even if the field name has been seen on a previous leaf
|
||||||
if (fieldInfo.hasVectors()) {
|
if (fieldInfo.hasVectors()) {
|
||||||
tvFieldToReader.put(fieldInfo.name, reader);
|
tvFieldToReader.put(fieldInfo.name, reader);
|
||||||
}
|
}
|
||||||
|
// TODO consider populating 1st leaf with terms even if the field name has been seen on a previous leaf
|
||||||
|
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||||
|
termsFieldToReader.put(fieldInfo.name, reader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -154,17 +161,6 @@ public class ParallelLeafReader extends LeafReader {
|
|||||||
|
|
||||||
fieldInfos = builder.finish();
|
fieldInfos = builder.finish();
|
||||||
this.metaData = new LeafMetaData(createdVersionMajor, minVersion, indexSort);
|
this.metaData = new LeafMetaData(createdVersionMajor, minVersion, indexSort);
|
||||||
|
|
||||||
// build Fields instance
|
|
||||||
for (final LeafReader reader : this.parallelReaders) {
|
|
||||||
final Fields readerFields = reader.fields();
|
|
||||||
for (String field : readerFields) {
|
|
||||||
// only add if the reader responsible for that field name is the current:
|
|
||||||
if (fieldToReader.get(field) == reader) {
|
|
||||||
this.fields.addField(field, readerFields.terms(field));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// do this finally so any Exceptions occurred before don't affect refcounts:
|
// do this finally so any Exceptions occurred before don't affect refcounts:
|
||||||
for (LeafReader reader : completeReaderSet) {
|
for (LeafReader reader : completeReaderSet) {
|
||||||
@ -230,13 +226,14 @@ public class ParallelLeafReader extends LeafReader {
|
|||||||
ensureOpen();
|
ensureOpen();
|
||||||
return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
|
return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() {
|
public Terms terms(String field) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return fields;
|
LeafReader leafReader = termsFieldToReader.get(field);
|
||||||
|
return leafReader == null ? null : leafReader.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int numDocs() {
|
public int numDocs() {
|
||||||
// Don't call ensureOpen() here (it could affect performance)
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
@ -18,6 +18,8 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
@ -284,21 +286,27 @@ public final class SlowCodecReaderWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static FieldsProducer readerToFieldsProducer(final LeafReader reader) throws IOException {
|
private static FieldsProducer readerToFieldsProducer(final LeafReader reader) throws IOException {
|
||||||
final Fields fields = reader.fields();
|
ArrayList<String> indexedFields = new ArrayList<>();
|
||||||
|
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
|
||||||
|
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||||
|
indexedFields.add(fieldInfo.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Collections.sort(indexedFields);
|
||||||
return new FieldsProducer() {
|
return new FieldsProducer() {
|
||||||
@Override
|
@Override
|
||||||
public Iterator<String> iterator() {
|
public Iterator<String> iterator() {
|
||||||
return fields.iterator();
|
return indexedFields.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Terms terms(String field) throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return fields.terms(field);
|
return reader.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int size() {
|
public int size() {
|
||||||
return fields.size();
|
return indexedFields.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -49,6 +49,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||||||
*/
|
*/
|
||||||
class SortingLeafReader extends FilterLeafReader {
|
class SortingLeafReader extends FilterLeafReader {
|
||||||
|
|
||||||
|
//TODO remove from here; move to FreqProxTermsWriter or FreqProxFields?
|
||||||
static class SortingFields extends FilterFields {
|
static class SortingFields extends FilterFields {
|
||||||
|
|
||||||
private final Sorter.DocMap docMap;
|
private final Sorter.DocMap docMap;
|
||||||
@ -1042,8 +1043,9 @@ class SortingLeafReader extends FilterLeafReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new SortingFields(in.fields(), in.getFieldInfos(), docMap);
|
Terms terms = super.terms(field);
|
||||||
|
return terms==null ? null : new SortingTerms(terms, in.getFieldInfos().fieldInfo(field).getIndexOptions(), docMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -109,6 +109,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
boolean start(IndexableField field, boolean first) {
|
boolean start(IndexableField field, boolean first) {
|
||||||
|
super.start(field, first);
|
||||||
assert field.fieldType().indexOptions() != IndexOptions.NONE;
|
assert field.fieldType().indexOptions() != IndexOptions.NONE;
|
||||||
|
|
||||||
if (first) {
|
if (first) {
|
||||||
@ -224,7 +225,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
|
|||||||
void newTerm(final int termID) {
|
void newTerm(final int termID) {
|
||||||
TermVectorsPostingsArray postings = termVectorsPostingsArray;
|
TermVectorsPostingsArray postings = termVectorsPostingsArray;
|
||||||
|
|
||||||
postings.freqs[termID] = 1;
|
postings.freqs[termID] = getTermFreq();
|
||||||
postings.lastOffsets[termID] = 0;
|
postings.lastOffsets[termID] = 0;
|
||||||
postings.lastPositions[termID] = 0;
|
postings.lastPositions[termID] = 0;
|
||||||
|
|
||||||
@ -235,11 +236,25 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
|
|||||||
void addTerm(final int termID) {
|
void addTerm(final int termID) {
|
||||||
TermVectorsPostingsArray postings = termVectorsPostingsArray;
|
TermVectorsPostingsArray postings = termVectorsPostingsArray;
|
||||||
|
|
||||||
postings.freqs[termID]++;
|
postings.freqs[termID] += getTermFreq();
|
||||||
|
|
||||||
writeProx(postings, termID);
|
writeProx(postings, termID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int getTermFreq() {
|
||||||
|
int freq = termFreqAtt.getTermFrequency();
|
||||||
|
if (freq != 1) {
|
||||||
|
if (doVectorPositions) {
|
||||||
|
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector positions while using custom TermFrequencyAttribute");
|
||||||
|
}
|
||||||
|
if (doVectorOffsets) {
|
||||||
|
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector offsets while using custom TermFrequencyAttribute");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void newPostingsArray() {
|
public void newPostingsArray() {
|
||||||
termVectorsPostingsArray = (TermVectorsPostingsArray) postingsArray;
|
termVectorsPostingsArray = (TermVectorsPostingsArray) postingsArray;
|
||||||
|
@ -19,12 +19,13 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
|
||||||
import org.apache.lucene.util.BytesRefHash;
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.IntBlockPool;
|
import org.apache.lucene.util.IntBlockPool;
|
||||||
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
|
|
||||||
|
|
||||||
abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
||||||
private static final int HASH_INIT_SIZE = 4;
|
private static final int HASH_INIT_SIZE = 4;
|
||||||
@ -35,6 +36,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||||||
protected final DocumentsWriterPerThread.DocState docState;
|
protected final DocumentsWriterPerThread.DocState docState;
|
||||||
protected final FieldInvertState fieldState;
|
protected final FieldInvertState fieldState;
|
||||||
TermToBytesRefAttribute termAtt;
|
TermToBytesRefAttribute termAtt;
|
||||||
|
protected TermFrequencyAttribute termFreqAtt;
|
||||||
|
|
||||||
// Copied from our perThread
|
// Copied from our perThread
|
||||||
final IntBlockPool intPool;
|
final IntBlockPool intPool;
|
||||||
@ -287,6 +289,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||||||
* document. */
|
* document. */
|
||||||
boolean start(IndexableField field, boolean first) {
|
boolean start(IndexableField field, boolean first) {
|
||||||
termAtt = fieldState.termAttribute;
|
termAtt = fieldState.termAttribute;
|
||||||
|
termFreqAtt = fieldState.termFreqAttribute;
|
||||||
if (nextPerField != null) {
|
if (nextPerField != null) {
|
||||||
doNextCall = nextPerField.start(field, first);
|
doNextCall = nextPerField.start(field, first);
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,7 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||||||
protected final CompiledAutomaton compiled;
|
protected final CompiledAutomaton compiled;
|
||||||
/** term containing the field, and possibly some pattern structure */
|
/** term containing the field, and possibly some pattern structure */
|
||||||
protected final Term term;
|
protected final Term term;
|
||||||
|
protected final boolean automatonIsBinary;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new AutomatonQuery from an {@link Automaton}.
|
* Create a new AutomatonQuery from an {@link Automaton}.
|
||||||
@ -98,6 +99,7 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||||||
super(term.field());
|
super(term.field());
|
||||||
this.term = term;
|
this.term = term;
|
||||||
this.automaton = automaton;
|
this.automaton = automaton;
|
||||||
|
this.automatonIsBinary = isBinary;
|
||||||
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
|
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
|
||||||
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
|
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
|
||||||
}
|
}
|
||||||
@ -154,4 +156,9 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||||||
public Automaton getAutomaton() {
|
public Automaton getAutomaton() {
|
||||||
return automaton;
|
return automaton;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Is this a binary (byte) oriented automaton. See the constructor. */
|
||||||
|
public boolean isAutomatonBinary() {
|
||||||
|
return automatonIsBinary;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -178,6 +178,14 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||||||
return disjuncts[0];
|
return disjuncts[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tieBreakerMultiplier == 1.0f) {
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
for (Query sub : disjuncts) {
|
||||||
|
builder.add(sub, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
boolean actuallyRewritten = false;
|
boolean actuallyRewritten = false;
|
||||||
List<Query> rewrittenDisjuncts = new ArrayList<>();
|
List<Query> rewrittenDisjuncts = new ArrayList<>();
|
||||||
for (Query sub : disjuncts) {
|
for (Query sub : disjuncts) {
|
||||||
|
@ -26,7 +26,6 @@ import java.util.Objects;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
@ -224,8 +223,7 @@ public class TermInSetQuery extends Query implements Accountable {
|
|||||||
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
|
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
|
||||||
final LeafReader reader = context.reader();
|
final LeafReader reader = context.reader();
|
||||||
|
|
||||||
final Fields fields = reader.fields();
|
Terms terms = reader.terms(field);
|
||||||
Terms terms = fields.terms(field);
|
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ public final class NamedSPILoader<S extends NamedSPILoader.NamedSPI> implements
|
|||||||
private final Class<S> clazz;
|
private final Class<S> clazz;
|
||||||
|
|
||||||
public NamedSPILoader(Class<S> clazz) {
|
public NamedSPILoader(Class<S> clazz) {
|
||||||
this(clazz, Thread.currentThread().getContextClassLoader());
|
this(clazz, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public NamedSPILoader(Class<S> clazz, ClassLoader classloader) {
|
public NamedSPILoader(Class<S> clazz, ClassLoader classloader) {
|
||||||
|
@ -538,11 +538,7 @@ public class QueryBuilder {
|
|||||||
builder.add(queryPos, operator);
|
builder.add(queryPos, operator);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BooleanQuery bq = builder.build();
|
return builder.build();
|
||||||
if (bq.clauses().size() == 1) {
|
|
||||||
return bq.clauses().get(0).getQuery();
|
|
||||||
}
|
|
||||||
return bq;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -48,13 +48,11 @@ public final class SPIClassIterator<S> implements Iterator<Class<? extends S>> {
|
|||||||
private final Enumeration<URL> profilesEnum;
|
private final Enumeration<URL> profilesEnum;
|
||||||
private Iterator<String> linesIterator;
|
private Iterator<String> linesIterator;
|
||||||
|
|
||||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the context classloader. */
|
/** Creates a new SPI iterator to lookup services of type {@code clazz} using
|
||||||
|
* the same {@link ClassLoader} as the argument. */
|
||||||
public static <S> SPIClassIterator<S> get(Class<S> clazz) {
|
public static <S> SPIClassIterator<S> get(Class<S> clazz) {
|
||||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
return new SPIClassIterator<>(clazz,
|
||||||
if (cl == null) {
|
Objects.requireNonNull(clazz.getClassLoader(), () -> clazz + " has no classloader."));
|
||||||
cl = clazz.getClassLoader();
|
|
||||||
}
|
|
||||||
return new SPIClassIterator<>(clazz, cl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the given classloader. */
|
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the given classloader. */
|
||||||
|
@ -125,6 +125,7 @@ public class TestToken extends LuceneTestCase {
|
|||||||
t.setFlags(8);
|
t.setFlags(8);
|
||||||
t.setPositionIncrement(3);
|
t.setPositionIncrement(3);
|
||||||
t.setPositionLength(11);
|
t.setPositionLength(11);
|
||||||
|
t.setTermFrequency(42);
|
||||||
TestUtil.assertAttributeReflection(t,
|
TestUtil.assertAttributeReflection(t,
|
||||||
new HashMap<String, Object>() {{
|
new HashMap<String, Object>() {{
|
||||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||||
@ -136,6 +137,7 @@ public class TestToken extends LuceneTestCase {
|
|||||||
put(PayloadAttribute.class.getName() + "#payload", null);
|
put(PayloadAttribute.class.getName() + "#payload", null);
|
||||||
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
|
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
|
||||||
put(FlagsAttribute.class.getName() + "#flags", 8);
|
put(FlagsAttribute.class.getName() + "#flags", 8);
|
||||||
|
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -82,6 +82,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
|
|||||||
t.setPositionIncrement(3);
|
t.setPositionIncrement(3);
|
||||||
t.setPositionLength(11);
|
t.setPositionLength(11);
|
||||||
t.setType("foobar");
|
t.setType("foobar");
|
||||||
|
t.setTermFrequency(42);
|
||||||
TestUtil.assertAttributeReflection(t,
|
TestUtil.assertAttributeReflection(t,
|
||||||
new HashMap<String, Object>() {{
|
new HashMap<String, Object>() {{
|
||||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||||
@ -91,6 +92,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
|
|||||||
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
|
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
|
||||||
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
|
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
|
||||||
put(TypeAttribute.class.getName() + "#type", "foobar");
|
put(TypeAttribute.class.getName() + "#type", "foobar");
|
||||||
|
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,7 +55,7 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
|||||||
|
|
||||||
DirectoryReader r = DirectoryReader.open(w);
|
DirectoryReader r = DirectoryReader.open(w);
|
||||||
assertEquals(1, r.leaves().size());
|
assertEquals(1, r.leaves().size());
|
||||||
FieldReader field = (FieldReader) r.leaves().get(0).reader().fields().terms("field");
|
FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
|
||||||
// We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
|
// We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
|
||||||
Stats stats = field.getStats();
|
Stats stats = field.getStats();
|
||||||
assertEquals(0, stats.floorBlockCount);
|
assertEquals(0, stats.floorBlockCount);
|
||||||
|
@ -91,7 +91,7 @@ public class Test2BDocs extends LuceneTestCase {
|
|||||||
LeafReader reader = context.reader();
|
LeafReader reader = context.reader();
|
||||||
int lim = context.reader().maxDoc();
|
int lim = context.reader().maxDoc();
|
||||||
|
|
||||||
Terms terms = reader.fields().terms("f1");
|
Terms terms = reader.terms("f1");
|
||||||
for (int i=0; i<10000; i++) {
|
for (int i=0; i<10000; i++) {
|
||||||
TermsEnum te = terms.iterator();
|
TermsEnum te = terms.iterator();
|
||||||
assertTrue( te.seekExact(term) );
|
assertTrue( te.seekExact(term) );
|
||||||
|
@ -0,0 +1,468 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
|
import org.apache.lucene.search.TermStatistics;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
import static org.apache.lucene.index.PostingsEnum.NO_MORE_DOCS;
|
||||||
|
|
||||||
|
public class TestCustomTermFreq extends LuceneTestCase {
|
||||||
|
|
||||||
|
private static final class CannedTermFreqs extends TokenStream {
|
||||||
|
private final String[] terms;
|
||||||
|
private final int[] termFreqs;
|
||||||
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
private final TermFrequencyAttribute termFreqAtt = addAttribute(TermFrequencyAttribute.class);
|
||||||
|
private int upto;
|
||||||
|
|
||||||
|
public CannedTermFreqs(String[] terms, int[] termFreqs) {
|
||||||
|
this.terms = terms;
|
||||||
|
this.termFreqs = termFreqs;
|
||||||
|
assert terms.length == termFreqs.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() {
|
||||||
|
if (upto == terms.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
clearAttributes();
|
||||||
|
|
||||||
|
termAtt.append(terms[upto]);
|
||||||
|
termFreqAtt.setTermFrequency(termFreqs[upto]);
|
||||||
|
|
||||||
|
upto++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() {
|
||||||
|
upto = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingletonTermsOneDoc() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||||
|
new int[] {42, 128}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(128, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(42, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingletonTermsTwoDocs() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||||
|
new int[] {42, 128}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||||
|
new int[] {50, 50}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(128, postings.freq());
|
||||||
|
assertEquals(1, postings.nextDoc());
|
||||||
|
assertEquals(50, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(42, postings.freq());
|
||||||
|
assertEquals(1, postings.nextDoc());
|
||||||
|
assertEquals(50, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRepeatTermsOneDoc() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(228, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(59, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRepeatTermsTwoDocs() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {50, 60, 70, 80}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(228, postings.freq());
|
||||||
|
assertEquals(1, postings.nextDoc());
|
||||||
|
assertEquals(140, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(59, postings.freq());
|
||||||
|
assertEquals(1, postings.nextDoc());
|
||||||
|
assertEquals(120, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTotalTermFreq() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {50, 60, 70, 80}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
|
||||||
|
TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
|
||||||
|
assertEquals(179, termsEnum.totalTermFreq());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
|
||||||
|
assertEquals(368, termsEnum.totalTermFreq());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// you can't index proximity with custom term freqs:
|
||||||
|
public void testInvalidProx() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
|
||||||
|
assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// you can't index DOCS_ONLY with custom term freq
|
||||||
|
public void testInvalidDocsOnly() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
|
||||||
|
assertEquals("field \"field\": must index term freq while using custom TermFrequencyAttribute", e.getMessage());
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// sum of term freqs must fit in an int
|
||||||
|
public void testOverflowInt() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("field", "this field should be indexed", fieldType));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
Document doc2 = new Document();
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||||
|
new int[] {3, Integer.MAX_VALUE}),
|
||||||
|
fieldType);
|
||||||
|
doc2.add(field);
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc2);});
|
||||||
|
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
assertEquals(1, r.numDocs());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidTermVectorPositions() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
fieldType.setStoreTermVectors(true);
|
||||||
|
fieldType.setStoreTermVectorPositions(true);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
|
||||||
|
assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage());
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInvalidTermVectorOffsets() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
fieldType.setStoreTermVectors(true);
|
||||||
|
fieldType.setStoreTermVectorOffsets(true);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
|
||||||
|
assertEquals("field \"field\": cannot index term vector offsets while using custom TermFrequencyAttribute", e.getMessage());
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermVectors() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
fieldType.setStoreTermVectors(true);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {50, 60, 70, 80}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexReader r = DirectoryReader.open(w);
|
||||||
|
|
||||||
|
Fields fields = r.getTermVectors(0);
|
||||||
|
TermsEnum termsEnum = fields.terms("field").iterator();
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
|
||||||
|
assertEquals(228, termsEnum.totalTermFreq());
|
||||||
|
PostingsEnum postings = termsEnum.postings(null);
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(228, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
|
||||||
|
assertEquals(59, termsEnum.totalTermFreq());
|
||||||
|
postings = termsEnum.postings(null);
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(59, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
fields = r.getTermVectors(1);
|
||||||
|
termsEnum = fields.terms("field").iterator();
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
|
||||||
|
assertEquals(140, termsEnum.totalTermFreq());
|
||||||
|
postings = termsEnum.postings(null);
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(140, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
|
||||||
|
assertEquals(120, termsEnum.totalTermFreq());
|
||||||
|
postings = termsEnum.postings(null);
|
||||||
|
assertNotNull(postings);
|
||||||
|
assertEquals(0, postings.nextDoc());
|
||||||
|
assertEquals(120, postings.freq());
|
||||||
|
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similarity holds onto the FieldInvertState for subsequent verification.
|
||||||
|
*/
|
||||||
|
private static class NeverForgetsSimilarity extends Similarity {
|
||||||
|
public FieldInvertState lastState;
|
||||||
|
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
|
||||||
|
|
||||||
|
private NeverForgetsSimilarity() {
|
||||||
|
// no
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long computeNorm(FieldInvertState state) {
|
||||||
|
this.lastState = state;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFieldInvertState() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
|
||||||
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||||
|
new int[] {42, 128, 17, 100}),
|
||||||
|
fieldType);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
|
||||||
|
assertEquals(228, fis.getMaxTermFrequency());
|
||||||
|
assertEquals(2, fis.getUniqueTermCount());
|
||||||
|
assertEquals(0, fis.getNumOverlap());
|
||||||
|
assertEquals(287, fis.getLength());
|
||||||
|
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
}
|
@ -249,14 +249,16 @@ public class TestDoc extends LuceneTestCase {
|
|||||||
for (int i = 0; i < reader.numDocs(); i++)
|
for (int i = 0; i < reader.numDocs(); i++)
|
||||||
out.println(reader.document(i));
|
out.println(reader.document(i));
|
||||||
|
|
||||||
Fields fields = reader.fields();
|
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
|
||||||
for (String field : fields) {
|
if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
|
||||||
Terms terms = fields.terms(field);
|
continue;
|
||||||
|
}
|
||||||
|
Terms terms = reader.terms(fieldInfo.name);
|
||||||
assertNotNull(terms);
|
assertNotNull(terms);
|
||||||
TermsEnum tis = terms.iterator();
|
TermsEnum tis = terms.iterator();
|
||||||
while(tis.next() != null) {
|
while(tis.next() != null) {
|
||||||
|
|
||||||
out.print(" term=" + field + ":" + tis.term());
|
out.print(" term=" + fieldInfo.name + ":" + tis.term());
|
||||||
out.println(" DF=" + tis.docFreq());
|
out.println(" DF=" + tis.docFreq());
|
||||||
|
|
||||||
PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS);
|
PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS);
|
||||||
|
@ -37,17 +37,6 @@ import org.junit.Ignore;
|
|||||||
public class TestExitableDirectoryReader extends LuceneTestCase {
|
public class TestExitableDirectoryReader extends LuceneTestCase {
|
||||||
private static class TestReader extends FilterLeafReader {
|
private static class TestReader extends FilterLeafReader {
|
||||||
|
|
||||||
private static class TestFields extends FilterFields {
|
|
||||||
TestFields(Fields in) {
|
|
||||||
super(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms(String field) throws IOException {
|
|
||||||
return new TestTerms(super.terms(field));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class TestTerms extends FilterTerms {
|
private static class TestTerms extends FilterTerms {
|
||||||
TestTerms(Terms in) {
|
TestTerms(Terms in) {
|
||||||
super(in);
|
super(in);
|
||||||
@ -83,8 +72,9 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new TestFields(super.fields());
|
Terms terms = super.terms(field);
|
||||||
|
return terms==null ? null : new TestTerms(terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -0,0 +1,139 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.search.CollectionStatistics;
|
||||||
|
import org.apache.lucene.search.TermStatistics;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
public class TestFieldInvertState extends LuceneTestCase {
|
||||||
|
/**
|
||||||
|
* Similarity holds onto the FieldInvertState for subsequent verification.
|
||||||
|
*/
|
||||||
|
private static class NeverForgetsSimilarity extends Similarity {
|
||||||
|
public FieldInvertState lastState;
|
||||||
|
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
|
||||||
|
|
||||||
|
private NeverForgetsSimilarity() {
|
||||||
|
// no
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long computeNorm(FieldInvertState state) {
|
||||||
|
this.lastState = state;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasic() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
|
||||||
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTokenStream(new Token("a", 0, 1),
|
||||||
|
new Token("b", 2, 3),
|
||||||
|
new Token("c", 4, 5)),
|
||||||
|
TextField.TYPE_NOT_STORED);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
|
||||||
|
assertEquals(1, fis.getMaxTermFrequency());
|
||||||
|
assertEquals(3, fis.getUniqueTermCount());
|
||||||
|
assertEquals(0, fis.getNumOverlap());
|
||||||
|
assertEquals(3, fis.getLength());
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandom() throws Exception {
|
||||||
|
int numUniqueTokens = TestUtil.nextInt(random(), 1, 25);
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
|
||||||
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
int numTokens = atLeast(10000);
|
||||||
|
Token[] tokens = new Token[numTokens];
|
||||||
|
Map<Character,Integer> counts = new HashMap<>();
|
||||||
|
int numStacked = 0;
|
||||||
|
int maxTermFreq = 0;
|
||||||
|
int pos = -1;
|
||||||
|
for (int i=0;i<numTokens;i++) {
|
||||||
|
char tokenChar = (char) ('a' + random().nextInt(numUniqueTokens));
|
||||||
|
Integer oldCount = counts.get(tokenChar);
|
||||||
|
int newCount;
|
||||||
|
if (oldCount == null) {
|
||||||
|
newCount = 1;
|
||||||
|
} else {
|
||||||
|
newCount = 1 + oldCount;
|
||||||
|
}
|
||||||
|
counts.put(tokenChar, newCount);
|
||||||
|
maxTermFreq = Math.max(maxTermFreq, newCount);
|
||||||
|
|
||||||
|
Token token = new Token(Character.toString(tokenChar), 2*i, 2*i+1);
|
||||||
|
|
||||||
|
if (i > 0 && random().nextInt(7) == 3) {
|
||||||
|
token.setPositionIncrement(0);
|
||||||
|
numStacked++;
|
||||||
|
} else {
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
tokens[i] = token;
|
||||||
|
}
|
||||||
|
|
||||||
|
Field field = new Field("field",
|
||||||
|
new CannedTokenStream(tokens),
|
||||||
|
TextField.TYPE_NOT_STORED);
|
||||||
|
doc.add(field);
|
||||||
|
w.addDocument(doc);
|
||||||
|
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
|
||||||
|
assertEquals(maxTermFreq, fis.getMaxTermFrequency());
|
||||||
|
assertEquals(counts.size(), fis.getUniqueTermCount());
|
||||||
|
assertEquals(numStacked, fis.getNumOverlap());
|
||||||
|
assertEquals(numTokens, fis.getLength());
|
||||||
|
assertEquals(pos, fis.getPosition());
|
||||||
|
|
||||||
|
IOUtils.close(w, dir);
|
||||||
|
}
|
||||||
|
}
|
@ -35,18 +35,6 @@ public class TestFilterLeafReader extends LuceneTestCase {
|
|||||||
|
|
||||||
private static class TestReader extends FilterLeafReader {
|
private static class TestReader extends FilterLeafReader {
|
||||||
|
|
||||||
/** Filter that only permits terms containing 'e'.*/
|
|
||||||
private static class TestFields extends FilterFields {
|
|
||||||
TestFields(Fields in) {
|
|
||||||
super(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms(String field) throws IOException {
|
|
||||||
return new TestTerms(super.terms(field));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class TestTerms extends FilterTerms {
|
private static class TestTerms extends FilterTerms {
|
||||||
TestTerms(Terms in) {
|
TestTerms(Terms in) {
|
||||||
super(in);
|
super(in);
|
||||||
@ -103,8 +91,9 @@ public class TestFilterLeafReader extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new TestFields(super.fields());
|
Terms terms = super.terms(field);
|
||||||
|
return terms==null ? null : new TestTerms(terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -17,10 +17,13 @@
|
|||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.*;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestFlex extends LuceneTestCase {
|
public class TestFlex extends LuceneTestCase {
|
||||||
|
|
||||||
@ -70,7 +73,7 @@ public class TestFlex extends LuceneTestCase {
|
|||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.forceMerge(1);
|
w.forceMerge(1);
|
||||||
DirectoryReader r = w.getReader();
|
DirectoryReader r = w.getReader();
|
||||||
TermsEnum terms = getOnlyLeafReader(r).fields().terms("f").iterator();
|
TermsEnum terms = getOnlyLeafReader(r).terms("f").iterator();
|
||||||
assertTrue(terms.next() != null);
|
assertTrue(terms.next() != null);
|
||||||
try {
|
try {
|
||||||
assertEquals(0, terms.ord());
|
assertEquals(0, terms.ord());
|
||||||
|
@ -80,9 +80,7 @@ public class TestIndexReaderClose extends LuceneTestCase {
|
|||||||
reader.getReaderCacheHelper().addClosedListener(new FaultyListener());
|
reader.getReaderCacheHelper().addClosedListener(new FaultyListener());
|
||||||
}
|
}
|
||||||
|
|
||||||
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
|
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> reader.close());
|
||||||
reader.close();
|
|
||||||
});
|
|
||||||
|
|
||||||
if (throwOnClose) {
|
if (throwOnClose) {
|
||||||
assertEquals("BOOM!", expected.getMessage());
|
assertEquals("BOOM!", expected.getMessage());
|
||||||
@ -90,9 +88,7 @@ public class TestIndexReaderClose extends LuceneTestCase {
|
|||||||
assertEquals("GRRRRRRRRRRRR!", expected.getMessage());
|
assertEquals("GRRRRRRRRRRRR!", expected.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
expectThrows(AlreadyClosedException.class, () -> {
|
expectThrows(AlreadyClosedException.class, () -> reader.terms("someField"));
|
||||||
reader.fields();
|
|
||||||
});
|
|
||||||
|
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
reader.close(); // call it again
|
reader.close(); // call it again
|
||||||
|
@ -694,7 +694,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||||||
writer.close();
|
writer.close();
|
||||||
DirectoryReader reader = DirectoryReader.open(dir);
|
DirectoryReader reader = DirectoryReader.open(dir);
|
||||||
LeafReader subreader = getOnlyLeafReader(reader);
|
LeafReader subreader = getOnlyLeafReader(reader);
|
||||||
TermsEnum te = subreader.fields().terms("").iterator();
|
TermsEnum te = subreader.terms("").iterator();
|
||||||
assertEquals(new BytesRef("a"), te.next());
|
assertEquals(new BytesRef("a"), te.next());
|
||||||
assertEquals(new BytesRef("b"), te.next());
|
assertEquals(new BytesRef("b"), te.next());
|
||||||
assertEquals(new BytesRef("c"), te.next());
|
assertEquals(new BytesRef("c"), te.next());
|
||||||
@ -715,7 +715,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||||||
writer.close();
|
writer.close();
|
||||||
DirectoryReader reader = DirectoryReader.open(dir);
|
DirectoryReader reader = DirectoryReader.open(dir);
|
||||||
LeafReader subreader = getOnlyLeafReader(reader);
|
LeafReader subreader = getOnlyLeafReader(reader);
|
||||||
TermsEnum te = subreader.fields().terms("").iterator();
|
TermsEnum te = subreader.terms("").iterator();
|
||||||
assertEquals(new BytesRef(""), te.next());
|
assertEquals(new BytesRef(""), te.next());
|
||||||
assertEquals(new BytesRef("a"), te.next());
|
assertEquals(new BytesRef("a"), te.next());
|
||||||
assertEquals(new BytesRef("b"), te.next());
|
assertEquals(new BytesRef("b"), te.next());
|
||||||
|
@ -136,7 +136,7 @@ public class TestIndexWriterUnicode extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void checkTermsOrder(IndexReader r, Set<String> allTerms, boolean isTop) throws IOException {
|
private void checkTermsOrder(IndexReader r, Set<String> allTerms, boolean isTop) throws IOException {
|
||||||
TermsEnum terms = MultiFields.getFields(r).terms("f").iterator();
|
TermsEnum terms = MultiFields.getTerms(r, "f").iterator();
|
||||||
|
|
||||||
BytesRefBuilder last = new BytesRefBuilder();
|
BytesRefBuilder last = new BytesRefBuilder();
|
||||||
|
|
||||||
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
@ -90,21 +89,10 @@ public class TestParallelTermEnum extends LuceneTestCase {
|
|||||||
public void test1() throws IOException {
|
public void test1() throws IOException {
|
||||||
ParallelLeafReader pr = new ParallelLeafReader(ir1, ir2);
|
ParallelLeafReader pr = new ParallelLeafReader(ir1, ir2);
|
||||||
|
|
||||||
Fields fields = pr.fields();
|
assertEquals(3, pr.getFieldInfos().size());
|
||||||
Iterator<String> fe = fields.iterator();
|
|
||||||
|
|
||||||
String f = fe.next();
|
checkTerms(pr.terms("field1"), "brown", "fox", "jumps", "quick", "the");
|
||||||
assertEquals("field1", f);
|
checkTerms(pr.terms("field2"), "brown", "fox", "jumps", "quick", "the");
|
||||||
checkTerms(fields.terms(f), "brown", "fox", "jumps", "quick", "the");
|
checkTerms(pr.terms("field3"), "dog", "fox", "jumps", "lazy", "over", "the");
|
||||||
|
|
||||||
f = fe.next();
|
|
||||||
assertEquals("field2", f);
|
|
||||||
checkTerms(fields.terms(f), "brown", "fox", "jumps", "quick", "the");
|
|
||||||
|
|
||||||
f = fe.next();
|
|
||||||
assertEquals("field3", f);
|
|
||||||
checkTerms(fields.terms(f), "dog", "fox", "jumps", "lazy", "over", "the");
|
|
||||||
|
|
||||||
assertFalse(fe.hasNext());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -479,7 +479,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexReader reader = DirectoryReader.open(dir);
|
IndexReader reader = DirectoryReader.open(dir);
|
||||||
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator();
|
TermsEnum terms = MultiFields.getTerms(reader, field).iterator();
|
||||||
PostingsEnum tp = null;
|
PostingsEnum tp = null;
|
||||||
while (terms.next() != null) {
|
while (terms.next() != null) {
|
||||||
String termText = terms.term().utf8ToString();
|
String termText = terms.term().utf8ToString();
|
||||||
@ -602,7 +602,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||||||
field.setTokenStream(ts);
|
field.setTokenStream(ts);
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
DirectoryReader reader = writer.getReader();
|
DirectoryReader reader = writer.getReader();
|
||||||
TermsEnum te = MultiFields.getFields(reader).terms("field").iterator();
|
TermsEnum te = MultiFields.getTerms(reader, "field").iterator();
|
||||||
assertTrue(te.seekExact(new BytesRef("withPayload")));
|
assertTrue(te.seekExact(new BytesRef("withPayload")));
|
||||||
PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS);
|
PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS);
|
||||||
de.nextDoc();
|
de.nextDoc();
|
||||||
|
@ -221,9 +221,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||||||
|
|
||||||
public int[] toDocsArray(Term term, Bits bits, IndexReader reader)
|
public int[] toDocsArray(Term term, Bits bits, IndexReader reader)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Fields fields = MultiFields.getFields(reader);
|
TermsEnum ctermsEnum = MultiFields.getTerms(reader, term.field).iterator();
|
||||||
Terms cterms = fields.terms(term.field);
|
|
||||||
TermsEnum ctermsEnum = cterms.iterator();
|
|
||||||
if (ctermsEnum.seekExact(new BytesRef(term.text()))) {
|
if (ctermsEnum.seekExact(new BytesRef(term.text()))) {
|
||||||
PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, null, PostingsEnum.NONE);
|
PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, null, PostingsEnum.NONE);
|
||||||
return toArray(postingsEnum);
|
return toArray(postingsEnum);
|
||||||
|
@ -291,7 +291,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||||||
// TODO: improve this
|
// TODO: improve this
|
||||||
LeafReader sub = ctx.reader();
|
LeafReader sub = ctx.reader();
|
||||||
//System.out.println("\nsub=" + sub);
|
//System.out.println("\nsub=" + sub);
|
||||||
final TermsEnum termsEnum = sub.fields().terms("content").iterator();
|
final TermsEnum termsEnum = sub.terms("content").iterator();
|
||||||
PostingsEnum docs = null;
|
PostingsEnum docs = null;
|
||||||
PostingsEnum docsAndPositions = null;
|
PostingsEnum docsAndPositions = null;
|
||||||
PostingsEnum docsAndPositionsAndOffsets = null;
|
PostingsEnum docsAndPositionsAndOffsets = null;
|
||||||
|
@ -57,7 +57,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||||||
SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random()));
|
SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random()));
|
||||||
assertTrue(reader != null);
|
assertTrue(reader != null);
|
||||||
|
|
||||||
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
|
TermsEnum terms = reader.terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
|
||||||
terms.seekCeil(new BytesRef("field"));
|
terms.seekCeil(new BytesRef("field"));
|
||||||
PostingsEnum termDocs = TestUtil.docs(random(), terms, null, PostingsEnum.FREQS);
|
PostingsEnum termDocs = TestUtil.docs(random(), terms, null, PostingsEnum.FREQS);
|
||||||
if (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
if (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
@ -19,14 +19,14 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
|
||||||
public class TestSegmentTermEnum extends LuceneTestCase {
|
public class TestSegmentTermEnum extends LuceneTestCase {
|
||||||
@ -80,7 +80,7 @@ public class TestSegmentTermEnum extends LuceneTestCase {
|
|||||||
addDoc(writer, "aaa bbb");
|
addDoc(writer, "aaa bbb");
|
||||||
writer.close();
|
writer.close();
|
||||||
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(dir));
|
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(dir));
|
||||||
TermsEnum terms = reader.fields().terms("content").iterator();
|
TermsEnum terms = reader.terms("content").iterator();
|
||||||
assertNotNull(terms.next());
|
assertNotNull(terms.next());
|
||||||
assertEquals("aaa", terms.term().utf8ToString());
|
assertEquals("aaa", terms.term().utf8ToString());
|
||||||
assertNotNull(terms.next());
|
assertNotNull(terms.next());
|
||||||
|
@ -21,10 +21,13 @@ import java.util.HashSet;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.document.*;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestStressAdvance extends LuceneTestCase {
|
public class TestStressAdvance extends LuceneTestCase {
|
||||||
|
|
||||||
@ -74,7 +77,7 @@ public class TestStressAdvance extends LuceneTestCase {
|
|||||||
bDocIDs.add(docID);
|
bDocIDs.add(docID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final TermsEnum te = getOnlyLeafReader(r).fields().terms("field").iterator();
|
final TermsEnum te = getOnlyLeafReader(r).terms("field").iterator();
|
||||||
|
|
||||||
PostingsEnum de = null;
|
PostingsEnum de = null;
|
||||||
for(int iter2=0;iter2<10;iter2++) {
|
for(int iter2=0;iter2<10;iter2++) {
|
||||||
|
@ -18,7 +18,17 @@ package org.apache.lucene.index;
|
|||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
@ -738,7 +748,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||||||
DirectoryReader r = w.getReader();
|
DirectoryReader r = w.getReader();
|
||||||
w.close();
|
w.close();
|
||||||
LeafReader sub = getOnlyLeafReader(r);
|
LeafReader sub = getOnlyLeafReader(r);
|
||||||
Terms terms = sub.fields().terms("field");
|
Terms terms = sub.terms("field");
|
||||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
|
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||||
TermsEnum te = terms.intersect(ca, null);
|
TermsEnum te = terms.intersect(ca, null);
|
||||||
@ -792,7 +802,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||||||
DirectoryReader r = w.getReader();
|
DirectoryReader r = w.getReader();
|
||||||
w.close();
|
w.close();
|
||||||
LeafReader sub = getOnlyLeafReader(r);
|
LeafReader sub = getOnlyLeafReader(r);
|
||||||
Terms terms = sub.fields().terms("field");
|
Terms terms = sub.terms("field");
|
||||||
|
|
||||||
Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
|
Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||||
@ -846,7 +856,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||||||
DirectoryReader r = w.getReader();
|
DirectoryReader r = w.getReader();
|
||||||
w.close();
|
w.close();
|
||||||
LeafReader sub = getOnlyLeafReader(r);
|
LeafReader sub = getOnlyLeafReader(r);
|
||||||
Terms terms = sub.fields().terms("field");
|
Terms terms = sub.terms("field");
|
||||||
|
|
||||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton(); // accept ALL
|
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton(); // accept ALL
|
||||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||||
@ -986,7 +996,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
IndexReader r = w.getReader();
|
IndexReader r = w.getReader();
|
||||||
assertEquals(1, r.leaves().size());
|
assertEquals(1, r.leaves().size());
|
||||||
TermsEnum te = r.leaves().get(0).reader().fields().terms("field").iterator();
|
TermsEnum te = r.leaves().get(0).reader().terms("field").iterator();
|
||||||
for(int i=0;i<=termCount;i++) {
|
for(int i=0;i<=termCount;i++) {
|
||||||
assertTrue("term '" + termsList.get(i).utf8ToString() + "' should exist but doesn't", te.seekExact(termsList.get(i)));
|
assertTrue("term '" + termsList.get(i).utf8ToString() + "' should exist but doesn't", te.seekExact(termsList.get(i)));
|
||||||
}
|
}
|
||||||
@ -1007,9 +1017,8 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||||||
doc.add(newStringField("field", "foobar", Field.Store.NO));
|
doc.add(newStringField("field", "foobar", Field.Store.NO));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
IndexReader r = w.getReader();
|
IndexReader r = w.getReader();
|
||||||
Fields fields = MultiFields.getFields(r);
|
Terms terms = MultiFields.getTerms(r, "field");
|
||||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
|
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
|
||||||
Terms terms = fields.terms("field");
|
|
||||||
String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
|
String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
|
||||||
assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
|
assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
|
||||||
r.close();
|
r.close();
|
||||||
|
@ -29,7 +29,6 @@ import org.apache.lucene.document.Document;
|
|||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.FilterDirectoryReader;
|
import org.apache.lucene.index.FilterDirectoryReader;
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
@ -219,21 +218,16 @@ public class TermInSetQueryTest extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new FilterFields(in.fields()) {
|
Terms terms = super.terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new FilterTerms(terms) {
|
||||||
@Override
|
@Override
|
||||||
public Terms terms(String field) throws IOException {
|
public TermsEnum iterator() throws IOException {
|
||||||
final Terms in = this.in.terms(field);
|
counter.incrementAndGet();
|
||||||
if (in == null) {
|
return super.iterator();
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return new FilterTerms(in) {
|
|
||||||
@Override
|
|
||||||
public TermsEnum iterator() throws IOException {
|
|
||||||
counter.incrementAndGet();
|
|
||||||
return super.iterator();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -523,6 +523,21 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||||||
assertTrue("score should be negative", h[i].score < 0);
|
assertTrue("score should be negative", h[i].score < 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRewriteBoolean() throws Exception {
|
||||||
|
Query sub1 = tq("hed", "albino");
|
||||||
|
Query sub2 = tq("hed", "elephant");
|
||||||
|
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
|
||||||
|
Arrays.asList(
|
||||||
|
sub1, sub2
|
||||||
|
), 1.0f);
|
||||||
|
Query rewritten = s.rewrite(q);
|
||||||
|
assertTrue(rewritten instanceof BooleanQuery);
|
||||||
|
BooleanQuery bq = (BooleanQuery) rewritten;
|
||||||
|
assertEquals(bq.clauses().size(), 2);
|
||||||
|
assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
|
||||||
|
assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
|
||||||
|
}
|
||||||
|
|
||||||
/** macro */
|
/** macro */
|
||||||
protected Query tq(String f, String t) {
|
protected Query tq(String f, String t) {
|
||||||
|
@ -71,7 +71,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||||||
|
|
||||||
// this TermEnum gives "piccadilly", "pie" and "pizza".
|
// this TermEnum gives "piccadilly", "pie" and "pizza".
|
||||||
String prefix = "pi";
|
String prefix = "pi";
|
||||||
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
|
TermsEnum te = MultiFields.getTerms(reader,"body").iterator();
|
||||||
te.seekCeil(new BytesRef(prefix));
|
te.seekCeil(new BytesRef(prefix));
|
||||||
do {
|
do {
|
||||||
String s = te.term().utf8ToString();
|
String s = te.term().utf8ToString();
|
||||||
|
@ -73,7 +73,7 @@ public class TestPhrasePrefixQuery extends LuceneTestCase {
|
|||||||
|
|
||||||
// this TermEnum gives "piccadilly", "pie" and "pizza".
|
// this TermEnum gives "piccadilly", "pie" and "pizza".
|
||||||
String prefix = "pi";
|
String prefix = "pi";
|
||||||
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
|
TermsEnum te = MultiFields.getTerms(reader, "body").iterator();
|
||||||
te.seekCeil(new BytesRef(prefix));
|
te.seekCeil(new BytesRef(prefix));
|
||||||
do {
|
do {
|
||||||
String s = te.term().utf8ToString();
|
String s = te.term().utf8ToString();
|
||||||
|
@ -61,7 +61,7 @@ public class TestSameScoresWithThreads extends LuceneTestCase {
|
|||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
final IndexSearcher s = newSearcher(r);
|
final IndexSearcher s = newSearcher(r);
|
||||||
Terms terms = MultiFields.getFields(r).terms("body");
|
Terms terms = MultiFields.getTerms(r, "body");
|
||||||
int termCount = 0;
|
int termCount = 0;
|
||||||
TermsEnum termsEnum = terms.iterator();
|
TermsEnum termsEnum = terms.iterator();
|
||||||
while(termsEnum.next() != null) {
|
while(termsEnum.next() != null) {
|
||||||
|
@ -22,7 +22,6 @@ import org.apache.lucene.document.Document;
|
|||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.FilterDirectoryReader;
|
import org.apache.lucene.index.FilterDirectoryReader;
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
@ -123,31 +122,27 @@ public class TestTermQuery extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new FilterFields(super.fields()) {
|
Terms terms = super.terms(field);
|
||||||
|
return terms==null ? null : new FilterTerms(terms) {
|
||||||
@Override
|
@Override
|
||||||
public Terms terms(String field) throws IOException {
|
public TermsEnum iterator() throws IOException {
|
||||||
return new FilterTerms(super.terms(field)) {
|
return new FilterTermsEnum(super.iterator()) {
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator() throws IOException {
|
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||||
return new FilterTermsEnum(super.iterator()) {
|
throw new AssertionError("no seek");
|
||||||
@Override
|
}
|
||||||
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
@Override
|
||||||
throw new AssertionError("no seek");
|
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||||
}
|
throw new AssertionError("no seek");
|
||||||
@Override
|
}
|
||||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
@Override
|
||||||
throw new AssertionError("no seek");
|
public boolean seekExact(BytesRef text) throws IOException {
|
||||||
}
|
throw new AssertionError("no seek");
|
||||||
@Override
|
}
|
||||||
public boolean seekExact(BytesRef text) throws IOException {
|
@Override
|
||||||
throw new AssertionError("no seek");
|
public void seekExact(long ord) throws IOException {
|
||||||
}
|
throw new AssertionError("no seek");
|
||||||
@Override
|
|
||||||
public void seekExact(long ord) throws IOException {
|
|
||||||
throw new AssertionError("no seek");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -178,32 +178,36 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||||||
.build();
|
.build();
|
||||||
Query syn2 = new TermQuery(new Term("field", "cavy"));
|
Query syn2 = new TermQuery(new Term("field", "cavy"));
|
||||||
|
|
||||||
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
BooleanQuery synQuery = new BooleanQuery.Builder()
|
||||||
.add(syn1, BooleanClause.Occur.SHOULD)
|
.add(syn1, BooleanClause.Occur.SHOULD)
|
||||||
.add(syn2, BooleanClause.Occur.SHOULD)
|
.add(syn2, BooleanClause.Occur.SHOULD)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
||||||
|
.add(synQuery, occur)
|
||||||
|
.build();
|
||||||
|
|
||||||
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
||||||
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
|
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
|
||||||
|
|
||||||
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
|
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.add(new TermQuery(new Term("field", "story")), occur)
|
.add(new TermQuery(new Term("field", "story")), occur)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
|
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
|
||||||
|
|
||||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||||
.add(new TermQuery(new Term("field", "the")), occur)
|
.add(new TermQuery(new Term("field", "the")), occur)
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.add(new TermQuery(new Term("field", "story")), occur)
|
.add(new TermQuery(new Term("field", "story")), occur)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
|
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
|
||||||
|
|
||||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||||
.add(new TermQuery(new Term("field", "the")), occur)
|
.add(new TermQuery(new Term("field", "the")), occur)
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.add(new TermQuery(new Term("field", "story")), occur)
|
.add(new TermQuery(new Term("field", "story")), occur)
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
|
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
|
||||||
}
|
}
|
||||||
@ -217,32 +221,36 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||||||
.add(new Term("field", "pig"))
|
.add(new Term("field", "pig"))
|
||||||
.build();
|
.build();
|
||||||
Query syn2 = new TermQuery(new Term("field", "cavy"));
|
Query syn2 = new TermQuery(new Term("field", "cavy"));
|
||||||
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
|
||||||
|
BooleanQuery synQuery = new BooleanQuery.Builder()
|
||||||
.add(syn1, BooleanClause.Occur.SHOULD)
|
.add(syn1, BooleanClause.Occur.SHOULD)
|
||||||
.add(syn2, BooleanClause.Occur.SHOULD)
|
.add(syn2, BooleanClause.Occur.SHOULD)
|
||||||
.build();
|
.build();
|
||||||
|
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
||||||
|
.add(synQuery, occur)
|
||||||
|
.build();
|
||||||
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
||||||
queryBuilder.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
|
queryBuilder.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
|
||||||
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
|
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
|
||||||
|
|
||||||
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
|
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.add(new TermQuery(new Term("field", "story")), occur)
|
.add(new TermQuery(new Term("field", "story")), occur)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
|
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
|
||||||
|
|
||||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||||
.add(new TermQuery(new Term("field", "the")), occur)
|
.add(new TermQuery(new Term("field", "the")), occur)
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.add(new TermQuery(new Term("field", "story")), occur)
|
.add(new TermQuery(new Term("field", "story")), occur)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
|
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
|
||||||
|
|
||||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||||
.add(new TermQuery(new Term("field", "the")), occur)
|
.add(new TermQuery(new Term("field", "the")), occur)
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.add(new TermQuery(new Term("field", "story")), occur)
|
.add(new TermQuery(new Term("field", "story")), occur)
|
||||||
.add(expectedGraphQuery, occur)
|
.add(synQuery, occur)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
|
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
|
||||||
}
|
}
|
||||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.index.DocValuesType;
|
|||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.LeafMetaData;
|
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.LeafMetaData;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
@ -90,8 +90,8 @@ public class TermVectorLeafReader extends LeafReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return fields;
|
return fields.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -148,7 +148,7 @@ public class TermVectorLeafReader extends LeafReader {
|
|||||||
if (docID != 0) {
|
if (docID != 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return fields();
|
return fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -18,7 +18,6 @@ package org.apache.lucene.search.highlight;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@ -30,7 +29,6 @@ import org.apache.lucene.analysis.CachingTokenFilter;
|
|||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
@ -429,30 +427,15 @@ public class WeightedSpanTermExtractor {
|
|||||||
DelegatingLeafReader(LeafReader in) {
|
DelegatingLeafReader(LeafReader in) {
|
||||||
super(in);
|
super(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfos getFieldInfos() {
|
public FieldInfos getFieldInfos() {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();//TODO merge them
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new FilterFields(super.fields()) {
|
return super.terms(DelegatingLeafReader.FIELD_NAME);
|
||||||
@Override
|
|
||||||
public Terms terms(String field) throws IOException {
|
|
||||||
return super.terms(DelegatingLeafReader.FIELD_NAME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Iterator<String> iterator() {
|
|
||||||
return Collections.singletonList(DelegatingLeafReader.FIELD_NAME).iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -83,8 +83,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||||||
return allAutomata.get(0);
|
return allAutomata.get(0);
|
||||||
}
|
}
|
||||||
//TODO it'd be nice if we could get at the underlying Automaton in CharacterRunAutomaton so that we
|
//TODO it'd be nice if we could get at the underlying Automaton in CharacterRunAutomaton so that we
|
||||||
// could union them all. But it's not exposed, and note TermRangeQuery isn't modelled as an Automaton
|
// could union them all. But it's not exposed, and sometimes the automaton is byte (not char) oriented
|
||||||
// by MultiTermHighlighting.
|
|
||||||
|
|
||||||
// Return an aggregate CharacterRunAutomaton of others
|
// Return an aggregate CharacterRunAutomaton of others
|
||||||
return new CharacterRunAutomaton(Automata.makeEmpty()) {// the makeEmpty() is bogus; won't be used
|
return new CharacterRunAutomaton(Automata.makeEmpty()) {// the makeEmpty() is bogus; won't be used
|
||||||
|
@ -19,12 +19,10 @@ package org.apache.lucene.search.uhighlight;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.AutomatonQuery;
|
import org.apache.lucene.search.AutomatonQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
@ -32,19 +30,17 @@ import org.apache.lucene.search.BoostQuery;
|
|||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.PrefixQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
|
||||||
import org.apache.lucene.search.spans.SpanBoostQuery;
|
import org.apache.lucene.search.spans.SpanBoostQuery;
|
||||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
|
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
|
||||||
import org.apache.lucene.util.CharsRef;
|
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
|
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
import org.apache.lucene.util.automaton.Operations;
|
import org.apache.lucene.util.automaton.Operations;
|
||||||
@ -110,18 +106,6 @@ class MultiTermHighlighting {
|
|||||||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
|
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
|
||||||
fieldMatcher, lookInSpan, preRewriteFunc)));
|
fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||||
} else if (query instanceof PrefixQuery) {
|
|
||||||
final PrefixQuery pq = (PrefixQuery) query;
|
|
||||||
Term prefix = pq.getPrefix();
|
|
||||||
if (fieldMatcher.test(prefix.field())) {
|
|
||||||
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
|
|
||||||
Automata.makeAnyString())) {
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return pq.toString();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (query instanceof FuzzyQuery) {
|
} else if (query instanceof FuzzyQuery) {
|
||||||
final FuzzyQuery fq = (FuzzyQuery) query;
|
final FuzzyQuery fq = (FuzzyQuery) query;
|
||||||
if (fieldMatcher.test(fq.getField())) {
|
if (fieldMatcher.test(fq.getField())) {
|
||||||
@ -143,69 +127,63 @@ class MultiTermHighlighting {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if (query instanceof TermRangeQuery) {
|
|
||||||
final TermRangeQuery tq = (TermRangeQuery) query;
|
|
||||||
if (fieldMatcher.test(tq.getField())) {
|
|
||||||
final CharsRef lowerBound;
|
|
||||||
if (tq.getLowerTerm() == null) {
|
|
||||||
lowerBound = null;
|
|
||||||
} else {
|
|
||||||
lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
|
|
||||||
}
|
|
||||||
|
|
||||||
final CharsRef upperBound;
|
|
||||||
if (tq.getUpperTerm() == null) {
|
|
||||||
upperBound = null;
|
|
||||||
} else {
|
|
||||||
upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
|
|
||||||
}
|
|
||||||
|
|
||||||
final boolean includeLower = tq.includesLower();
|
|
||||||
final boolean includeUpper = tq.includesUpper();
|
|
||||||
final CharsRef scratch = new CharsRef();
|
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
|
|
||||||
|
|
||||||
// this is *not* an automaton, but its very simple
|
|
||||||
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
|
|
||||||
@Override
|
|
||||||
public boolean run(char[] s, int offset, int length) {
|
|
||||||
scratch.chars = s;
|
|
||||||
scratch.offset = offset;
|
|
||||||
scratch.length = length;
|
|
||||||
|
|
||||||
if (lowerBound != null) {
|
|
||||||
int cmp = comparator.compare(scratch, lowerBound);
|
|
||||||
if (cmp < 0 || (!includeLower && cmp == 0)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (upperBound != null) {
|
|
||||||
int cmp = comparator.compare(scratch, upperBound);
|
|
||||||
if (cmp > 0 || (!includeUpper && cmp == 0)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return tq.toString();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (query instanceof AutomatonQuery) {
|
} else if (query instanceof AutomatonQuery) {
|
||||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||||
if (fieldMatcher.test(aq.getField())) {
|
if (fieldMatcher.test(aq.getField())) {
|
||||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
|
||||||
@Override
|
if (aq.isAutomatonBinary() == false) { // note: is the case for WildcardQuery, RegexpQuery
|
||||||
public String toString() {
|
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||||
return aq.toString();
|
@Override
|
||||||
}
|
public String toString() {
|
||||||
});
|
return aq.toString();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else { // note: is the case for PrefixQuery, TermRangeQuery
|
||||||
|
// byte oriented automaton:
|
||||||
|
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) { // empty here is bogus just to satisfy API
|
||||||
|
// TODO can we get access to the aq.compiledAutomaton.runAutomaton ?
|
||||||
|
ByteRunAutomaton byteRunAutomaton =
|
||||||
|
new ByteRunAutomaton(aq.getAutomaton(), true, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean run(char[] chars, int offset, int length) {
|
||||||
|
int state = 0;
|
||||||
|
final int maxIdx = offset + length;
|
||||||
|
for (int i = offset; i < maxIdx; i++) {
|
||||||
|
final int code = chars[i];
|
||||||
|
int b;
|
||||||
|
// UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 )
|
||||||
|
if (code < 0x80) {
|
||||||
|
state = byteRunAutomaton.step(state, code);
|
||||||
|
if (state == -1) return false;
|
||||||
|
} else if (code < 0x800) {
|
||||||
|
b = (0xC0 | (code >> 6));
|
||||||
|
state = byteRunAutomaton.step(state, b);
|
||||||
|
if (state == -1) return false;
|
||||||
|
b = (0x80 | (code & 0x3F));
|
||||||
|
state = byteRunAutomaton.step(state, b);
|
||||||
|
if (state == -1) return false;
|
||||||
|
} else {
|
||||||
|
// more complex
|
||||||
|
byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
|
||||||
|
int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
|
||||||
|
for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
|
||||||
|
state = byteRunAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
|
||||||
|
if (state == -1) return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return byteRunAutomaton.isAccept(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return aq.toString();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return list.toArray(new CharacterRunAutomaton[list.size()]);
|
return list.toArray(new CharacterRunAutomaton[list.size()]);
|
||||||
|
@ -24,7 +24,6 @@ import java.util.Collections;
|
|||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@ -36,7 +35,6 @@ import java.util.function.Predicate;
|
|||||||
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
@ -529,12 +527,16 @@ public class PhraseHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TODO move up; it's currently inbetween other inner classes that are related
|
||||||
/**
|
/**
|
||||||
|
* Needed to support the ability to highlight a query irrespective of the field a query refers to
|
||||||
|
* (aka requireFieldMatch=false).
|
||||||
* This reader will just delegate every call to a single field in the wrapped
|
* This reader will just delegate every call to a single field in the wrapped
|
||||||
* LeafReader. This way we ensure that all queries going through this reader target the same field.
|
* LeafReader. This way we ensure that all queries going through this reader target the same field.
|
||||||
*/
|
*/
|
||||||
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
|
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
|
||||||
final String fieldName;
|
final String fieldName;
|
||||||
|
|
||||||
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
|
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
|
||||||
super(in);
|
super(in);
|
||||||
this.fieldName = fieldName;
|
this.fieldName = fieldName;
|
||||||
@ -542,27 +544,12 @@ public class PhraseHelper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfos getFieldInfos() {
|
public FieldInfos getFieldInfos() {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();//TODO merge them
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new FilterFields(super.fields()) {
|
return super.terms(fieldName);
|
||||||
@Override
|
|
||||||
public Terms terms(String field) throws IOException {
|
|
||||||
return super.terms(fieldName);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Iterator<String> iterator() {
|
|
||||||
return Collections.singletonList(fieldName).iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int size() {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -18,7 +18,6 @@ package org.apache.lucene.search.uhighlight;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
@ -52,24 +51,9 @@ final class TermVectorFilteredLeafReader extends FilterLeafReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new TermVectorFilteredFields(in.fields(), filterTerms);
|
Terms terms = in.terms(field);
|
||||||
}
|
return terms==null ? null : new TermsFilteredTerms(terms, filterTerms);
|
||||||
|
|
||||||
private static final class TermVectorFilteredFields extends FilterLeafReader.FilterFields {
|
|
||||||
// NOTE: super ("in") is baseFields
|
|
||||||
|
|
||||||
private final Terms filterTerms;
|
|
||||||
|
|
||||||
TermVectorFilteredFields(Fields baseFields, Terms filterTerms) {
|
|
||||||
super(baseFields);
|
|
||||||
this.filterTerms = filterTerms;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Terms terms(String field) throws IOException {
|
|
||||||
return new TermsFilteredTerms(in.terms(field), filterTerms);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class TermsFilteredTerms extends FilterLeafReader.FilterTerms {
|
private static final class TermsFilteredTerms extends FilterLeafReader.FilterTerms {
|
||||||
|
@ -24,7 +24,6 @@ import java.util.Locale;
|
|||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestSplittingBreakIterator extends LuceneTestCase {
|
public class TestSplittingBreakIterator extends LuceneTestCase {
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,12 +51,9 @@ import org.apache.lucene.search.TermQuery;
|
|||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestUnifiedHighlighter extends LuceneTestCase {
|
public class TestUnifiedHighlighter extends LuceneTestCase {
|
||||||
|
|
||||||
private final FieldType fieldType; // for "body" generally, but not necessarily others. See constructor
|
private final FieldType fieldType; // for "body" generally, but not necessarily others. See constructor
|
||||||
|
@ -24,11 +24,13 @@ import java.util.List;
|
|||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
@ -63,16 +65,15 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.search.spans.SpanWeight;
|
import org.apache.lucene.search.spans.SpanWeight;
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some tests that highlight wildcard, fuzzy, etc queries.
|
* Some tests that highlight wildcard, fuzzy, etc queries.
|
||||||
*/
|
*/
|
||||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
||||||
|
|
||||||
final FieldType fieldType;
|
final FieldType fieldType;
|
||||||
@ -1079,4 +1080,66 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||||||
assertEquals("[<b>я</b>]", Arrays.toString(snippets));
|
assertEquals("[<b>я</b>]", Arrays.toString(snippets));
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-7719
|
||||||
|
public void testMultiByteMTQ() throws IOException {
|
||||||
|
Analyzer analyzer = new KeywordAnalyzer();
|
||||||
|
try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer)) {
|
||||||
|
for (int attempt = 0; attempt < 20; attempt++) {
|
||||||
|
iw.deleteAll();
|
||||||
|
String field = "title";
|
||||||
|
String value = RandomStrings.randomUnicodeOfLength(random(), 3);
|
||||||
|
if (value.contains(UnifiedHighlighter.MULTIVAL_SEP_CHAR+"")) { // will throw things off
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int[] valuePoints = value.codePoints().toArray();
|
||||||
|
|
||||||
|
iw.addDocument(Collections.singleton(
|
||||||
|
new Field(field, value, fieldType)));
|
||||||
|
iw.commit();
|
||||||
|
try (IndexReader ir = iw.getReader()) {
|
||||||
|
IndexSearcher searcher = newSearcher(ir);
|
||||||
|
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
|
||||||
|
highlighter.setBreakIterator(WholeBreakIterator::new);
|
||||||
|
|
||||||
|
// Test PrefixQuery
|
||||||
|
Query query = new PrefixQuery(new Term(field,
|
||||||
|
UnicodeUtil.newString(valuePoints, 0, 1)));
|
||||||
|
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||||
|
|
||||||
|
// Test TermRangeQuery
|
||||||
|
query = new TermRangeQuery(field,
|
||||||
|
new BytesRef(value),
|
||||||
|
new BytesRef(value),
|
||||||
|
true, true );
|
||||||
|
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||||
|
|
||||||
|
// Test FuzzyQuery
|
||||||
|
query = new FuzzyQuery(new Term(field, value + "Z"), 1);
|
||||||
|
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||||
|
|
||||||
|
if (valuePoints.length != 3) {
|
||||||
|
continue; // even though we ask RandomStrings for a String with 3 code points, it seems sometimes it's less
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test WildcardQuery
|
||||||
|
query = new WildcardQuery(new Term(field,
|
||||||
|
new StringBuilder()
|
||||||
|
.append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[0])
|
||||||
|
.append(WildcardQuery.WILDCARD_CHAR)
|
||||||
|
.append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[2]).toString()));
|
||||||
|
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||||
|
|
||||||
|
//TODO hmmm; how to randomly generate RegexpQuery? Low priority; we've covered the others well.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void highlightAndAssertMatch(IndexSearcher searcher, UnifiedHighlighter highlighter, Query query, String field, String fieldVal) throws IOException {
|
||||||
|
TopDocs topDocs = searcher.search(query, 1);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
String[] snippets = highlighter.highlight(field, query, topDocs);
|
||||||
|
assertEquals("[<b>"+fieldVal+"</b>]", Arrays.toString(snippets));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,15 +37,12 @@ import org.apache.lucene.search.TopDocs;
|
|||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
||||||
|
|
||||||
Analyzer indexAnalyzer;
|
Analyzer indexAnalyzer;
|
||||||
|
@ -32,8 +32,6 @@ import org.apache.lucene.store.Directory;
|
|||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
|
public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
|
||||||
|
|
||||||
private MockAnalyzer indexAnalyzer =
|
private MockAnalyzer indexAnalyzer =
|
||||||
|
@ -54,8 +54,6 @@ import org.apache.lucene.util.QueryBuilder;
|
|||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
||||||
|
|
||||||
final FieldType fieldType;
|
final FieldType fieldType;
|
||||||
|
@ -48,8 +48,6 @@ import java.util.Map;
|
|||||||
* This test DOES NOT represent all testing for highlighting when term vectors are used. Other tests pick the offset
|
* This test DOES NOT represent all testing for highlighting when term vectors are used. Other tests pick the offset
|
||||||
* source at random (to include term vectors) and in-effect test term vectors generally.
|
* source at random (to include term vectors) and in-effect test term vectors generally.
|
||||||
*/
|
*/
|
||||||
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
|
||||||
public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
|
public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
|
||||||
|
|
||||||
private Analyzer indexAnalyzer;
|
private Analyzer indexAnalyzer;
|
||||||
|
@ -217,18 +217,20 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||||||
// TEST OffsetsEnums & Passage visibility
|
// TEST OffsetsEnums & Passage visibility
|
||||||
|
|
||||||
// this code never runs; just for compilation
|
// this code never runs; just for compilation
|
||||||
OffsetsEnum oe = new OffsetsEnum(null, EMPTY);
|
Passage p;
|
||||||
oe.getTerm();
|
try (OffsetsEnum oe = new OffsetsEnum(null, EMPTY)) {
|
||||||
oe.getPostingsEnum();
|
oe.getTerm();
|
||||||
oe.freq();
|
oe.getPostingsEnum();
|
||||||
oe.hasMorePositions();
|
oe.freq();
|
||||||
oe.nextPosition();
|
oe.hasMorePositions();
|
||||||
oe.startOffset();
|
oe.nextPosition();
|
||||||
oe.endOffset();
|
oe.startOffset();
|
||||||
oe.getWeight();
|
oe.endOffset();
|
||||||
oe.setWeight(2f);
|
oe.getWeight();
|
||||||
|
oe.setWeight(2f);
|
||||||
|
}
|
||||||
|
|
||||||
Passage p = new Passage();
|
p = new Passage();
|
||||||
p.setStartOffset(0);
|
p.setStartOffset(0);
|
||||||
p.setEndOffset(9);
|
p.setEndOffset(9);
|
||||||
p.setScore(1f);
|
p.setScore(1f);
|
||||||
|
@ -1325,7 +1325,10 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||||||
String uniqueRandomValue;
|
String uniqueRandomValue;
|
||||||
do {
|
do {
|
||||||
// the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
|
// the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
|
||||||
final int nextInt = random.nextInt(Integer.MAX_VALUE);
|
//
|
||||||
|
// Additionally in order to avoid precision loss when joining via a float field we can't generate values higher than
|
||||||
|
// 0xFFFFFF, so we can't use Integer#MAX_VALUE as upper bound here:
|
||||||
|
final int nextInt = random.nextInt(0xFFFFFF);
|
||||||
uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
|
uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
|
||||||
assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue,16);
|
assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue,16);
|
||||||
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
|
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
|
||||||
|
@ -868,20 +868,27 @@ public class MemoryIndex {
|
|||||||
|
|
||||||
final int numDimensions = fieldInfo.getPointDimensionCount();
|
final int numDimensions = fieldInfo.getPointDimensionCount();
|
||||||
final int numBytesPerDimension = fieldInfo.getPointNumBytes();
|
final int numBytesPerDimension = fieldInfo.getPointNumBytes();
|
||||||
minPackedValue = pointValues[0].bytes.clone();
|
if (numDimensions == 1) {
|
||||||
maxPackedValue = pointValues[0].bytes.clone();
|
// PointInSetQuery.MergePointVisitor expects values to be visited in increasing order,
|
||||||
|
// this is a 1d optimization which has to be done here too. Otherwise we emit values
|
||||||
for (int i = 0; i < pointValuesCount; i++) {
|
// out of order which causes mismatches.
|
||||||
BytesRef pointValue = pointValues[i];
|
Arrays.sort(pointValues, 0, pointValuesCount);
|
||||||
assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
|
minPackedValue = pointValues[0].bytes.clone();
|
||||||
|
maxPackedValue = pointValues[pointValuesCount - 1].bytes.clone();
|
||||||
for (int dim = 0; dim < numDimensions; ++dim) {
|
} else {
|
||||||
int offset = dim * numBytesPerDimension;
|
minPackedValue = pointValues[0].bytes.clone();
|
||||||
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
|
maxPackedValue = pointValues[0].bytes.clone();
|
||||||
System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
|
for (int i = 0; i < pointValuesCount; i++) {
|
||||||
}
|
BytesRef pointValue = pointValues[i];
|
||||||
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
|
assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
|
||||||
System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
|
for (int dim = 0; dim < numDimensions; ++dim) {
|
||||||
|
int offset = dim * numBytesPerDimension;
|
||||||
|
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
|
||||||
|
System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
|
||||||
|
}
|
||||||
|
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
|
||||||
|
System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1127,7 +1134,7 @@ public class MemoryIndex {
|
|||||||
*/
|
*/
|
||||||
private final class MemoryIndexReader extends LeafReader {
|
private final class MemoryIndexReader extends LeafReader {
|
||||||
|
|
||||||
private Fields memoryFields = new MemoryFields(fields);
|
private final MemoryFields memoryFields = new MemoryFields(fields);
|
||||||
|
|
||||||
private MemoryIndexReader() {
|
private MemoryIndexReader() {
|
||||||
super(); // avoid as much superclass baggage as possible
|
super(); // avoid as much superclass baggage as possible
|
||||||
@ -1229,8 +1236,8 @@ public class MemoryIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() {
|
public Terms terms(String field) throws IOException {
|
||||||
return memoryFields;
|
return memoryFields.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MemoryFields extends Fields {
|
private class MemoryFields extends Fields {
|
||||||
@ -1582,7 +1589,7 @@ public class MemoryIndex {
|
|||||||
@Override
|
@Override
|
||||||
public Fields getTermVectors(int docID) {
|
public Fields getTermVectors(int docID) {
|
||||||
if (docID == 0) {
|
if (docID == 0) {
|
||||||
return fields();
|
return memoryFields;
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -131,7 +131,7 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||||||
mi.addField("field", "some terms be here", analyzer);
|
mi.addField("field", "some terms be here", analyzer);
|
||||||
IndexSearcher searcher = mi.createSearcher();
|
IndexSearcher searcher = mi.createSearcher();
|
||||||
LeafReader reader = (LeafReader) searcher.getIndexReader();
|
LeafReader reader = (LeafReader) searcher.getIndexReader();
|
||||||
TermsEnum terms = reader.fields().terms("field").iterator();
|
TermsEnum terms = reader.terms("field").iterator();
|
||||||
terms.seekExact(0);
|
terms.seekExact(0);
|
||||||
assertEquals("be", terms.term().utf8ToString());
|
assertEquals("be", terms.term().utf8ToString());
|
||||||
TestUtil.checkReader(reader);
|
TestUtil.checkReader(reader);
|
||||||
@ -512,6 +512,30 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||||||
assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new double[] {10D, 10D}, new double[] {30D, 30D})));
|
assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new double[] {10D, 10D}, new double[] {30D, 30D})));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMultiValuedPointsSortedCorrectly() throws Exception {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new IntPoint("ints", 3));
|
||||||
|
doc.add(new IntPoint("ints", 2));
|
||||||
|
doc.add(new IntPoint("ints", 1));
|
||||||
|
doc.add(new LongPoint("longs", 3L));
|
||||||
|
doc.add(new LongPoint("longs", 2L));
|
||||||
|
doc.add(new LongPoint("longs", 1L));
|
||||||
|
doc.add(new FloatPoint("floats", 3F));
|
||||||
|
doc.add(new FloatPoint("floats", 2F));
|
||||||
|
doc.add(new FloatPoint("floats", 1F));
|
||||||
|
doc.add(new DoublePoint("doubles", 3D));
|
||||||
|
doc.add(new DoublePoint("doubles", 2D));
|
||||||
|
doc.add(new DoublePoint("doubles", 1D));
|
||||||
|
|
||||||
|
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
|
||||||
|
IndexSearcher s = mi.createSearcher();
|
||||||
|
|
||||||
|
assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
|
||||||
|
assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
|
||||||
|
assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
|
||||||
|
assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
|
||||||
|
}
|
||||||
|
|
||||||
public void testIndexingPointsAndDocValues() throws Exception {
|
public void testIndexingPointsAndDocValues() throws Exception {
|
||||||
FieldType type = new FieldType();
|
FieldType type = new FieldType();
|
||||||
type.setDimensions(1, 4);
|
type.setDimensions(1, 4);
|
||||||
|
@ -53,7 +53,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
|
||||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
@ -67,8 +66,8 @@ import org.apache.lucene.search.spans.SpanOrQuery;
|
|||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LineFileDocs;
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
@ -171,7 +170,7 @@ public class TestMemoryIndexAgainstRAMDir extends BaseTokenStreamTestCase {
|
|||||||
|
|
||||||
private void duellReaders(CompositeReader other, LeafReader memIndexReader)
|
private void duellReaders(CompositeReader other, LeafReader memIndexReader)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Fields memFields = memIndexReader.fields();
|
Fields memFields = memIndexReader.getTermVectors(0);
|
||||||
for (String field : MultiFields.getFields(other)) {
|
for (String field : MultiFields.getFields(other)) {
|
||||||
Terms memTerms = memFields.terms(field);
|
Terms memTerms = memFields.terms(field);
|
||||||
Terms iwTerms = memIndexReader.terms(field);
|
Terms iwTerms = memIndexReader.terms(field);
|
||||||
|
@ -39,15 +39,16 @@ public class IndexMergeTool {
|
|||||||
System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
|
System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
FSDirectory mergedIndex = FSDirectory.open(Paths.get(args[0]));
|
|
||||||
|
|
||||||
IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(null)
|
// Try to use hardlinks to source segments, if possible.
|
||||||
.setOpenMode(OpenMode.CREATE));
|
Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[0])));
|
||||||
|
|
||||||
|
IndexWriter writer = new IndexWriter(mergedIndex,
|
||||||
|
new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE));
|
||||||
|
|
||||||
Directory[] indexes = new Directory[args.length - 1];
|
Directory[] indexes = new Directory[args.length - 1];
|
||||||
for (int i = 1; i < args.length; i++) {
|
for (int i = 1; i < args.length; i++) {
|
||||||
// try to use hardlinks if possible
|
indexes[i - 1] = FSDirectory.open(Paths.get(args[i]));
|
||||||
indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i])));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println("Merging...");
|
System.out.println("Merging...");
|
||||||
|
@ -22,7 +22,6 @@ import java.util.Collections;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
@ -214,11 +213,10 @@ public class CommonTermsQuery extends Query {
|
|||||||
Term[] queryTerms) throws IOException {
|
Term[] queryTerms) throws IOException {
|
||||||
TermsEnum termsEnum = null;
|
TermsEnum termsEnum = null;
|
||||||
for (LeafReaderContext context : leaves) {
|
for (LeafReaderContext context : leaves) {
|
||||||
final Fields fields = context.reader().fields();
|
|
||||||
for (int i = 0; i < queryTerms.length; i++) {
|
for (int i = 0; i < queryTerms.length; i++) {
|
||||||
Term term = queryTerms[i];
|
Term term = queryTerms[i];
|
||||||
TermContext termContext = contextArray[i];
|
TermContext termContext = contextArray[i];
|
||||||
final Terms terms = fields.terms(term.field());
|
final Terms terms = context.reader().terms(term.field());
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
// field does not exist
|
// field does not exist
|
||||||
continue;
|
continue;
|
||||||
|
@ -19,9 +19,8 @@ package org.apache.lucene.queries.function.valuesource;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
@ -50,8 +49,7 @@ public class TFValueSource extends TermFreqValueSource {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||||
Fields fields = readerContext.reader().fields();
|
final Terms terms = readerContext.reader().terms(indexedField);
|
||||||
final Terms terms = fields.terms(indexedField);
|
|
||||||
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
|
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
|
||||||
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), indexedField);
|
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), indexedField);
|
||||||
if (similarity == null) {
|
if (similarity == null) {
|
||||||
|
@ -19,9 +19,8 @@ package org.apache.lucene.queries.function.valuesource;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
@ -48,8 +47,7 @@ public class TermFreqValueSource extends DocFreqValueSource {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||||
Fields fields = readerContext.reader().fields();
|
final Terms terms = readerContext.reader().terms(indexedField);
|
||||||
final Terms terms = fields.terms(indexedField);
|
|
||||||
|
|
||||||
return new IntDocValues(this) {
|
return new IntDocValues(this) {
|
||||||
PostingsEnum docs ;
|
PostingsEnum docs ;
|
||||||
|
@ -351,7 +351,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
|
|||||||
assertEquals("Synonym(b:dog b:dogs) Synonym(t:dog t:dogs)", q.toString());
|
assertEquals("Synonym(b:dog b:dogs) Synonym(t:dog t:dogs)", q.toString());
|
||||||
q = parser.parse("guinea pig");
|
q = parser.parse("guinea pig");
|
||||||
assertFalse(parser.getSplitOnWhitespace());
|
assertFalse(parser.getSplitOnWhitespace());
|
||||||
assertEquals("((+b:guinea +b:pig) (+t:guinea +t:pig)) (b:cavy t:cavy)", q.toString());
|
assertEquals("((+b:guinea +b:pig) b:cavy) ((+t:guinea +t:pig) t:cavy)", q.toString());
|
||||||
parser.setSplitOnWhitespace(true);
|
parser.setSplitOnWhitespace(true);
|
||||||
q = parser.parse("guinea pig");
|
q = parser.parse("guinea pig");
|
||||||
assertEquals("(b:guinea t:guinea) (b:pig t:pig)", q.toString());
|
assertEquals("(b:guinea t:guinea) (b:pig t:pig)", q.toString());
|
||||||
|
@ -522,8 +522,10 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||||||
.build();
|
.build();
|
||||||
|
|
||||||
BooleanQuery graphQuery = new BooleanQuery.Builder()
|
BooleanQuery graphQuery = new BooleanQuery.Builder()
|
||||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
.add(new BooleanQuery.Builder()
|
||||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||||
|
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||||
|
.build(), BooleanClause.Occur.SHOULD)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(graphQuery, dumb.parse("guinea pig"));
|
assertEquals(graphQuery, dumb.parse("guinea pig"));
|
||||||
|
|
||||||
@ -541,11 +543,32 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||||||
QueryParser smart = new SmartQueryParser();
|
QueryParser smart = new SmartQueryParser();
|
||||||
smart.setSplitOnWhitespace(false);
|
smart.setSplitOnWhitespace(false);
|
||||||
graphQuery = new BooleanQuery.Builder()
|
graphQuery = new BooleanQuery.Builder()
|
||||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
.add(new BooleanQuery.Builder()
|
||||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||||
|
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||||
|
.build(), BooleanClause.Occur.SHOULD)
|
||||||
.build();
|
.build();
|
||||||
assertEquals(graphQuery, smart.parse("guinea pig"));
|
assertEquals(graphQuery, smart.parse("guinea pig"));
|
||||||
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
|
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
|
||||||
|
|
||||||
|
// with the AND operator
|
||||||
|
dumb.setDefaultOperator(Operator.AND);
|
||||||
|
BooleanQuery graphAndQuery = new BooleanQuery.Builder()
|
||||||
|
.add(new BooleanQuery.Builder()
|
||||||
|
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||||
|
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||||
|
.build(), BooleanClause.Occur.MUST)
|
||||||
|
.build();
|
||||||
|
assertEquals(graphAndQuery, dumb.parse("guinea pig"));
|
||||||
|
|
||||||
|
graphAndQuery = new BooleanQuery.Builder()
|
||||||
|
.add(new BooleanQuery.Builder()
|
||||||
|
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||||
|
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||||
|
.build(), BooleanClause.Occur.MUST)
|
||||||
|
.add(cavy, BooleanClause.Occur.MUST)
|
||||||
|
.build();
|
||||||
|
assertEquals(graphAndQuery, dumb.parse("guinea pig cavy"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEnableGraphQueries() throws Exception {
|
public void testEnableGraphQueries() throws Exception {
|
||||||
@ -616,30 +639,30 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||||||
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
|
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
|
||||||
|
|
||||||
// Operators should not interrupt multiword analysis if not don't associate
|
// Operators should not interrupt multiword analysis if not don't associate
|
||||||
assertQueryEquals("(guinea pig)", a, "(+guinea +pig) cavy");
|
assertQueryEquals("(guinea pig)", a, "((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("+(guinea pig)", a, "+((+guinea +pig) cavy)");
|
assertQueryEquals("+(guinea pig)", a, "+(((+guinea +pig) cavy))");
|
||||||
assertQueryEquals("-(guinea pig)", a, "-((+guinea +pig) cavy)");
|
assertQueryEquals("-(guinea pig)", a, "-(((+guinea +pig) cavy))");
|
||||||
assertQueryEquals("!(guinea pig)", a, "-((+guinea +pig) cavy)");
|
assertQueryEquals("!(guinea pig)", a, "-(((+guinea +pig) cavy))");
|
||||||
assertQueryEquals("NOT (guinea pig)", a, "-((+guinea +pig) cavy)");
|
assertQueryEquals("NOT (guinea pig)", a, "-(((+guinea +pig) cavy))");
|
||||||
assertQueryEquals("(guinea pig)^2", a, "((+guinea +pig) cavy)^2.0");
|
assertQueryEquals("(guinea pig)^2", a, "(((+guinea +pig) cavy))^2.0");
|
||||||
|
|
||||||
assertQueryEquals("field:(guinea pig)", a, "(+guinea +pig) cavy");
|
assertQueryEquals("field:(guinea pig)", a, "((+guinea +pig) cavy)");
|
||||||
|
|
||||||
assertQueryEquals("+small guinea pig", a, "+small (+guinea +pig) cavy");
|
assertQueryEquals("+small guinea pig", a, "+small ((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("-small guinea pig", a, "-small (+guinea +pig) cavy");
|
assertQueryEquals("-small guinea pig", a, "-small ((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("!small guinea pig", a, "-small (+guinea +pig) cavy");
|
assertQueryEquals("!small guinea pig", a, "-small ((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("NOT small guinea pig", a, "-small (+guinea +pig) cavy");
|
assertQueryEquals("NOT small guinea pig", a, "-small ((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("small* guinea pig", a, "small* (+guinea +pig) cavy");
|
assertQueryEquals("small* guinea pig", a, "small* ((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("small? guinea pig", a, "small? (+guinea +pig) cavy");
|
assertQueryEquals("small? guinea pig", a, "small? ((+guinea +pig) cavy)");
|
||||||
assertQueryEquals("\"small\" guinea pig", a, "small (+guinea +pig) cavy");
|
assertQueryEquals("\"small\" guinea pig", a, "small ((+guinea +pig) cavy)");
|
||||||
|
|
||||||
assertQueryEquals("guinea pig +running", a, "(+guinea +pig) cavy +running");
|
assertQueryEquals("guinea pig +running", a, "((+guinea +pig) cavy) +running");
|
||||||
assertQueryEquals("guinea pig -running", a, "(+guinea +pig) cavy -running");
|
assertQueryEquals("guinea pig -running", a, "((+guinea +pig) cavy) -running");
|
||||||
assertQueryEquals("guinea pig !running", a, "(+guinea +pig) cavy -running");
|
assertQueryEquals("guinea pig !running", a, "((+guinea +pig) cavy) -running");
|
||||||
assertQueryEquals("guinea pig NOT running", a, "(+guinea +pig) cavy -running");
|
assertQueryEquals("guinea pig NOT running", a, "((+guinea +pig) cavy) -running");
|
||||||
assertQueryEquals("guinea pig running*", a, "(+guinea +pig) cavy running*");
|
assertQueryEquals("guinea pig running*", a, "((+guinea +pig) cavy) running*");
|
||||||
assertQueryEquals("guinea pig running?", a, "(+guinea +pig) cavy running?");
|
assertQueryEquals("guinea pig running?", a, "((+guinea +pig) cavy) running?");
|
||||||
assertQueryEquals("guinea pig \"running\"", a, "(+guinea +pig) cavy running");
|
assertQueryEquals("guinea pig \"running\"", a, "((+guinea +pig) cavy) running");
|
||||||
|
|
||||||
assertQueryEquals("\"guinea pig\"~2", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
|
assertQueryEquals("\"guinea pig\"~2", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
|
||||||
|
|
||||||
@ -744,14 +767,16 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||||||
BooleanQuery guineaPig = synonym.build();
|
BooleanQuery guineaPig = synonym.build();
|
||||||
|
|
||||||
BooleanQuery graphQuery = new BooleanQuery.Builder()
|
BooleanQuery graphQuery = new BooleanQuery.Builder()
|
||||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
.add(new BooleanQuery.Builder()
|
||||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||||
.build();;
|
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||||
|
.build(), BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
assertEquals(graphQuery, parser.parse("guinea pig"));
|
assertEquals(graphQuery, parser.parse("guinea pig"));
|
||||||
|
|
||||||
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
||||||
splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
|
splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
|
||||||
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "(+guinea +pig) cavy");
|
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "((+guinea +pig) cavy)");
|
||||||
splitOnWhitespace = oldSplitOnWhitespace;
|
splitOnWhitespace = oldSplitOnWhitespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,12 +37,12 @@ import org.apache.lucene.document.Document;
|
|||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.MergeScheduler;
|
import org.apache.lucene.index.MergeScheduler;
|
||||||
import org.apache.lucene.index.PerThreadPKLookup;
|
import org.apache.lucene.index.PerThreadPKLookup;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TieredMergePolicy;
|
import org.apache.lucene.index.TieredMergePolicy;
|
||||||
@ -75,7 +75,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||||||
doc.add(makeIDField("id1", 110));
|
doc.add(makeIDField("id1", 110));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
IndexReader r = w.getReader();
|
IndexReader r = w.getReader();
|
||||||
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator();
|
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().terms("id").iterator();
|
||||||
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
|
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
|
||||||
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 100));
|
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 100));
|
||||||
assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));
|
assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));
|
||||||
|
@ -71,10 +71,11 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields fields() throws IOException {
|
public Terms terms(String field) throws IOException {
|
||||||
return new AssertingFields(super.fields());
|
Terms terms = super.terms(field);
|
||||||
|
return terms == null ? null : new AssertingTerms(terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields getTermVectors(int docID) throws IOException {
|
public Fields getTermVectors(int docID) throws IOException {
|
||||||
Fields fields = super.getTermVectors(docID);
|
Fields fields = super.getTermVectors(docID);
|
||||||
|
@ -335,7 +335,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
|||||||
|
|
||||||
// PostingsFormat
|
// PostingsFormat
|
||||||
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
|
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
|
||||||
consumer.write(oneDocReader.fields());
|
consumer.write(MultiFields.getFields(oneDocReader));
|
||||||
IOUtils.close(consumer);
|
IOUtils.close(consumer);
|
||||||
IOUtils.close(consumer);
|
IOUtils.close(consumer);
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user