mirror of https://github.com/apache/lucene.git
Merge branch 'master' into feature/autoscaling
# Conflicts: # solr/CHANGES.txt
This commit is contained in:
commit
fb7803d9a0
|
@ -66,6 +66,13 @@
|
|||
</foaf:Person>
|
||||
</maintainer>
|
||||
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-6.6.0</name>
|
||||
<created>2017-06-06</created>
|
||||
<revision>6.6.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-6.5.1</name>
|
||||
|
|
|
@ -66,6 +66,13 @@
|
|||
</foaf:Person>
|
||||
</maintainer>
|
||||
|
||||
<release>
|
||||
<Version>
|
||||
<name>solr-6.6.0</name>
|
||||
<created>2017-06-06</created>
|
||||
<revision>6.6.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>solr-6.5.1</name>
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
|
||||
</content>
|
||||
<content url="file://$MODULE_DIR$/../resources">
|
||||
<sourceFolder url="file://$MODULE_DIR$/../resources" type="java-resource" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="Solrj library" level="project" />
|
||||
|
|
|
@ -14,6 +14,16 @@ New Features
|
|||
well as the oldest Lucene version that contributed to the segment.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-7854: The new TermFrequencyAttribute used during analysis
|
||||
with a custom token stream allows indexing custom term frequencies
|
||||
(Mike McCandless)
|
||||
|
||||
* LUCENE-7866: Add a new DelimitedTermFrequencyTokenFilter that allows to
|
||||
mark tokens with a custom term frequency (LUCENE-7854). It parses a numeric
|
||||
value after a separator char ('|') at the end of each token and changes
|
||||
the term frequency to this value. (Uwe Schindler, Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
|
||||
|
@ -59,6 +69,11 @@ API Changes
|
|||
|
||||
* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
|
||||
|
||||
* LUCENE-7500: Removed abstract LeafReader.fields(); instead terms(fieldName)
|
||||
has been made abstract, fomerly was final. Also, MultiFields.getTerms
|
||||
was optimized to work directly instead of being implemented on getFields.
|
||||
(David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
||||
|
@ -97,6 +112,8 @@ Optimizations
|
|||
query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
|
||||
the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
|
||||
|
||||
* LUCENE-7874: DisjunctionMaxQuery rewrites to a BooleanQuery when tiebreaker is set to 1. (Jim Ferenczi)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
||||
|
@ -113,18 +130,53 @@ Other
|
|||
* LUCENE-7852: Correct copyright year(s) in lucene/LICENSE.txt file.
|
||||
(Christine Poerschke, Steve Rowe)
|
||||
|
||||
* LUCENE-7719: Generalized the UnifiedHighlighter's support for AutomatonQuery
|
||||
for character & binary automata. Added AutomatonQuery.isBinary. (David Smiley)
|
||||
|
||||
* LUCENE-7873: Due to serious problems with context class loaders in several
|
||||
frameworks (OSGI, Java 9 Jigsaw), the lookup of Codecs, PostingsFormats,
|
||||
DocValuesFormats and all analysis factories was changed to only inspect the
|
||||
current classloader that defined the interface class (lucene-core.jar).
|
||||
See MIGRATE.txt for more information! (Uwe Schindler, Dawid Weiss)
|
||||
|
||||
======================= Lucene 6.7.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-7855: Added advanced options of the Wikipedia tokenizer to its factory.
|
||||
(Juan Pedro via Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7864: IndexMergeTool is not using intermediate hard links (even
|
||||
if possible). (Dawid Weiss)
|
||||
|
||||
* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d points, the PointInSetQuery.MergePointVisitor expects
|
||||
that these points are visited in ascending order. The memory index doesn't do this and this can result in document
|
||||
with multiple points that should match to not match. (Martijn van Groningen)
|
||||
|
||||
* LUCENE-7878: Fix query builder to keep the SHOULD clause that wraps multi-word synonyms. (Jim Ferenczi)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7800: Remove code that potentially rethrows checked exceptions
|
||||
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
|
||||
Uwe Schindler, Dawid Weiss)
|
||||
|
||||
* LUCENE-7876: Avoid calls to LeafReader.fields() and MultiFields.getFields()
|
||||
that are trivially replaced by LeafReader.terms() and MultiFields.getTerms()
|
||||
(David Smiley)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7828: Speed up range queries on range fields by improving how we
|
||||
compute the relation between the query and inner nodes of the BKD tree.
|
||||
(Adrien Grand)
|
||||
|
||||
======================= Lucene 6.6.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -1,5 +1,46 @@
|
|||
# Apache Lucene Migration Guide
|
||||
|
||||
## Changed SPI lookups for codecs and analysis changed (LUCENE-7873) ##
|
||||
|
||||
Due to serious problems with context class loaders in several frameworks
|
||||
(OSGI, Java 9 Jigsaw), the lookup of Codecs, PostingsFormats, DocValuesFormats
|
||||
and all analysis factories was changed to only inspect the current classloader
|
||||
that defined the interface class (`lucene-core.jar`). Normal applications
|
||||
should not encounter any issues with that change, because the application
|
||||
classloader (unnamed module in Java 9) can load all SPIs from all JARs
|
||||
from classpath.
|
||||
|
||||
For any code that relies on the old behaviour (e.g., certain web applications
|
||||
or components in application servers) one can manually instruct the Lucene
|
||||
SPI implementation to also inspect the context classloader. To do this,
|
||||
add this code to the early startup phase of your application before any
|
||||
Apache Lucene component is used:
|
||||
|
||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||
// Codecs:
|
||||
PostingsFormat.reloadPostingsFormats(cl);
|
||||
DocValuesFormat.reloadDocValuesFormats(cl);
|
||||
Codec.reloadCodecs(cl);
|
||||
// Analysis:
|
||||
CharFilterFactory.reloadCharFilters(cl);
|
||||
TokenFilterFactory.reloadTokenFilters(cl);
|
||||
TokenizerFactory.reloadTokenizers(cl);
|
||||
|
||||
This code will reload all service providers from the given class loader
|
||||
(in our case the context class loader). Of course, instead of specifying
|
||||
the context class loader, it is receommended to use the application's main
|
||||
class loader or the module class loader.
|
||||
|
||||
If you are migrating your project to Java 9 Jigsaw module system, keep in mind
|
||||
that Lucene currently does not yet support `module-info.java` declarations of
|
||||
service provider impls (`provides` statement). It is therefore recommended
|
||||
to keep all of Lucene in one Uber-Module and not try to split Lucene into
|
||||
several modules. As soon as Lucene will migrate to Java 9 as minimum requirement,
|
||||
we will work on improving that.
|
||||
|
||||
For OSGI, the same applies. You have to create a bundle with all of Lucene for
|
||||
SPI to work correctly.
|
||||
|
||||
## Query.hashCode and Query.equals are now abstract methods (LUCENE-7277)
|
||||
|
||||
Any custom query subclasses should redeclare equivalence relationship according
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
|
||||
/**
|
||||
* Characters before the delimiter are the "token", the textual integer after is the term frequency.
|
||||
* To use this {@code TokenFilter} the field must be indexed with
|
||||
* {@link IndexOptions#DOCS_AND_FREQS} but no positions or offsets.
|
||||
* <p>
|
||||
* For example, if the delimiter is '|', then for the string "foo|5", "foo" is the token
|
||||
* and "5" is a term frequency. If there is no delimiter, the TokenFilter does not modify
|
||||
* the term frequency.
|
||||
* <p>
|
||||
* Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
|
||||
*/
|
||||
public final class DelimitedTermFrequencyTokenFilter extends TokenFilter {
|
||||
public static final char DEFAULT_DELIMITER = '|';
|
||||
|
||||
private final char delimiter;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final TermFrequencyAttribute tfAtt = addAttribute(TermFrequencyAttribute.class);
|
||||
|
||||
|
||||
public DelimitedTermFrequencyTokenFilter(TokenStream input) {
|
||||
this(input, DEFAULT_DELIMITER);
|
||||
}
|
||||
|
||||
public DelimitedTermFrequencyTokenFilter(TokenStream input, char delimiter) {
|
||||
super(input);
|
||||
this.delimiter = delimiter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
final char[] buffer = termAtt.buffer();
|
||||
final int length = termAtt.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (buffer[i] == delimiter) {
|
||||
termAtt.setLength(i); // simply set a new length
|
||||
i++;
|
||||
tfAtt.setTermFrequency(ArrayUtil.parseInt(buffer, i, length - i));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link DelimitedTermFrequencyTokenFilter}. The field must have {@code omitPositions=true}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_tfdl" class="solr.TextField" omitPositions="true">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.DelimitedTermFrequencyTokenFilterFactory" delimiter="|"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class DelimitedTermFrequencyTokenFilterFactory extends TokenFilterFactory {
|
||||
public static final String DELIMITER_ATTR = "delimiter";
|
||||
|
||||
private final char delimiter;
|
||||
|
||||
/** Creates a new DelimitedPayloadTokenFilterFactory */
|
||||
public DelimitedTermFrequencyTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
delimiter = getChar(args, DELIMITER_ATTR, DelimitedTermFrequencyTokenFilter.DEFAULT_DELIMITER);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DelimitedTermFrequencyTokenFilter create(TokenStream input) {
|
||||
return new DelimitedTermFrequencyTokenFilter(input, delimiter);
|
||||
}
|
||||
}
|
|
@ -48,7 +48,7 @@ public final class AnalysisSPILoader<S extends AbstractAnalysisFactory> {
|
|||
}
|
||||
|
||||
public AnalysisSPILoader(Class<S> clazz, String[] suffixes) {
|
||||
this(clazz, suffixes, Thread.currentThread().getContextClassLoader());
|
||||
this(clazz, suffixes, null);
|
||||
}
|
||||
|
||||
public AnalysisSPILoader(Class<S> clazz, String[] suffixes, ClassLoader classloader) {
|
||||
|
|
|
@ -16,9 +16,9 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.wikipedia;
|
||||
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
@ -33,19 +33,28 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* </fieldType></pre>
|
||||
*/
|
||||
public class WikipediaTokenizerFactory extends TokenizerFactory {
|
||||
|
||||
public static final String TOKEN_OUTPUT = "tokenOutput";
|
||||
public static final String UNTOKENIZED_TYPES = "untokenizedTypes";
|
||||
|
||||
protected final int tokenOutput;
|
||||
protected Set<String> untokenizedTypes;
|
||||
|
||||
/** Creates a new WikipediaTokenizerFactory */
|
||||
public WikipediaTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
tokenOutput = getInt(args, TOKEN_OUTPUT, WikipediaTokenizer.TOKENS_ONLY);
|
||||
untokenizedTypes = getSet(args, UNTOKENIZED_TYPES);
|
||||
|
||||
if (untokenizedTypes == null) {
|
||||
untokenizedTypes = Collections.emptySet();
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: add support for WikipediaTokenizer's advanced options.
|
||||
|
||||
@Override
|
||||
public WikipediaTokenizer create(AttributeFactory factory) {
|
||||
return new WikipediaTokenizer(factory, WikipediaTokenizer.TOKENS_ONLY,
|
||||
Collections.<String>emptySet());
|
||||
return new WikipediaTokenizer(factory, tokenOutput, untokenizedTypes);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,6 +63,7 @@ org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
|
|||
org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
|
||||
org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
|
||||
org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
|
||||
org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory
|
||||
org.apache.lucene.analysis.miscellaneous.FingerprintFilterFactory
|
||||
org.apache.lucene.analysis.miscellaneous.FixBrokenOffsetsFilterFactory
|
||||
org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory
|
||||
|
|
|
@ -21,13 +21,17 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
|
@ -49,6 +53,12 @@ import org.apache.lucene.util.Version;
|
|||
|
||||
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
||||
public class TestFactories extends BaseTokenStreamTestCase {
|
||||
|
||||
/** Factories that are excluded from testing it with random data */
|
||||
private static final Set<Class<? extends AbstractAnalysisFactory>> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList(
|
||||
DelimitedTermFrequencyTokenFilterFactory.class
|
||||
));
|
||||
|
||||
public void test() throws IOException {
|
||||
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
||||
doTestTokenizer(tokenizer);
|
||||
|
@ -77,11 +87,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
assertFalse(mtc instanceof CharFilterFactory);
|
||||
}
|
||||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,11 +111,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
assertTrue(mtc instanceof TokenFilterFactory);
|
||||
}
|
||||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,11 +135,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
assertTrue(mtc instanceof CharFilterFactory);
|
||||
}
|
||||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
|
|||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
|
||||
|
@ -159,6 +160,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
WordDelimiterFilter.class,
|
||||
// Cannot correct offsets when a char filter had changed them:
|
||||
WordDelimiterGraphFilter.class,
|
||||
// requires a special encoded token value, so it may fail with random data:
|
||||
DelimitedTermFrequencyTokenFilter.class,
|
||||
// clones of core's filters:
|
||||
org.apache.lucene.analysis.core.StopFilter.class,
|
||||
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||
|
||||
public class DelimitedTermFrequencyTokenFilterTest extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testTermFrequency() throws Exception {
|
||||
String test = "The quick|40 red|4 fox|06 jumped|1 over the lazy|2 brown|123 dogs|1024";
|
||||
DelimitedTermFrequencyTokenFilter filter =
|
||||
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
|
||||
filter.reset();
|
||||
assertTermEquals("The", filter, termAtt, tfAtt, 1);
|
||||
assertTermEquals("quick", filter, termAtt, tfAtt, 40);
|
||||
assertTermEquals("red", filter, termAtt, tfAtt, 4);
|
||||
assertTermEquals("fox", filter, termAtt, tfAtt, 6);
|
||||
assertTermEquals("jumped", filter, termAtt, tfAtt, 1);
|
||||
assertTermEquals("over", filter, termAtt, tfAtt, 1);
|
||||
assertTermEquals("the", filter, termAtt, tfAtt, 1);
|
||||
assertTermEquals("lazy", filter, termAtt, tfAtt, 2);
|
||||
assertTermEquals("brown", filter, termAtt, tfAtt, 123);
|
||||
assertTermEquals("dogs", filter, termAtt, tfAtt, 1024);
|
||||
assertFalse(filter.incrementToken());
|
||||
filter.end();
|
||||
filter.close();
|
||||
}
|
||||
|
||||
public void testInvalidNegativeTf() throws Exception {
|
||||
String test = "foo bar|-20";
|
||||
DelimitedTermFrequencyTokenFilter filter =
|
||||
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
|
||||
filter.reset();
|
||||
assertTermEquals("foo", filter, termAtt, tfAtt, 1);
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, filter::incrementToken);
|
||||
assertEquals("Term frequency must be 1 or greater; got -20", iae.getMessage());
|
||||
}
|
||||
|
||||
public void testInvalidFloatTf() throws Exception {
|
||||
String test = "foo bar|1.2";
|
||||
DelimitedTermFrequencyTokenFilter filter =
|
||||
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
|
||||
filter.reset();
|
||||
assertTermEquals("foo", filter, termAtt, tfAtt, 1);
|
||||
expectThrows(NumberFormatException.class, filter::incrementToken);
|
||||
}
|
||||
|
||||
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, TermFrequencyAttribute tfAtt, int expectedTf) throws Exception {
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals(expected, termAtt.toString());
|
||||
assertEquals(expectedTf, tfAtt.getTermFrequency());
|
||||
}
|
||||
}
|
|
@ -17,34 +17,90 @@
|
|||
package org.apache.lucene.analysis.wikipedia;
|
||||
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the wikipedia tokenizer is working.
|
||||
*/
|
||||
public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCase {
|
||||
|
||||
private final String WIKIPEDIA = "Wikipedia";
|
||||
private final String TOKEN_OUTPUT = "tokenOutput";
|
||||
private final String UNTOKENIZED_TYPES = "untokenizedTypes";
|
||||
|
||||
public void testTokenizer() throws Exception {
|
||||
Reader reader = new StringReader("This is a [[Category:foo]]");
|
||||
Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(newAttributeFactory());
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer,
|
||||
new String[] { "This", "is", "a", "foo" },
|
||||
new int[] { 0, 5, 8, 21 },
|
||||
new int[] { 4, 7, 9, 24 },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
|
||||
new int[] { 1, 1, 1, 1, });
|
||||
String text = "This is a [[Category:foo]]";
|
||||
Tokenizer tf = tokenizerFactory(WIKIPEDIA).create(newAttributeFactory());
|
||||
tf.setReader(new StringReader(text));
|
||||
assertTokenStreamContents(tf,
|
||||
new String[] { "This", "is", "a", "foo" },
|
||||
new int[] { 0, 5, 8, 21 },
|
||||
new int[] { 4, 7, 9, 24 },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
|
||||
new int[] { 1, 1, 1, 1, },
|
||||
text.length());
|
||||
}
|
||||
|
||||
|
||||
public void testTokenizerTokensOnly() throws Exception {
|
||||
String text = "This is a [[Category:foo]]";
|
||||
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer( WikipediaTokenizer.TOKENS_ONLY).toString()).create(newAttributeFactory());
|
||||
tf.setReader(new StringReader(text));
|
||||
assertTokenStreamContents(tf,
|
||||
new String[] { "This", "is", "a", "foo" },
|
||||
new int[] { 0, 5, 8, 21 },
|
||||
new int[] { 4, 7, 9, 24 },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
|
||||
new int[] { 1, 1, 1, 1, },
|
||||
text.length());
|
||||
}
|
||||
|
||||
public void testTokenizerUntokenizedOnly() throws Exception {
|
||||
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
|
||||
Set<String> untoks = new HashSet<>();
|
||||
untoks.add(WikipediaTokenizer.CATEGORY);
|
||||
untoks.add(WikipediaTokenizer.ITALICS);
|
||||
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.UNTOKENIZED_ONLY).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
|
||||
tf.setReader(new StringReader(test));
|
||||
assertTokenStreamContents(tf,
|
||||
new String[] { "a b c d", "e f g", "link", "here", "link",
|
||||
"there", "italics here", "something", "more italics", "h i j" },
|
||||
new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
|
||||
new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
|
||||
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
|
||||
);
|
||||
}
|
||||
|
||||
public void testTokenizerBoth() throws Exception {
|
||||
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
|
||||
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.BOTH).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
|
||||
tf.setReader(new StringReader(test));
|
||||
assertTokenStreamContents(tf,
|
||||
new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
|
||||
"link", "here", "link", "there", "italics here", "italics", "here",
|
||||
"something", "more italics", "more", "italics", "h i j", "h", "i", "j" },
|
||||
new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 },
|
||||
new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
|
||||
new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
|
||||
);
|
||||
}
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
tokenizerFactory("Wikipedia", "bogusArg", "bogusValue");
|
||||
tokenizerFactory(WIKIPEDIA, "bogusArg", "bogusValue").create(newAttributeFactory());
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIllegalArguments() throws Exception {
|
||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("tokenOutput must be TOKENS_ONLY, UNTOKENIZED_ONLY or BOTH"));
|
||||
}
|
||||
}
|
|
@ -21,13 +21,17 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
|
@ -46,6 +50,12 @@ import org.apache.lucene.util.Version;
|
|||
|
||||
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
|
||||
public class TestFactories extends BaseTokenStreamTestCase {
|
||||
|
||||
/** Factories that are excluded from testing it with random data */
|
||||
private static final Set<Class<? extends AbstractAnalysisFactory>> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList(
|
||||
DelimitedTermFrequencyTokenFilterFactory.class
|
||||
));
|
||||
|
||||
public void test() throws IOException {
|
||||
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
|
||||
doTestTokenizer(tokenizer);
|
||||
|
@ -74,11 +84,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
assertFalse(mtc instanceof CharFilterFactory);
|
||||
}
|
||||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(factory, null, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,11 +108,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
assertTrue(mtc instanceof TokenFilterFactory);
|
||||
}
|
||||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -118,11 +132,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
|
|||
assertTrue(mtc instanceof CharFilterFactory);
|
||||
}
|
||||
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
|
||||
// beast it just a little, it shouldnt throw exceptions:
|
||||
// (it should have thrown them in initialize)
|
||||
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
|
||||
checkRandomData(random(), a, 20, 20, false, false);
|
||||
a.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -297,7 +297,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
"6.5.0-cfs",
|
||||
"6.5.0-nocfs",
|
||||
"6.5.1-cfs",
|
||||
"6.5.1-nocfs"
|
||||
"6.5.1-nocfs",
|
||||
"6.6.0-cfs",
|
||||
"6.6.0-nocfs"
|
||||
};
|
||||
|
||||
final String[] unsupportedNames = {
|
||||
|
@ -1190,7 +1192,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
for (String name : oldNames) {
|
||||
Directory dir = oldIndexDirs.get(name);
|
||||
IndexReader r = DirectoryReader.open(dir);
|
||||
TermsEnum terms = MultiFields.getFields(r).terms("content").iterator();
|
||||
TermsEnum terms = MultiFields.getTerms(r, "content").iterator();
|
||||
BytesRef t = terms.next();
|
||||
assertNotNull(t);
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -26,15 +26,18 @@ import org.apache.lucene.util.AttributeReflector;
|
|||
* <li>{@link PositionIncrementAttribute}
|
||||
* <li>{@link PositionLengthAttribute}
|
||||
* <li>{@link OffsetAttribute}
|
||||
* <li>{@link TermFrequencyAttribute}
|
||||
* </ul>*/
|
||||
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
||||
implements TypeAttribute, PositionIncrementAttribute,
|
||||
PositionLengthAttribute, OffsetAttribute {
|
||||
PositionLengthAttribute, OffsetAttribute,
|
||||
TermFrequencyAttribute {
|
||||
|
||||
private int startOffset,endOffset;
|
||||
private String type = DEFAULT_TYPE;
|
||||
private int positionIncrement = 1;
|
||||
private int positionLength = 1;
|
||||
private int termFrequency = 1;
|
||||
|
||||
/** Constructs the attribute implementation. */
|
||||
public PackedTokenAttributeImpl() {
|
||||
|
@ -132,12 +135,26 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void setTermFrequency(int termFrequency) {
|
||||
if (termFrequency < 1) {
|
||||
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
|
||||
}
|
||||
this.termFrequency = termFrequency;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int getTermFrequency() {
|
||||
return termFrequency;
|
||||
}
|
||||
|
||||
/** Resets the attributes
|
||||
*/
|
||||
@Override
|
||||
public void clear() {
|
||||
super.clear();
|
||||
positionIncrement = positionLength = 1;
|
||||
termFrequency = 1;
|
||||
startOffset = endOffset = 0;
|
||||
type = DEFAULT_TYPE;
|
||||
}
|
||||
|
@ -147,10 +164,8 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||
@Override
|
||||
public void end() {
|
||||
super.end();
|
||||
// super.end already calls this.clear, so we only set values that are different from clear:
|
||||
positionIncrement = 0;
|
||||
positionLength = 1;
|
||||
startOffset = endOffset = 0;
|
||||
type = DEFAULT_TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -170,6 +185,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||
positionIncrement == other.positionIncrement &&
|
||||
positionLength == other.positionLength &&
|
||||
(type == null ? other.type == null : type.equals(other.type)) &&
|
||||
termFrequency == other.termFrequency &&
|
||||
super.equals(obj)
|
||||
);
|
||||
} else
|
||||
|
@ -185,6 +201,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||
code = code * 31 + positionLength;
|
||||
if (type != null)
|
||||
code = code * 31 + type.hashCode();
|
||||
code = code * 31 + termFrequency;;
|
||||
return code;
|
||||
}
|
||||
|
||||
|
@ -198,12 +215,14 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||
to.startOffset = startOffset;
|
||||
to.endOffset = endOffset;
|
||||
to.type = type;
|
||||
to.termFrequency = termFrequency;
|
||||
} else {
|
||||
super.copyTo(target);
|
||||
((OffsetAttribute) target).setOffset(startOffset, endOffset);
|
||||
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
|
||||
((PositionLengthAttribute) target).setPositionLength(positionLength);
|
||||
((TypeAttribute) target).setType(type);
|
||||
((TermFrequencyAttribute) target).setTermFrequency(termFrequency);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -215,6 +234,6 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
|
|||
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
|
||||
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
|
||||
reflector.reflect(TypeAttribute.class, "type", type);
|
||||
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis.tokenattributes;
|
||||
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
/** Sets the custom term frequency of a term within one document. If this attribute
|
||||
* is present in your analysis chain for a given field, that field must be indexed with
|
||||
* {@link IndexOptions#DOCS_AND_FREQS}. */
|
||||
public interface TermFrequencyAttribute extends Attribute {
|
||||
|
||||
/** Set the custom term frequency of the current term within one document. */
|
||||
public void setTermFrequency(int termFrequency);
|
||||
|
||||
/** Returns the custom term frequencey. */
|
||||
public int getTermFrequency();
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.tokenattributes;
|
||||
|
||||
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
||||
/** Default implementation of {@link TermFrequencyAttribute}. */
|
||||
public class TermFrequencyAttributeImpl extends AttributeImpl implements TermFrequencyAttribute, Cloneable {
|
||||
private int termFrequency = 1;
|
||||
|
||||
/** Initialize this attribute with term frequencey of 1 */
|
||||
public TermFrequencyAttributeImpl() {}
|
||||
|
||||
@Override
|
||||
public void setTermFrequency(int termFrequency) {
|
||||
if (termFrequency < 1) {
|
||||
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
|
||||
}
|
||||
this.termFrequency = termFrequency;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTermFrequency() {
|
||||
return termFrequency;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
this.termFrequency = 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() {
|
||||
this.termFrequency = 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == this) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other instanceof TermFrequencyAttributeImpl) {
|
||||
TermFrequencyAttributeImpl _other = (TermFrequencyAttributeImpl) other;
|
||||
return termFrequency == _other.termFrequency;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Integer.hashCode(termFrequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
TermFrequencyAttribute t = (TermFrequencyAttribute) target;
|
||||
t.setTermFrequency(termFrequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reflectWith(AttributeReflector reflector) {
|
||||
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
|
||||
}
|
||||
}
|
|
@ -121,12 +121,6 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
private final TreeMap<String,FieldReader> fields = new TreeMap<>();
|
||||
|
||||
/** File offset where the directory starts in the terms file. */
|
||||
private long dirOffset;
|
||||
|
||||
/** File offset where the directory starts in the index file. */
|
||||
private long indexDirOffset;
|
||||
|
||||
final String segment;
|
||||
|
||||
final int version;
|
||||
|
@ -167,8 +161,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
CodecUtil.retrieveChecksum(termsIn);
|
||||
|
||||
// Read per-field details
|
||||
seekDir(termsIn, dirOffset);
|
||||
seekDir(indexIn, indexDirOffset);
|
||||
seekDir(termsIn);
|
||||
seekDir(indexIn);
|
||||
|
||||
final int numFields = termsIn.readVInt();
|
||||
if (numFields < 0) {
|
||||
|
@ -181,13 +175,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
if (numTerms <= 0) {
|
||||
throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
|
||||
}
|
||||
final int numBytes = termsIn.readVInt();
|
||||
if (numBytes < 0) {
|
||||
throw new CorruptIndexException("invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
|
||||
}
|
||||
final BytesRef rootCode = new BytesRef(new byte[numBytes]);
|
||||
termsIn.readBytes(rootCode.bytes, 0, numBytes);
|
||||
rootCode.length = numBytes;
|
||||
final BytesRef rootCode = readBytesRef(termsIn);
|
||||
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
|
||||
if (fieldInfo == null) {
|
||||
throw new CorruptIndexException("invalid field number: " + field, termsIn);
|
||||
|
@ -230,19 +218,24 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
private static BytesRef readBytesRef(IndexInput in) throws IOException {
|
||||
int numBytes = in.readVInt();
|
||||
if (numBytes < 0) {
|
||||
throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
|
||||
}
|
||||
|
||||
BytesRef bytes = new BytesRef();
|
||||
bytes.length = in.readVInt();
|
||||
bytes.bytes = new byte[bytes.length];
|
||||
in.readBytes(bytes.bytes, 0, bytes.length);
|
||||
bytes.length = numBytes;
|
||||
bytes.bytes = new byte[numBytes];
|
||||
in.readBytes(bytes.bytes, 0, numBytes);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/** Seek {@code input} to the directory offset. */
|
||||
private void seekDir(IndexInput input, long dirOffset)
|
||||
throws IOException {
|
||||
private static void seekDir(IndexInput input) throws IOException {
|
||||
input.seek(input.length() - CodecUtil.footerLength() - 8);
|
||||
dirOffset = input.readLong();
|
||||
input.seek(dirOffset);
|
||||
long offset = input.readLong();
|
||||
input.seek(offset);
|
||||
}
|
||||
|
||||
// for debugging
|
||||
|
|
|
@ -19,22 +19,20 @@ package org.apache.lucene.document;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntPredicate;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -60,13 +58,167 @@ abstract class RangeFieldQuery extends Query {
|
|||
/** Used by {@code RangeFieldQuery} to check how each internal or leaf node relates to the query. */
|
||||
enum QueryType {
|
||||
/** Use this for intersects queries. */
|
||||
INTERSECTS,
|
||||
INTERSECTS {
|
||||
|
||||
@Override
|
||||
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
int numDims, int bytesPerDim, int dim) {
|
||||
int minOffset = dim * bytesPerDim;
|
||||
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, minOffset) < 0
|
||||
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, maxOffset) > 0) {
|
||||
// disjoint
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, minOffset) >= 0
|
||||
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, maxOffset) <= 0) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||
int minOffset = dim * bytesPerDim;
|
||||
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||
return StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, minOffset) >= 0
|
||||
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, maxOffset) <= 0;
|
||||
}
|
||||
|
||||
},
|
||||
/** Use this for within queries. */
|
||||
WITHIN,
|
||||
WITHIN {
|
||||
|
||||
@Override
|
||||
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
int numDims, int bytesPerDim, int dim) {
|
||||
int minOffset = dim * bytesPerDim;
|
||||
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) < 0
|
||||
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) > 0) {
|
||||
// all ranges have at least one point outside of the query
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) >= 0
|
||||
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) <= 0) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||
int minOffset = dim * bytesPerDim;
|
||||
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) <= 0
|
||||
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) >= 0;
|
||||
}
|
||||
|
||||
},
|
||||
/** Use this for contains */
|
||||
CONTAINS,
|
||||
CONTAINS {
|
||||
|
||||
@Override
|
||||
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
int numDims, int bytesPerDim, int dim) {
|
||||
int minOffset = dim * bytesPerDim;
|
||||
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) > 0
|
||||
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) < 0) {
|
||||
// all ranges are either less than the query max or greater than the query min
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) <= 0
|
||||
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) >= 0) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||
int minOffset = dim * bytesPerDim;
|
||||
int maxOffset = minOffset + bytesPerDim * numDims;
|
||||
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) >= 0
|
||||
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) <= 0;
|
||||
}
|
||||
|
||||
},
|
||||
/** Use this for crosses queries */
|
||||
CROSSES
|
||||
CROSSES {
|
||||
|
||||
@Override
|
||||
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
int numDims, int bytesPerDim, int dim) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
|
||||
int numDims, int bytesPerDim) {
|
||||
Relation intersectRelation = QueryType.INTERSECTS.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
|
||||
if (intersectRelation == Relation.CELL_OUTSIDE_QUERY) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
Relation withinRelation = QueryType.WITHIN.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
|
||||
if (withinRelation == Relation.CELL_INSIDE_QUERY) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
if (intersectRelation == Relation.CELL_INSIDE_QUERY && withinRelation == Relation.CELL_OUTSIDE_QUERY) {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
|
||||
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
|
||||
return INTERSECTS.matches(queryPackedValue, packedValue, numDims, bytesPerDim)
|
||||
&& WITHIN.matches(queryPackedValue, packedValue, numDims, bytesPerDim) == false;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
abstract Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim, int dim);
|
||||
|
||||
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim) {
|
||||
boolean inside = true;
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
Relation relation = compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim, dim);
|
||||
if (relation == Relation.CELL_OUTSIDE_QUERY) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (relation != Relation.CELL_INSIDE_QUERY) {
|
||||
inside = false;
|
||||
}
|
||||
}
|
||||
return inside ? Relation.CELL_INSIDE_QUERY : Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
|
||||
abstract boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim);
|
||||
|
||||
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
if (matches(queryPackedValue, packedValue, numDims, bytesPerDim, dim) == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -111,54 +263,33 @@ abstract class RangeFieldQuery extends Query {
|
|||
@Override
|
||||
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
return new ConstantScoreWeight(this, boost) {
|
||||
final RangeFieldComparator target = new RangeFieldComparator();
|
||||
|
||||
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||
values.intersect(
|
||||
new IntersectVisitor() {
|
||||
DocIdSetBuilder.BulkAdder adder;
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
adder = result.grow(count);
|
||||
}
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
adder.add(docID);
|
||||
}
|
||||
@Override
|
||||
public void visit(int docID, byte[] leaf) throws IOException {
|
||||
if (target.matches(leaf)) {
|
||||
adder.add(docID);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return compareRange(minPackedValue, maxPackedValue);
|
||||
}
|
||||
});
|
||||
return result.build();
|
||||
}
|
||||
|
||||
private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
|
||||
// compute range relation for BKD traversal
|
||||
if (target.intersects(node) == false) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (target.within(node)) {
|
||||
// target within cell; continue traversing:
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else if (target.contains(node)) {
|
||||
// target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
|
||||
return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ?
|
||||
Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
// target intersects cell; continue traversing:
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
|
||||
return new IntersectVisitor() {
|
||||
DocIdSetBuilder.BulkAdder adder;
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
adder = result.grow(count);
|
||||
}
|
||||
@Override
|
||||
public void visit(int docID) throws IOException {
|
||||
adder.add(docID);
|
||||
}
|
||||
@Override
|
||||
public void visit(int docID, byte[] leaf) throws IOException {
|
||||
if (queryType.matches(ranges, leaf, numDims, bytesPerDim)) {
|
||||
adder.add(docID);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return queryType.compare(ranges, minPackedValue, maxPackedValue, numDims, bytesPerDim);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
PointValues values = reader.getPointValues(field);
|
||||
if (values == null) {
|
||||
|
@ -173,115 +304,59 @@ abstract class RangeFieldQuery extends Query {
|
|||
checkFieldInfo(fieldInfo);
|
||||
boolean allDocsMatch = false;
|
||||
if (values.getDocCount() == reader.maxDoc()
|
||||
&& compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
|
||||
&& queryType.compare(ranges, values.getMinPackedValue(), values.getMaxPackedValue(), numDims, bytesPerDim) == Relation.CELL_INSIDE_QUERY) {
|
||||
allDocsMatch = true;
|
||||
}
|
||||
|
||||
DocIdSetIterator iterator = allDocsMatch == true ?
|
||||
DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
final Weight weight = this;
|
||||
if (allDocsMatch) {
|
||||
return new ScorerSupplier() {
|
||||
@Override
|
||||
public Scorer get(boolean randomAccess) {
|
||||
return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
|
||||
}
|
||||
|
||||
/** get an encoded byte representation of the internal node; this is
|
||||
* the lower half of the min array and the upper half of the max array */
|
||||
private byte[] getInternalRange(byte[] min, byte[] max) {
|
||||
byte[] range = new byte[min.length];
|
||||
final int dimSize = numDims * bytesPerDim;
|
||||
System.arraycopy(min, 0, range, 0, dimSize);
|
||||
System.arraycopy(max, dimSize, range, dimSize, dimSize);
|
||||
return range;
|
||||
}
|
||||
};
|
||||
}
|
||||
@Override
|
||||
public long cost() {
|
||||
return reader.maxDoc();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return new ScorerSupplier() {
|
||||
|
||||
/**
|
||||
* RangeFieldComparator class provides the core comparison logic for accepting or rejecting indexed
|
||||
* {@code RangeField} types based on the defined query range and relation.
|
||||
*/
|
||||
class RangeFieldComparator {
|
||||
final Predicate<byte[]> predicate;
|
||||
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||
final IntersectVisitor visitor = getIntersectVisitor(result);
|
||||
long cost = -1;
|
||||
|
||||
/** constructs the comparator based on the query type */
|
||||
RangeFieldComparator() {
|
||||
switch (queryType) {
|
||||
case INTERSECTS:
|
||||
predicate = this::intersects;
|
||||
break;
|
||||
case WITHIN:
|
||||
predicate = this::contains;
|
||||
break;
|
||||
case CONTAINS:
|
||||
predicate = this::within;
|
||||
break;
|
||||
case CROSSES:
|
||||
// crosses first checks intersection (disjoint automatic fails),
|
||||
// then ensures the query doesn't wholly contain the leaf:
|
||||
predicate = (byte[] leaf) -> this.intersects(leaf)
|
||||
&& this.contains(leaf) == false;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("invalid queryType [" + queryType + "] found.");
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public Scorer get(boolean randomAccess) throws IOException {
|
||||
values.intersect(visitor);
|
||||
DocIdSetIterator iterator = result.build().iterator();
|
||||
return new ConstantScoreScorer(weight, score(), iterator);
|
||||
}
|
||||
|
||||
/** determines if the candidate range matches the query request */
|
||||
private boolean matches(final byte[] candidate) {
|
||||
return (Arrays.equals(ranges, candidate) && queryType != QueryType.CROSSES)
|
||||
|| predicate.test(candidate);
|
||||
}
|
||||
|
||||
/** check if query intersects candidate range */
|
||||
private boolean intersects(final byte[] candidate) {
|
||||
return relate((int d) -> compareMinMax(candidate, d) > 0 || compareMaxMin(candidate, d) < 0);
|
||||
}
|
||||
|
||||
/** check if query is within candidate range */
|
||||
private boolean within(final byte[] candidate) {
|
||||
return relate((int d) -> compareMinMin(candidate, d) < 0 || compareMaxMax(candidate, d) > 0);
|
||||
}
|
||||
|
||||
/** check if query contains candidate range */
|
||||
private boolean contains(final byte[] candidate) {
|
||||
return relate((int d) -> compareMinMin(candidate, d) > 0 || compareMaxMax(candidate, d) < 0);
|
||||
}
|
||||
|
||||
/** internal method used by each relation method to test range relation logic */
|
||||
private boolean relate(IntPredicate predicate) {
|
||||
for (int d=0; d<numDims; ++d) {
|
||||
if (predicate.test(d)) {
|
||||
return false;
|
||||
@Override
|
||||
public long cost() {
|
||||
if (cost == -1) {
|
||||
// Computing the cost may be expensive, so only do it if necessary
|
||||
cost = values.estimatePointCount(visitor);
|
||||
assert cost >= 0;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** compare the encoded min value (for the defined query dimension) with the encoded min value in the byte array */
|
||||
private int compareMinMin(byte[] b, int dimension) {
|
||||
// convert dimension to offset:
|
||||
dimension *= bytesPerDim;
|
||||
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
|
||||
}
|
||||
|
||||
/** compare the encoded min value (for the defined query dimension) with the encoded max value in the byte array */
|
||||
private int compareMinMax(byte[] b, int dimension) {
|
||||
// convert dimension to offset:
|
||||
dimension *= bytesPerDim;
|
||||
return StringHelper.compare(bytesPerDim, ranges, dimension, b, numDims * bytesPerDim + dimension);
|
||||
}
|
||||
|
||||
/** compare the encoded max value (for the defined query dimension) with the encoded min value in the byte array */
|
||||
private int compareMaxMin(byte[] b, int dimension) {
|
||||
// convert dimension to offset:
|
||||
dimension *= bytesPerDim;
|
||||
return StringHelper.compare(bytesPerDim, ranges, numDims * bytesPerDim + dimension, b, dimension);
|
||||
}
|
||||
|
||||
/** compare the encoded max value (for the defined query dimension) with the encoded max value in the byte array */
|
||||
private int compareMaxMax(byte[] b, int dimension) {
|
||||
// convert dimension to max offset:
|
||||
dimension = numDims * bytesPerDim + dimension * bytesPerDim;
|
||||
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
|
||||
}
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
ScorerSupplier scorerSupplier = scorerSupplier(context);
|
||||
if (scorerSupplier == null) {
|
||||
return null;
|
||||
}
|
||||
return scorerSupplier.get(false);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -501,9 +501,8 @@ class BufferedUpdatesStream implements Accountable {
|
|||
queue = new SegmentQueue(numReaders);
|
||||
|
||||
long segTermCount = 0;
|
||||
for(int i=0;i<numReaders;i++) {
|
||||
SegmentState state = segStates[i];
|
||||
Terms terms = state.reader.fields().terms(field);
|
||||
for (SegmentState state : segStates) {
|
||||
Terms terms = state.reader.terms(field);
|
||||
if (terms != null) {
|
||||
segTermCount += terms.size();
|
||||
state.termsEnum = terms.iterator();
|
||||
|
@ -617,7 +616,6 @@ class BufferedUpdatesStream implements Accountable {
|
|||
// DocValues updates
|
||||
private synchronized void applyDocValuesUpdates(List<DocValuesUpdate> updates,
|
||||
SegmentState segState, DocValuesFieldUpdates.Container dvUpdatesContainer) throws IOException {
|
||||
Fields fields = segState.reader.fields();
|
||||
|
||||
// TODO: we can process the updates per DV field, from last to first so that
|
||||
// if multiple terms affect same document for the same field, we add an update
|
||||
|
@ -651,7 +649,7 @@ class BufferedUpdatesStream implements Accountable {
|
|||
// if we change the code to process updates in terms order, enable this assert
|
||||
// assert currentField == null || currentField.compareTo(term.field()) < 0;
|
||||
currentField = term.field();
|
||||
Terms terms = fields.terms(currentField);
|
||||
Terms terms = segState.reader.terms(currentField);
|
||||
if (terms != null) {
|
||||
termsEnum = terms.iterator();
|
||||
} else {
|
||||
|
|
|
@ -98,12 +98,15 @@ public abstract class CodecReader extends LeafReader implements Accountable {
|
|||
throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public final Fields fields() {
|
||||
return getPostingsReader();
|
||||
public final Terms terms(String field) throws IOException {
|
||||
//ensureOpen(); no; getPostingsReader calls this
|
||||
// We could check the FieldInfo IndexOptions but there's no point since
|
||||
// PostingsReader will simply return null for fields that don't exist or that have no terms index.
|
||||
return getPostingsReader().terms(field);
|
||||
}
|
||||
|
||||
|
||||
// returns the FieldInfo that corresponds to the given field and type, or
|
||||
// null if the field does not exist, or not indexed as the requested
|
||||
// DovDocValuesType.
|
||||
|
|
|
@ -770,10 +770,12 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
}
|
||||
invertState.lastStartOffset = startOffset;
|
||||
|
||||
invertState.length++;
|
||||
if (invertState.length < 0) {
|
||||
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
|
||||
try {
|
||||
invertState.length = Math.addExact(invertState.length, invertState.termFreqAttribute.getTermFrequency());
|
||||
} catch (ArithmeticException ae) {
|
||||
throw new IllegalArgumentException("too many tokens for field \"" + field.name() + "\"");
|
||||
}
|
||||
|
||||
//System.out.println(" term=" + invertState.termAttribute);
|
||||
|
||||
// If we hit an exception in here, we abort
|
||||
|
|
|
@ -17,14 +17,13 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
|
||||
import org.apache.lucene.index.FilterLeafReader.FilterFields;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FilterLeafReader.FilterTerms;
|
||||
import org.apache.lucene.index.FilterLeafReader.FilterTermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* The {@link ExitableDirectoryReader} wraps a real index {@link DirectoryReader} and
|
||||
|
@ -79,14 +78,12 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
Fields fields = super.fields();
|
||||
if (queryTimeout.isTimeoutEnabled()) {
|
||||
return new ExitableFields(fields, queryTimeout);
|
||||
}
|
||||
else {
|
||||
return fields; // break out of wrapper as soon as possible
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = in.terms(field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
return (queryTimeout.isTimeoutEnabled()) ? new ExitableTerms(terms, queryTimeout) : terms;
|
||||
}
|
||||
|
||||
// this impl does not change deletes or data so we can delegate the
|
||||
|
@ -103,29 +100,6 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class for another Fields implementation that is used by the ExitableFilterAtomicReader.
|
||||
*/
|
||||
public static class ExitableFields extends FilterFields {
|
||||
|
||||
private QueryTimeout queryTimeout;
|
||||
|
||||
/** Constructor **/
|
||||
public ExitableFields(Fields fields, QueryTimeout queryTimeout) {
|
||||
super(fields);
|
||||
this.queryTimeout = queryTimeout;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = in.terms(field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
return new ExitableTerms(terms, queryTimeout);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class for another Terms implementation that is used by ExitableFields.
|
||||
*/
|
||||
|
|
|
@ -20,6 +20,7 @@ import org.apache.lucene.analysis.TokenStream; // javadocs
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
|
@ -48,6 +49,7 @@ public final class FieldInvertState {
|
|||
PositionIncrementAttribute posIncrAttribute;
|
||||
PayloadAttribute payloadAttribute;
|
||||
TermToBytesRefAttribute termAttribute;
|
||||
TermFrequencyAttribute termFreqAttribute;
|
||||
|
||||
/** Creates {code FieldInvertState} for the specified
|
||||
* field name. */
|
||||
|
@ -88,6 +90,7 @@ public final class FieldInvertState {
|
|||
if (this.attributeSource != attributeSource) {
|
||||
this.attributeSource = attributeSource;
|
||||
termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
|
||||
termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
|
||||
posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
|
||||
offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
|
||||
payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
|
||||
|
|
|
@ -20,9 +20,15 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
/** Flex API for access to fields and terms
|
||||
* @lucene.experimental */
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
|
||||
/**
|
||||
* Provides a {@link Terms} index for fields that have it, and lists which fields do.
|
||||
* This is primarily an internal/experimental API (see {@link FieldsProducer}),
|
||||
* although it is also used to expose the set of term vectors per document.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class Fields implements Iterable<String> {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
|
|
|
@ -345,11 +345,11 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||
protected void doClose() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
public Terms terms(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return in.fields();
|
||||
return in.terms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -113,9 +113,10 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||
if (!hasFreq) {
|
||||
assert postings.termFreqs == null;
|
||||
postings.lastDocCodes[termID] = docState.docID;
|
||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||
} else {
|
||||
postings.lastDocCodes[termID] = docState.docID << 1;
|
||||
postings.termFreqs[termID] = 1;
|
||||
postings.termFreqs[termID] = getTermFreq();
|
||||
if (hasProx) {
|
||||
writeProx(termID, fieldState.position);
|
||||
if (hasOffsets) {
|
||||
|
@ -124,19 +125,21 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||
} else {
|
||||
assert !hasOffsets;
|
||||
}
|
||||
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
|
||||
}
|
||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||
fieldState.uniqueTermCount++;
|
||||
}
|
||||
|
||||
@Override
|
||||
void addTerm(final int termID) {
|
||||
final FreqProxPostingsArray postings = freqProxPostingsArray;
|
||||
|
||||
assert !hasFreq || postings.termFreqs[termID] > 0;
|
||||
|
||||
if (!hasFreq) {
|
||||
assert postings.termFreqs == null;
|
||||
if (termFreqAtt.getTermFrequency() != 1) {
|
||||
throw new IllegalStateException("field \"" + fieldInfo.name + "\": must index term freq while using custom TermFrequencyAttribute");
|
||||
}
|
||||
if (docState.docID != postings.lastDocIDs[termID]) {
|
||||
// New document; now encode docCode for previous doc:
|
||||
assert docState.docID > postings.lastDocIDs[termID];
|
||||
|
@ -160,8 +163,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||
}
|
||||
|
||||
// Init freq for the current document
|
||||
postings.termFreqs[termID] = 1;
|
||||
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
|
||||
postings.termFreqs[termID] = getTermFreq();
|
||||
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
|
||||
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
|
||||
postings.lastDocIDs[termID] = docState.docID;
|
||||
if (hasProx) {
|
||||
|
@ -175,7 +178,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||
}
|
||||
fieldState.uniqueTermCount++;
|
||||
} else {
|
||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
|
||||
postings.termFreqs[termID] = Math.addExact(postings.termFreqs[termID], getTermFreq());
|
||||
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, postings.termFreqs[termID]);
|
||||
if (hasProx) {
|
||||
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
|
||||
if (hasOffsets) {
|
||||
|
@ -185,6 +189,17 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
|
|||
}
|
||||
}
|
||||
|
||||
private int getTermFreq() {
|
||||
int freq = termFreqAtt.getTermFrequency();
|
||||
if (freq != 1) {
|
||||
if (hasProx) {
|
||||
throw new IllegalStateException("field \"" + fieldInfo.name + "\": cannot index positions while using custom TermFrequencyAttribute");
|
||||
}
|
||||
}
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newPostingsArray() {
|
||||
freqProxPostingsArray = (FreqProxPostingsArray) postingsArray;
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.CacheHelper;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
|
||||
|
@ -60,7 +59,7 @@ public abstract class LeafReader extends IndexReader {
|
|||
}
|
||||
|
||||
/**
|
||||
* Optional method: Return a {@link CacheHelper} that can be used to cache
|
||||
* Optional method: Return a {@link IndexReader.CacheHelper} that can be used to cache
|
||||
* based on the content of this leaf regardless of deletions. Two readers
|
||||
* that have the same data but different sets of deleted documents or doc
|
||||
* values updates may be considered equal. Consider using
|
||||
|
@ -73,12 +72,6 @@ public abstract class LeafReader extends IndexReader {
|
|||
*/
|
||||
public abstract CacheHelper getCoreCacheHelper();
|
||||
|
||||
/**
|
||||
* Returns {@link Fields} for this reader.
|
||||
* This method will not return null.
|
||||
*/
|
||||
public abstract Fields fields() throws IOException;
|
||||
|
||||
@Override
|
||||
public final int docFreq(Term term) throws IOException {
|
||||
final Terms terms = terms(term.field());
|
||||
|
@ -139,10 +132,8 @@ public abstract class LeafReader extends IndexReader {
|
|||
return terms.getSumTotalTermFreq();
|
||||
}
|
||||
|
||||
/** This may return null if the field does not exist.*/
|
||||
public final Terms terms(String field) throws IOException {
|
||||
return fields().terms(field);
|
||||
}
|
||||
/** Returns the {@link Terms} index for this field, or null if it has none. */
|
||||
public abstract Terms terms(String field) throws IOException;
|
||||
|
||||
/** Returns {@link PostingsEnum} for the specified term.
|
||||
* This will return null if either the field or
|
||||
|
|
|
@ -70,8 +70,11 @@ class MergeReaderWrapper extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return fields;
|
||||
public Terms terms(String field) throws IOException {
|
||||
ensureOpen();
|
||||
// We could check the FieldInfo IndexOptions but there's no point since
|
||||
// PostingsReader will simply return null for fields that don't exist or that have no terms index.
|
||||
return fields.terms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -31,11 +32,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.MergedIterator;
|
||||
|
||||
/**
|
||||
* Exposes flex API, merged from flex API of sub-segments.
|
||||
* Provides a single {@link Fields} term index view over an
|
||||
* {@link IndexReader}.
|
||||
* This is useful when you're interacting with an {@link
|
||||
* IndexReader} implementation that consists of sequential
|
||||
* sub-readers (eg {@link DirectoryReader} or {@link
|
||||
* MultiReader}).
|
||||
* MultiReader}) and you must treat it as a {@link LeafReader}.
|
||||
*
|
||||
* <p><b>NOTE</b>: for composite readers, you'll get better
|
||||
* performance by gathering the sub readers using
|
||||
|
@ -45,7 +47,6 @@ import org.apache.lucene.util.MergedIterator;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class MultiFields extends Fields {
|
||||
private final Fields[] subs;
|
||||
private final ReaderSlice[] subSlices;
|
||||
|
@ -64,13 +65,13 @@ public final class MultiFields extends Fields {
|
|||
switch (leaves.size()) {
|
||||
case 1:
|
||||
// already an atomic reader / reader with one leave
|
||||
return leaves.get(0).reader().fields();
|
||||
return new LeafReaderFields(leaves.get(0).reader());
|
||||
default:
|
||||
final List<Fields> fields = new ArrayList<>(leaves.size());
|
||||
final List<ReaderSlice> slices = new ArrayList<>(leaves.size());
|
||||
for (final LeafReaderContext ctx : leaves) {
|
||||
final LeafReader r = ctx.reader();
|
||||
final Fields f = r.fields();
|
||||
final Fields f = new LeafReaderFields(r);
|
||||
fields.add(f);
|
||||
slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1));
|
||||
}
|
||||
|
@ -115,9 +116,31 @@ public final class MultiFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
/** This method may return null if the field does not exist.*/
|
||||
/** This method may return null if the field does not exist or if it has no terms. */
|
||||
public static Terms getTerms(IndexReader r, String field) throws IOException {
|
||||
return getFields(r).terms(field);
|
||||
final List<LeafReaderContext> leaves = r.leaves();
|
||||
if (leaves.size() == 1) {
|
||||
return leaves.get(0).reader().terms(field);
|
||||
}
|
||||
|
||||
final List<Terms> termsPerLeaf = new ArrayList<>(leaves.size());
|
||||
final List<ReaderSlice> slicePerLeaf = new ArrayList<>(leaves.size());
|
||||
|
||||
for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) {
|
||||
LeafReaderContext ctx = leaves.get(leafIdx);
|
||||
Terms subTerms = ctx.reader().terms(field);
|
||||
if (subTerms != null) {
|
||||
termsPerLeaf.add(subTerms);
|
||||
slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx - 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (termsPerLeaf.size() == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return new MultiTerms(termsPerLeaf.toArray(Terms.EMPTY_ARRAY),
|
||||
slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY));
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns {@link PostingsEnum} for the specified field and
|
||||
|
@ -264,5 +287,37 @@ public final class MultiFields extends Fields {
|
|||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
private static class LeafReaderFields extends Fields {
|
||||
|
||||
private final LeafReader leafReader;
|
||||
private final List<String> indexedFields;
|
||||
|
||||
LeafReaderFields(LeafReader leafReader) {
|
||||
this.leafReader = leafReader;
|
||||
this.indexedFields = new ArrayList<>();
|
||||
for (FieldInfo fieldInfo : leafReader.getFieldInfos()) {
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||
indexedFields.add(fieldInfo.name);
|
||||
}
|
||||
}
|
||||
Collections.sort(indexedFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.unmodifiableList(indexedFields).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return indexedFields.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return leafReader.terms(field);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
@ -50,7 +51,6 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public class ParallelLeafReader extends LeafReader {
|
||||
private final FieldInfos fieldInfos;
|
||||
private final ParallelFields fields = new ParallelFields();
|
||||
private final LeafReader[] parallelReaders, storedFieldsReaders;
|
||||
private final Set<LeafReader> completeReaderSet =
|
||||
Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>());
|
||||
|
@ -58,9 +58,10 @@ public class ParallelLeafReader extends LeafReader {
|
|||
private final int maxDoc, numDocs;
|
||||
private final boolean hasDeletions;
|
||||
private final LeafMetaData metaData;
|
||||
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();
|
||||
private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
|
||||
|
||||
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();//TODO needn't sort?
|
||||
private final Map<String,LeafReader> termsFieldToReader = new HashMap<>();
|
||||
|
||||
/** Create a ParallelLeafReader based on the provided
|
||||
* readers; auto-closes the given readers on {@link #close()}. */
|
||||
public ParallelLeafReader(LeafReader... readers) throws IOException {
|
||||
|
@ -130,9 +131,15 @@ public class ParallelLeafReader extends LeafReader {
|
|||
if (!fieldToReader.containsKey(fieldInfo.name)) {
|
||||
builder.add(fieldInfo);
|
||||
fieldToReader.put(fieldInfo.name, reader);
|
||||
// only add these if the reader responsible for that field name is the current:
|
||||
// TODO consider populating 1st leaf with vectors even if the field name has been seen on a previous leaf
|
||||
if (fieldInfo.hasVectors()) {
|
||||
tvFieldToReader.put(fieldInfo.name, reader);
|
||||
}
|
||||
// TODO consider populating 1st leaf with terms even if the field name has been seen on a previous leaf
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||
termsFieldToReader.put(fieldInfo.name, reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -154,17 +161,6 @@ public class ParallelLeafReader extends LeafReader {
|
|||
|
||||
fieldInfos = builder.finish();
|
||||
this.metaData = new LeafMetaData(createdVersionMajor, minVersion, indexSort);
|
||||
|
||||
// build Fields instance
|
||||
for (final LeafReader reader : this.parallelReaders) {
|
||||
final Fields readerFields = reader.fields();
|
||||
for (String field : readerFields) {
|
||||
// only add if the reader responsible for that field name is the current:
|
||||
if (fieldToReader.get(field) == reader) {
|
||||
this.fields.addField(field, readerFields.terms(field));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// do this finally so any Exceptions occurred before don't affect refcounts:
|
||||
for (LeafReader reader : completeReaderSet) {
|
||||
|
@ -230,13 +226,14 @@ public class ParallelLeafReader extends LeafReader {
|
|||
ensureOpen();
|
||||
return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Fields fields() {
|
||||
public Terms terms(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return fields;
|
||||
LeafReader leafReader = termsFieldToReader.get(field);
|
||||
return leafReader == null ? null : leafReader.terms(field);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
// Don't call ensureOpen() here (it could affect performance)
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.index;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
|
@ -284,21 +286,27 @@ public final class SlowCodecReaderWrapper {
|
|||
}
|
||||
|
||||
private static FieldsProducer readerToFieldsProducer(final LeafReader reader) throws IOException {
|
||||
final Fields fields = reader.fields();
|
||||
ArrayList<String> indexedFields = new ArrayList<>();
|
||||
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
|
||||
indexedFields.add(fieldInfo.name);
|
||||
}
|
||||
}
|
||||
Collections.sort(indexedFields);
|
||||
return new FieldsProducer() {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return fields.iterator();
|
||||
return indexedFields.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return fields.terms(field);
|
||||
return reader.terms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return fields.size();
|
||||
return indexedFields.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -49,6 +49,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
*/
|
||||
class SortingLeafReader extends FilterLeafReader {
|
||||
|
||||
//TODO remove from here; move to FreqProxTermsWriter or FreqProxFields?
|
||||
static class SortingFields extends FilterFields {
|
||||
|
||||
private final Sorter.DocMap docMap;
|
||||
|
@ -1042,8 +1043,9 @@ class SortingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new SortingFields(in.fields(), in.getFieldInfos(), docMap);
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = super.terms(field);
|
||||
return terms==null ? null : new SortingTerms(terms, in.getFieldInfos().fieldInfo(field).getIndexOptions(), docMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -109,6 +109,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
|
|||
|
||||
@Override
|
||||
boolean start(IndexableField field, boolean first) {
|
||||
super.start(field, first);
|
||||
assert field.fieldType().indexOptions() != IndexOptions.NONE;
|
||||
|
||||
if (first) {
|
||||
|
@ -224,7 +225,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
|
|||
void newTerm(final int termID) {
|
||||
TermVectorsPostingsArray postings = termVectorsPostingsArray;
|
||||
|
||||
postings.freqs[termID] = 1;
|
||||
postings.freqs[termID] = getTermFreq();
|
||||
postings.lastOffsets[termID] = 0;
|
||||
postings.lastPositions[termID] = 0;
|
||||
|
||||
|
@ -235,11 +236,25 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
|
|||
void addTerm(final int termID) {
|
||||
TermVectorsPostingsArray postings = termVectorsPostingsArray;
|
||||
|
||||
postings.freqs[termID]++;
|
||||
postings.freqs[termID] += getTermFreq();
|
||||
|
||||
writeProx(postings, termID);
|
||||
}
|
||||
|
||||
private int getTermFreq() {
|
||||
int freq = termFreqAtt.getTermFrequency();
|
||||
if (freq != 1) {
|
||||
if (doVectorPositions) {
|
||||
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector positions while using custom TermFrequencyAttribute");
|
||||
}
|
||||
if (doVectorOffsets) {
|
||||
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector offsets while using custom TermFrequencyAttribute");
|
||||
}
|
||||
}
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void newPostingsArray() {
|
||||
termVectorsPostingsArray = (TermVectorsPostingsArray) postingsArray;
|
||||
|
|
|
@ -19,12 +19,13 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
|
||||
|
||||
abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
||||
private static final int HASH_INIT_SIZE = 4;
|
||||
|
@ -35,6 +36,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
protected final DocumentsWriterPerThread.DocState docState;
|
||||
protected final FieldInvertState fieldState;
|
||||
TermToBytesRefAttribute termAtt;
|
||||
protected TermFrequencyAttribute termFreqAtt;
|
||||
|
||||
// Copied from our perThread
|
||||
final IntBlockPool intPool;
|
||||
|
@ -287,6 +289,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
|||
* document. */
|
||||
boolean start(IndexableField field, boolean first) {
|
||||
termAtt = fieldState.termAttribute;
|
||||
termFreqAtt = fieldState.termFreqAttribute;
|
||||
if (nextPerField != null) {
|
||||
doNextCall = nextPerField.start(field, first);
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
protected final CompiledAutomaton compiled;
|
||||
/** term containing the field, and possibly some pattern structure */
|
||||
protected final Term term;
|
||||
protected final boolean automatonIsBinary;
|
||||
|
||||
/**
|
||||
* Create a new AutomatonQuery from an {@link Automaton}.
|
||||
|
@ -98,6 +99,7 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
super(term.field());
|
||||
this.term = term;
|
||||
this.automaton = automaton;
|
||||
this.automatonIsBinary = isBinary;
|
||||
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
|
||||
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
|
||||
}
|
||||
|
@ -154,4 +156,9 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
public Automaton getAutomaton() {
|
||||
return automaton;
|
||||
}
|
||||
|
||||
/** Is this a binary (byte) oriented automaton. See the constructor. */
|
||||
public boolean isAutomatonBinary() {
|
||||
return automatonIsBinary;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -178,6 +178,14 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
return disjuncts[0];
|
||||
}
|
||||
|
||||
if (tieBreakerMultiplier == 1.0f) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (Query sub : disjuncts) {
|
||||
builder.add(sub, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
boolean actuallyRewritten = false;
|
||||
List<Query> rewrittenDisjuncts = new ArrayList<>();
|
||||
for (Query sub : disjuncts) {
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -224,8 +223,7 @@ public class TermInSetQuery extends Query implements Accountable {
|
|||
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
|
||||
final LeafReader reader = context.reader();
|
||||
|
||||
final Fields fields = reader.fields();
|
||||
Terms terms = fields.terms(field);
|
||||
Terms terms = reader.terms(field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ public final class NamedSPILoader<S extends NamedSPILoader.NamedSPI> implements
|
|||
private final Class<S> clazz;
|
||||
|
||||
public NamedSPILoader(Class<S> clazz) {
|
||||
this(clazz, Thread.currentThread().getContextClassLoader());
|
||||
this(clazz, null);
|
||||
}
|
||||
|
||||
public NamedSPILoader(Class<S> clazz, ClassLoader classloader) {
|
||||
|
|
|
@ -538,11 +538,7 @@ public class QueryBuilder {
|
|||
builder.add(queryPos, operator);
|
||||
}
|
||||
}
|
||||
BooleanQuery bq = builder.build();
|
||||
if (bq.clauses().size() == 1) {
|
||||
return bq.clauses().get(0).getQuery();
|
||||
}
|
||||
return bq;
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -48,13 +48,11 @@ public final class SPIClassIterator<S> implements Iterator<Class<? extends S>> {
|
|||
private final Enumeration<URL> profilesEnum;
|
||||
private Iterator<String> linesIterator;
|
||||
|
||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the context classloader. */
|
||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using
|
||||
* the same {@link ClassLoader} as the argument. */
|
||||
public static <S> SPIClassIterator<S> get(Class<S> clazz) {
|
||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||
if (cl == null) {
|
||||
cl = clazz.getClassLoader();
|
||||
}
|
||||
return new SPIClassIterator<>(clazz, cl);
|
||||
return new SPIClassIterator<>(clazz,
|
||||
Objects.requireNonNull(clazz.getClassLoader(), () -> clazz + " has no classloader."));
|
||||
}
|
||||
|
||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the given classloader. */
|
||||
|
|
|
@ -125,6 +125,7 @@ public class TestToken extends LuceneTestCase {
|
|||
t.setFlags(8);
|
||||
t.setPositionIncrement(3);
|
||||
t.setPositionLength(11);
|
||||
t.setTermFrequency(42);
|
||||
TestUtil.assertAttributeReflection(t,
|
||||
new HashMap<String, Object>() {{
|
||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||
|
@ -136,6 +137,7 @@ public class TestToken extends LuceneTestCase {
|
|||
put(PayloadAttribute.class.getName() + "#payload", null);
|
||||
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
|
||||
put(FlagsAttribute.class.getName() + "#flags", 8);
|
||||
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,6 +82,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
|
|||
t.setPositionIncrement(3);
|
||||
t.setPositionLength(11);
|
||||
t.setType("foobar");
|
||||
t.setTermFrequency(42);
|
||||
TestUtil.assertAttributeReflection(t,
|
||||
new HashMap<String, Object>() {{
|
||||
put(CharTermAttribute.class.getName() + "#term", "foobar");
|
||||
|
@ -91,6 +92,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
|
|||
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
|
||||
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
|
||||
put(TypeAttribute.class.getName() + "#type", "foobar");
|
||||
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
|||
|
||||
DirectoryReader r = DirectoryReader.open(w);
|
||||
assertEquals(1, r.leaves().size());
|
||||
FieldReader field = (FieldReader) r.leaves().get(0).reader().fields().terms("field");
|
||||
FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
|
||||
// We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
|
||||
Stats stats = field.getStats();
|
||||
assertEquals(0, stats.floorBlockCount);
|
||||
|
|
|
@ -91,7 +91,7 @@ public class Test2BDocs extends LuceneTestCase {
|
|||
LeafReader reader = context.reader();
|
||||
int lim = context.reader().maxDoc();
|
||||
|
||||
Terms terms = reader.fields().terms("f1");
|
||||
Terms terms = reader.terms("f1");
|
||||
for (int i=0; i<10000; i++) {
|
||||
TermsEnum te = terms.iterator();
|
||||
assertTrue( te.seekExact(term) );
|
||||
|
|
|
@ -0,0 +1,468 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import static org.apache.lucene.index.PostingsEnum.NO_MORE_DOCS;
|
||||
|
||||
public class TestCustomTermFreq extends LuceneTestCase {
|
||||
|
||||
private static final class CannedTermFreqs extends TokenStream {
|
||||
private final String[] terms;
|
||||
private final int[] termFreqs;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final TermFrequencyAttribute termFreqAtt = addAttribute(TermFrequencyAttribute.class);
|
||||
private int upto;
|
||||
|
||||
public CannedTermFreqs(String[] terms, int[] termFreqs) {
|
||||
this.terms = terms;
|
||||
this.termFreqs = termFreqs;
|
||||
assert terms.length == termFreqs.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (upto == terms.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
clearAttributes();
|
||||
|
||||
termAtt.append(terms[upto]);
|
||||
termFreqAtt.setTermFrequency(termFreqs[upto]);
|
||||
|
||||
upto++;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
upto = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void testSingletonTermsOneDoc() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||
new int[] {42, 128}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(128, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(42, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testSingletonTermsTwoDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||
new int[] {42, 128}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||
new int[] {50, 50}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(128, postings.freq());
|
||||
assertEquals(1, postings.nextDoc());
|
||||
assertEquals(50, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(42, postings.freq());
|
||||
assertEquals(1, postings.nextDoc());
|
||||
assertEquals(50, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testRepeatTermsOneDoc() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(228, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(59, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testRepeatTermsTwoDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {50, 60, 70, 80}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(228, postings.freq());
|
||||
assertEquals(1, postings.nextDoc());
|
||||
assertEquals(140, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(59, postings.freq());
|
||||
assertEquals(1, postings.nextDoc());
|
||||
assertEquals(120, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testTotalTermFreq() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {50, 60, 70, 80}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
|
||||
TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
|
||||
assertEquals(179, termsEnum.totalTermFreq());
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
|
||||
assertEquals(368, termsEnum.totalTermFreq());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
// you can't index proximity with custom term freqs:
|
||||
public void testInvalidProx() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
|
||||
assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
|
||||
// you can't index DOCS_ONLY with custom term freq
|
||||
public void testInvalidDocsOnly() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
|
||||
assertEquals("field \"field\": must index term freq while using custom TermFrequencyAttribute", e.getMessage());
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
|
||||
// sum of term freqs must fit in an int
|
||||
public void testOverflowInt() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "this field should be indexed", fieldType));
|
||||
w.addDocument(doc);
|
||||
|
||||
Document doc2 = new Document();
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar"},
|
||||
new int[] {3, Integer.MAX_VALUE}),
|
||||
fieldType);
|
||||
doc2.add(field);
|
||||
expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc2);});
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
assertEquals(1, r.numDocs());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
public void testInvalidTermVectorPositions() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
fieldType.setStoreTermVectors(true);
|
||||
fieldType.setStoreTermVectorPositions(true);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
|
||||
assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage());
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
|
||||
public void testInvalidTermVectorOffsets() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
fieldType.setStoreTermVectors(true);
|
||||
fieldType.setStoreTermVectorOffsets(true);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
|
||||
assertEquals("field \"field\": cannot index term vector offsets while using custom TermFrequencyAttribute", e.getMessage());
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
|
||||
public void testTermVectors() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
fieldType.setStoreTermVectors(true);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {50, 60, 70, 80}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
|
||||
IndexReader r = DirectoryReader.open(w);
|
||||
|
||||
Fields fields = r.getTermVectors(0);
|
||||
TermsEnum termsEnum = fields.terms("field").iterator();
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
|
||||
assertEquals(228, termsEnum.totalTermFreq());
|
||||
PostingsEnum postings = termsEnum.postings(null);
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(228, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
|
||||
assertEquals(59, termsEnum.totalTermFreq());
|
||||
postings = termsEnum.postings(null);
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(59, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
fields = r.getTermVectors(1);
|
||||
termsEnum = fields.terms("field").iterator();
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
|
||||
assertEquals(140, termsEnum.totalTermFreq());
|
||||
postings = termsEnum.postings(null);
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(140, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
|
||||
assertEquals(120, termsEnum.totalTermFreq());
|
||||
postings = termsEnum.postings(null);
|
||||
assertNotNull(postings);
|
||||
assertEquals(0, postings.nextDoc());
|
||||
assertEquals(120, postings.freq());
|
||||
assertEquals(NO_MORE_DOCS, postings.nextDoc());
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
/**
|
||||
* Similarity holds onto the FieldInvertState for subsequent verification.
|
||||
*/
|
||||
private static class NeverForgetsSimilarity extends Similarity {
|
||||
public FieldInvertState lastState;
|
||||
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
|
||||
|
||||
private NeverForgetsSimilarity() {
|
||||
// no
|
||||
}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
this.lastState = state;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public void testFieldInvertState() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
|
||||
Document doc = new Document();
|
||||
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Field field = new Field("field",
|
||||
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
|
||||
new int[] {42, 128, 17, 100}),
|
||||
fieldType);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
|
||||
assertEquals(228, fis.getMaxTermFrequency());
|
||||
assertEquals(2, fis.getUniqueTermCount());
|
||||
assertEquals(0, fis.getNumOverlap());
|
||||
assertEquals(287, fis.getLength());
|
||||
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
}
|
|
@ -249,14 +249,16 @@ public class TestDoc extends LuceneTestCase {
|
|||
for (int i = 0; i < reader.numDocs(); i++)
|
||||
out.println(reader.document(i));
|
||||
|
||||
Fields fields = reader.fields();
|
||||
for (String field : fields) {
|
||||
Terms terms = fields.terms(field);
|
||||
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
|
||||
if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
|
||||
continue;
|
||||
}
|
||||
Terms terms = reader.terms(fieldInfo.name);
|
||||
assertNotNull(terms);
|
||||
TermsEnum tis = terms.iterator();
|
||||
while(tis.next() != null) {
|
||||
|
||||
out.print(" term=" + field + ":" + tis.term());
|
||||
out.print(" term=" + fieldInfo.name + ":" + tis.term());
|
||||
out.println(" DF=" + tis.docFreq());
|
||||
|
||||
PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS);
|
||||
|
|
|
@ -37,17 +37,6 @@ import org.junit.Ignore;
|
|||
public class TestExitableDirectoryReader extends LuceneTestCase {
|
||||
private static class TestReader extends FilterLeafReader {
|
||||
|
||||
private static class TestFields extends FilterFields {
|
||||
TestFields(Fields in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return new TestTerms(super.terms(field));
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestTerms extends FilterTerms {
|
||||
TestTerms(Terms in) {
|
||||
super(in);
|
||||
|
@ -83,8 +72,9 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new TestFields(super.fields());
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = super.terms(field);
|
||||
return terms==null ? null : new TestTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestFieldInvertState extends LuceneTestCase {
|
||||
/**
|
||||
* Similarity holds onto the FieldInvertState for subsequent verification.
|
||||
*/
|
||||
private static class NeverForgetsSimilarity extends Similarity {
|
||||
public FieldInvertState lastState;
|
||||
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
|
||||
|
||||
private NeverForgetsSimilarity() {
|
||||
// no
|
||||
}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
this.lastState = state;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
Document doc = new Document();
|
||||
Field field = new Field("field",
|
||||
new CannedTokenStream(new Token("a", 0, 1),
|
||||
new Token("b", 2, 3),
|
||||
new Token("c", 4, 5)),
|
||||
TextField.TYPE_NOT_STORED);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
|
||||
assertEquals(1, fis.getMaxTermFrequency());
|
||||
assertEquals(3, fis.getUniqueTermCount());
|
||||
assertEquals(0, fis.getNumOverlap());
|
||||
assertEquals(3, fis.getLength());
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
int numUniqueTokens = TestUtil.nextInt(random(), 1, 25);
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
Document doc = new Document();
|
||||
|
||||
int numTokens = atLeast(10000);
|
||||
Token[] tokens = new Token[numTokens];
|
||||
Map<Character,Integer> counts = new HashMap<>();
|
||||
int numStacked = 0;
|
||||
int maxTermFreq = 0;
|
||||
int pos = -1;
|
||||
for (int i=0;i<numTokens;i++) {
|
||||
char tokenChar = (char) ('a' + random().nextInt(numUniqueTokens));
|
||||
Integer oldCount = counts.get(tokenChar);
|
||||
int newCount;
|
||||
if (oldCount == null) {
|
||||
newCount = 1;
|
||||
} else {
|
||||
newCount = 1 + oldCount;
|
||||
}
|
||||
counts.put(tokenChar, newCount);
|
||||
maxTermFreq = Math.max(maxTermFreq, newCount);
|
||||
|
||||
Token token = new Token(Character.toString(tokenChar), 2*i, 2*i+1);
|
||||
|
||||
if (i > 0 && random().nextInt(7) == 3) {
|
||||
token.setPositionIncrement(0);
|
||||
numStacked++;
|
||||
} else {
|
||||
pos++;
|
||||
}
|
||||
tokens[i] = token;
|
||||
}
|
||||
|
||||
Field field = new Field("field",
|
||||
new CannedTokenStream(tokens),
|
||||
TextField.TYPE_NOT_STORED);
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
|
||||
assertEquals(maxTermFreq, fis.getMaxTermFrequency());
|
||||
assertEquals(counts.size(), fis.getUniqueTermCount());
|
||||
assertEquals(numStacked, fis.getNumOverlap());
|
||||
assertEquals(numTokens, fis.getLength());
|
||||
assertEquals(pos, fis.getPosition());
|
||||
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
}
|
|
@ -35,18 +35,6 @@ public class TestFilterLeafReader extends LuceneTestCase {
|
|||
|
||||
private static class TestReader extends FilterLeafReader {
|
||||
|
||||
/** Filter that only permits terms containing 'e'.*/
|
||||
private static class TestFields extends FilterFields {
|
||||
TestFields(Fields in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return new TestTerms(super.terms(field));
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestTerms extends FilterTerms {
|
||||
TestTerms(Terms in) {
|
||||
super(in);
|
||||
|
@ -103,8 +91,9 @@ public class TestFilterLeafReader extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new TestFields(super.fields());
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = super.terms(field);
|
||||
return terms==null ? null : new TestTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,10 +17,13 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestFlex extends LuceneTestCase {
|
||||
|
||||
|
@ -70,7 +73,7 @@ public class TestFlex extends LuceneTestCase {
|
|||
w.addDocument(doc);
|
||||
w.forceMerge(1);
|
||||
DirectoryReader r = w.getReader();
|
||||
TermsEnum terms = getOnlyLeafReader(r).fields().terms("f").iterator();
|
||||
TermsEnum terms = getOnlyLeafReader(r).terms("f").iterator();
|
||||
assertTrue(terms.next() != null);
|
||||
try {
|
||||
assertEquals(0, terms.ord());
|
||||
|
|
|
@ -80,9 +80,7 @@ public class TestIndexReaderClose extends LuceneTestCase {
|
|||
reader.getReaderCacheHelper().addClosedListener(new FaultyListener());
|
||||
}
|
||||
|
||||
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
|
||||
reader.close();
|
||||
});
|
||||
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> reader.close());
|
||||
|
||||
if (throwOnClose) {
|
||||
assertEquals("BOOM!", expected.getMessage());
|
||||
|
@ -90,9 +88,7 @@ public class TestIndexReaderClose extends LuceneTestCase {
|
|||
assertEquals("GRRRRRRRRRRRR!", expected.getMessage());
|
||||
}
|
||||
|
||||
expectThrows(AlreadyClosedException.class, () -> {
|
||||
reader.fields();
|
||||
});
|
||||
expectThrows(AlreadyClosedException.class, () -> reader.terms("someField"));
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
reader.close(); // call it again
|
||||
|
|
|
@ -694,7 +694,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
writer.close();
|
||||
DirectoryReader reader = DirectoryReader.open(dir);
|
||||
LeafReader subreader = getOnlyLeafReader(reader);
|
||||
TermsEnum te = subreader.fields().terms("").iterator();
|
||||
TermsEnum te = subreader.terms("").iterator();
|
||||
assertEquals(new BytesRef("a"), te.next());
|
||||
assertEquals(new BytesRef("b"), te.next());
|
||||
assertEquals(new BytesRef("c"), te.next());
|
||||
|
@ -715,7 +715,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
writer.close();
|
||||
DirectoryReader reader = DirectoryReader.open(dir);
|
||||
LeafReader subreader = getOnlyLeafReader(reader);
|
||||
TermsEnum te = subreader.fields().terms("").iterator();
|
||||
TermsEnum te = subreader.terms("").iterator();
|
||||
assertEquals(new BytesRef(""), te.next());
|
||||
assertEquals(new BytesRef("a"), te.next());
|
||||
assertEquals(new BytesRef("b"), te.next());
|
||||
|
|
|
@ -136,7 +136,7 @@ public class TestIndexWriterUnicode extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void checkTermsOrder(IndexReader r, Set<String> allTerms, boolean isTop) throws IOException {
|
||||
TermsEnum terms = MultiFields.getFields(r).terms("f").iterator();
|
||||
TermsEnum terms = MultiFields.getTerms(r, "f").iterator();
|
||||
|
||||
BytesRefBuilder last = new BytesRefBuilder();
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -90,21 +89,10 @@ public class TestParallelTermEnum extends LuceneTestCase {
|
|||
public void test1() throws IOException {
|
||||
ParallelLeafReader pr = new ParallelLeafReader(ir1, ir2);
|
||||
|
||||
Fields fields = pr.fields();
|
||||
Iterator<String> fe = fields.iterator();
|
||||
assertEquals(3, pr.getFieldInfos().size());
|
||||
|
||||
String f = fe.next();
|
||||
assertEquals("field1", f);
|
||||
checkTerms(fields.terms(f), "brown", "fox", "jumps", "quick", "the");
|
||||
|
||||
f = fe.next();
|
||||
assertEquals("field2", f);
|
||||
checkTerms(fields.terms(f), "brown", "fox", "jumps", "quick", "the");
|
||||
|
||||
f = fe.next();
|
||||
assertEquals("field3", f);
|
||||
checkTerms(fields.terms(f), "dog", "fox", "jumps", "lazy", "over", "the");
|
||||
|
||||
assertFalse(fe.hasNext());
|
||||
checkTerms(pr.terms("field1"), "brown", "fox", "jumps", "quick", "the");
|
||||
checkTerms(pr.terms("field2"), "brown", "fox", "jumps", "quick", "the");
|
||||
checkTerms(pr.terms("field3"), "dog", "fox", "jumps", "lazy", "over", "the");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -479,7 +479,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
}
|
||||
writer.close();
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator();
|
||||
TermsEnum terms = MultiFields.getTerms(reader, field).iterator();
|
||||
PostingsEnum tp = null;
|
||||
while (terms.next() != null) {
|
||||
String termText = terms.term().utf8ToString();
|
||||
|
@ -602,7 +602,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
field.setTokenStream(ts);
|
||||
writer.addDocument(doc);
|
||||
DirectoryReader reader = writer.getReader();
|
||||
TermsEnum te = MultiFields.getFields(reader).terms("field").iterator();
|
||||
TermsEnum te = MultiFields.getTerms(reader, "field").iterator();
|
||||
assertTrue(te.seekExact(new BytesRef("withPayload")));
|
||||
PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS);
|
||||
de.nextDoc();
|
||||
|
|
|
@ -221,9 +221,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||
|
||||
public int[] toDocsArray(Term term, Bits bits, IndexReader reader)
|
||||
throws IOException {
|
||||
Fields fields = MultiFields.getFields(reader);
|
||||
Terms cterms = fields.terms(term.field);
|
||||
TermsEnum ctermsEnum = cterms.iterator();
|
||||
TermsEnum ctermsEnum = MultiFields.getTerms(reader, term.field).iterator();
|
||||
if (ctermsEnum.seekExact(new BytesRef(term.text()))) {
|
||||
PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, null, PostingsEnum.NONE);
|
||||
return toArray(postingsEnum);
|
||||
|
|
|
@ -291,7 +291,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||
// TODO: improve this
|
||||
LeafReader sub = ctx.reader();
|
||||
//System.out.println("\nsub=" + sub);
|
||||
final TermsEnum termsEnum = sub.fields().terms("content").iterator();
|
||||
final TermsEnum termsEnum = sub.terms("content").iterator();
|
||||
PostingsEnum docs = null;
|
||||
PostingsEnum docsAndPositions = null;
|
||||
PostingsEnum docsAndPositionsAndOffsets = null;
|
||||
|
|
|
@ -57,7 +57,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random()));
|
||||
assertTrue(reader != null);
|
||||
|
||||
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
|
||||
TermsEnum terms = reader.terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
|
||||
terms.seekCeil(new BytesRef("field"));
|
||||
PostingsEnum termDocs = TestUtil.docs(random(), terms, null, PostingsEnum.FREQS);
|
||||
if (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
|
|
@ -19,14 +19,14 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
||||
public class TestSegmentTermEnum extends LuceneTestCase {
|
||||
|
@ -80,7 +80,7 @@ public class TestSegmentTermEnum extends LuceneTestCase {
|
|||
addDoc(writer, "aaa bbb");
|
||||
writer.close();
|
||||
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(dir));
|
||||
TermsEnum terms = reader.fields().terms("content").iterator();
|
||||
TermsEnum terms = reader.terms("content").iterator();
|
||||
assertNotNull(terms.next());
|
||||
assertEquals("aaa", terms.term().utf8ToString());
|
||||
assertNotNull(terms.next());
|
||||
|
|
|
@ -21,10 +21,13 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestStressAdvance extends LuceneTestCase {
|
||||
|
||||
|
@ -74,7 +77,7 @@ public class TestStressAdvance extends LuceneTestCase {
|
|||
bDocIDs.add(docID);
|
||||
}
|
||||
}
|
||||
final TermsEnum te = getOnlyLeafReader(r).fields().terms("field").iterator();
|
||||
final TermsEnum te = getOnlyLeafReader(r).terms("field").iterator();
|
||||
|
||||
PostingsEnum de = null;
|
||||
for(int iter2=0;iter2<10;iter2++) {
|
||||
|
|
|
@ -18,7 +18,17 @@ package org.apache.lucene.index;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -738,7 +748,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
LeafReader sub = getOnlyLeafReader(r);
|
||||
Terms terms = sub.fields().terms("field");
|
||||
Terms terms = sub.terms("field");
|
||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
TermsEnum te = terms.intersect(ca, null);
|
||||
|
@ -792,7 +802,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
LeafReader sub = getOnlyLeafReader(r);
|
||||
Terms terms = sub.fields().terms("field");
|
||||
Terms terms = sub.terms("field");
|
||||
|
||||
Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
|
@ -846,7 +856,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
DirectoryReader r = w.getReader();
|
||||
w.close();
|
||||
LeafReader sub = getOnlyLeafReader(r);
|
||||
Terms terms = sub.fields().terms("field");
|
||||
Terms terms = sub.terms("field");
|
||||
|
||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton(); // accept ALL
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
|
@ -986,7 +996,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
assertEquals(1, r.leaves().size());
|
||||
TermsEnum te = r.leaves().get(0).reader().fields().terms("field").iterator();
|
||||
TermsEnum te = r.leaves().get(0).reader().terms("field").iterator();
|
||||
for(int i=0;i<=termCount;i++) {
|
||||
assertTrue("term '" + termsList.get(i).utf8ToString() + "' should exist but doesn't", te.seekExact(termsList.get(i)));
|
||||
}
|
||||
|
@ -1007,9 +1017,8 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
doc.add(newStringField("field", "foobar", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
Fields fields = MultiFields.getFields(r);
|
||||
Terms terms = MultiFields.getTerms(r, "field");
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
|
||||
Terms terms = fields.terms("field");
|
||||
String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
|
||||
assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
|
||||
r.close();
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -219,21 +218,16 @@ public class TermInSetQueryTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(in.fields()) {
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = super.terms(field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
return new FilterTerms(terms) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
final Terms in = this.in.terms(field);
|
||||
if (in == null) {
|
||||
return null;
|
||||
}
|
||||
return new FilterTerms(in) {
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
counter.incrementAndGet();
|
||||
return super.iterator();
|
||||
}
|
||||
};
|
||||
public TermsEnum iterator() throws IOException {
|
||||
counter.incrementAndGet();
|
||||
return super.iterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -523,6 +523,21 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
assertTrue("score should be negative", h[i].score < 0);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRewriteBoolean() throws Exception {
|
||||
Query sub1 = tq("hed", "albino");
|
||||
Query sub2 = tq("hed", "elephant");
|
||||
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
|
||||
Arrays.asList(
|
||||
sub1, sub2
|
||||
), 1.0f);
|
||||
Query rewritten = s.rewrite(q);
|
||||
assertTrue(rewritten instanceof BooleanQuery);
|
||||
BooleanQuery bq = (BooleanQuery) rewritten;
|
||||
assertEquals(bq.clauses().size(), 2);
|
||||
assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
|
||||
assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
|
||||
}
|
||||
|
||||
/** macro */
|
||||
protected Query tq(String f, String t) {
|
||||
|
|
|
@ -71,7 +71,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
|
||||
// this TermEnum gives "piccadilly", "pie" and "pizza".
|
||||
String prefix = "pi";
|
||||
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
|
||||
TermsEnum te = MultiFields.getTerms(reader,"body").iterator();
|
||||
te.seekCeil(new BytesRef(prefix));
|
||||
do {
|
||||
String s = te.term().utf8ToString();
|
||||
|
|
|
@ -73,7 +73,7 @@ public class TestPhrasePrefixQuery extends LuceneTestCase {
|
|||
|
||||
// this TermEnum gives "piccadilly", "pie" and "pizza".
|
||||
String prefix = "pi";
|
||||
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
|
||||
TermsEnum te = MultiFields.getTerms(reader, "body").iterator();
|
||||
te.seekCeil(new BytesRef(prefix));
|
||||
do {
|
||||
String s = te.term().utf8ToString();
|
||||
|
|
|
@ -61,7 +61,7 @@ public class TestSameScoresWithThreads extends LuceneTestCase {
|
|||
w.close();
|
||||
|
||||
final IndexSearcher s = newSearcher(r);
|
||||
Terms terms = MultiFields.getFields(r).terms("body");
|
||||
Terms terms = MultiFields.getTerms(r, "body");
|
||||
int termCount = 0;
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
while(termsEnum.next() != null) {
|
||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -123,31 +122,27 @@ public class TestTermQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(super.fields()) {
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = super.terms(field);
|
||||
return terms==null ? null : new FilterTerms(terms) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return new FilterTerms(super.terms(field)) {
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return new FilterTermsEnum(super.iterator()) {
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return new FilterTermsEnum(super.iterator()) {
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
};
|
||||
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
throw new AssertionError("no seek");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -178,32 +178,36 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||
.build();
|
||||
Query syn2 = new TermQuery(new Term("field", "cavy"));
|
||||
|
||||
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
||||
BooleanQuery synQuery = new BooleanQuery.Builder()
|
||||
.add(syn1, BooleanClause.Occur.SHOULD)
|
||||
.add(syn2, BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
||||
.add(synQuery, occur)
|
||||
.build();
|
||||
|
||||
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
||||
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
|
||||
|
||||
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.add(new TermQuery(new Term("field", "story")), occur)
|
||||
.build();
|
||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
|
||||
|
||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "the")), occur)
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.add(new TermQuery(new Term("field", "story")), occur)
|
||||
.build();
|
||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
|
||||
|
||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "the")), occur)
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.add(new TermQuery(new Term("field", "story")), occur)
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.build();
|
||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
|
||||
}
|
||||
|
@ -217,32 +221,36 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||
.add(new Term("field", "pig"))
|
||||
.build();
|
||||
Query syn2 = new TermQuery(new Term("field", "cavy"));
|
||||
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
||||
|
||||
BooleanQuery synQuery = new BooleanQuery.Builder()
|
||||
.add(syn1, BooleanClause.Occur.SHOULD)
|
||||
.add(syn2, BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
|
||||
.add(synQuery, occur)
|
||||
.build();
|
||||
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
||||
queryBuilder.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
|
||||
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
|
||||
|
||||
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.add(new TermQuery(new Term("field", "story")), occur)
|
||||
.build();
|
||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
|
||||
|
||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "the")), occur)
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.add(new TermQuery(new Term("field", "story")), occur)
|
||||
.build();
|
||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
|
||||
|
||||
expectedBooleanQuery = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "the")), occur)
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.add(new TermQuery(new Term("field", "story")), occur)
|
||||
.add(expectedGraphQuery, occur)
|
||||
.add(synQuery, occur)
|
||||
.build();
|
||||
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
|
||||
}
|
||||
|
|
|
@ -25,8 +25,8 @@ import org.apache.lucene.index.DocValuesType;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafMetaData;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.LeafMetaData;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
|
@ -90,8 +90,8 @@ public class TermVectorLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return fields;
|
||||
public Terms terms(String field) throws IOException {
|
||||
return fields.terms(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -148,7 +148,7 @@ public class TermVectorLeafReader extends LeafReader {
|
|||
if (docID != 0) {
|
||||
return null;
|
||||
}
|
||||
return fields();
|
||||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search.highlight;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
@ -30,7 +29,6 @@ import org.apache.lucene.analysis.CachingTokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -429,30 +427,15 @@ public class WeightedSpanTermExtractor {
|
|||
DelegatingLeafReader(LeafReader in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
throw new UnsupportedOperationException();
|
||||
throw new UnsupportedOperationException();//TODO merge them
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(super.fields()) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return super.terms(DelegatingLeafReader.FIELD_NAME);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.singletonList(DelegatingLeafReader.FIELD_NAME).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
public Terms terms(String field) throws IOException {
|
||||
return super.terms(DelegatingLeafReader.FIELD_NAME);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -83,8 +83,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
return allAutomata.get(0);
|
||||
}
|
||||
//TODO it'd be nice if we could get at the underlying Automaton in CharacterRunAutomaton so that we
|
||||
// could union them all. But it's not exposed, and note TermRangeQuery isn't modelled as an Automaton
|
||||
// by MultiTermHighlighting.
|
||||
// could union them all. But it's not exposed, and sometimes the automaton is byte (not char) oriented
|
||||
|
||||
// Return an aggregate CharacterRunAutomaton of others
|
||||
return new CharacterRunAutomaton(Automata.makeEmpty()) {// the makeEmpty() is bogus; won't be used
|
||||
|
|
|
@ -19,12 +19,10 @@ package org.apache.lucene.search.uhighlight;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -32,19 +30,17 @@ import org.apache.lucene.search.BoostQuery;
|
|||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.spans.SpanBoostQuery;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
@ -110,18 +106,6 @@ class MultiTermHighlighting {
|
|||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
|
||||
fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
} else if (query instanceof PrefixQuery) {
|
||||
final PrefixQuery pq = (PrefixQuery) query;
|
||||
Term prefix = pq.getPrefix();
|
||||
if (fieldMatcher.test(prefix.field())) {
|
||||
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
|
||||
Automata.makeAnyString())) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return pq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof FuzzyQuery) {
|
||||
final FuzzyQuery fq = (FuzzyQuery) query;
|
||||
if (fieldMatcher.test(fq.getField())) {
|
||||
|
@ -143,69 +127,63 @@ class MultiTermHighlighting {
|
|||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof TermRangeQuery) {
|
||||
final TermRangeQuery tq = (TermRangeQuery) query;
|
||||
if (fieldMatcher.test(tq.getField())) {
|
||||
final CharsRef lowerBound;
|
||||
if (tq.getLowerTerm() == null) {
|
||||
lowerBound = null;
|
||||
} else {
|
||||
lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
|
||||
}
|
||||
|
||||
final CharsRef upperBound;
|
||||
if (tq.getUpperTerm() == null) {
|
||||
upperBound = null;
|
||||
} else {
|
||||
upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
|
||||
}
|
||||
|
||||
final boolean includeLower = tq.includesLower();
|
||||
final boolean includeUpper = tq.includesUpper();
|
||||
final CharsRef scratch = new CharsRef();
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
|
||||
|
||||
// this is *not* an automaton, but its very simple
|
||||
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
|
||||
@Override
|
||||
public boolean run(char[] s, int offset, int length) {
|
||||
scratch.chars = s;
|
||||
scratch.offset = offset;
|
||||
scratch.length = length;
|
||||
|
||||
if (lowerBound != null) {
|
||||
int cmp = comparator.compare(scratch, lowerBound);
|
||||
if (cmp < 0 || (!includeLower && cmp == 0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (upperBound != null) {
|
||||
int cmp = comparator.compare(scratch, upperBound);
|
||||
if (cmp > 0 || (!includeUpper && cmp == 0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return tq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (fieldMatcher.test(aq.getField())) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
|
||||
if (aq.isAutomatonBinary() == false) { // note: is the case for WildcardQuery, RegexpQuery
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
} else { // note: is the case for PrefixQuery, TermRangeQuery
|
||||
// byte oriented automaton:
|
||||
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) { // empty here is bogus just to satisfy API
|
||||
// TODO can we get access to the aq.compiledAutomaton.runAutomaton ?
|
||||
ByteRunAutomaton byteRunAutomaton =
|
||||
new ByteRunAutomaton(aq.getAutomaton(), true, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
|
||||
@Override
|
||||
public boolean run(char[] chars, int offset, int length) {
|
||||
int state = 0;
|
||||
final int maxIdx = offset + length;
|
||||
for (int i = offset; i < maxIdx; i++) {
|
||||
final int code = chars[i];
|
||||
int b;
|
||||
// UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 )
|
||||
if (code < 0x80) {
|
||||
state = byteRunAutomaton.step(state, code);
|
||||
if (state == -1) return false;
|
||||
} else if (code < 0x800) {
|
||||
b = (0xC0 | (code >> 6));
|
||||
state = byteRunAutomaton.step(state, b);
|
||||
if (state == -1) return false;
|
||||
b = (0x80 | (code & 0x3F));
|
||||
state = byteRunAutomaton.step(state, b);
|
||||
if (state == -1) return false;
|
||||
} else {
|
||||
// more complex
|
||||
byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
|
||||
int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
|
||||
for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
|
||||
state = byteRunAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
|
||||
if (state == -1) return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return byteRunAutomaton.isAccept(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return list.toArray(new CharacterRunAutomaton[list.size()]);
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Collections;
|
|||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -36,7 +35,6 @@ import java.util.function.Predicate;
|
|||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
@ -529,12 +527,16 @@ public class PhraseHelper {
|
|||
}
|
||||
}
|
||||
|
||||
//TODO move up; it's currently inbetween other inner classes that are related
|
||||
/**
|
||||
* Needed to support the ability to highlight a query irrespective of the field a query refers to
|
||||
* (aka requireFieldMatch=false).
|
||||
* This reader will just delegate every call to a single field in the wrapped
|
||||
* LeafReader. This way we ensure that all queries going through this reader target the same field.
|
||||
*/
|
||||
*/
|
||||
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
|
||||
final String fieldName;
|
||||
|
||||
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
|
||||
super(in);
|
||||
this.fieldName = fieldName;
|
||||
|
@ -542,27 +544,12 @@ public class PhraseHelper {
|
|||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
throw new UnsupportedOperationException();
|
||||
throw new UnsupportedOperationException();//TODO merge them
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(super.fields()) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return super.terms(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.singletonList(fieldName).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
public Terms terms(String field) throws IOException {
|
||||
return super.terms(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search.uhighlight;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -52,24 +51,9 @@ final class TermVectorFilteredLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new TermVectorFilteredFields(in.fields(), filterTerms);
|
||||
}
|
||||
|
||||
private static final class TermVectorFilteredFields extends FilterLeafReader.FilterFields {
|
||||
// NOTE: super ("in") is baseFields
|
||||
|
||||
private final Terms filterTerms;
|
||||
|
||||
TermVectorFilteredFields(Fields baseFields, Terms filterTerms) {
|
||||
super(baseFields);
|
||||
this.filterTerms = filterTerms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return new TermsFilteredTerms(in.terms(field), filterTerms);
|
||||
}
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = in.terms(field);
|
||||
return terms==null ? null : new TermsFilteredTerms(terms, filterTerms);
|
||||
}
|
||||
|
||||
private static final class TermsFilteredTerms extends FilterLeafReader.FilterTerms {
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Locale;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestSplittingBreakIterator extends LuceneTestCase {
|
||||
|
||||
|
||||
|
|
|
@ -51,12 +51,9 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestUnifiedHighlighter extends LuceneTestCase {
|
||||
|
||||
private final FieldType fieldType; // for "body" generally, but not necessarily others. See constructor
|
||||
|
|
|
@ -24,11 +24,13 @@ import java.util.List;
|
|||
import java.util.Objects;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -63,16 +65,15 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
/**
|
||||
* Some tests that highlight wildcard, fuzzy, etc queries.
|
||||
*/
|
||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
||||
|
||||
final FieldType fieldType;
|
||||
|
@ -1079,4 +1080,66 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
assertEquals("[<b>я</b>]", Arrays.toString(snippets));
|
||||
ir.close();
|
||||
}
|
||||
|
||||
// LUCENE-7719
|
||||
public void testMultiByteMTQ() throws IOException {
|
||||
Analyzer analyzer = new KeywordAnalyzer();
|
||||
try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer)) {
|
||||
for (int attempt = 0; attempt < 20; attempt++) {
|
||||
iw.deleteAll();
|
||||
String field = "title";
|
||||
String value = RandomStrings.randomUnicodeOfLength(random(), 3);
|
||||
if (value.contains(UnifiedHighlighter.MULTIVAL_SEP_CHAR+"")) { // will throw things off
|
||||
continue;
|
||||
}
|
||||
int[] valuePoints = value.codePoints().toArray();
|
||||
|
||||
iw.addDocument(Collections.singleton(
|
||||
new Field(field, value, fieldType)));
|
||||
iw.commit();
|
||||
try (IndexReader ir = iw.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
|
||||
highlighter.setBreakIterator(WholeBreakIterator::new);
|
||||
|
||||
// Test PrefixQuery
|
||||
Query query = new PrefixQuery(new Term(field,
|
||||
UnicodeUtil.newString(valuePoints, 0, 1)));
|
||||
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||
|
||||
// Test TermRangeQuery
|
||||
query = new TermRangeQuery(field,
|
||||
new BytesRef(value),
|
||||
new BytesRef(value),
|
||||
true, true );
|
||||
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||
|
||||
// Test FuzzyQuery
|
||||
query = new FuzzyQuery(new Term(field, value + "Z"), 1);
|
||||
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||
|
||||
if (valuePoints.length != 3) {
|
||||
continue; // even though we ask RandomStrings for a String with 3 code points, it seems sometimes it's less
|
||||
}
|
||||
|
||||
// Test WildcardQuery
|
||||
query = new WildcardQuery(new Term(field,
|
||||
new StringBuilder()
|
||||
.append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[0])
|
||||
.append(WildcardQuery.WILDCARD_CHAR)
|
||||
.append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[2]).toString()));
|
||||
highlightAndAssertMatch(searcher, highlighter, query, field, value);
|
||||
|
||||
//TODO hmmm; how to randomly generate RegexpQuery? Low priority; we've covered the others well.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void highlightAndAssertMatch(IndexSearcher searcher, UnifiedHighlighter highlighter, Query query, String field, String fieldVal) throws IOException {
|
||||
TopDocs topDocs = searcher.search(query, 1);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighter.highlight(field, query, topDocs);
|
||||
assertEquals("[<b>"+fieldVal+"</b>]", Arrays.toString(snippets));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,15 +37,12 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
|
||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
||||
|
||||
Analyzer indexAnalyzer;
|
||||
|
|
|
@ -32,8 +32,6 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
|
||||
|
||||
private MockAnalyzer indexAnalyzer =
|
||||
|
|
|
@ -54,8 +54,6 @@ import org.apache.lucene.util.QueryBuilder;
|
|||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
||||
|
||||
final FieldType fieldType;
|
||||
|
|
|
@ -48,8 +48,6 @@ import java.util.Map;
|
|||
* This test DOES NOT represent all testing for highlighting when term vectors are used. Other tests pick the offset
|
||||
* source at random (to include term vectors) and in-effect test term vectors generally.
|
||||
*/
|
||||
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
|
||||
public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
|
||||
|
||||
private Analyzer indexAnalyzer;
|
||||
|
|
|
@ -217,18 +217,20 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
// TEST OffsetsEnums & Passage visibility
|
||||
|
||||
// this code never runs; just for compilation
|
||||
OffsetsEnum oe = new OffsetsEnum(null, EMPTY);
|
||||
oe.getTerm();
|
||||
oe.getPostingsEnum();
|
||||
oe.freq();
|
||||
oe.hasMorePositions();
|
||||
oe.nextPosition();
|
||||
oe.startOffset();
|
||||
oe.endOffset();
|
||||
oe.getWeight();
|
||||
oe.setWeight(2f);
|
||||
Passage p;
|
||||
try (OffsetsEnum oe = new OffsetsEnum(null, EMPTY)) {
|
||||
oe.getTerm();
|
||||
oe.getPostingsEnum();
|
||||
oe.freq();
|
||||
oe.hasMorePositions();
|
||||
oe.nextPosition();
|
||||
oe.startOffset();
|
||||
oe.endOffset();
|
||||
oe.getWeight();
|
||||
oe.setWeight(2f);
|
||||
}
|
||||
|
||||
Passage p = new Passage();
|
||||
p = new Passage();
|
||||
p.setStartOffset(0);
|
||||
p.setEndOffset(9);
|
||||
p.setScore(1f);
|
||||
|
|
|
@ -1325,7 +1325,10 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
String uniqueRandomValue;
|
||||
do {
|
||||
// the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
|
||||
final int nextInt = random.nextInt(Integer.MAX_VALUE);
|
||||
//
|
||||
// Additionally in order to avoid precision loss when joining via a float field we can't generate values higher than
|
||||
// 0xFFFFFF, so we can't use Integer#MAX_VALUE as upper bound here:
|
||||
final int nextInt = random.nextInt(0xFFFFFF);
|
||||
uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
|
||||
assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue,16);
|
||||
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
|
||||
|
|
|
@ -868,20 +868,27 @@ public class MemoryIndex {
|
|||
|
||||
final int numDimensions = fieldInfo.getPointDimensionCount();
|
||||
final int numBytesPerDimension = fieldInfo.getPointNumBytes();
|
||||
minPackedValue = pointValues[0].bytes.clone();
|
||||
maxPackedValue = pointValues[0].bytes.clone();
|
||||
|
||||
for (int i = 0; i < pointValuesCount; i++) {
|
||||
BytesRef pointValue = pointValues[i];
|
||||
assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
|
||||
|
||||
for (int dim = 0; dim < numDimensions; ++dim) {
|
||||
int offset = dim * numBytesPerDimension;
|
||||
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
|
||||
System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
|
||||
}
|
||||
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
|
||||
System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
|
||||
if (numDimensions == 1) {
|
||||
// PointInSetQuery.MergePointVisitor expects values to be visited in increasing order,
|
||||
// this is a 1d optimization which has to be done here too. Otherwise we emit values
|
||||
// out of order which causes mismatches.
|
||||
Arrays.sort(pointValues, 0, pointValuesCount);
|
||||
minPackedValue = pointValues[0].bytes.clone();
|
||||
maxPackedValue = pointValues[pointValuesCount - 1].bytes.clone();
|
||||
} else {
|
||||
minPackedValue = pointValues[0].bytes.clone();
|
||||
maxPackedValue = pointValues[0].bytes.clone();
|
||||
for (int i = 0; i < pointValuesCount; i++) {
|
||||
BytesRef pointValue = pointValues[i];
|
||||
assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
|
||||
for (int dim = 0; dim < numDimensions; ++dim) {
|
||||
int offset = dim * numBytesPerDimension;
|
||||
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
|
||||
System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
|
||||
}
|
||||
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
|
||||
System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1127,7 +1134,7 @@ public class MemoryIndex {
|
|||
*/
|
||||
private final class MemoryIndexReader extends LeafReader {
|
||||
|
||||
private Fields memoryFields = new MemoryFields(fields);
|
||||
private final MemoryFields memoryFields = new MemoryFields(fields);
|
||||
|
||||
private MemoryIndexReader() {
|
||||
super(); // avoid as much superclass baggage as possible
|
||||
|
@ -1229,8 +1236,8 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() {
|
||||
return memoryFields;
|
||||
public Terms terms(String field) throws IOException {
|
||||
return memoryFields.terms(field);
|
||||
}
|
||||
|
||||
private class MemoryFields extends Fields {
|
||||
|
@ -1582,7 +1589,7 @@ public class MemoryIndex {
|
|||
@Override
|
||||
public Fields getTermVectors(int docID) {
|
||||
if (docID == 0) {
|
||||
return fields();
|
||||
return memoryFields;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
mi.addField("field", "some terms be here", analyzer);
|
||||
IndexSearcher searcher = mi.createSearcher();
|
||||
LeafReader reader = (LeafReader) searcher.getIndexReader();
|
||||
TermsEnum terms = reader.fields().terms("field").iterator();
|
||||
TermsEnum terms = reader.terms("field").iterator();
|
||||
terms.seekExact(0);
|
||||
assertEquals("be", terms.term().utf8ToString());
|
||||
TestUtil.checkReader(reader);
|
||||
|
@ -512,6 +512,30 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new double[] {10D, 10D}, new double[] {30D, 30D})));
|
||||
}
|
||||
|
||||
public void testMultiValuedPointsSortedCorrectly() throws Exception {
|
||||
Document doc = new Document();
|
||||
doc.add(new IntPoint("ints", 3));
|
||||
doc.add(new IntPoint("ints", 2));
|
||||
doc.add(new IntPoint("ints", 1));
|
||||
doc.add(new LongPoint("longs", 3L));
|
||||
doc.add(new LongPoint("longs", 2L));
|
||||
doc.add(new LongPoint("longs", 1L));
|
||||
doc.add(new FloatPoint("floats", 3F));
|
||||
doc.add(new FloatPoint("floats", 2F));
|
||||
doc.add(new FloatPoint("floats", 1F));
|
||||
doc.add(new DoublePoint("doubles", 3D));
|
||||
doc.add(new DoublePoint("doubles", 2D));
|
||||
doc.add(new DoublePoint("doubles", 1D));
|
||||
|
||||
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
|
||||
IndexSearcher s = mi.createSearcher();
|
||||
|
||||
assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
|
||||
assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
|
||||
assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
|
||||
assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
|
||||
}
|
||||
|
||||
public void testIndexingPointsAndDocValues() throws Exception {
|
||||
FieldType type = new FieldType();
|
||||
type.setDimensions(1, 4);
|
||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -67,8 +66,8 @@ import org.apache.lucene.search.spans.SpanOrQuery;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
|
@ -171,7 +170,7 @@ public class TestMemoryIndexAgainstRAMDir extends BaseTokenStreamTestCase {
|
|||
|
||||
private void duellReaders(CompositeReader other, LeafReader memIndexReader)
|
||||
throws IOException {
|
||||
Fields memFields = memIndexReader.fields();
|
||||
Fields memFields = memIndexReader.getTermVectors(0);
|
||||
for (String field : MultiFields.getFields(other)) {
|
||||
Terms memTerms = memFields.terms(field);
|
||||
Terms iwTerms = memIndexReader.terms(field);
|
||||
|
|
|
@ -39,15 +39,16 @@ public class IndexMergeTool {
|
|||
System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
|
||||
System.exit(1);
|
||||
}
|
||||
FSDirectory mergedIndex = FSDirectory.open(Paths.get(args[0]));
|
||||
|
||||
IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(null)
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
// Try to use hardlinks to source segments, if possible.
|
||||
Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[0])));
|
||||
|
||||
IndexWriter writer = new IndexWriter(mergedIndex,
|
||||
new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE));
|
||||
|
||||
Directory[] indexes = new Directory[args.length - 1];
|
||||
for (int i = 1; i < args.length; i++) {
|
||||
// try to use hardlinks if possible
|
||||
indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i])));
|
||||
indexes[i - 1] = FSDirectory.open(Paths.get(args[i]));
|
||||
}
|
||||
|
||||
System.out.println("Merging...");
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -214,11 +213,10 @@ public class CommonTermsQuery extends Query {
|
|||
Term[] queryTerms) throws IOException {
|
||||
TermsEnum termsEnum = null;
|
||||
for (LeafReaderContext context : leaves) {
|
||||
final Fields fields = context.reader().fields();
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
Term term = queryTerms[i];
|
||||
TermContext termContext = contextArray[i];
|
||||
final Terms terms = fields.terms(term.field());
|
||||
final Terms terms = context.reader().terms(term.field());
|
||||
if (terms == null) {
|
||||
// field does not exist
|
||||
continue;
|
||||
|
|
|
@ -19,9 +19,8 @@ package org.apache.lucene.queries.function.valuesource;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
|
@ -50,8 +49,7 @@ public class TFValueSource extends TermFreqValueSource {
|
|||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
Fields fields = readerContext.reader().fields();
|
||||
final Terms terms = fields.terms(indexedField);
|
||||
final Terms terms = readerContext.reader().terms(indexedField);
|
||||
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
|
||||
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), indexedField);
|
||||
if (similarity == null) {
|
||||
|
|
|
@ -19,9 +19,8 @@ package org.apache.lucene.queries.function.valuesource;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
|
@ -48,8 +47,7 @@ public class TermFreqValueSource extends DocFreqValueSource {
|
|||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
Fields fields = readerContext.reader().fields();
|
||||
final Terms terms = fields.terms(indexedField);
|
||||
final Terms terms = readerContext.reader().terms(indexedField);
|
||||
|
||||
return new IntDocValues(this) {
|
||||
PostingsEnum docs ;
|
||||
|
|
|
@ -351,7 +351,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
|
|||
assertEquals("Synonym(b:dog b:dogs) Synonym(t:dog t:dogs)", q.toString());
|
||||
q = parser.parse("guinea pig");
|
||||
assertFalse(parser.getSplitOnWhitespace());
|
||||
assertEquals("((+b:guinea +b:pig) (+t:guinea +t:pig)) (b:cavy t:cavy)", q.toString());
|
||||
assertEquals("((+b:guinea +b:pig) b:cavy) ((+t:guinea +t:pig) t:cavy)", q.toString());
|
||||
parser.setSplitOnWhitespace(true);
|
||||
q = parser.parse("guinea pig");
|
||||
assertEquals("(b:guinea t:guinea) (b:pig t:pig)", q.toString());
|
||||
|
|
|
@ -522,8 +522,10 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
.build();
|
||||
|
||||
BooleanQuery graphQuery = new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.build(), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(graphQuery, dumb.parse("guinea pig"));
|
||||
|
||||
|
@ -541,11 +543,32 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
QueryParser smart = new SmartQueryParser();
|
||||
smart.setSplitOnWhitespace(false);
|
||||
graphQuery = new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.build(), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(graphQuery, smart.parse("guinea pig"));
|
||||
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
|
||||
|
||||
// with the AND operator
|
||||
dumb.setDefaultOperator(Operator.AND);
|
||||
BooleanQuery graphAndQuery = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.build(), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
assertEquals(graphAndQuery, dumb.parse("guinea pig"));
|
||||
|
||||
graphAndQuery = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.build(), BooleanClause.Occur.MUST)
|
||||
.add(cavy, BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
assertEquals(graphAndQuery, dumb.parse("guinea pig cavy"));
|
||||
}
|
||||
|
||||
public void testEnableGraphQueries() throws Exception {
|
||||
|
@ -616,30 +639,30 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
|
||||
|
||||
// Operators should not interrupt multiword analysis if not don't associate
|
||||
assertQueryEquals("(guinea pig)", a, "(+guinea +pig) cavy");
|
||||
assertQueryEquals("+(guinea pig)", a, "+((+guinea +pig) cavy)");
|
||||
assertQueryEquals("-(guinea pig)", a, "-((+guinea +pig) cavy)");
|
||||
assertQueryEquals("!(guinea pig)", a, "-((+guinea +pig) cavy)");
|
||||
assertQueryEquals("NOT (guinea pig)", a, "-((+guinea +pig) cavy)");
|
||||
assertQueryEquals("(guinea pig)^2", a, "((+guinea +pig) cavy)^2.0");
|
||||
assertQueryEquals("(guinea pig)", a, "((+guinea +pig) cavy)");
|
||||
assertQueryEquals("+(guinea pig)", a, "+(((+guinea +pig) cavy))");
|
||||
assertQueryEquals("-(guinea pig)", a, "-(((+guinea +pig) cavy))");
|
||||
assertQueryEquals("!(guinea pig)", a, "-(((+guinea +pig) cavy))");
|
||||
assertQueryEquals("NOT (guinea pig)", a, "-(((+guinea +pig) cavy))");
|
||||
assertQueryEquals("(guinea pig)^2", a, "(((+guinea +pig) cavy))^2.0");
|
||||
|
||||
assertQueryEquals("field:(guinea pig)", a, "(+guinea +pig) cavy");
|
||||
assertQueryEquals("field:(guinea pig)", a, "((+guinea +pig) cavy)");
|
||||
|
||||
assertQueryEquals("+small guinea pig", a, "+small (+guinea +pig) cavy");
|
||||
assertQueryEquals("-small guinea pig", a, "-small (+guinea +pig) cavy");
|
||||
assertQueryEquals("!small guinea pig", a, "-small (+guinea +pig) cavy");
|
||||
assertQueryEquals("NOT small guinea pig", a, "-small (+guinea +pig) cavy");
|
||||
assertQueryEquals("small* guinea pig", a, "small* (+guinea +pig) cavy");
|
||||
assertQueryEquals("small? guinea pig", a, "small? (+guinea +pig) cavy");
|
||||
assertQueryEquals("\"small\" guinea pig", a, "small (+guinea +pig) cavy");
|
||||
assertQueryEquals("+small guinea pig", a, "+small ((+guinea +pig) cavy)");
|
||||
assertQueryEquals("-small guinea pig", a, "-small ((+guinea +pig) cavy)");
|
||||
assertQueryEquals("!small guinea pig", a, "-small ((+guinea +pig) cavy)");
|
||||
assertQueryEquals("NOT small guinea pig", a, "-small ((+guinea +pig) cavy)");
|
||||
assertQueryEquals("small* guinea pig", a, "small* ((+guinea +pig) cavy)");
|
||||
assertQueryEquals("small? guinea pig", a, "small? ((+guinea +pig) cavy)");
|
||||
assertQueryEquals("\"small\" guinea pig", a, "small ((+guinea +pig) cavy)");
|
||||
|
||||
assertQueryEquals("guinea pig +running", a, "(+guinea +pig) cavy +running");
|
||||
assertQueryEquals("guinea pig -running", a, "(+guinea +pig) cavy -running");
|
||||
assertQueryEquals("guinea pig !running", a, "(+guinea +pig) cavy -running");
|
||||
assertQueryEquals("guinea pig NOT running", a, "(+guinea +pig) cavy -running");
|
||||
assertQueryEquals("guinea pig running*", a, "(+guinea +pig) cavy running*");
|
||||
assertQueryEquals("guinea pig running?", a, "(+guinea +pig) cavy running?");
|
||||
assertQueryEquals("guinea pig \"running\"", a, "(+guinea +pig) cavy running");
|
||||
assertQueryEquals("guinea pig +running", a, "((+guinea +pig) cavy) +running");
|
||||
assertQueryEquals("guinea pig -running", a, "((+guinea +pig) cavy) -running");
|
||||
assertQueryEquals("guinea pig !running", a, "((+guinea +pig) cavy) -running");
|
||||
assertQueryEquals("guinea pig NOT running", a, "((+guinea +pig) cavy) -running");
|
||||
assertQueryEquals("guinea pig running*", a, "((+guinea +pig) cavy) running*");
|
||||
assertQueryEquals("guinea pig running?", a, "((+guinea +pig) cavy) running?");
|
||||
assertQueryEquals("guinea pig \"running\"", a, "((+guinea +pig) cavy) running");
|
||||
|
||||
assertQueryEquals("\"guinea pig\"~2", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
|
||||
|
||||
|
@ -744,14 +767,16 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
BooleanQuery guineaPig = synonym.build();
|
||||
|
||||
BooleanQuery graphQuery = new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.build();;
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(guineaPig, BooleanClause.Occur.SHOULD)
|
||||
.add(cavy, BooleanClause.Occur.SHOULD)
|
||||
.build(), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(graphQuery, parser.parse("guinea pig"));
|
||||
|
||||
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
||||
splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
|
||||
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "(+guinea +pig) cavy");
|
||||
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "((+guinea +pig) cavy)");
|
||||
splitOnWhitespace = oldSplitOnWhitespace;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,12 +37,12 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.PerThreadPKLookup;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
|
@ -75,7 +75,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
|
|||
doc.add(makeIDField("id1", 110));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator();
|
||||
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().terms("id").iterator();
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
|
||||
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 100));
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));
|
||||
|
|
|
@ -71,10 +71,11 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new AssertingFields(super.fields());
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = super.terms(field);
|
||||
return terms == null ? null : new AssertingTerms(terms);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Fields getTermVectors(int docID) throws IOException {
|
||||
Fields fields = super.getTermVectors(docID);
|
||||
|
|
|
@ -335,7 +335,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
|||
|
||||
// PostingsFormat
|
||||
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
|
||||
consumer.write(oneDocReader.fields());
|
||||
consumer.write(MultiFields.getFields(oneDocReader));
|
||||
IOUtils.close(consumer);
|
||||
IOUtils.close(consumer);
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue