Merge branch 'master' into feature/autoscaling

# Conflicts:
#	solr/CHANGES.txt
This commit is contained in:
Shalin Shekhar Mangar 2017-06-21 12:35:48 +05:30
commit fb7803d9a0
657 changed files with 10458 additions and 5344 deletions

View File

@ -66,6 +66,13 @@
</foaf:Person>
</maintainer>
<release>
<Version>
<name>lucene-6.6.0</name>
<created>2017-06-06</created>
<revision>6.6.0</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-6.5.1</name>

View File

@ -66,6 +66,13 @@
</foaf:Person>
</maintainer>
<release>
<Version>
<name>solr-6.6.0</name>
<created>2017-06-06</created>
<revision>6.6.0</revision>
</Version>
</release>
<release>
<Version>
<name>solr-6.5.1</name>

View File

@ -6,6 +6,9 @@
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
</content>
<content url="file://$MODULE_DIR$/../resources">
<sourceFolder url="file://$MODULE_DIR$/../resources" type="java-resource" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Solrj library" level="project" />

View File

@ -14,6 +14,16 @@ New Features
well as the oldest Lucene version that contributed to the segment.
(Adrien Grand)
* LUCENE-7854: The new TermFrequencyAttribute used during analysis
with a custom token stream allows indexing custom term frequencies
(Mike McCandless)
* LUCENE-7866: Add a new DelimitedTermFrequencyTokenFilter that allows to
mark tokens with a custom term frequency (LUCENE-7854). It parses a numeric
value after a separator char ('|') at the end of each token and changes
the term frequency to this value. (Uwe Schindler, Robert Muir, Mike
McCandless)
API Changes
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
@ -59,6 +69,11 @@ API Changes
* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
* LUCENE-7500: Removed abstract LeafReader.fields(); instead terms(fieldName)
has been made abstract, fomerly was final. Also, MultiFields.getTerms
was optimized to work directly instead of being implemented on getFields.
(David Smiley)
Bug Fixes
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
@ -97,6 +112,8 @@ Optimizations
query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
* LUCENE-7874: DisjunctionMaxQuery rewrites to a BooleanQuery when tiebreaker is set to 1. (Jim Ferenczi)
Other
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
@ -113,18 +130,53 @@ Other
* LUCENE-7852: Correct copyright year(s) in lucene/LICENSE.txt file.
(Christine Poerschke, Steve Rowe)
* LUCENE-7719: Generalized the UnifiedHighlighter's support for AutomatonQuery
for character & binary automata. Added AutomatonQuery.isBinary. (David Smiley)
* LUCENE-7873: Due to serious problems with context class loaders in several
frameworks (OSGI, Java 9 Jigsaw), the lookup of Codecs, PostingsFormats,
DocValuesFormats and all analysis factories was changed to only inspect the
current classloader that defined the interface class (lucene-core.jar).
See MIGRATE.txt for more information! (Uwe Schindler, Dawid Weiss)
======================= Lucene 6.7.0 =======================
New Features
* LUCENE-7855: Added advanced options of the Wikipedia tokenizer to its factory.
(Juan Pedro via Adrien Grand)
Bug Fixes
* LUCENE-7864: IndexMergeTool is not using intermediate hard links (even
if possible). (Dawid Weiss)
* LUCENE-7869: Changed MemoryIndex to sort 1d points. In case of 1d points, the PointInSetQuery.MergePointVisitor expects
that these points are visited in ascending order. The memory index doesn't do this and this can result in document
with multiple points that should match to not match. (Martijn van Groningen)
* LUCENE-7878: Fix query builder to keep the SHOULD clause that wraps multi-word synonyms. (Jim Ferenczi)
Other
* LUCENE-7800: Remove code that potentially rethrows checked exceptions
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
Uwe Schindler, Dawid Weiss)
* LUCENE-7876: Avoid calls to LeafReader.fields() and MultiFields.getFields()
that are trivially replaced by LeafReader.terms() and MultiFields.getTerms()
(David Smiley)
Improvements
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
Optimizations
* LUCENE-7828: Speed up range queries on range fields by improving how we
compute the relation between the query and inner nodes of the BKD tree.
(Adrien Grand)
======================= Lucene 6.6.0 =======================
New Features

View File

@ -1,5 +1,46 @@
# Apache Lucene Migration Guide
## Changed SPI lookups for codecs and analysis changed (LUCENE-7873) ##
Due to serious problems with context class loaders in several frameworks
(OSGI, Java 9 Jigsaw), the lookup of Codecs, PostingsFormats, DocValuesFormats
and all analysis factories was changed to only inspect the current classloader
that defined the interface class (`lucene-core.jar`). Normal applications
should not encounter any issues with that change, because the application
classloader (unnamed module in Java 9) can load all SPIs from all JARs
from classpath.
For any code that relies on the old behaviour (e.g., certain web applications
or components in application servers) one can manually instruct the Lucene
SPI implementation to also inspect the context classloader. To do this,
add this code to the early startup phase of your application before any
Apache Lucene component is used:
ClassLoader cl = Thread.currentThread().getContextClassLoader();
// Codecs:
PostingsFormat.reloadPostingsFormats(cl);
DocValuesFormat.reloadDocValuesFormats(cl);
Codec.reloadCodecs(cl);
// Analysis:
CharFilterFactory.reloadCharFilters(cl);
TokenFilterFactory.reloadTokenFilters(cl);
TokenizerFactory.reloadTokenizers(cl);
This code will reload all service providers from the given class loader
(in our case the context class loader). Of course, instead of specifying
the context class loader, it is receommended to use the application's main
class loader or the module class loader.
If you are migrating your project to Java 9 Jigsaw module system, keep in mind
that Lucene currently does not yet support `module-info.java` declarations of
service provider impls (`provides` statement). It is therefore recommended
to keep all of Lucene in one Uber-Module and not try to split Lucene into
several modules. As soon as Lucene will migrate to Java 9 as minimum requirement,
we will work on improving that.
For OSGI, the same applies. You have to create a bundle with all of Lucene for
SPI to work correctly.
## Query.hashCode and Query.equals are now abstract methods (LUCENE-7277)
Any custom query subclasses should redeclare equivalence relationship according

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Characters before the delimiter are the "token", the textual integer after is the term frequency.
* To use this {@code TokenFilter} the field must be indexed with
* {@link IndexOptions#DOCS_AND_FREQS} but no positions or offsets.
* <p>
* For example, if the delimiter is '|', then for the string "foo|5", "foo" is the token
* and "5" is a term frequency. If there is no delimiter, the TokenFilter does not modify
* the term frequency.
* <p>
* Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
*/
public final class DelimitedTermFrequencyTokenFilter extends TokenFilter {
public static final char DEFAULT_DELIMITER = '|';
private final char delimiter;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final TermFrequencyAttribute tfAtt = addAttribute(TermFrequencyAttribute.class);
public DelimitedTermFrequencyTokenFilter(TokenStream input) {
this(input, DEFAULT_DELIMITER);
}
public DelimitedTermFrequencyTokenFilter(TokenStream input, char delimiter) {
super(input);
this.delimiter = delimiter;
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
for (int i = 0; i < length; i++) {
if (buffer[i] == delimiter) {
termAtt.setLength(i); // simply set a new length
i++;
tfAtt.setTermFrequency(ArrayUtil.parseInt(buffer, i, length - i));
return true;
}
}
return true;
}
return false;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link DelimitedTermFrequencyTokenFilter}. The field must have {@code omitPositions=true}.
* <pre class="prettyprint">
* &lt;fieldType name="text_tfdl" class="solr.TextField" omitPositions="true"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
* &lt;filter class="solr.DelimitedTermFrequencyTokenFilterFactory" delimiter="|"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class DelimitedTermFrequencyTokenFilterFactory extends TokenFilterFactory {
public static final String DELIMITER_ATTR = "delimiter";
private final char delimiter;
/** Creates a new DelimitedPayloadTokenFilterFactory */
public DelimitedTermFrequencyTokenFilterFactory(Map<String, String> args) {
super(args);
delimiter = getChar(args, DELIMITER_ATTR, DelimitedTermFrequencyTokenFilter.DEFAULT_DELIMITER);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
@Override
public DelimitedTermFrequencyTokenFilter create(TokenStream input) {
return new DelimitedTermFrequencyTokenFilter(input, delimiter);
}
}

View File

@ -48,7 +48,7 @@ public final class AnalysisSPILoader<S extends AbstractAnalysisFactory> {
}
public AnalysisSPILoader(Class<S> clazz, String[] suffixes) {
this(clazz, suffixes, Thread.currentThread().getContextClassLoader());
this(clazz, suffixes, null);
}
public AnalysisSPILoader(Class<S> clazz, String[] suffixes, ClassLoader classloader) {

View File

@ -16,9 +16,9 @@
*/
package org.apache.lucene.analysis.wikipedia;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
@ -33,19 +33,28 @@ import org.apache.lucene.util.AttributeFactory;
* &lt;/fieldType&gt;</pre>
*/
public class WikipediaTokenizerFactory extends TokenizerFactory {
public static final String TOKEN_OUTPUT = "tokenOutput";
public static final String UNTOKENIZED_TYPES = "untokenizedTypes";
protected final int tokenOutput;
protected Set<String> untokenizedTypes;
/** Creates a new WikipediaTokenizerFactory */
public WikipediaTokenizerFactory(Map<String,String> args) {
super(args);
tokenOutput = getInt(args, TOKEN_OUTPUT, WikipediaTokenizer.TOKENS_ONLY);
untokenizedTypes = getSet(args, UNTOKENIZED_TYPES);
if (untokenizedTypes == null) {
untokenizedTypes = Collections.emptySet();
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
// TODO: add support for WikipediaTokenizer's advanced options.
@Override
public WikipediaTokenizer create(AttributeFactory factory) {
return new WikipediaTokenizer(factory, WikipediaTokenizer.TOKENS_ONLY,
Collections.<String>emptySet());
return new WikipediaTokenizer(factory, tokenOutput, untokenizedTypes);
}
}

View File

@ -63,6 +63,7 @@ org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory
org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory
org.apache.lucene.analysis.miscellaneous.CodepointCountFilterFactory
org.apache.lucene.analysis.miscellaneous.DateRecognizerFilterFactory
org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory
org.apache.lucene.analysis.miscellaneous.FingerprintFilterFactory
org.apache.lucene.analysis.miscellaneous.FixBrokenOffsetsFilterFactory
org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory

View File

@ -21,13 +21,17 @@ import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
@ -49,6 +53,12 @@ import org.apache.lucene.util.Version;
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
public class TestFactories extends BaseTokenStreamTestCase {
/** Factories that are excluded from testing it with random data */
private static final Set<Class<? extends AbstractAnalysisFactory>> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList(
DelimitedTermFrequencyTokenFilterFactory.class
));
public void test() throws IOException {
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
doTestTokenizer(tokenizer);
@ -77,11 +87,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
assertFalse(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(factory, null, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(factory, null, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
}
@ -99,11 +111,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
assertTrue(mtc instanceof TokenFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
}
@ -121,11 +135,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
assertTrue(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
}

View File

@ -73,6 +73,7 @@ import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
@ -159,6 +160,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
WordDelimiterFilter.class,
// Cannot correct offsets when a char filter had changed them:
WordDelimiterGraphFilter.class,
// requires a special encoded token value, so it may fail with random data:
DelimitedTermFrequencyTokenFilter.class,
// clones of core's filters:
org.apache.lucene.analysis.core.StopFilter.class,
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
public class DelimitedTermFrequencyTokenFilterTest extends BaseTokenStreamTestCase {
public void testTermFrequency() throws Exception {
String test = "The quick|40 red|4 fox|06 jumped|1 over the lazy|2 brown|123 dogs|1024";
DelimitedTermFrequencyTokenFilter filter =
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
filter.reset();
assertTermEquals("The", filter, termAtt, tfAtt, 1);
assertTermEquals("quick", filter, termAtt, tfAtt, 40);
assertTermEquals("red", filter, termAtt, tfAtt, 4);
assertTermEquals("fox", filter, termAtt, tfAtt, 6);
assertTermEquals("jumped", filter, termAtt, tfAtt, 1);
assertTermEquals("over", filter, termAtt, tfAtt, 1);
assertTermEquals("the", filter, termAtt, tfAtt, 1);
assertTermEquals("lazy", filter, termAtt, tfAtt, 2);
assertTermEquals("brown", filter, termAtt, tfAtt, 123);
assertTermEquals("dogs", filter, termAtt, tfAtt, 1024);
assertFalse(filter.incrementToken());
filter.end();
filter.close();
}
public void testInvalidNegativeTf() throws Exception {
String test = "foo bar|-20";
DelimitedTermFrequencyTokenFilter filter =
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
filter.reset();
assertTermEquals("foo", filter, termAtt, tfAtt, 1);
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, filter::incrementToken);
assertEquals("Term frequency must be 1 or greater; got -20", iae.getMessage());
}
public void testInvalidFloatTf() throws Exception {
String test = "foo bar|1.2";
DelimitedTermFrequencyTokenFilter filter =
new DelimitedTermFrequencyTokenFilter(whitespaceMockTokenizer(test));
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
TermFrequencyAttribute tfAtt = filter.getAttribute(TermFrequencyAttribute.class);
filter.reset();
assertTermEquals("foo", filter, termAtt, tfAtt, 1);
expectThrows(NumberFormatException.class, filter::incrementToken);
}
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, TermFrequencyAttribute tfAtt, int expectedTf) throws Exception {
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
assertEquals(expectedTf, tfAtt.getTermFrequency());
}
}

View File

@ -17,34 +17,90 @@
package org.apache.lucene.analysis.wikipedia;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
/**
* Simple tests to ensure the wikipedia tokenizer is working.
*/
public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCase {
private final String WIKIPEDIA = "Wikipedia";
private final String TOKEN_OUTPUT = "tokenOutput";
private final String UNTOKENIZED_TYPES = "untokenizedTypes";
public void testTokenizer() throws Exception {
Reader reader = new StringReader("This is a [[Category:foo]]");
Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(newAttributeFactory());
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer,
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
new int[] { 1, 1, 1, 1, });
String text = "This is a [[Category:foo]]";
Tokenizer tf = tokenizerFactory(WIKIPEDIA).create(newAttributeFactory());
tf.setReader(new StringReader(text));
assertTokenStreamContents(tf,
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
new int[] { 1, 1, 1, 1, },
text.length());
}
public void testTokenizerTokensOnly() throws Exception {
String text = "This is a [[Category:foo]]";
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer( WikipediaTokenizer.TOKENS_ONLY).toString()).create(newAttributeFactory());
tf.setReader(new StringReader(text));
assertTokenStreamContents(tf,
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
new int[] { 1, 1, 1, 1, },
text.length());
}
public void testTokenizerUntokenizedOnly() throws Exception {
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
Set<String> untoks = new HashSet<>();
untoks.add(WikipediaTokenizer.CATEGORY);
untoks.add(WikipediaTokenizer.ITALICS);
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.UNTOKENIZED_ONLY).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
tf.setReader(new StringReader(test));
assertTokenStreamContents(tf,
new String[] { "a b c d", "e f g", "link", "here", "link",
"there", "italics here", "something", "more italics", "h i j" },
new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
);
}
public void testTokenizerBoth() throws Exception {
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.BOTH).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
tf.setReader(new StringReader(test));
assertTokenStreamContents(tf,
new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
"link", "here", "link", "there", "italics here", "italics", "here",
"something", "more italics", "more", "italics", "h i j", "h", "i", "j" },
new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 },
new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
);
}
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
tokenizerFactory("Wikipedia", "bogusArg", "bogusValue");
tokenizerFactory(WIKIPEDIA, "bogusArg", "bogusValue").create(newAttributeFactory());
});
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
public void testIllegalArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
});
assertTrue(expected.getMessage().contains("tokenOutput must be TOKENS_ONLY, UNTOKENIZED_ONLY or BOTH"));
}
}

View File

@ -21,13 +21,17 @@ import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilterFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
@ -46,6 +50,12 @@ import org.apache.lucene.util.Version;
// TODO: fix this to use CustomAnalyzer instead of its own FactoryAnalyzer
public class TestFactories extends BaseTokenStreamTestCase {
/** Factories that are excluded from testing it with random data */
private static final Set<Class<? extends AbstractAnalysisFactory>> EXCLUDE_FACTORIES_RANDOM_DATA = new HashSet<>(Arrays.asList(
DelimitedTermFrequencyTokenFilterFactory.class
));
public void test() throws IOException {
for (String tokenizer : TokenizerFactory.availableTokenizers()) {
doTestTokenizer(tokenizer);
@ -74,11 +84,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
assertFalse(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(factory, null, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(factory, null, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
}
@ -96,11 +108,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
assertTrue(mtc instanceof TokenFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
}
@ -118,11 +132,13 @@ public class TestFactories extends BaseTokenStreamTestCase {
assertTrue(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
}

View File

@ -297,7 +297,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"6.5.0-cfs",
"6.5.0-nocfs",
"6.5.1-cfs",
"6.5.1-nocfs"
"6.5.1-nocfs",
"6.6.0-cfs",
"6.6.0-nocfs"
};
final String[] unsupportedNames = {
@ -1190,7 +1192,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
for (String name : oldNames) {
Directory dir = oldIndexDirs.get(name);
IndexReader r = DirectoryReader.open(dir);
TermsEnum terms = MultiFields.getFields(r).terms("content").iterator();
TermsEnum terms = MultiFields.getTerms(r, "content").iterator();
BytesRef t = terms.next();
assertNotNull(t);

View File

@ -26,15 +26,18 @@ import org.apache.lucene.util.AttributeReflector;
* <li>{@link PositionIncrementAttribute}
* <li>{@link PositionLengthAttribute}
* <li>{@link OffsetAttribute}
* <li>{@link TermFrequencyAttribute}
* </ul>*/
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
PositionLengthAttribute, OffsetAttribute {
PositionLengthAttribute, OffsetAttribute,
TermFrequencyAttribute {
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int positionIncrement = 1;
private int positionLength = 1;
private int termFrequency = 1;
/** Constructs the attribute implementation. */
public PackedTokenAttributeImpl() {
@ -132,12 +135,26 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
this.type = type;
}
@Override
public final void setTermFrequency(int termFrequency) {
if (termFrequency < 1) {
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
}
this.termFrequency = termFrequency;
}
@Override
public final int getTermFrequency() {
return termFrequency;
}
/** Resets the attributes
*/
@Override
public void clear() {
super.clear();
positionIncrement = positionLength = 1;
termFrequency = 1;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@ -147,10 +164,8 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
@Override
public void end() {
super.end();
// super.end already calls this.clear, so we only set values that are different from clear:
positionIncrement = 0;
positionLength = 1;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@Override
@ -170,6 +185,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
positionIncrement == other.positionIncrement &&
positionLength == other.positionLength &&
(type == null ? other.type == null : type.equals(other.type)) &&
termFrequency == other.termFrequency &&
super.equals(obj)
);
} else
@ -185,6 +201,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
code = code * 31 + positionLength;
if (type != null)
code = code * 31 + type.hashCode();
code = code * 31 + termFrequency;;
return code;
}
@ -198,12 +215,14 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
to.startOffset = startOffset;
to.endOffset = endOffset;
to.type = type;
to.termFrequency = termFrequency;
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PositionLengthAttribute) target).setPositionLength(positionLength);
((TypeAttribute) target).setType(type);
((TermFrequencyAttribute) target).setTermFrequency(termFrequency);
}
}
@ -215,6 +234,6 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
reflector.reflect(TypeAttribute.class, "type", type);
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.Attribute;
/** Sets the custom term frequency of a term within one document. If this attribute
* is present in your analysis chain for a given field, that field must be indexed with
* {@link IndexOptions#DOCS_AND_FREQS}. */
public interface TermFrequencyAttribute extends Attribute {
/** Set the custom term frequency of the current term within one document. */
public void setTermFrequency(int termFrequency);
/** Returns the custom term frequencey. */
public int getTermFrequency();
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link TermFrequencyAttribute}. */
public class TermFrequencyAttributeImpl extends AttributeImpl implements TermFrequencyAttribute, Cloneable {
private int termFrequency = 1;
/** Initialize this attribute with term frequencey of 1 */
public TermFrequencyAttributeImpl() {}
@Override
public void setTermFrequency(int termFrequency) {
if (termFrequency < 1) {
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
}
this.termFrequency = termFrequency;
}
@Override
public int getTermFrequency() {
return termFrequency;
}
@Override
public void clear() {
this.termFrequency = 1;
}
@Override
public void end() {
this.termFrequency = 1;
}
@Override
public boolean equals(Object other) {
if (other == this) {
return true;
}
if (other instanceof TermFrequencyAttributeImpl) {
TermFrequencyAttributeImpl _other = (TermFrequencyAttributeImpl) other;
return termFrequency == _other.termFrequency;
}
return false;
}
@Override
public int hashCode() {
return Integer.hashCode(termFrequency);
}
@Override
public void copyTo(AttributeImpl target) {
TermFrequencyAttribute t = (TermFrequencyAttribute) target;
t.setTermFrequency(termFrequency);
}
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
}
}

View File

@ -121,12 +121,6 @@ public final class BlockTreeTermsReader extends FieldsProducer {
private final TreeMap<String,FieldReader> fields = new TreeMap<>();
/** File offset where the directory starts in the terms file. */
private long dirOffset;
/** File offset where the directory starts in the index file. */
private long indexDirOffset;
final String segment;
final int version;
@ -167,8 +161,8 @@ public final class BlockTreeTermsReader extends FieldsProducer {
CodecUtil.retrieveChecksum(termsIn);
// Read per-field details
seekDir(termsIn, dirOffset);
seekDir(indexIn, indexDirOffset);
seekDir(termsIn);
seekDir(indexIn);
final int numFields = termsIn.readVInt();
if (numFields < 0) {
@ -181,13 +175,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (numTerms <= 0) {
throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
}
final int numBytes = termsIn.readVInt();
if (numBytes < 0) {
throw new CorruptIndexException("invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
}
final BytesRef rootCode = new BytesRef(new byte[numBytes]);
termsIn.readBytes(rootCode.bytes, 0, numBytes);
rootCode.length = numBytes;
final BytesRef rootCode = readBytesRef(termsIn);
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
if (fieldInfo == null) {
throw new CorruptIndexException("invalid field number: " + field, termsIn);
@ -230,19 +218,24 @@ public final class BlockTreeTermsReader extends FieldsProducer {
}
private static BytesRef readBytesRef(IndexInput in) throws IOException {
int numBytes = in.readVInt();
if (numBytes < 0) {
throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
}
BytesRef bytes = new BytesRef();
bytes.length = in.readVInt();
bytes.bytes = new byte[bytes.length];
in.readBytes(bytes.bytes, 0, bytes.length);
bytes.length = numBytes;
bytes.bytes = new byte[numBytes];
in.readBytes(bytes.bytes, 0, numBytes);
return bytes;
}
/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input, long dirOffset)
throws IOException {
private static void seekDir(IndexInput input) throws IOException {
input.seek(input.length() - CodecUtil.footerLength() - 8);
dirOffset = input.readLong();
input.seek(dirOffset);
long offset = input.readLong();
input.seek(offset);
}
// for debugging

View File

@ -19,22 +19,20 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntPredicate;
import java.util.function.Predicate;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.StringHelper;
@ -60,13 +58,167 @@ abstract class RangeFieldQuery extends Query {
/** Used by {@code RangeFieldQuery} to check how each internal or leaf node relates to the query. */
enum QueryType {
/** Use this for intersects queries. */
INTERSECTS,
INTERSECTS {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, minOffset) < 0
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, maxOffset) > 0) {
// disjoint
return Relation.CELL_OUTSIDE_QUERY;
}
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, minOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, maxOffset) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
return StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, minOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, maxOffset) <= 0;
}
},
/** Use this for within queries. */
WITHIN,
WITHIN {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) < 0
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) > 0) {
// all ranges have at least one point outside of the query
return Relation.CELL_OUTSIDE_QUERY;
}
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) <= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) >= 0;
}
},
/** Use this for contains */
CONTAINS,
CONTAINS {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) > 0
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) < 0) {
// all ranges are either less than the query max or greater than the query min
return Relation.CELL_OUTSIDE_QUERY;
}
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) <= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) >= 0) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) <= 0;
}
},
/** Use this for crosses queries */
CROSSES
CROSSES {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
throw new UnsupportedOperationException();
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
throw new UnsupportedOperationException();
}
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim) {
Relation intersectRelation = QueryType.INTERSECTS.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
if (intersectRelation == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
}
Relation withinRelation = QueryType.WITHIN.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
if (withinRelation == Relation.CELL_INSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
}
if (intersectRelation == Relation.CELL_INSIDE_QUERY && withinRelation == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
return INTERSECTS.matches(queryPackedValue, packedValue, numDims, bytesPerDim)
&& WITHIN.matches(queryPackedValue, packedValue, numDims, bytesPerDim) == false;
}
};
abstract Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim, int dim);
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim) {
boolean inside = true;
for (int dim = 0; dim < numDims; ++dim) {
Relation relation = compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim, dim);
if (relation == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (relation != Relation.CELL_INSIDE_QUERY) {
inside = false;
}
}
return inside ? Relation.CELL_INSIDE_QUERY : Relation.CELL_CROSSES_QUERY;
}
abstract boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim);
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
for (int dim = 0; dim < numDims; ++dim) {
if (matches(queryPackedValue, packedValue, numDims, bytesPerDim, dim) == false) {
return false;
}
}
return true;
}
}
/**
@ -111,54 +263,33 @@ abstract class RangeFieldQuery extends Query {
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
final RangeFieldComparator target = new RangeFieldComparator();
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(
new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
if (target.matches(leaf)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return compareRange(minPackedValue, maxPackedValue);
}
});
return result.build();
}
private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
// compute range relation for BKD traversal
if (target.intersects(node) == false) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (target.within(node)) {
// target within cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
} else if (target.contains(node)) {
// target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ?
Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
}
// target intersects cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
if (queryType.matches(ranges, leaf, numDims, bytesPerDim)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return queryType.compare(ranges, minPackedValue, maxPackedValue, numDims, bytesPerDim);
}
};
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
@ -173,115 +304,59 @@ abstract class RangeFieldQuery extends Query {
checkFieldInfo(fieldInfo);
boolean allDocsMatch = false;
if (values.getDocCount() == reader.maxDoc()
&& compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
&& queryType.compare(ranges, values.getMinPackedValue(), values.getMaxPackedValue(), numDims, bytesPerDim) == Relation.CELL_INSIDE_QUERY) {
allDocsMatch = true;
}
DocIdSetIterator iterator = allDocsMatch == true ?
DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
return new ConstantScoreScorer(this, score(), iterator);
}
final Weight weight = this;
if (allDocsMatch) {
return new ScorerSupplier() {
@Override
public Scorer get(boolean randomAccess) {
return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
}
/** get an encoded byte representation of the internal node; this is
* the lower half of the min array and the upper half of the max array */
private byte[] getInternalRange(byte[] min, byte[] max) {
byte[] range = new byte[min.length];
final int dimSize = numDims * bytesPerDim;
System.arraycopy(min, 0, range, 0, dimSize);
System.arraycopy(max, dimSize, range, dimSize, dimSize);
return range;
}
};
}
@Override
public long cost() {
return reader.maxDoc();
}
};
} else {
return new ScorerSupplier() {
/**
* RangeFieldComparator class provides the core comparison logic for accepting or rejecting indexed
* {@code RangeField} types based on the defined query range and relation.
*/
class RangeFieldComparator {
final Predicate<byte[]> predicate;
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;
/** constructs the comparator based on the query type */
RangeFieldComparator() {
switch (queryType) {
case INTERSECTS:
predicate = this::intersects;
break;
case WITHIN:
predicate = this::contains;
break;
case CONTAINS:
predicate = this::within;
break;
case CROSSES:
// crosses first checks intersection (disjoint automatic fails),
// then ensures the query doesn't wholly contain the leaf:
predicate = (byte[] leaf) -> this.intersects(leaf)
&& this.contains(leaf) == false;
break;
default:
throw new IllegalArgumentException("invalid queryType [" + queryType + "] found.");
}
}
@Override
public Scorer get(boolean randomAccess) throws IOException {
values.intersect(visitor);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), iterator);
}
/** determines if the candidate range matches the query request */
private boolean matches(final byte[] candidate) {
return (Arrays.equals(ranges, candidate) && queryType != QueryType.CROSSES)
|| predicate.test(candidate);
}
/** check if query intersects candidate range */
private boolean intersects(final byte[] candidate) {
return relate((int d) -> compareMinMax(candidate, d) > 0 || compareMaxMin(candidate, d) < 0);
}
/** check if query is within candidate range */
private boolean within(final byte[] candidate) {
return relate((int d) -> compareMinMin(candidate, d) < 0 || compareMaxMax(candidate, d) > 0);
}
/** check if query contains candidate range */
private boolean contains(final byte[] candidate) {
return relate((int d) -> compareMinMin(candidate, d) > 0 || compareMaxMax(candidate, d) < 0);
}
/** internal method used by each relation method to test range relation logic */
private boolean relate(IntPredicate predicate) {
for (int d=0; d<numDims; ++d) {
if (predicate.test(d)) {
return false;
@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimatePointCount(visitor);
assert cost >= 0;
}
return cost;
}
};
}
}
return true;
}
/** compare the encoded min value (for the defined query dimension) with the encoded min value in the byte array */
private int compareMinMin(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
}
/** compare the encoded min value (for the defined query dimension) with the encoded max value in the byte array */
private int compareMinMax(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, numDims * bytesPerDim + dimension);
}
/** compare the encoded max value (for the defined query dimension) with the encoded min value in the byte array */
private int compareMaxMin(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, numDims * bytesPerDim + dimension, b, dimension);
}
/** compare the encoded max value (for the defined query dimension) with the encoded max value in the byte array */
private int compareMaxMax(byte[] b, int dimension) {
// convert dimension to max offset:
dimension = numDims * bytesPerDim + dimension * bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(false);
}
};
}
@Override

View File

@ -501,9 +501,8 @@ class BufferedUpdatesStream implements Accountable {
queue = new SegmentQueue(numReaders);
long segTermCount = 0;
for(int i=0;i<numReaders;i++) {
SegmentState state = segStates[i];
Terms terms = state.reader.fields().terms(field);
for (SegmentState state : segStates) {
Terms terms = state.reader.terms(field);
if (terms != null) {
segTermCount += terms.size();
state.termsEnum = terms.iterator();
@ -617,7 +616,6 @@ class BufferedUpdatesStream implements Accountable {
// DocValues updates
private synchronized void applyDocValuesUpdates(List<DocValuesUpdate> updates,
SegmentState segState, DocValuesFieldUpdates.Container dvUpdatesContainer) throws IOException {
Fields fields = segState.reader.fields();
// TODO: we can process the updates per DV field, from last to first so that
// if multiple terms affect same document for the same field, we add an update
@ -651,7 +649,7 @@ class BufferedUpdatesStream implements Accountable {
// if we change the code to process updates in terms order, enable this assert
// assert currentField == null || currentField.compareTo(term.field()) < 0;
currentField = term.field();
Terms terms = fields.terms(currentField);
Terms terms = segState.reader.terms(currentField);
if (terms != null) {
termsEnum = terms.iterator();
} else {

View File

@ -98,12 +98,15 @@ public abstract class CodecReader extends LeafReader implements Accountable {
throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
}
}
@Override
public final Fields fields() {
return getPostingsReader();
public final Terms terms(String field) throws IOException {
//ensureOpen(); no; getPostingsReader calls this
// We could check the FieldInfo IndexOptions but there's no point since
// PostingsReader will simply return null for fields that don't exist or that have no terms index.
return getPostingsReader().terms(field);
}
// returns the FieldInfo that corresponds to the given field and type, or
// null if the field does not exist, or not indexed as the requested
// DovDocValuesType.

View File

@ -770,10 +770,12 @@ final class DefaultIndexingChain extends DocConsumer {
}
invertState.lastStartOffset = startOffset;
invertState.length++;
if (invertState.length < 0) {
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
try {
invertState.length = Math.addExact(invertState.length, invertState.termFreqAttribute.getTermFrequency());
} catch (ArithmeticException ae) {
throw new IllegalArgumentException("too many tokens for field \"" + field.name() + "\"");
}
//System.out.println(" term=" + invertState.termAttribute);
// If we hit an exception in here, we abort

View File

@ -17,14 +17,13 @@
package org.apache.lucene.index;
import org.apache.lucene.index.FilterLeafReader.FilterFields;
import java.io.IOException;
import org.apache.lucene.index.FilterLeafReader.FilterTerms;
import org.apache.lucene.index.FilterLeafReader.FilterTermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import java.io.IOException;
/**
* The {@link ExitableDirectoryReader} wraps a real index {@link DirectoryReader} and
@ -79,14 +78,12 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
}
@Override
public Fields fields() throws IOException {
Fields fields = super.fields();
if (queryTimeout.isTimeoutEnabled()) {
return new ExitableFields(fields, queryTimeout);
}
else {
return fields; // break out of wrapper as soon as possible
public Terms terms(String field) throws IOException {
Terms terms = in.terms(field);
if (terms == null) {
return null;
}
return (queryTimeout.isTimeoutEnabled()) ? new ExitableTerms(terms, queryTimeout) : terms;
}
// this impl does not change deletes or data so we can delegate the
@ -103,29 +100,6 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
}
/**
* Wrapper class for another Fields implementation that is used by the ExitableFilterAtomicReader.
*/
public static class ExitableFields extends FilterFields {
private QueryTimeout queryTimeout;
/** Constructor **/
public ExitableFields(Fields fields, QueryTimeout queryTimeout) {
super(fields);
this.queryTimeout = queryTimeout;
}
@Override
public Terms terms(String field) throws IOException {
Terms terms = in.terms(field);
if (terms == null) {
return null;
}
return new ExitableTerms(terms, queryTimeout);
}
}
/**
* Wrapper class for another Terms implementation that is used by ExitableFields.
*/

View File

@ -20,6 +20,7 @@ import org.apache.lucene.analysis.TokenStream; // javadocs
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeSource;
@ -48,6 +49,7 @@ public final class FieldInvertState {
PositionIncrementAttribute posIncrAttribute;
PayloadAttribute payloadAttribute;
TermToBytesRefAttribute termAttribute;
TermFrequencyAttribute termFreqAttribute;
/** Creates {code FieldInvertState} for the specified
* field name. */
@ -88,6 +90,7 @@ public final class FieldInvertState {
if (this.attributeSource != attributeSource) {
this.attributeSource = attributeSource;
termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);

View File

@ -20,9 +20,15 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
/** Flex API for access to fields and terms
* @lucene.experimental */
import org.apache.lucene.codecs.FieldsProducer;
/**
* Provides a {@link Terms} index for fields that have it, and lists which fields do.
* This is primarily an internal/experimental API (see {@link FieldsProducer}),
* although it is also used to expose the set of term vectors per document.
*
* @lucene.experimental
*/
public abstract class Fields implements Iterable<String> {
/** Sole constructor. (For invocation by subclass

View File

@ -345,11 +345,11 @@ public abstract class FilterLeafReader extends LeafReader {
protected void doClose() throws IOException {
in.close();
}
@Override
public Fields fields() throws IOException {
public Terms terms(String field) throws IOException {
ensureOpen();
return in.fields();
return in.terms(field);
}
@Override

View File

@ -113,9 +113,10 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
if (!hasFreq) {
assert postings.termFreqs == null;
postings.lastDocCodes[termID] = docState.docID;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
postings.termFreqs[termID] = 1;
postings.termFreqs[termID] = getTermFreq();
if (hasProx) {
writeProx(termID, fieldState.position);
if (hasOffsets) {
@ -124,19 +125,21 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
} else {
assert !hasOffsets;
}
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
}
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
fieldState.uniqueTermCount++;
}
@Override
void addTerm(final int termID) {
final FreqProxPostingsArray postings = freqProxPostingsArray;
assert !hasFreq || postings.termFreqs[termID] > 0;
if (!hasFreq) {
assert postings.termFreqs == null;
if (termFreqAtt.getTermFrequency() != 1) {
throw new IllegalStateException("field \"" + fieldInfo.name + "\": must index term freq while using custom TermFrequencyAttribute");
}
if (docState.docID != postings.lastDocIDs[termID]) {
// New document; now encode docCode for previous doc:
assert docState.docID > postings.lastDocIDs[termID];
@ -160,8 +163,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
}
// Init freq for the current document
postings.termFreqs[termID] = 1;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.termFreqs[termID] = getTermFreq();
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
if (hasProx) {
@ -175,7 +178,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
}
fieldState.uniqueTermCount++;
} else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
postings.termFreqs[termID] = Math.addExact(postings.termFreqs[termID], getTermFreq());
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, postings.termFreqs[termID]);
if (hasProx) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
if (hasOffsets) {
@ -185,6 +189,17 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
}
}
private int getTermFreq() {
int freq = termFreqAtt.getTermFrequency();
if (freq != 1) {
if (hasProx) {
throw new IllegalStateException("field \"" + fieldInfo.name + "\": cannot index positions while using custom TermFrequencyAttribute");
}
}
return freq;
}
@Override
public void newPostingsArray() {
freqProxPostingsArray = (FreqProxPostingsArray) postingsArray;

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.IndexReader.CacheHelper;
import org.apache.lucene.util.Bits;
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
@ -60,7 +59,7 @@ public abstract class LeafReader extends IndexReader {
}
/**
* Optional method: Return a {@link CacheHelper} that can be used to cache
* Optional method: Return a {@link IndexReader.CacheHelper} that can be used to cache
* based on the content of this leaf regardless of deletions. Two readers
* that have the same data but different sets of deleted documents or doc
* values updates may be considered equal. Consider using
@ -73,12 +72,6 @@ public abstract class LeafReader extends IndexReader {
*/
public abstract CacheHelper getCoreCacheHelper();
/**
* Returns {@link Fields} for this reader.
* This method will not return null.
*/
public abstract Fields fields() throws IOException;
@Override
public final int docFreq(Term term) throws IOException {
final Terms terms = terms(term.field());
@ -139,10 +132,8 @@ public abstract class LeafReader extends IndexReader {
return terms.getSumTotalTermFreq();
}
/** This may return null if the field does not exist.*/
public final Terms terms(String field) throws IOException {
return fields().terms(field);
}
/** Returns the {@link Terms} index for this field, or null if it has none. */
public abstract Terms terms(String field) throws IOException;
/** Returns {@link PostingsEnum} for the specified term.
* This will return null if either the field or

View File

@ -70,8 +70,11 @@ class MergeReaderWrapper extends LeafReader {
}
@Override
public Fields fields() throws IOException {
return fields;
public Terms terms(String field) throws IOException {
ensureOpen();
// We could check the FieldInfo IndexOptions but there's no point since
// PostingsReader will simply return null for fields that don't exist or that have no terms index.
return fields.terms(field);
}
@Override

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -31,11 +32,12 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MergedIterator;
/**
* Exposes flex API, merged from flex API of sub-segments.
* Provides a single {@link Fields} term index view over an
* {@link IndexReader}.
* This is useful when you're interacting with an {@link
* IndexReader} implementation that consists of sequential
* sub-readers (eg {@link DirectoryReader} or {@link
* MultiReader}).
* MultiReader}) and you must treat it as a {@link LeafReader}.
*
* <p><b>NOTE</b>: for composite readers, you'll get better
* performance by gathering the sub readers using
@ -45,7 +47,6 @@ import org.apache.lucene.util.MergedIterator;
*
* @lucene.experimental
*/
public final class MultiFields extends Fields {
private final Fields[] subs;
private final ReaderSlice[] subSlices;
@ -64,13 +65,13 @@ public final class MultiFields extends Fields {
switch (leaves.size()) {
case 1:
// already an atomic reader / reader with one leave
return leaves.get(0).reader().fields();
return new LeafReaderFields(leaves.get(0).reader());
default:
final List<Fields> fields = new ArrayList<>(leaves.size());
final List<ReaderSlice> slices = new ArrayList<>(leaves.size());
for (final LeafReaderContext ctx : leaves) {
final LeafReader r = ctx.reader();
final Fields f = r.fields();
final Fields f = new LeafReaderFields(r);
fields.add(f);
slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1));
}
@ -115,9 +116,31 @@ public final class MultiFields extends Fields {
}
}
/** This method may return null if the field does not exist.*/
/** This method may return null if the field does not exist or if it has no terms. */
public static Terms getTerms(IndexReader r, String field) throws IOException {
return getFields(r).terms(field);
final List<LeafReaderContext> leaves = r.leaves();
if (leaves.size() == 1) {
return leaves.get(0).reader().terms(field);
}
final List<Terms> termsPerLeaf = new ArrayList<>(leaves.size());
final List<ReaderSlice> slicePerLeaf = new ArrayList<>(leaves.size());
for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) {
LeafReaderContext ctx = leaves.get(leafIdx);
Terms subTerms = ctx.reader().terms(field);
if (subTerms != null) {
termsPerLeaf.add(subTerms);
slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx - 1));
}
}
if (termsPerLeaf.size() == 0) {
return null;
} else {
return new MultiTerms(termsPerLeaf.toArray(Terms.EMPTY_ARRAY),
slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY));
}
}
/** Returns {@link PostingsEnum} for the specified field and
@ -264,5 +287,37 @@ public final class MultiFields extends Fields {
}
return fields;
}
private static class LeafReaderFields extends Fields {
private final LeafReader leafReader;
private final List<String> indexedFields;
LeafReaderFields(LeafReader leafReader) {
this.leafReader = leafReader;
this.indexedFields = new ArrayList<>();
for (FieldInfo fieldInfo : leafReader.getFieldInfos()) {
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
indexedFields.add(fieldInfo.name);
}
}
Collections.sort(indexedFields);
}
@Override
public Iterator<String> iterator() {
return Collections.unmodifiableList(indexedFields).iterator();
}
@Override
public int size() {
return indexedFields.size();
}
@Override
public Terms terms(String field) throws IOException {
return leafReader.terms(field);
}
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
@ -50,7 +51,6 @@ import org.apache.lucene.util.Version;
*/
public class ParallelLeafReader extends LeafReader {
private final FieldInfos fieldInfos;
private final ParallelFields fields = new ParallelFields();
private final LeafReader[] parallelReaders, storedFieldsReaders;
private final Set<LeafReader> completeReaderSet =
Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>());
@ -58,9 +58,10 @@ public class ParallelLeafReader extends LeafReader {
private final int maxDoc, numDocs;
private final boolean hasDeletions;
private final LeafMetaData metaData;
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();
private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>();
private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>();//TODO needn't sort?
private final Map<String,LeafReader> termsFieldToReader = new HashMap<>();
/** Create a ParallelLeafReader based on the provided
* readers; auto-closes the given readers on {@link #close()}. */
public ParallelLeafReader(LeafReader... readers) throws IOException {
@ -130,9 +131,15 @@ public class ParallelLeafReader extends LeafReader {
if (!fieldToReader.containsKey(fieldInfo.name)) {
builder.add(fieldInfo);
fieldToReader.put(fieldInfo.name, reader);
// only add these if the reader responsible for that field name is the current:
// TODO consider populating 1st leaf with vectors even if the field name has been seen on a previous leaf
if (fieldInfo.hasVectors()) {
tvFieldToReader.put(fieldInfo.name, reader);
}
// TODO consider populating 1st leaf with terms even if the field name has been seen on a previous leaf
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
termsFieldToReader.put(fieldInfo.name, reader);
}
}
}
}
@ -154,17 +161,6 @@ public class ParallelLeafReader extends LeafReader {
fieldInfos = builder.finish();
this.metaData = new LeafMetaData(createdVersionMajor, minVersion, indexSort);
// build Fields instance
for (final LeafReader reader : this.parallelReaders) {
final Fields readerFields = reader.fields();
for (String field : readerFields) {
// only add if the reader responsible for that field name is the current:
if (fieldToReader.get(field) == reader) {
this.fields.addField(field, readerFields.terms(field));
}
}
}
// do this finally so any Exceptions occurred before don't affect refcounts:
for (LeafReader reader : completeReaderSet) {
@ -230,13 +226,14 @@ public class ParallelLeafReader extends LeafReader {
ensureOpen();
return hasDeletions ? parallelReaders[0].getLiveDocs() : null;
}
@Override
public Fields fields() {
public Terms terms(String field) throws IOException {
ensureOpen();
return fields;
LeafReader leafReader = termsFieldToReader.get(field);
return leafReader == null ? null : leafReader.terms(field);
}
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)

View File

@ -18,6 +18,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import org.apache.lucene.codecs.DocValuesProducer;
@ -284,21 +286,27 @@ public final class SlowCodecReaderWrapper {
}
private static FieldsProducer readerToFieldsProducer(final LeafReader reader) throws IOException {
final Fields fields = reader.fields();
ArrayList<String> indexedFields = new ArrayList<>();
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
indexedFields.add(fieldInfo.name);
}
}
Collections.sort(indexedFields);
return new FieldsProducer() {
@Override
public Iterator<String> iterator() {
return fields.iterator();
return indexedFields.iterator();
}
@Override
public Terms terms(String field) throws IOException {
return fields.terms(field);
return reader.terms(field);
}
@Override
public int size() {
return fields.size();
return indexedFields.size();
}
@Override

View File

@ -49,6 +49,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
*/
class SortingLeafReader extends FilterLeafReader {
//TODO remove from here; move to FreqProxTermsWriter or FreqProxFields?
static class SortingFields extends FilterFields {
private final Sorter.DocMap docMap;
@ -1042,8 +1043,9 @@ class SortingLeafReader extends FilterLeafReader {
}
@Override
public Fields fields() throws IOException {
return new SortingFields(in.fields(), in.getFieldInfos(), docMap);
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
return terms==null ? null : new SortingTerms(terms, in.getFieldInfos().fieldInfo(field).getIndexOptions(), docMap);
}
@Override

View File

@ -109,6 +109,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
@Override
boolean start(IndexableField field, boolean first) {
super.start(field, first);
assert field.fieldType().indexOptions() != IndexOptions.NONE;
if (first) {
@ -224,7 +225,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
void newTerm(final int termID) {
TermVectorsPostingsArray postings = termVectorsPostingsArray;
postings.freqs[termID] = 1;
postings.freqs[termID] = getTermFreq();
postings.lastOffsets[termID] = 0;
postings.lastPositions[termID] = 0;
@ -235,11 +236,25 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
void addTerm(final int termID) {
TermVectorsPostingsArray postings = termVectorsPostingsArray;
postings.freqs[termID]++;
postings.freqs[termID] += getTermFreq();
writeProx(postings, termID);
}
private int getTermFreq() {
int freq = termFreqAtt.getTermFrequency();
if (freq != 1) {
if (doVectorPositions) {
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector positions while using custom TermFrequencyAttribute");
}
if (doVectorOffsets) {
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector offsets while using custom TermFrequencyAttribute");
}
}
return freq;
}
@Override
public void newPostingsArray() {
termVectorsPostingsArray = (TermVectorsPostingsArray) postingsArray;

View File

@ -19,12 +19,13 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
private static final int HASH_INIT_SIZE = 4;
@ -35,6 +36,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
protected final DocumentsWriterPerThread.DocState docState;
protected final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
protected TermFrequencyAttribute termFreqAtt;
// Copied from our perThread
final IntBlockPool intPool;
@ -287,6 +289,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
* document. */
boolean start(IndexableField field, boolean first) {
termAtt = fieldState.termAttribute;
termFreqAtt = fieldState.termFreqAttribute;
if (nextPerField != null) {
doNextCall = nextPerField.start(field, first);
}

View File

@ -51,6 +51,7 @@ public class AutomatonQuery extends MultiTermQuery {
protected final CompiledAutomaton compiled;
/** term containing the field, and possibly some pattern structure */
protected final Term term;
protected final boolean automatonIsBinary;
/**
* Create a new AutomatonQuery from an {@link Automaton}.
@ -98,6 +99,7 @@ public class AutomatonQuery extends MultiTermQuery {
super(term.field());
this.term = term;
this.automaton = automaton;
this.automatonIsBinary = isBinary;
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
}
@ -154,4 +156,9 @@ public class AutomatonQuery extends MultiTermQuery {
public Automaton getAutomaton() {
return automaton;
}
/** Is this a binary (byte) oriented automaton. See the constructor. */
public boolean isAutomatonBinary() {
return automatonIsBinary;
}
}

View File

@ -178,6 +178,14 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
return disjuncts[0];
}
if (tieBreakerMultiplier == 1.0f) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Query sub : disjuncts) {
builder.add(sub, BooleanClause.Occur.SHOULD);
}
return builder.build();
}
boolean actuallyRewritten = false;
List<Query> rewrittenDisjuncts = new ArrayList<>();
for (Query sub : disjuncts) {

View File

@ -26,7 +26,6 @@ import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -224,8 +223,7 @@ public class TermInSetQuery extends Query implements Accountable {
private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
final LeafReader reader = context.reader();
final Fields fields = reader.fields();
Terms terms = fields.terms(field);
Terms terms = reader.terms(field);
if (terms == null) {
return null;
}

View File

@ -35,7 +35,7 @@ public final class NamedSPILoader<S extends NamedSPILoader.NamedSPI> implements
private final Class<S> clazz;
public NamedSPILoader(Class<S> clazz) {
this(clazz, Thread.currentThread().getContextClassLoader());
this(clazz, null);
}
public NamedSPILoader(Class<S> clazz, ClassLoader classloader) {

View File

@ -538,11 +538,7 @@ public class QueryBuilder {
builder.add(queryPos, operator);
}
}
BooleanQuery bq = builder.build();
if (bq.clauses().size() == 1) {
return bq.clauses().get(0).getQuery();
}
return bq;
return builder.build();
}
/**

View File

@ -48,13 +48,11 @@ public final class SPIClassIterator<S> implements Iterator<Class<? extends S>> {
private final Enumeration<URL> profilesEnum;
private Iterator<String> linesIterator;
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the context classloader. */
/** Creates a new SPI iterator to lookup services of type {@code clazz} using
* the same {@link ClassLoader} as the argument. */
public static <S> SPIClassIterator<S> get(Class<S> clazz) {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
if (cl == null) {
cl = clazz.getClassLoader();
}
return new SPIClassIterator<>(clazz, cl);
return new SPIClassIterator<>(clazz,
Objects.requireNonNull(clazz.getClassLoader(), () -> clazz + " has no classloader."));
}
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the given classloader. */

View File

@ -125,6 +125,7 @@ public class TestToken extends LuceneTestCase {
t.setFlags(8);
t.setPositionIncrement(3);
t.setPositionLength(11);
t.setTermFrequency(42);
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
@ -136,6 +137,7 @@ public class TestToken extends LuceneTestCase {
put(PayloadAttribute.class.getName() + "#payload", null);
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
put(FlagsAttribute.class.getName() + "#flags", 8);
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
}});
}
}

View File

@ -82,6 +82,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
t.setPositionIncrement(3);
t.setPositionLength(11);
t.setType("foobar");
t.setTermFrequency(42);
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
@ -91,6 +92,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
put(TypeAttribute.class.getName() + "#type", "foobar");
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
}});
}
}

View File

@ -55,7 +55,7 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
FieldReader field = (FieldReader) r.leaves().get(0).reader().fields().terms("field");
FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
// We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
Stats stats = field.getStats();
assertEquals(0, stats.floorBlockCount);

View File

@ -91,7 +91,7 @@ public class Test2BDocs extends LuceneTestCase {
LeafReader reader = context.reader();
int lim = context.reader().maxDoc();
Terms terms = reader.fields().terms("f1");
Terms terms = reader.terms("f1");
for (int i=0; i<10000; i++) {
TermsEnum te = terms.iterator();
assertTrue( te.seekExact(term) );

View File

@ -0,0 +1,468 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import static org.apache.lucene.index.PostingsEnum.NO_MORE_DOCS;
public class TestCustomTermFreq extends LuceneTestCase {
private static final class CannedTermFreqs extends TokenStream {
private final String[] terms;
private final int[] termFreqs;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final TermFrequencyAttribute termFreqAtt = addAttribute(TermFrequencyAttribute.class);
private int upto;
public CannedTermFreqs(String[] terms, int[] termFreqs) {
this.terms = terms;
this.termFreqs = termFreqs;
assert terms.length == termFreqs.length;
}
@Override
public boolean incrementToken() {
if (upto == terms.length) {
return false;
}
clearAttributes();
termAtt.append(terms[upto]);
termFreqAtt.setTermFrequency(termFreqs[upto]);
upto++;
return true;
}
@Override
public void reset() {
upto = 0;
}
}
public void testSingletonTermsOneDoc() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {42, 128}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(128, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(42, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testSingletonTermsTwoDocs() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {42, 128}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {50, 50}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(128, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(50, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(42, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(50, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testRepeatTermsOneDoc() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(228, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(59, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testRepeatTermsTwoDocs() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {50, 60, 70, 80}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(228, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(140, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(59, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(120, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testTotalTermFreq() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {50, 60, 70, 80}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
assertEquals(179, termsEnum.totalTermFreq());
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
assertEquals(368, termsEnum.totalTermFreq());
IOUtils.close(r, w, dir);
}
// you can't index proximity with custom term freqs:
public void testInvalidProx() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
// you can't index DOCS_ONLY with custom term freq
public void testInvalidDocsOnly() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": must index term freq while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
// sum of term freqs must fit in an int
public void testOverflowInt() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS);
Document doc = new Document();
doc.add(new Field("field", "this field should be indexed", fieldType));
w.addDocument(doc);
Document doc2 = new Document();
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {3, Integer.MAX_VALUE}),
fieldType);
doc2.add(field);
expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc2);});
IndexReader r = DirectoryReader.open(w);
assertEquals(1, r.numDocs());
IOUtils.close(r, w, dir);
}
public void testInvalidTermVectorPositions() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorPositions(true);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
public void testInvalidTermVectorOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorOffsets(true);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index term vector offsets while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
public void testTermVectors() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
fieldType.setStoreTermVectors(true);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {50, 60, 70, 80}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
Fields fields = r.getTermVectors(0);
TermsEnum termsEnum = fields.terms("field").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
assertEquals(228, termsEnum.totalTermFreq());
PostingsEnum postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(228, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
assertEquals(59, termsEnum.totalTermFreq());
postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(59, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
fields = r.getTermVectors(1);
termsEnum = fields.terms("field").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
assertEquals(140, termsEnum.totalTermFreq());
postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(140, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
assertEquals(120, termsEnum.totalTermFreq());
postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(120, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
/**
* Similarity holds onto the FieldInvertState for subsequent verification.
*/
private static class NeverForgetsSimilarity extends Similarity {
public FieldInvertState lastState;
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
private NeverForgetsSimilarity() {
// no
}
@Override
public long computeNorm(FieldInvertState state) {
this.lastState = state;
return 1;
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}
public void testFieldInvertState() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(228, fis.getMaxTermFrequency());
assertEquals(2, fis.getUniqueTermCount());
assertEquals(0, fis.getNumOverlap());
assertEquals(287, fis.getLength());
IOUtils.close(w, dir);
}
}

View File

@ -249,14 +249,16 @@ public class TestDoc extends LuceneTestCase {
for (int i = 0; i < reader.numDocs(); i++)
out.println(reader.document(i));
Fields fields = reader.fields();
for (String field : fields) {
Terms terms = fields.terms(field);
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
continue;
}
Terms terms = reader.terms(fieldInfo.name);
assertNotNull(terms);
TermsEnum tis = terms.iterator();
while(tis.next() != null) {
out.print(" term=" + field + ":" + tis.term());
out.print(" term=" + fieldInfo.name + ":" + tis.term());
out.println(" DF=" + tis.docFreq());
PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS);

View File

@ -37,17 +37,6 @@ import org.junit.Ignore;
public class TestExitableDirectoryReader extends LuceneTestCase {
private static class TestReader extends FilterLeafReader {
private static class TestFields extends FilterFields {
TestFields(Fields in) {
super(in);
}
@Override
public Terms terms(String field) throws IOException {
return new TestTerms(super.terms(field));
}
}
private static class TestTerms extends FilterTerms {
TestTerms(Terms in) {
super(in);
@ -83,8 +72,9 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
}
@Override
public Fields fields() throws IOException {
return new TestFields(super.fields());
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
return terms==null ? null : new TestTerms(terms);
}
@Override

View File

@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestFieldInvertState extends LuceneTestCase {
/**
* Similarity holds onto the FieldInvertState for subsequent verification.
*/
private static class NeverForgetsSimilarity extends Similarity {
public FieldInvertState lastState;
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
private NeverForgetsSimilarity() {
// no
}
@Override
public long computeNorm(FieldInvertState state) {
this.lastState = state;
return 1;
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}
public void testBasic() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
Field field = new Field("field",
new CannedTokenStream(new Token("a", 0, 1),
new Token("b", 2, 3),
new Token("c", 4, 5)),
TextField.TYPE_NOT_STORED);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(1, fis.getMaxTermFrequency());
assertEquals(3, fis.getUniqueTermCount());
assertEquals(0, fis.getNumOverlap());
assertEquals(3, fis.getLength());
IOUtils.close(w, dir);
}
public void testRandom() throws Exception {
int numUniqueTokens = TestUtil.nextInt(random(), 1, 25);
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
int numTokens = atLeast(10000);
Token[] tokens = new Token[numTokens];
Map<Character,Integer> counts = new HashMap<>();
int numStacked = 0;
int maxTermFreq = 0;
int pos = -1;
for (int i=0;i<numTokens;i++) {
char tokenChar = (char) ('a' + random().nextInt(numUniqueTokens));
Integer oldCount = counts.get(tokenChar);
int newCount;
if (oldCount == null) {
newCount = 1;
} else {
newCount = 1 + oldCount;
}
counts.put(tokenChar, newCount);
maxTermFreq = Math.max(maxTermFreq, newCount);
Token token = new Token(Character.toString(tokenChar), 2*i, 2*i+1);
if (i > 0 && random().nextInt(7) == 3) {
token.setPositionIncrement(0);
numStacked++;
} else {
pos++;
}
tokens[i] = token;
}
Field field = new Field("field",
new CannedTokenStream(tokens),
TextField.TYPE_NOT_STORED);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(maxTermFreq, fis.getMaxTermFrequency());
assertEquals(counts.size(), fis.getUniqueTermCount());
assertEquals(numStacked, fis.getNumOverlap());
assertEquals(numTokens, fis.getLength());
assertEquals(pos, fis.getPosition());
IOUtils.close(w, dir);
}
}

View File

@ -35,18 +35,6 @@ public class TestFilterLeafReader extends LuceneTestCase {
private static class TestReader extends FilterLeafReader {
/** Filter that only permits terms containing 'e'.*/
private static class TestFields extends FilterFields {
TestFields(Fields in) {
super(in);
}
@Override
public Terms terms(String field) throws IOException {
return new TestTerms(super.terms(field));
}
}
private static class TestTerms extends FilterTerms {
TestTerms(Terms in) {
super(in);
@ -103,8 +91,9 @@ public class TestFilterLeafReader extends LuceneTestCase {
}
@Override
public Fields fields() throws IOException {
return new TestFields(super.fields());
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
return terms==null ? null : new TestTerms(terms);
}
@Override

View File

@ -17,10 +17,13 @@
package org.apache.lucene.index;
import org.apache.lucene.store.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestFlex extends LuceneTestCase {
@ -70,7 +73,7 @@ public class TestFlex extends LuceneTestCase {
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = w.getReader();
TermsEnum terms = getOnlyLeafReader(r).fields().terms("f").iterator();
TermsEnum terms = getOnlyLeafReader(r).terms("f").iterator();
assertTrue(terms.next() != null);
try {
assertEquals(0, terms.ord());

View File

@ -80,9 +80,7 @@ public class TestIndexReaderClose extends LuceneTestCase {
reader.getReaderCacheHelper().addClosedListener(new FaultyListener());
}
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
reader.close();
});
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> reader.close());
if (throwOnClose) {
assertEquals("BOOM!", expected.getMessage());
@ -90,9 +88,7 @@ public class TestIndexReaderClose extends LuceneTestCase {
assertEquals("GRRRRRRRRRRRR!", expected.getMessage());
}
expectThrows(AlreadyClosedException.class, () -> {
reader.fields();
});
expectThrows(AlreadyClosedException.class, () -> reader.terms("someField"));
if (random().nextBoolean()) {
reader.close(); // call it again

View File

@ -694,7 +694,7 @@ public class TestIndexWriter extends LuceneTestCase {
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
LeafReader subreader = getOnlyLeafReader(reader);
TermsEnum te = subreader.fields().terms("").iterator();
TermsEnum te = subreader.terms("").iterator();
assertEquals(new BytesRef("a"), te.next());
assertEquals(new BytesRef("b"), te.next());
assertEquals(new BytesRef("c"), te.next());
@ -715,7 +715,7 @@ public class TestIndexWriter extends LuceneTestCase {
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
LeafReader subreader = getOnlyLeafReader(reader);
TermsEnum te = subreader.fields().terms("").iterator();
TermsEnum te = subreader.terms("").iterator();
assertEquals(new BytesRef(""), te.next());
assertEquals(new BytesRef("a"), te.next());
assertEquals(new BytesRef("b"), te.next());

View File

@ -136,7 +136,7 @@ public class TestIndexWriterUnicode extends LuceneTestCase {
}
private void checkTermsOrder(IndexReader r, Set<String> allTerms, boolean isTop) throws IOException {
TermsEnum terms = MultiFields.getFields(r).terms("f").iterator();
TermsEnum terms = MultiFields.getTerms(r, "f").iterator();
BytesRefBuilder last = new BytesRefBuilder();

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -90,21 +89,10 @@ public class TestParallelTermEnum extends LuceneTestCase {
public void test1() throws IOException {
ParallelLeafReader pr = new ParallelLeafReader(ir1, ir2);
Fields fields = pr.fields();
Iterator<String> fe = fields.iterator();
assertEquals(3, pr.getFieldInfos().size());
String f = fe.next();
assertEquals("field1", f);
checkTerms(fields.terms(f), "brown", "fox", "jumps", "quick", "the");
f = fe.next();
assertEquals("field2", f);
checkTerms(fields.terms(f), "brown", "fox", "jumps", "quick", "the");
f = fe.next();
assertEquals("field3", f);
checkTerms(fields.terms(f), "dog", "fox", "jumps", "lazy", "over", "the");
assertFalse(fe.hasNext());
checkTerms(pr.terms("field1"), "brown", "fox", "jumps", "quick", "the");
checkTerms(pr.terms("field2"), "brown", "fox", "jumps", "quick", "the");
checkTerms(pr.terms("field3"), "dog", "fox", "jumps", "lazy", "over", "the");
}
}

View File

@ -479,7 +479,7 @@ public class TestPayloads extends LuceneTestCase {
}
writer.close();
IndexReader reader = DirectoryReader.open(dir);
TermsEnum terms = MultiFields.getFields(reader).terms(field).iterator();
TermsEnum terms = MultiFields.getTerms(reader, field).iterator();
PostingsEnum tp = null;
while (terms.next() != null) {
String termText = terms.term().utf8ToString();
@ -602,7 +602,7 @@ public class TestPayloads extends LuceneTestCase {
field.setTokenStream(ts);
writer.addDocument(doc);
DirectoryReader reader = writer.getReader();
TermsEnum te = MultiFields.getFields(reader).terms("field").iterator();
TermsEnum te = MultiFields.getTerms(reader, "field").iterator();
assertTrue(te.seekExact(new BytesRef("withPayload")));
PostingsEnum de = te.postings(null, PostingsEnum.PAYLOADS);
de.nextDoc();

View File

@ -221,9 +221,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
public int[] toDocsArray(Term term, Bits bits, IndexReader reader)
throws IOException {
Fields fields = MultiFields.getFields(reader);
Terms cterms = fields.terms(term.field);
TermsEnum ctermsEnum = cterms.iterator();
TermsEnum ctermsEnum = MultiFields.getTerms(reader, term.field).iterator();
if (ctermsEnum.seekExact(new BytesRef(term.text()))) {
PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, null, PostingsEnum.NONE);
return toArray(postingsEnum);

View File

@ -291,7 +291,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
// TODO: improve this
LeafReader sub = ctx.reader();
//System.out.println("\nsub=" + sub);
final TermsEnum termsEnum = sub.fields().terms("content").iterator();
final TermsEnum termsEnum = sub.terms("content").iterator();
PostingsEnum docs = null;
PostingsEnum docsAndPositions = null;
PostingsEnum docsAndPositionsAndOffsets = null;

View File

@ -57,7 +57,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random()));
assertTrue(reader != null);
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
TermsEnum terms = reader.terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
terms.seekCeil(new BytesRef("field"));
PostingsEnum termDocs = TestUtil.docs(random(), terms, null, PostingsEnum.FREQS);
if (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {

View File

@ -19,14 +19,14 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestSegmentTermEnum extends LuceneTestCase {
@ -80,7 +80,7 @@ public class TestSegmentTermEnum extends LuceneTestCase {
addDoc(writer, "aaa bbb");
writer.close();
LeafReader reader = getOnlyLeafReader(DirectoryReader.open(dir));
TermsEnum terms = reader.fields().terms("content").iterator();
TermsEnum terms = reader.terms("content").iterator();
assertNotNull(terms.next());
assertEquals("aaa", terms.term().utf8ToString());
assertNotNull(terms.next());

View File

@ -21,10 +21,13 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.util.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.*;
import org.apache.lucene.document.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestStressAdvance extends LuceneTestCase {
@ -74,7 +77,7 @@ public class TestStressAdvance extends LuceneTestCase {
bDocIDs.add(docID);
}
}
final TermsEnum te = getOnlyLeafReader(r).fields().terms("field").iterator();
final TermsEnum te = getOnlyLeafReader(r).terms("field").iterator();
PostingsEnum de = null;
for(int iter2=0;iter2<10;iter2++) {

View File

@ -18,7 +18,17 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -738,7 +748,7 @@ public class TestTermsEnum extends LuceneTestCase {
DirectoryReader r = w.getReader();
w.close();
LeafReader sub = getOnlyLeafReader(r);
Terms terms = sub.fields().terms("field");
Terms terms = sub.terms("field");
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
TermsEnum te = terms.intersect(ca, null);
@ -792,7 +802,7 @@ public class TestTermsEnum extends LuceneTestCase {
DirectoryReader r = w.getReader();
w.close();
LeafReader sub = getOnlyLeafReader(r);
Terms terms = sub.fields().terms("field");
Terms terms = sub.terms("field");
Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
@ -846,7 +856,7 @@ public class TestTermsEnum extends LuceneTestCase {
DirectoryReader r = w.getReader();
w.close();
LeafReader sub = getOnlyLeafReader(r);
Terms terms = sub.fields().terms("field");
Terms terms = sub.terms("field");
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton(); // accept ALL
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
@ -986,7 +996,7 @@ public class TestTermsEnum extends LuceneTestCase {
w.addDocument(doc);
IndexReader r = w.getReader();
assertEquals(1, r.leaves().size());
TermsEnum te = r.leaves().get(0).reader().fields().terms("field").iterator();
TermsEnum te = r.leaves().get(0).reader().terms("field").iterator();
for(int i=0;i<=termCount;i++) {
assertTrue("term '" + termsList.get(i).utf8ToString() + "' should exist but doesn't", te.seekExact(termsList.get(i)));
}
@ -1007,9 +1017,8 @@ public class TestTermsEnum extends LuceneTestCase {
doc.add(newStringField("field", "foobar", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
Fields fields = MultiFields.getFields(r);
Terms terms = MultiFields.getTerms(r, "field");
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
Terms terms = fields.terms("field");
String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
r.close();

View File

@ -29,7 +29,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
@ -219,21 +218,16 @@ public class TermInSetQueryTest extends LuceneTestCase {
}
@Override
public Fields fields() throws IOException {
return new FilterFields(in.fields()) {
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
if (terms == null) {
return null;
}
return new FilterTerms(terms) {
@Override
public Terms terms(String field) throws IOException {
final Terms in = this.in.terms(field);
if (in == null) {
return null;
}
return new FilterTerms(in) {
@Override
public TermsEnum iterator() throws IOException {
counter.incrementAndGet();
return super.iterator();
}
};
public TermsEnum iterator() throws IOException {
counter.incrementAndGet();
return super.iterator();
}
};
}

View File

@ -523,6 +523,21 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
assertTrue("score should be negative", h[i].score < 0);
}
}
public void testRewriteBoolean() throws Exception {
Query sub1 = tq("hed", "albino");
Query sub2 = tq("hed", "elephant");
DisjunctionMaxQuery q = new DisjunctionMaxQuery(
Arrays.asList(
sub1, sub2
), 1.0f);
Query rewritten = s.rewrite(q);
assertTrue(rewritten instanceof BooleanQuery);
BooleanQuery bq = (BooleanQuery) rewritten;
assertEquals(bq.clauses().size(), 2);
assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
}
/** macro */
protected Query tq(String f, String t) {

View File

@ -71,7 +71,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
// this TermEnum gives "piccadilly", "pie" and "pizza".
String prefix = "pi";
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
TermsEnum te = MultiFields.getTerms(reader,"body").iterator();
te.seekCeil(new BytesRef(prefix));
do {
String s = te.term().utf8ToString();

View File

@ -73,7 +73,7 @@ public class TestPhrasePrefixQuery extends LuceneTestCase {
// this TermEnum gives "piccadilly", "pie" and "pizza".
String prefix = "pi";
TermsEnum te = MultiFields.getFields(reader).terms("body").iterator();
TermsEnum te = MultiFields.getTerms(reader, "body").iterator();
te.seekCeil(new BytesRef(prefix));
do {
String s = te.term().utf8ToString();

View File

@ -61,7 +61,7 @@ public class TestSameScoresWithThreads extends LuceneTestCase {
w.close();
final IndexSearcher s = newSearcher(r);
Terms terms = MultiFields.getFields(r).terms("body");
Terms terms = MultiFields.getTerms(r, "body");
int termCount = 0;
TermsEnum termsEnum = terms.iterator();
while(termsEnum.next() != null) {

View File

@ -22,7 +22,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
@ -123,31 +122,27 @@ public class TestTermQuery extends LuceneTestCase {
}
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
return terms==null ? null : new FilterTerms(terms) {
@Override
public Terms terms(String field) throws IOException {
return new FilterTerms(super.terms(field)) {
public TermsEnum iterator() throws IOException {
return new FilterTermsEnum(super.iterator()) {
@Override
public TermsEnum iterator() throws IOException {
return new FilterTermsEnum(super.iterator()) {
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
throw new AssertionError("no seek");
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new AssertionError("no seek");
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
throw new AssertionError("no seek");
}
@Override
public void seekExact(long ord) throws IOException {
throw new AssertionError("no seek");
}
};
public SeekStatus seekCeil(BytesRef text) throws IOException {
throw new AssertionError("no seek");
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new AssertionError("no seek");
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
throw new AssertionError("no seek");
}
@Override
public void seekExact(long ord) throws IOException {
throw new AssertionError("no seek");
}
};
}

View File

@ -178,32 +178,36 @@ public class TestQueryBuilder extends LuceneTestCase {
.build();
Query syn2 = new TermQuery(new Term("field", "cavy"));
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
BooleanQuery synQuery = new BooleanQuery.Builder()
.add(syn1, BooleanClause.Occur.SHOULD)
.add(syn2, BooleanClause.Occur.SHOULD)
.build();
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
.add(synQuery, occur)
.build();
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.add(new TermQuery(new Term("field", "story")), occur)
.build();
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
expectedBooleanQuery = new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "the")), occur)
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.add(new TermQuery(new Term("field", "story")), occur)
.build();
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
expectedBooleanQuery = new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "the")), occur)
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.add(new TermQuery(new Term("field", "story")), occur)
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.build();
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
}
@ -217,32 +221,36 @@ public class TestQueryBuilder extends LuceneTestCase {
.add(new Term("field", "pig"))
.build();
Query syn2 = new TermQuery(new Term("field", "cavy"));
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
BooleanQuery synQuery = new BooleanQuery.Builder()
.add(syn1, BooleanClause.Occur.SHOULD)
.add(syn2, BooleanClause.Occur.SHOULD)
.build();
BooleanQuery expectedGraphQuery = new BooleanQuery.Builder()
.add(synQuery, occur)
.build();
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
queryBuilder.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
assertEquals(expectedGraphQuery, queryBuilder.createBooleanQuery("field", "guinea pig", occur));
BooleanQuery expectedBooleanQuery = new BooleanQuery.Builder()
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.add(new TermQuery(new Term("field", "story")), occur)
.build();
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "guinea pig story", occur));
expectedBooleanQuery = new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "the")), occur)
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.add(new TermQuery(new Term("field", "story")), occur)
.build();
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story", occur));
expectedBooleanQuery = new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "the")), occur)
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.add(new TermQuery(new Term("field", "story")), occur)
.add(expectedGraphQuery, occur)
.add(synQuery, occur)
.build();
assertEquals(expectedBooleanQuery, queryBuilder.createBooleanQuery("field", "the guinea pig story guinea pig", occur));
}

View File

@ -25,8 +25,8 @@ import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
@ -90,8 +90,8 @@ public class TermVectorLeafReader extends LeafReader {
}
@Override
public Fields fields() throws IOException {
return fields;
public Terms terms(String field) throws IOException {
return fields.terms(field);
}
@Override
@ -148,7 +148,7 @@ public class TermVectorLeafReader extends LeafReader {
if (docID != 0) {
return null;
}
return fields();
return fields;
}
@Override

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search.highlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@ -30,7 +29,6 @@ import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
@ -429,30 +427,15 @@ public class WeightedSpanTermExtractor {
DelegatingLeafReader(LeafReader in) {
super(in);
}
@Override
public FieldInfos getFieldInfos() {
throw new UnsupportedOperationException();
throw new UnsupportedOperationException();//TODO merge them
}
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
@Override
public Terms terms(String field) throws IOException {
return super.terms(DelegatingLeafReader.FIELD_NAME);
}
@Override
public Iterator<String> iterator() {
return Collections.singletonList(DelegatingLeafReader.FIELD_NAME).iterator();
}
@Override
public int size() {
return 1;
}
};
public Terms terms(String field) throws IOException {
return super.terms(DelegatingLeafReader.FIELD_NAME);
}
@Override

View File

@ -83,8 +83,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
return allAutomata.get(0);
}
//TODO it'd be nice if we could get at the underlying Automaton in CharacterRunAutomaton so that we
// could union them all. But it's not exposed, and note TermRangeQuery isn't modelled as an Automaton
// by MultiTermHighlighting.
// could union them all. But it's not exposed, and sometimes the automaton is byte (not char) oriented
// Return an aggregate CharacterRunAutomaton of others
return new CharacterRunAutomaton(Automata.makeEmpty()) {// the makeEmpty() is bogus; won't be used

View File

@ -19,12 +19,10 @@ package org.apache.lucene.search.uhighlight;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -32,19 +30,17 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.apache.lucene.util.automaton.Operations;
@ -110,18 +106,6 @@ class MultiTermHighlighting {
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
if (fieldMatcher.test(prefix.field())) {
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
Automata.makeAnyString())) {
@Override
public String toString() {
return pq.toString();
}
});
}
} else if (query instanceof FuzzyQuery) {
final FuzzyQuery fq = (FuzzyQuery) query;
if (fieldMatcher.test(fq.getField())) {
@ -143,69 +127,63 @@ class MultiTermHighlighting {
}
});
}
} else if (query instanceof TermRangeQuery) {
final TermRangeQuery tq = (TermRangeQuery) query;
if (fieldMatcher.test(tq.getField())) {
final CharsRef lowerBound;
if (tq.getLowerTerm() == null) {
lowerBound = null;
} else {
lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
}
final CharsRef upperBound;
if (tq.getUpperTerm() == null) {
upperBound = null;
} else {
upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
}
final boolean includeLower = tq.includesLower();
final boolean includeUpper = tq.includesUpper();
final CharsRef scratch = new CharsRef();
@SuppressWarnings("deprecation")
final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
// this is *not* an automaton, but its very simple
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
@Override
public boolean run(char[] s, int offset, int length) {
scratch.chars = s;
scratch.offset = offset;
scratch.length = length;
if (lowerBound != null) {
int cmp = comparator.compare(scratch, lowerBound);
if (cmp < 0 || (!includeLower && cmp == 0)) {
return false;
}
}
if (upperBound != null) {
int cmp = comparator.compare(scratch, upperBound);
if (cmp > 0 || (!includeUpper && cmp == 0)) {
return false;
}
}
return true;
}
@Override
public String toString() {
return tq.toString();
}
});
}
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (fieldMatcher.test(aq.getField())) {
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return aq.toString();
}
});
if (aq.isAutomatonBinary() == false) { // note: is the case for WildcardQuery, RegexpQuery
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return aq.toString();
}
});
} else { // note: is the case for PrefixQuery, TermRangeQuery
// byte oriented automaton:
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) { // empty here is bogus just to satisfy API
// TODO can we get access to the aq.compiledAutomaton.runAutomaton ?
ByteRunAutomaton byteRunAutomaton =
new ByteRunAutomaton(aq.getAutomaton(), true, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
@Override
public boolean run(char[] chars, int offset, int length) {
int state = 0;
final int maxIdx = offset + length;
for (int i = offset; i < maxIdx; i++) {
final int code = chars[i];
int b;
// UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 )
if (code < 0x80) {
state = byteRunAutomaton.step(state, code);
if (state == -1) return false;
} else if (code < 0x800) {
b = (0xC0 | (code >> 6));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
b = (0x80 | (code & 0x3F));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
} else {
// more complex
byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
state = byteRunAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
if (state == -1) return false;
}
break;
}
}
return byteRunAutomaton.isAccept(state);
}
@Override
public String toString() {
return aq.toString();
}
});
}
}
}
return list.toArray(new CharacterRunAutomaton[list.size()]);

View File

@ -24,7 +24,6 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@ -36,7 +35,6 @@ import java.util.function.Predicate;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@ -529,12 +527,16 @@ public class PhraseHelper {
}
}
//TODO move up; it's currently inbetween other inner classes that are related
/**
* Needed to support the ability to highlight a query irrespective of the field a query refers to
* (aka requireFieldMatch=false).
* This reader will just delegate every call to a single field in the wrapped
* LeafReader. This way we ensure that all queries going through this reader target the same field.
*/
*/
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
final String fieldName;
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
super(in);
this.fieldName = fieldName;
@ -542,27 +544,12 @@ public class PhraseHelper {
@Override
public FieldInfos getFieldInfos() {
throw new UnsupportedOperationException();
throw new UnsupportedOperationException();//TODO merge them
}
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
@Override
public Terms terms(String field) throws IOException {
return super.terms(fieldName);
}
@Override
public Iterator<String> iterator() {
return Collections.singletonList(fieldName).iterator();
}
@Override
public int size() {
return 1;
}
};
public Terms terms(String field) throws IOException {
return super.terms(fieldName);
}
@Override

View File

@ -18,7 +18,6 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
@ -52,24 +51,9 @@ final class TermVectorFilteredLeafReader extends FilterLeafReader {
}
@Override
public Fields fields() throws IOException {
return new TermVectorFilteredFields(in.fields(), filterTerms);
}
private static final class TermVectorFilteredFields extends FilterLeafReader.FilterFields {
// NOTE: super ("in") is baseFields
private final Terms filterTerms;
TermVectorFilteredFields(Fields baseFields, Terms filterTerms) {
super(baseFields);
this.filterTerms = filterTerms;
}
@Override
public Terms terms(String field) throws IOException {
return new TermsFilteredTerms(in.terms(field), filterTerms);
}
public Terms terms(String field) throws IOException {
Terms terms = in.terms(field);
return terms==null ? null : new TermsFilteredTerms(terms, filterTerms);
}
private static final class TermsFilteredTerms extends FilterLeafReader.FilterTerms {

View File

@ -24,7 +24,6 @@ import java.util.Locale;
import org.apache.lucene.util.LuceneTestCase;
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestSplittingBreakIterator extends LuceneTestCase {

View File

@ -51,12 +51,9 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.junit.After;
import org.junit.Before;
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestUnifiedHighlighter extends LuceneTestCase {
private final FieldType fieldType; // for "body" generally, but not necessarily others. See constructor

View File

@ -24,11 +24,13 @@ import java.util.List;
import java.util.Objects;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -63,16 +65,15 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.UnicodeUtil;
import org.junit.After;
import org.junit.Before;
/**
* Some tests that highlight wildcard, fuzzy, etc queries.
*/
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
final FieldType fieldType;
@ -1079,4 +1080,66 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
assertEquals("[<b>я</b>]", Arrays.toString(snippets));
ir.close();
}
// LUCENE-7719
public void testMultiByteMTQ() throws IOException {
Analyzer analyzer = new KeywordAnalyzer();
try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer)) {
for (int attempt = 0; attempt < 20; attempt++) {
iw.deleteAll();
String field = "title";
String value = RandomStrings.randomUnicodeOfLength(random(), 3);
if (value.contains(UnifiedHighlighter.MULTIVAL_SEP_CHAR+"")) { // will throw things off
continue;
}
int[] valuePoints = value.codePoints().toArray();
iw.addDocument(Collections.singleton(
new Field(field, value, fieldType)));
iw.commit();
try (IndexReader ir = iw.getReader()) {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
highlighter.setBreakIterator(WholeBreakIterator::new);
// Test PrefixQuery
Query query = new PrefixQuery(new Term(field,
UnicodeUtil.newString(valuePoints, 0, 1)));
highlightAndAssertMatch(searcher, highlighter, query, field, value);
// Test TermRangeQuery
query = new TermRangeQuery(field,
new BytesRef(value),
new BytesRef(value),
true, true );
highlightAndAssertMatch(searcher, highlighter, query, field, value);
// Test FuzzyQuery
query = new FuzzyQuery(new Term(field, value + "Z"), 1);
highlightAndAssertMatch(searcher, highlighter, query, field, value);
if (valuePoints.length != 3) {
continue; // even though we ask RandomStrings for a String with 3 code points, it seems sometimes it's less
}
// Test WildcardQuery
query = new WildcardQuery(new Term(field,
new StringBuilder()
.append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[0])
.append(WildcardQuery.WILDCARD_CHAR)
.append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[2]).toString()));
highlightAndAssertMatch(searcher, highlighter, query, field, value);
//TODO hmmm; how to randomly generate RegexpQuery? Low priority; we've covered the others well.
}
}
}
}
private void highlightAndAssertMatch(IndexSearcher searcher, UnifiedHighlighter highlighter, Query query, String field, String fieldVal) throws IOException {
TopDocs topDocs = searcher.search(query, 1);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight(field, query, topDocs);
assertEquals("[<b>"+fieldVal+"</b>]", Arrays.toString(snippets));
}
}

View File

@ -37,15 +37,12 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import java.util.HashSet;
import java.util.Random;
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestUnifiedHighlighterRanking extends LuceneTestCase {
Analyzer indexAnalyzer;

View File

@ -32,8 +32,6 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
private MockAnalyzer indexAnalyzer =

View File

@ -54,8 +54,6 @@ import org.apache.lucene.util.QueryBuilder;
import org.junit.After;
import org.junit.Before;
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
final FieldType fieldType;

View File

@ -48,8 +48,6 @@ import java.util.Map;
* This test DOES NOT represent all testing for highlighting when term vectors are used. Other tests pick the offset
* source at random (to include term vectors) and in-effect test term vectors generally.
*/
@LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"})
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule
public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
private Analyzer indexAnalyzer;

View File

@ -217,18 +217,20 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
// TEST OffsetsEnums & Passage visibility
// this code never runs; just for compilation
OffsetsEnum oe = new OffsetsEnum(null, EMPTY);
oe.getTerm();
oe.getPostingsEnum();
oe.freq();
oe.hasMorePositions();
oe.nextPosition();
oe.startOffset();
oe.endOffset();
oe.getWeight();
oe.setWeight(2f);
Passage p;
try (OffsetsEnum oe = new OffsetsEnum(null, EMPTY)) {
oe.getTerm();
oe.getPostingsEnum();
oe.freq();
oe.hasMorePositions();
oe.nextPosition();
oe.startOffset();
oe.endOffset();
oe.getWeight();
oe.setWeight(2f);
}
Passage p = new Passage();
p = new Passage();
p.setStartOffset(0);
p.setEndOffset(9);
p.setScore(1f);

View File

@ -1325,7 +1325,10 @@ public class TestJoinUtil extends LuceneTestCase {
String uniqueRandomValue;
do {
// the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
final int nextInt = random.nextInt(Integer.MAX_VALUE);
//
// Additionally in order to avoid precision loss when joining via a float field we can't generate values higher than
// 0xFFFFFF, so we can't use Integer#MAX_VALUE as upper bound here:
final int nextInt = random.nextInt(0xFFFFFF);
uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue,16);
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));

View File

@ -868,20 +868,27 @@ public class MemoryIndex {
final int numDimensions = fieldInfo.getPointDimensionCount();
final int numBytesPerDimension = fieldInfo.getPointNumBytes();
minPackedValue = pointValues[0].bytes.clone();
maxPackedValue = pointValues[0].bytes.clone();
for (int i = 0; i < pointValuesCount; i++) {
BytesRef pointValue = pointValues[i];
assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
for (int dim = 0; dim < numDimensions; ++dim) {
int offset = dim * numBytesPerDimension;
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
}
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
if (numDimensions == 1) {
// PointInSetQuery.MergePointVisitor expects values to be visited in increasing order,
// this is a 1d optimization which has to be done here too. Otherwise we emit values
// out of order which causes mismatches.
Arrays.sort(pointValues, 0, pointValuesCount);
minPackedValue = pointValues[0].bytes.clone();
maxPackedValue = pointValues[pointValuesCount - 1].bytes.clone();
} else {
minPackedValue = pointValues[0].bytes.clone();
maxPackedValue = pointValues[0].bytes.clone();
for (int i = 0; i < pointValuesCount; i++) {
BytesRef pointValue = pointValues[i];
assert pointValue.bytes.length == pointValue.length : "BytesRef should wrap a precise byte[], BytesRef.deepCopyOf() should take care of this";
for (int dim = 0; dim < numDimensions; ++dim) {
int offset = dim * numBytesPerDimension;
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, minPackedValue, offset) < 0) {
System.arraycopy(pointValue.bytes, offset, minPackedValue, offset, numBytesPerDimension);
}
if (StringHelper.compare(numBytesPerDimension, pointValue.bytes, offset, maxPackedValue, offset) > 0) {
System.arraycopy(pointValue.bytes, offset, maxPackedValue, offset, numBytesPerDimension);
}
}
}
}
@ -1127,7 +1134,7 @@ public class MemoryIndex {
*/
private final class MemoryIndexReader extends LeafReader {
private Fields memoryFields = new MemoryFields(fields);
private final MemoryFields memoryFields = new MemoryFields(fields);
private MemoryIndexReader() {
super(); // avoid as much superclass baggage as possible
@ -1229,8 +1236,8 @@ public class MemoryIndex {
}
@Override
public Fields fields() {
return memoryFields;
public Terms terms(String field) throws IOException {
return memoryFields.terms(field);
}
private class MemoryFields extends Fields {
@ -1582,7 +1589,7 @@ public class MemoryIndex {
@Override
public Fields getTermVectors(int docID) {
if (docID == 0) {
return fields();
return memoryFields;
} else {
return null;
}

View File

@ -131,7 +131,7 @@ public class TestMemoryIndex extends LuceneTestCase {
mi.addField("field", "some terms be here", analyzer);
IndexSearcher searcher = mi.createSearcher();
LeafReader reader = (LeafReader) searcher.getIndexReader();
TermsEnum terms = reader.fields().terms("field").iterator();
TermsEnum terms = reader.terms("field").iterator();
terms.seekExact(0);
assertEquals("be", terms.term().utf8ToString());
TestUtil.checkReader(reader);
@ -512,6 +512,30 @@ public class TestMemoryIndex extends LuceneTestCase {
assertEquals(1, s.count(DoublePoint.newRangeQuery("doubles", new double[] {10D, 10D}, new double[] {30D, 30D})));
}
public void testMultiValuedPointsSortedCorrectly() throws Exception {
Document doc = new Document();
doc.add(new IntPoint("ints", 3));
doc.add(new IntPoint("ints", 2));
doc.add(new IntPoint("ints", 1));
doc.add(new LongPoint("longs", 3L));
doc.add(new LongPoint("longs", 2L));
doc.add(new LongPoint("longs", 1L));
doc.add(new FloatPoint("floats", 3F));
doc.add(new FloatPoint("floats", 2F));
doc.add(new FloatPoint("floats", 1F));
doc.add(new DoublePoint("doubles", 3D));
doc.add(new DoublePoint("doubles", 2D));
doc.add(new DoublePoint("doubles", 1D));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
IndexSearcher s = mi.createSearcher();
assertEquals(1, s.count(IntPoint.newSetQuery("ints", 2)));
assertEquals(1, s.count(LongPoint.newSetQuery("longs", 2)));
assertEquals(1, s.count(FloatPoint.newSetQuery("floats", 2)));
assertEquals(1, s.count(DoublePoint.newSetQuery("doubles", 2)));
}
public void testIndexingPointsAndDocValues() throws Exception {
FieldType type = new FieldType();
type.setDimensions(1, 4);

View File

@ -53,7 +53,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
@ -67,8 +66,8 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
@ -171,7 +170,7 @@ public class TestMemoryIndexAgainstRAMDir extends BaseTokenStreamTestCase {
private void duellReaders(CompositeReader other, LeafReader memIndexReader)
throws IOException {
Fields memFields = memIndexReader.fields();
Fields memFields = memIndexReader.getTermVectors(0);
for (String field : MultiFields.getFields(other)) {
Terms memTerms = memFields.terms(field);
Terms iwTerms = memIndexReader.terms(field);

View File

@ -39,15 +39,16 @@ public class IndexMergeTool {
System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
System.exit(1);
}
FSDirectory mergedIndex = FSDirectory.open(Paths.get(args[0]));
IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(null)
.setOpenMode(OpenMode.CREATE));
// Try to use hardlinks to source segments, if possible.
Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[0])));
IndexWriter writer = new IndexWriter(mergedIndex,
new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE));
Directory[] indexes = new Directory[args.length - 1];
for (int i = 1; i < args.length; i++) {
// try to use hardlinks if possible
indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i])));
indexes[i - 1] = FSDirectory.open(Paths.get(args[i]));
}
System.out.println("Merging...");

View File

@ -22,7 +22,6 @@ import java.util.Collections;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
@ -214,11 +213,10 @@ public class CommonTermsQuery extends Query {
Term[] queryTerms) throws IOException {
TermsEnum termsEnum = null;
for (LeafReaderContext context : leaves) {
final Fields fields = context.reader().fields();
for (int i = 0; i < queryTerms.length; i++) {
Term term = queryTerms[i];
TermContext termContext = contextArray[i];
final Terms terms = fields.terms(term.field());
final Terms terms = context.reader().terms(term.field());
if (terms == null) {
// field does not exist
continue;

View File

@ -19,9 +19,8 @@ package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.function.FunctionValues;
@ -50,8 +49,7 @@ public class TFValueSource extends TermFreqValueSource {
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
Fields fields = readerContext.reader().fields();
final Terms terms = fields.terms(indexedField);
final Terms terms = readerContext.reader().terms(indexedField);
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), indexedField);
if (similarity == null) {

View File

@ -19,9 +19,8 @@ package org.apache.lucene.queries.function.valuesource;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.function.FunctionValues;
@ -48,8 +47,7 @@ public class TermFreqValueSource extends DocFreqValueSource {
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
Fields fields = readerContext.reader().fields();
final Terms terms = fields.terms(indexedField);
final Terms terms = readerContext.reader().terms(indexedField);
return new IntDocValues(this) {
PostingsEnum docs ;

View File

@ -351,7 +351,7 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
assertEquals("Synonym(b:dog b:dogs) Synonym(t:dog t:dogs)", q.toString());
q = parser.parse("guinea pig");
assertFalse(parser.getSplitOnWhitespace());
assertEquals("((+b:guinea +b:pig) (+t:guinea +t:pig)) (b:cavy t:cavy)", q.toString());
assertEquals("((+b:guinea +b:pig) b:cavy) ((+t:guinea +t:pig) t:cavy)", q.toString());
parser.setSplitOnWhitespace(true);
q = parser.parse("guinea pig");
assertEquals("(b:guinea t:guinea) (b:pig t:pig)", q.toString());

View File

@ -522,8 +522,10 @@ public class TestQueryParser extends QueryParserTestBase {
.build();
BooleanQuery graphQuery = new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(graphQuery, dumb.parse("guinea pig"));
@ -541,11 +543,32 @@ public class TestQueryParser extends QueryParserTestBase {
QueryParser smart = new SmartQueryParser();
smart.setSplitOnWhitespace(false);
graphQuery = new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(graphQuery, smart.parse("guinea pig"));
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
// with the AND operator
dumb.setDefaultOperator(Operator.AND);
BooleanQuery graphAndQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.MUST)
.build();
assertEquals(graphAndQuery, dumb.parse("guinea pig"));
graphAndQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.MUST)
.add(cavy, BooleanClause.Occur.MUST)
.build();
assertEquals(graphAndQuery, dumb.parse("guinea pig cavy"));
}
public void testEnableGraphQueries() throws Exception {
@ -616,30 +639,30 @@ public class TestQueryParser extends QueryParserTestBase {
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
// Operators should not interrupt multiword analysis if not don't associate
assertQueryEquals("(guinea pig)", a, "(+guinea +pig) cavy");
assertQueryEquals("+(guinea pig)", a, "+((+guinea +pig) cavy)");
assertQueryEquals("-(guinea pig)", a, "-((+guinea +pig) cavy)");
assertQueryEquals("!(guinea pig)", a, "-((+guinea +pig) cavy)");
assertQueryEquals("NOT (guinea pig)", a, "-((+guinea +pig) cavy)");
assertQueryEquals("(guinea pig)^2", a, "((+guinea +pig) cavy)^2.0");
assertQueryEquals("(guinea pig)", a, "((+guinea +pig) cavy)");
assertQueryEquals("+(guinea pig)", a, "+(((+guinea +pig) cavy))");
assertQueryEquals("-(guinea pig)", a, "-(((+guinea +pig) cavy))");
assertQueryEquals("!(guinea pig)", a, "-(((+guinea +pig) cavy))");
assertQueryEquals("NOT (guinea pig)", a, "-(((+guinea +pig) cavy))");
assertQueryEquals("(guinea pig)^2", a, "(((+guinea +pig) cavy))^2.0");
assertQueryEquals("field:(guinea pig)", a, "(+guinea +pig) cavy");
assertQueryEquals("field:(guinea pig)", a, "((+guinea +pig) cavy)");
assertQueryEquals("+small guinea pig", a, "+small (+guinea +pig) cavy");
assertQueryEquals("-small guinea pig", a, "-small (+guinea +pig) cavy");
assertQueryEquals("!small guinea pig", a, "-small (+guinea +pig) cavy");
assertQueryEquals("NOT small guinea pig", a, "-small (+guinea +pig) cavy");
assertQueryEquals("small* guinea pig", a, "small* (+guinea +pig) cavy");
assertQueryEquals("small? guinea pig", a, "small? (+guinea +pig) cavy");
assertQueryEquals("\"small\" guinea pig", a, "small (+guinea +pig) cavy");
assertQueryEquals("+small guinea pig", a, "+small ((+guinea +pig) cavy)");
assertQueryEquals("-small guinea pig", a, "-small ((+guinea +pig) cavy)");
assertQueryEquals("!small guinea pig", a, "-small ((+guinea +pig) cavy)");
assertQueryEquals("NOT small guinea pig", a, "-small ((+guinea +pig) cavy)");
assertQueryEquals("small* guinea pig", a, "small* ((+guinea +pig) cavy)");
assertQueryEquals("small? guinea pig", a, "small? ((+guinea +pig) cavy)");
assertQueryEquals("\"small\" guinea pig", a, "small ((+guinea +pig) cavy)");
assertQueryEquals("guinea pig +running", a, "(+guinea +pig) cavy +running");
assertQueryEquals("guinea pig -running", a, "(+guinea +pig) cavy -running");
assertQueryEquals("guinea pig !running", a, "(+guinea +pig) cavy -running");
assertQueryEquals("guinea pig NOT running", a, "(+guinea +pig) cavy -running");
assertQueryEquals("guinea pig running*", a, "(+guinea +pig) cavy running*");
assertQueryEquals("guinea pig running?", a, "(+guinea +pig) cavy running?");
assertQueryEquals("guinea pig \"running\"", a, "(+guinea +pig) cavy running");
assertQueryEquals("guinea pig +running", a, "((+guinea +pig) cavy) +running");
assertQueryEquals("guinea pig -running", a, "((+guinea +pig) cavy) -running");
assertQueryEquals("guinea pig !running", a, "((+guinea +pig) cavy) -running");
assertQueryEquals("guinea pig NOT running", a, "((+guinea +pig) cavy) -running");
assertQueryEquals("guinea pig running*", a, "((+guinea +pig) cavy) running*");
assertQueryEquals("guinea pig running?", a, "((+guinea +pig) cavy) running?");
assertQueryEquals("guinea pig \"running\"", a, "((+guinea +pig) cavy) running");
assertQueryEquals("\"guinea pig\"~2", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
@ -744,14 +767,16 @@ public class TestQueryParser extends QueryParserTestBase {
BooleanQuery guineaPig = synonym.build();
BooleanQuery graphQuery = new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build();;
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(graphQuery, parser.parse("guinea pig"));
boolean oldSplitOnWhitespace = splitOnWhitespace;
splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "(+guinea +pig) cavy");
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "((+guinea +pig) cavy)");
splitOnWhitespace = oldSplitOnWhitespace;
}

View File

@ -37,12 +37,12 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.PerThreadPKLookup;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy;
@ -75,7 +75,7 @@ public class TestIDVersionPostingsFormat extends LuceneTestCase {
doc.add(makeIDField("id1", 110));
w.addDocument(doc);
IndexReader r = w.getReader();
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().fields().terms("id").iterator();
IDVersionSegmentTermsEnum termsEnum = (IDVersionSegmentTermsEnum) r.leaves().get(0).reader().terms("id").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 50));
assertTrue(termsEnum.seekExact(new BytesRef("id0"), 100));
assertFalse(termsEnum.seekExact(new BytesRef("id0"), 101));

View File

@ -71,10 +71,11 @@ public class AssertingLeafReader extends FilterLeafReader {
}
@Override
public Fields fields() throws IOException {
return new AssertingFields(super.fields());
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
return terms == null ? null : new AssertingTerms(terms);
}
@Override
public Fields getTermVectors(int docID) throws IOException {
Fields fields = super.getTermVectors(docID);

View File

@ -335,7 +335,7 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
// PostingsFormat
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
consumer.write(oneDocReader.fields());
consumer.write(MultiFields.getFields(oneDocReader));
IOUtils.close(consumer);
IOUtils.close(consumer);
}

Some files were not shown because too many files have changed in this diff Show More