This commit is contained in:
Noble Paul 2017-06-08 15:58:03 +09:30
commit 647916d8ed
42 changed files with 1758 additions and 537 deletions

View File

@ -66,6 +66,13 @@
</foaf:Person>
</maintainer>
<release>
<Version>
<name>lucene-6.6.0</name>
<created>2017-06-06</created>
<revision>6.6.0</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-6.5.1</name>

View File

@ -66,6 +66,13 @@
</foaf:Person>
</maintainer>
<release>
<Version>
<name>solr-6.6.0</name>
<created>2017-06-06</created>
<revision>6.6.0</revision>
</Version>
</release>
<release>
<Version>
<name>solr-6.5.1</name>

View File

@ -14,6 +14,10 @@ New Features
well as the oldest Lucene version that contributed to the segment.
(Adrien Grand)
* LUCENE-7854: The new TermFrequencyAttribute used during analysis
with a custom token stream allows indexing custom term frequencies
(Mike McCandless)
API Changes
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
@ -115,6 +119,11 @@ Other
======================= Lucene 6.7.0 =======================
New Features
* LUCENE-7855: Added advanced options of the Wikipedia tokenizer to its factory.
(Juan Pedro via Adrien Grand)
Other
* LUCENE-7800: Remove code that potentially rethrows checked exceptions
@ -125,6 +134,12 @@ Improvements
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
Optimizations
* LUCENE-7828: Speed up range queries on range fields by improving how we
compute the relation between the query and inner nodes of the BKD tree.
(Adrien Grand)
======================= Lucene 6.6.0 =======================
New Features

View File

@ -16,9 +16,9 @@
*/
package org.apache.lucene.analysis.wikipedia;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
@ -33,19 +33,28 @@ import org.apache.lucene.util.AttributeFactory;
* &lt;/fieldType&gt;</pre>
*/
public class WikipediaTokenizerFactory extends TokenizerFactory {
public static final String TOKEN_OUTPUT = "tokenOutput";
public static final String UNTOKENIZED_TYPES = "untokenizedTypes";
protected final int tokenOutput;
protected Set<String> untokenizedTypes;
/** Creates a new WikipediaTokenizerFactory */
public WikipediaTokenizerFactory(Map<String,String> args) {
super(args);
tokenOutput = getInt(args, TOKEN_OUTPUT, WikipediaTokenizer.TOKENS_ONLY);
untokenizedTypes = getSet(args, UNTOKENIZED_TYPES);
if (untokenizedTypes == null) {
untokenizedTypes = Collections.emptySet();
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
// TODO: add support for WikipediaTokenizer's advanced options.
@Override
public WikipediaTokenizer create(AttributeFactory factory) {
return new WikipediaTokenizer(factory, WikipediaTokenizer.TOKENS_ONLY,
Collections.<String>emptySet());
return new WikipediaTokenizer(factory, tokenOutput, untokenizedTypes);
}
}

View File

@ -17,34 +17,90 @@
package org.apache.lucene.analysis.wikipedia;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
/**
* Simple tests to ensure the wikipedia tokenizer is working.
*/
public class TestWikipediaTokenizerFactory extends BaseTokenStreamFactoryTestCase {
private final String WIKIPEDIA = "Wikipedia";
private final String TOKEN_OUTPUT = "tokenOutput";
private final String UNTOKENIZED_TYPES = "untokenizedTypes";
public void testTokenizer() throws Exception {
Reader reader = new StringReader("This is a [[Category:foo]]");
Tokenizer tokenizer = tokenizerFactory("Wikipedia").create(newAttributeFactory());
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer,
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
new int[] { 1, 1, 1, 1, });
String text = "This is a [[Category:foo]]";
Tokenizer tf = tokenizerFactory(WIKIPEDIA).create(newAttributeFactory());
tf.setReader(new StringReader(text));
assertTokenStreamContents(tf,
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
new int[] { 1, 1, 1, 1, },
text.length());
}
public void testTokenizerTokensOnly() throws Exception {
String text = "This is a [[Category:foo]]";
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer( WikipediaTokenizer.TOKENS_ONLY).toString()).create(newAttributeFactory());
tf.setReader(new StringReader(text));
assertTokenStreamContents(tf,
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", WikipediaTokenizer.CATEGORY },
new int[] { 1, 1, 1, 1, },
text.length());
}
public void testTokenizerUntokenizedOnly() throws Exception {
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
Set<String> untoks = new HashSet<>();
untoks.add(WikipediaTokenizer.CATEGORY);
untoks.add(WikipediaTokenizer.ITALICS);
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.UNTOKENIZED_ONLY).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
tf.setReader(new StringReader(test));
assertTokenStreamContents(tf,
new String[] { "a b c d", "e f g", "link", "here", "link",
"there", "italics here", "something", "more italics", "h i j" },
new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
);
}
public void testTokenizerBoth() throws Exception {
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, new Integer(WikipediaTokenizer.BOTH).toString(), UNTOKENIZED_TYPES, WikipediaTokenizer.CATEGORY + ", " + WikipediaTokenizer.ITALICS).create(newAttributeFactory());
tf.setReader(new StringReader(test));
assertTokenStreamContents(tf,
new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
"link", "here", "link", "there", "italics here", "italics", "here",
"something", "more italics", "more", "italics", "h i j", "h", "i", "j" },
new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 },
new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
);
}
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
tokenizerFactory("Wikipedia", "bogusArg", "bogusValue");
tokenizerFactory(WIKIPEDIA, "bogusArg", "bogusValue").create(newAttributeFactory());
});
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
public void testIllegalArguments() throws Exception {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
Tokenizer tf = tokenizerFactory(WIKIPEDIA, TOKEN_OUTPUT, "-1").create(newAttributeFactory());
});
assertTrue(expected.getMessage().contains("tokenOutput must be TOKENS_ONLY, UNTOKENIZED_ONLY or BOTH"));
}
}

View File

@ -26,15 +26,18 @@ import org.apache.lucene.util.AttributeReflector;
* <li>{@link PositionIncrementAttribute}
* <li>{@link PositionLengthAttribute}
* <li>{@link OffsetAttribute}
* <li>{@link TermFrequencyAttribute}
* </ul>*/
public class PackedTokenAttributeImpl extends CharTermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
PositionLengthAttribute, OffsetAttribute {
PositionLengthAttribute, OffsetAttribute,
TermFrequencyAttribute {
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int positionIncrement = 1;
private int positionLength = 1;
private int termFrequency = 1;
/** Constructs the attribute implementation. */
public PackedTokenAttributeImpl() {
@ -132,12 +135,26 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
this.type = type;
}
@Override
public final void setTermFrequency(int termFrequency) {
if (termFrequency < 1) {
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
}
this.termFrequency = termFrequency;
}
@Override
public final int getTermFrequency() {
return termFrequency;
}
/** Resets the attributes
*/
@Override
public void clear() {
super.clear();
positionIncrement = positionLength = 1;
termFrequency = 1;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@ -147,10 +164,8 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
@Override
public void end() {
super.end();
// super.end already calls this.clear, so we only set values that are different from clear:
positionIncrement = 0;
positionLength = 1;
startOffset = endOffset = 0;
type = DEFAULT_TYPE;
}
@Override
@ -170,6 +185,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
positionIncrement == other.positionIncrement &&
positionLength == other.positionLength &&
(type == null ? other.type == null : type.equals(other.type)) &&
termFrequency == other.termFrequency &&
super.equals(obj)
);
} else
@ -185,6 +201,7 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
code = code * 31 + positionLength;
if (type != null)
code = code * 31 + type.hashCode();
code = code * 31 + termFrequency;;
return code;
}
@ -198,12 +215,14 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
to.startOffset = startOffset;
to.endOffset = endOffset;
to.type = type;
to.termFrequency = termFrequency;
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PositionLengthAttribute) target).setPositionLength(positionLength);
((TypeAttribute) target).setType(type);
((TermFrequencyAttribute) target).setTermFrequency(termFrequency);
}
}
@ -215,6 +234,6 @@ public class PackedTokenAttributeImpl extends CharTermAttributeImpl
reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
reflector.reflect(TypeAttribute.class, "type", type);
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.Attribute;
/** Sets the custom term frequency of a term within one document. If this attribute
* is present in your analysis chain for a given field, that field must be indexed with
* {@link IndexOptions#DOCS_AND_FREQS}. */
public interface TermFrequencyAttribute extends Attribute {
/** Set the custom term frequency of the current term within one document. */
public void setTermFrequency(int termFrequency);
/** Returns the custom term frequencey. */
public int getTermFrequency();
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.tokenattributes;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
/** Default implementation of {@link TermFrequencyAttribute}. */
public class TermFrequencyAttributeImpl extends AttributeImpl implements TermFrequencyAttribute, Cloneable {
private int termFrequency = 1;
/** Initialize this attribute with term frequencey of 1 */
public TermFrequencyAttributeImpl() {}
@Override
public void setTermFrequency(int termFrequency) {
if (termFrequency < 1) {
throw new IllegalArgumentException("Term frequency must be 1 or greater; got " + termFrequency);
}
this.termFrequency = termFrequency;
}
@Override
public int getTermFrequency() {
return termFrequency;
}
@Override
public void clear() {
this.termFrequency = 1;
}
@Override
public void end() {
this.termFrequency = 1;
}
@Override
public boolean equals(Object other) {
if (other == this) {
return true;
}
if (other instanceof TermFrequencyAttributeImpl) {
TermFrequencyAttributeImpl _other = (TermFrequencyAttributeImpl) other;
return termFrequency == _other.termFrequency;
}
return false;
}
@Override
public int hashCode() {
return Integer.hashCode(termFrequency);
}
@Override
public void copyTo(AttributeImpl target) {
TermFrequencyAttribute t = (TermFrequencyAttribute) target;
t.setTermFrequency(termFrequency);
}
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(TermFrequencyAttribute.class, "termFrequency", termFrequency);
}
}

View File

@ -19,22 +19,20 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.IntPredicate;
import java.util.function.Predicate;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.StringHelper;
@ -60,13 +58,167 @@ abstract class RangeFieldQuery extends Query {
/** Used by {@code RangeFieldQuery} to check how each internal or leaf node relates to the query. */
enum QueryType {
/** Use this for intersects queries. */
INTERSECTS,
INTERSECTS {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, minOffset) < 0
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, maxOffset) > 0) {
// disjoint
return Relation.CELL_OUTSIDE_QUERY;
}
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, minOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, maxOffset) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
return StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, minOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, maxOffset) <= 0;
}
},
/** Use this for within queries. */
WITHIN,
WITHIN {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) < 0
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) > 0) {
// all ranges have at least one point outside of the query
return Relation.CELL_OUTSIDE_QUERY;
}
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) <= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) >= 0;
}
},
/** Use this for contains */
CONTAINS,
CONTAINS {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, maxPackedValue, maxOffset) > 0
|| StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, minPackedValue, minOffset) < 0) {
// all ranges are either less than the query max or greater than the query min
return Relation.CELL_OUTSIDE_QUERY;
}
if (StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, minPackedValue, maxOffset) <= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, maxPackedValue, minOffset) >= 0) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
int minOffset = dim * bytesPerDim;
int maxOffset = minOffset + bytesPerDim * numDims;
return StringHelper.compare(bytesPerDim, queryPackedValue, minOffset, packedValue, minOffset) >= 0
&& StringHelper.compare(bytesPerDim, queryPackedValue, maxOffset, packedValue, maxOffset) <= 0;
}
},
/** Use this for crosses queries */
CROSSES
CROSSES {
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim, int dim) {
throw new UnsupportedOperationException();
}
@Override
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim) {
throw new UnsupportedOperationException();
}
@Override
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int numDims, int bytesPerDim) {
Relation intersectRelation = QueryType.INTERSECTS.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
if (intersectRelation == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
}
Relation withinRelation = QueryType.WITHIN.compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim);
if (withinRelation == Relation.CELL_INSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
}
if (intersectRelation == Relation.CELL_INSIDE_QUERY && withinRelation == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
return INTERSECTS.matches(queryPackedValue, packedValue, numDims, bytesPerDim)
&& WITHIN.matches(queryPackedValue, packedValue, numDims, bytesPerDim) == false;
}
};
abstract Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim, int dim);
Relation compare(byte[] queryPackedValue, byte[] minPackedValue, byte[] maxPackedValue, int numDims, int bytesPerDim) {
boolean inside = true;
for (int dim = 0; dim < numDims; ++dim) {
Relation relation = compare(queryPackedValue, minPackedValue, maxPackedValue, numDims, bytesPerDim, dim);
if (relation == Relation.CELL_OUTSIDE_QUERY) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (relation != Relation.CELL_INSIDE_QUERY) {
inside = false;
}
}
return inside ? Relation.CELL_INSIDE_QUERY : Relation.CELL_CROSSES_QUERY;
}
abstract boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim, int dim);
boolean matches(byte[] queryPackedValue, byte[] packedValue, int numDims, int bytesPerDim) {
for (int dim = 0; dim < numDims; ++dim) {
if (matches(queryPackedValue, packedValue, numDims, bytesPerDim, dim) == false) {
return false;
}
}
return true;
}
}
/**
@ -111,54 +263,33 @@ abstract class RangeFieldQuery extends Query {
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
final RangeFieldComparator target = new RangeFieldComparator();
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(
new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
if (target.matches(leaf)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return compareRange(minPackedValue, maxPackedValue);
}
});
return result.build();
}
private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
// compute range relation for BKD traversal
if (target.intersects(node) == false) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (target.within(node)) {
// target within cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
} else if (target.contains(node)) {
// target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ?
Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
}
// target intersects cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
if (queryType.matches(ranges, leaf, numDims, bytesPerDim)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return queryType.compare(ranges, minPackedValue, maxPackedValue, numDims, bytesPerDim);
}
};
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
@ -173,115 +304,59 @@ abstract class RangeFieldQuery extends Query {
checkFieldInfo(fieldInfo);
boolean allDocsMatch = false;
if (values.getDocCount() == reader.maxDoc()
&& compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
&& queryType.compare(ranges, values.getMinPackedValue(), values.getMaxPackedValue(), numDims, bytesPerDim) == Relation.CELL_INSIDE_QUERY) {
allDocsMatch = true;
}
DocIdSetIterator iterator = allDocsMatch == true ?
DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
return new ConstantScoreScorer(this, score(), iterator);
}
final Weight weight = this;
if (allDocsMatch) {
return new ScorerSupplier() {
@Override
public Scorer get(boolean randomAccess) {
return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
}
/** get an encoded byte representation of the internal node; this is
* the lower half of the min array and the upper half of the max array */
private byte[] getInternalRange(byte[] min, byte[] max) {
byte[] range = new byte[min.length];
final int dimSize = numDims * bytesPerDim;
System.arraycopy(min, 0, range, 0, dimSize);
System.arraycopy(max, dimSize, range, dimSize, dimSize);
return range;
}
};
}
@Override
public long cost() {
return reader.maxDoc();
}
};
} else {
return new ScorerSupplier() {
/**
* RangeFieldComparator class provides the core comparison logic for accepting or rejecting indexed
* {@code RangeField} types based on the defined query range and relation.
*/
class RangeFieldComparator {
final Predicate<byte[]> predicate;
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;
/** constructs the comparator based on the query type */
RangeFieldComparator() {
switch (queryType) {
case INTERSECTS:
predicate = this::intersects;
break;
case WITHIN:
predicate = this::contains;
break;
case CONTAINS:
predicate = this::within;
break;
case CROSSES:
// crosses first checks intersection (disjoint automatic fails),
// then ensures the query doesn't wholly contain the leaf:
predicate = (byte[] leaf) -> this.intersects(leaf)
&& this.contains(leaf) == false;
break;
default:
throw new IllegalArgumentException("invalid queryType [" + queryType + "] found.");
}
}
@Override
public Scorer get(boolean randomAccess) throws IOException {
values.intersect(visitor);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), iterator);
}
/** determines if the candidate range matches the query request */
private boolean matches(final byte[] candidate) {
return (Arrays.equals(ranges, candidate) && queryType != QueryType.CROSSES)
|| predicate.test(candidate);
}
/** check if query intersects candidate range */
private boolean intersects(final byte[] candidate) {
return relate((int d) -> compareMinMax(candidate, d) > 0 || compareMaxMin(candidate, d) < 0);
}
/** check if query is within candidate range */
private boolean within(final byte[] candidate) {
return relate((int d) -> compareMinMin(candidate, d) < 0 || compareMaxMax(candidate, d) > 0);
}
/** check if query contains candidate range */
private boolean contains(final byte[] candidate) {
return relate((int d) -> compareMinMin(candidate, d) > 0 || compareMaxMax(candidate, d) < 0);
}
/** internal method used by each relation method to test range relation logic */
private boolean relate(IntPredicate predicate) {
for (int d=0; d<numDims; ++d) {
if (predicate.test(d)) {
return false;
@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimatePointCount(visitor);
assert cost >= 0;
}
return cost;
}
};
}
}
return true;
}
/** compare the encoded min value (for the defined query dimension) with the encoded min value in the byte array */
private int compareMinMin(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
}
/** compare the encoded min value (for the defined query dimension) with the encoded max value in the byte array */
private int compareMinMax(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, numDims * bytesPerDim + dimension);
}
/** compare the encoded max value (for the defined query dimension) with the encoded min value in the byte array */
private int compareMaxMin(byte[] b, int dimension) {
// convert dimension to offset:
dimension *= bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, numDims * bytesPerDim + dimension, b, dimension);
}
/** compare the encoded max value (for the defined query dimension) with the encoded max value in the byte array */
private int compareMaxMax(byte[] b, int dimension) {
// convert dimension to max offset:
dimension = numDims * bytesPerDim + dimension * bytesPerDim;
return StringHelper.compare(bytesPerDim, ranges, dimension, b, dimension);
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(false);
}
};
}
@Override

View File

@ -770,10 +770,12 @@ final class DefaultIndexingChain extends DocConsumer {
}
invertState.lastStartOffset = startOffset;
invertState.length++;
if (invertState.length < 0) {
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
try {
invertState.length = Math.addExact(invertState.length, invertState.termFreqAttribute.getTermFrequency());
} catch (ArithmeticException ae) {
throw new IllegalArgumentException("too many tokens for field \"" + field.name() + "\"");
}
//System.out.println(" term=" + invertState.termAttribute);
// If we hit an exception in here, we abort

View File

@ -20,6 +20,7 @@ import org.apache.lucene.analysis.TokenStream; // javadocs
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeSource;
@ -48,6 +49,7 @@ public final class FieldInvertState {
PositionIncrementAttribute posIncrAttribute;
PayloadAttribute payloadAttribute;
TermToBytesRefAttribute termAttribute;
TermFrequencyAttribute termFreqAttribute;
/** Creates {code FieldInvertState} for the specified
* field name. */
@ -88,6 +90,7 @@ public final class FieldInvertState {
if (this.attributeSource != attributeSource) {
this.attributeSource = attributeSource;
termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);

View File

@ -113,9 +113,10 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
if (!hasFreq) {
assert postings.termFreqs == null;
postings.lastDocCodes[termID] = docState.docID;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
postings.termFreqs[termID] = 1;
postings.termFreqs[termID] = getTermFreq();
if (hasProx) {
writeProx(termID, fieldState.position);
if (hasOffsets) {
@ -124,19 +125,21 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
} else {
assert !hasOffsets;
}
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
}
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
fieldState.uniqueTermCount++;
}
@Override
void addTerm(final int termID) {
final FreqProxPostingsArray postings = freqProxPostingsArray;
assert !hasFreq || postings.termFreqs[termID] > 0;
if (!hasFreq) {
assert postings.termFreqs == null;
if (termFreqAtt.getTermFrequency() != 1) {
throw new IllegalStateException("field \"" + fieldInfo.name + "\": must index term freq while using custom TermFrequencyAttribute");
}
if (docState.docID != postings.lastDocIDs[termID]) {
// New document; now encode docCode for previous doc:
assert docState.docID > postings.lastDocIDs[termID];
@ -160,8 +163,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
}
// Init freq for the current document
postings.termFreqs[termID] = 1;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.termFreqs[termID] = getTermFreq();
fieldState.maxTermFrequency = Math.max(postings.termFreqs[termID], fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
if (hasProx) {
@ -175,7 +178,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
}
fieldState.uniqueTermCount++;
} else {
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
postings.termFreqs[termID] = Math.addExact(postings.termFreqs[termID], getTermFreq());
fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, postings.termFreqs[termID]);
if (hasProx) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
if (hasOffsets) {
@ -185,6 +189,17 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
}
}
private int getTermFreq() {
int freq = termFreqAtt.getTermFrequency();
if (freq != 1) {
if (hasProx) {
throw new IllegalStateException("field \"" + fieldInfo.name + "\": cannot index positions while using custom TermFrequencyAttribute");
}
}
return freq;
}
@Override
public void newPostingsArray() {
freqProxPostingsArray = (FreqProxPostingsArray) postingsArray;

View File

@ -109,6 +109,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
@Override
boolean start(IndexableField field, boolean first) {
super.start(field, first);
assert field.fieldType().indexOptions() != IndexOptions.NONE;
if (first) {
@ -224,7 +225,7 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
void newTerm(final int termID) {
TermVectorsPostingsArray postings = termVectorsPostingsArray;
postings.freqs[termID] = 1;
postings.freqs[termID] = getTermFreq();
postings.lastOffsets[termID] = 0;
postings.lastPositions[termID] = 0;
@ -235,11 +236,25 @@ final class TermVectorsConsumerPerField extends TermsHashPerField {
void addTerm(final int termID) {
TermVectorsPostingsArray postings = termVectorsPostingsArray;
postings.freqs[termID]++;
postings.freqs[termID] += getTermFreq();
writeProx(postings, termID);
}
private int getTermFreq() {
int freq = termFreqAtt.getTermFrequency();
if (freq != 1) {
if (doVectorPositions) {
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector positions while using custom TermFrequencyAttribute");
}
if (doVectorOffsets) {
throw new IllegalArgumentException("field \"" + fieldInfo.name + "\": cannot index term vector offsets while using custom TermFrequencyAttribute");
}
}
return freq;
}
@Override
public void newPostingsArray() {
termVectorsPostingsArray = (TermVectorsPostingsArray) postingsArray;

View File

@ -19,12 +19,13 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
private static final int HASH_INIT_SIZE = 4;
@ -35,6 +36,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
protected final DocumentsWriterPerThread.DocState docState;
protected final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
protected TermFrequencyAttribute termFreqAtt;
// Copied from our perThread
final IntBlockPool intPool;
@ -287,6 +289,7 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
* document. */
boolean start(IndexableField field, boolean first) {
termAtt = fieldState.termAttribute;
termFreqAtt = fieldState.termFreqAttribute;
if (nextPerField != null) {
doNextCall = nextPerField.start(field, first);
}

View File

@ -125,6 +125,7 @@ public class TestToken extends LuceneTestCase {
t.setFlags(8);
t.setPositionIncrement(3);
t.setPositionLength(11);
t.setTermFrequency(42);
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
@ -136,6 +137,7 @@ public class TestToken extends LuceneTestCase {
put(PayloadAttribute.class.getName() + "#payload", null);
put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
put(FlagsAttribute.class.getName() + "#flags", 8);
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
}});
}
}

View File

@ -82,6 +82,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
t.setPositionIncrement(3);
t.setPositionLength(11);
t.setType("foobar");
t.setTermFrequency(42);
TestUtil.assertAttributeReflection(t,
new HashMap<String, Object>() {{
put(CharTermAttribute.class.getName() + "#term", "foobar");
@ -91,6 +92,7 @@ public class TestPackedTokenAttributeImpl extends LuceneTestCase {
put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 3);
put(PositionLengthAttribute.class.getName() + "#positionLength", 11);
put(TypeAttribute.class.getName() + "#type", "foobar");
put(TermFrequencyAttribute.class.getName() + "#termFrequency", 42);
}});
}
}

View File

@ -0,0 +1,468 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import static org.apache.lucene.index.PostingsEnum.NO_MORE_DOCS;
public class TestCustomTermFreq extends LuceneTestCase {
private static final class CannedTermFreqs extends TokenStream {
private final String[] terms;
private final int[] termFreqs;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final TermFrequencyAttribute termFreqAtt = addAttribute(TermFrequencyAttribute.class);
private int upto;
public CannedTermFreqs(String[] terms, int[] termFreqs) {
this.terms = terms;
this.termFreqs = termFreqs;
assert terms.length == termFreqs.length;
}
@Override
public boolean incrementToken() {
if (upto == terms.length) {
return false;
}
clearAttributes();
termAtt.append(terms[upto]);
termFreqAtt.setTermFrequency(termFreqs[upto]);
upto++;
return true;
}
@Override
public void reset() {
upto = 0;
}
}
public void testSingletonTermsOneDoc() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {42, 128}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(128, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(42, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testSingletonTermsTwoDocs() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {42, 128}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {50, 50}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(128, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(50, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(42, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(50, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testRepeatTermsOneDoc() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(228, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(59, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testRepeatTermsTwoDocs() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {50, 60, 70, 80}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
PostingsEnum postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("bar"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(228, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(140, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
postings = MultiFields.getTermDocsEnum(r, "field", new BytesRef("foo"));
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(59, postings.freq());
assertEquals(1, postings.nextDoc());
assertEquals(120, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
public void testTotalTermFreq() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {50, 60, 70, 80}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
TermsEnum termsEnum = MultiFields.getTerms(r, "field").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
assertEquals(179, termsEnum.totalTermFreq());
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
assertEquals(368, termsEnum.totalTermFreq());
IOUtils.close(r, w, dir);
}
// you can't index proximity with custom term freqs:
public void testInvalidProx() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
// you can't index DOCS_ONLY with custom term freq
public void testInvalidDocsOnly() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": must index term freq while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
// sum of term freqs must fit in an int
public void testOverflowInt() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS);
Document doc = new Document();
doc.add(new Field("field", "this field should be indexed", fieldType));
w.addDocument(doc);
Document doc2 = new Document();
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar"},
new int[] {3, Integer.MAX_VALUE}),
fieldType);
doc2.add(field);
expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc2);});
IndexReader r = DirectoryReader.open(w);
assertEquals(1, r.numDocs());
IOUtils.close(r, w, dir);
}
public void testInvalidTermVectorPositions() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorPositions(true);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
public void testInvalidTermVectorOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorOffsets(true);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index term vector offsets while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
public void testTermVectors() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
fieldType.setStoreTermVectors(true);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
doc = new Document();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {50, 60, 70, 80}),
fieldType);
doc.add(field);
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
Fields fields = r.getTermVectors(0);
TermsEnum termsEnum = fields.terms("field").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
assertEquals(228, termsEnum.totalTermFreq());
PostingsEnum postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(228, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
assertEquals(59, termsEnum.totalTermFreq());
postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(59, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
fields = r.getTermVectors(1);
termsEnum = fields.terms("field").iterator();
assertTrue(termsEnum.seekExact(new BytesRef("bar")));
assertEquals(140, termsEnum.totalTermFreq());
postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(140, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
assertTrue(termsEnum.seekExact(new BytesRef("foo")));
assertEquals(120, termsEnum.totalTermFreq());
postings = termsEnum.postings(null);
assertNotNull(postings);
assertEquals(0, postings.nextDoc());
assertEquals(120, postings.freq());
assertEquals(NO_MORE_DOCS, postings.nextDoc());
IOUtils.close(r, w, dir);
}
/**
* Similarity holds onto the FieldInvertState for subsequent verification.
*/
private static class NeverForgetsSimilarity extends Similarity {
public FieldInvertState lastState;
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
private NeverForgetsSimilarity() {
// no
}
@Override
public long computeNorm(FieldInvertState state) {
this.lastState = state;
return 1;
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}
public void testFieldInvertState() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(228, fis.getMaxTermFrequency());
assertEquals(2, fis.getUniqueTermCount());
assertEquals(0, fis.getNumOverlap());
assertEquals(287, fis.getLength());
IOUtils.close(w, dir);
}
}

View File

@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestFieldInvertState extends LuceneTestCase {
/**
* Similarity holds onto the FieldInvertState for subsequent verification.
*/
private static class NeverForgetsSimilarity extends Similarity {
public FieldInvertState lastState;
private final static NeverForgetsSimilarity INSTANCE = new NeverForgetsSimilarity();
private NeverForgetsSimilarity() {
// no
}
@Override
public long computeNorm(FieldInvertState state) {
this.lastState = state;
return 1;
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}
public void testBasic() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
Field field = new Field("field",
new CannedTokenStream(new Token("a", 0, 1),
new Token("b", 2, 3),
new Token("c", 4, 5)),
TextField.TYPE_NOT_STORED);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(1, fis.getMaxTermFrequency());
assertEquals(3, fis.getUniqueTermCount());
assertEquals(0, fis.getNumOverlap());
assertEquals(3, fis.getLength());
IOUtils.close(w, dir);
}
public void testRandom() throws Exception {
int numUniqueTokens = TestUtil.nextInt(random(), 1, 25);
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
int numTokens = atLeast(10000);
Token[] tokens = new Token[numTokens];
Map<Character,Integer> counts = new HashMap<>();
int numStacked = 0;
int maxTermFreq = 0;
int pos = -1;
for (int i=0;i<numTokens;i++) {
char tokenChar = (char) ('a' + random().nextInt(numUniqueTokens));
Integer oldCount = counts.get(tokenChar);
int newCount;
if (oldCount == null) {
newCount = 1;
} else {
newCount = 1 + oldCount;
}
counts.put(tokenChar, newCount);
maxTermFreq = Math.max(maxTermFreq, newCount);
Token token = new Token(Character.toString(tokenChar), 2*i, 2*i+1);
if (i > 0 && random().nextInt(7) == 3) {
token.setPositionIncrement(0);
numStacked++;
} else {
pos++;
}
tokens[i] = token;
}
Field field = new Field("field",
new CannedTokenStream(tokens),
TextField.TYPE_NOT_STORED);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(maxTermFreq, fis.getMaxTermFrequency());
assertEquals(counts.size(), fis.getUniqueTermCount());
assertEquals(numStacked, fis.getNumOverlap());
assertEquals(numTokens, fis.getLength());
assertEquals(pos, fis.getPosition());
IOUtils.close(w, dir);
}
}

View File

@ -2676,11 +2676,11 @@ public abstract class LuceneTestCase extends Assert {
if (expectedType.isInstance(e)) {
return expectedType.cast(e);
}
AssertionFailedError assertion = new AssertionFailedError("Unexpected exception type, expected " + expectedType.getSimpleName());
AssertionFailedError assertion = new AssertionFailedError("Unexpected exception type, expected " + expectedType.getSimpleName() + " but got " + e);
assertion.initCause(e);
throw assertion;
}
throw new AssertionFailedError("Expected exception " + expectedType.getSimpleName());
throw new AssertionFailedError("Expected exception " + expectedType.getSimpleName() + " but no exception was thrown");
}
/**

View File

@ -253,6 +253,9 @@ Other Changes
* SOLR-10419: All collection APIs should use the new Policy framework for replica placement. (Noble Paul, shalin)
* SOLR-10800: Factor out HttpShardHandler.transformReplicasToShardUrls from HttpShardHandler.prepDistributed.
(Domenico Fabio Marino, Christine Poerschke)
================== 6.7.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -352,6 +355,8 @@ Optimizations
so that the second phase which would normally involve calculating the domain for the bucket
can be skipped entirely, leading to large performance improvements. (yonik)
* SOLR-10722: Speed up Solr's use of the UnifiedHighlighter be re-using FieldInfos. (David Smiley)
Other Changes
----------------------
@ -380,6 +385,8 @@ Other Changes
* SOLR-8762: return child docs in DIH debug (Gopikannan Venugopalsamy via Mikhail Khludnev)
* SOLR-10501: Test sortMissing{First,Last} with points fields. (Steve Rowe)
================== 6.6.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -22,7 +22,6 @@ import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.ltr.TestRerankBase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
public class TestMultipleAdditiveTreesModel extends TestRerankBase {
@ -47,10 +46,16 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase {
@Test
public void testMultipleAdditiveTreesScoringWithAndWithoutEfiFeatureMatches() throws Exception {
public void testMultipleAdditiveTrees() throws Exception {
loadFeatures("multipleadditivetreesmodel_features.json");
loadModels("multipleadditivetreesmodel.json");
doTestMultipleAdditiveTreesScoringWithAndWithoutEfiFeatureMatches();
doTestMultipleAdditiveTreesExplain();
}
private void doTestMultipleAdditiveTreesScoringWithAndWithoutEfiFeatureMatches() throws Exception {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
query.add("rows", "3");
@ -79,9 +84,8 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase {
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==-120.0");
}
@Ignore
@Test
public void multipleAdditiveTreesTestExplain() throws Exception {
private void doTestMultipleAdditiveTreesExplain() throws Exception {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
query.add("fl", "*,score,[fv]");
@ -103,7 +107,7 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase {
qryResult = qryResult.substring(qryResult.indexOf("explain"));
assertThat(qryResult, containsString("multipleadditivetreesmodel"));
assertThat(qryResult, containsString(MultipleAdditiveTreesModel.class.getCanonicalName()));
assertThat(qryResult, containsString(MultipleAdditiveTreesModel.class.getSimpleName()));
assertThat(qryResult, containsString("-100.0 = tree 0"));
assertThat(qryResult, containsString("50.0 = tree 0"));
@ -113,7 +117,6 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase {
assertThat(qryResult, containsString(" Go Right "));
assertThat(qryResult, containsString(" Go Left "));
assertThat(qryResult, containsString("'this_feature_doesnt_exist' does not exist in FV"));
}
@Test

View File

@ -72,10 +72,10 @@ public class ReplicateFromLeader {
}
LOG.info("Will start replication from leader with poll interval: {}", pollIntervalStr );
NamedList slaveConfig = new NamedList();
slaveConfig.add("fetchFromLeader", true);
NamedList<Object> slaveConfig = new NamedList<>();
slaveConfig.add("fetchFromLeader", Boolean.TRUE);
slaveConfig.add("pollInterval", pollIntervalStr);
NamedList replicationConfig = new NamedList();
NamedList<Object> replicationConfig = new NamedList<>();
replicationConfig.add("slave", slaveConfig);
String lastCommitVersion = getCommitVersion(core);

View File

@ -155,7 +155,7 @@ public class IndexFetcher {
private boolean useExternalCompression = false;
private boolean fetchFromLeader = false;
boolean fetchFromLeader = false;
private final HttpClient myHttpClient;

View File

@ -1217,7 +1217,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
NamedList master = (NamedList) initArgs.get("master");
boolean enableMaster = isEnabled( master );
if (enableMaster || enableSlave) {
if (enableMaster || (enableSlave && !currentIndexFetcher.fetchFromLeader)) {
if (core.getCoreContainer().getZkController() != null) {
LOG.warn("SolrCloud is enabled for core " + core.getName() + " but so is old-style replication. Make sure you" +
" intend this behavior, it usually indicates a mis-configuration. Master setting is " +

View File

@ -379,10 +379,11 @@ public class HttpShardHandler extends ShardHandler {
for (int i=0; i<rb.shards.length; i++) {
final List<String> shardUrls;
if (rb.shards[i] != null) {
shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
final List<String> shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
replicaListTransformer.transform(shardUrls);
// And now recreate the | delimited list of equivalent servers
rb.shards[i] = createSliceShardsStr(shardUrls);
} else {
if (clusterState == null) {
clusterState = zkController.getClusterState();
@ -424,15 +425,11 @@ public class HttpShardHandler extends ShardHandler {
final List<Replica> eligibleSliceReplicas = collectEligibleReplicas(slice, clusterState, onlyNrtReplicas, isShardLeader);
replicaListTransformer.transform(eligibleSliceReplicas);
final List<String> shardUrls = transformReplicasToShardUrls(replicaListTransformer, eligibleSliceReplicas);
shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
for (Replica replica : eligibleSliceReplicas) {
String url = ZkCoreNodeProps.getCoreUrl(replica);
shardUrls.add(url);
}
if (shardUrls.isEmpty()) {
// And now recreate the | delimited list of equivalent servers
final String sliceShardsStr = createSliceShardsStr(shardUrls);
if (sliceShardsStr.isEmpty()) {
boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
if (!tolerant) {
// stop the check when there are no replicas available for a shard
@ -440,9 +437,8 @@ public class HttpShardHandler extends ShardHandler {
"no servers hosting shard: " + rb.slices[i]);
}
}
rb.shards[i] = sliceShardsStr;
}
// And now recreate the | delimited list of equivalent servers
rb.shards[i] = createSliceShardsStr(shardUrls);
}
}
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
@ -475,6 +471,17 @@ public class HttpShardHandler extends ShardHandler {
return eligibleSliceReplicas;
}
private static List<String> transformReplicasToShardUrls(final ReplicaListTransformer replicaListTransformer, final List<Replica> eligibleSliceReplicas) {
replicaListTransformer.transform(eligibleSliceReplicas);
final List<String> shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
for (Replica replica : eligibleSliceReplicas) {
String url = ZkCoreNodeProps.getCoreUrl(replica);
shardUrls.add(url);
}
return shardUrls;
}
private static String createSliceShardsStr(final List<String> shardUrls) {
final StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;

View File

@ -26,6 +26,7 @@ import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
@ -263,6 +264,12 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
}
}
// optimization for Solr which keeps a FieldInfos on-hand
@Override
protected FieldInfo getFieldInfo(String field) {
return ((SolrIndexSearcher)searcher).getFieldInfos().fieldInfo(field);
}
@Override
public int getMaxNoHighlightPassages(String field) {
boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);

View File

@ -17,7 +17,6 @@
package org.apache.solr.metrics.reporters;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@ -27,14 +26,11 @@ import com.codahale.metrics.ganglia.GangliaReporter;
import info.ganglia.gmetric4j.gmetric.GMetric;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricReporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
public class SolrGangliaReporter extends SolrMetricReporter {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private String host = null;
private int port = -1;

View File

@ -17,7 +17,6 @@
package org.apache.solr.metrics.reporters;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@ -29,14 +28,11 @@ import com.codahale.metrics.graphite.GraphiteSender;
import com.codahale.metrics.graphite.PickledGraphite;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricReporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Metrics reporter that wraps {@link com.codahale.metrics.graphite.GraphiteReporter}.
*/
public class SolrGraphiteReporter extends SolrMetricReporter {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private String host = null;
private int port = -1;

View File

@ -43,7 +43,8 @@ import org.slf4j.LoggerFactory;
* </ul>
*/
public class SolrSlf4jReporter extends SolrMetricReporter {
// we need this to pass validate-source-patterns
@SuppressWarnings("unused") // we need this to pass validate-source-patterns
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private int period = 60;

View File

@ -37,6 +37,8 @@ import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocSet;
import org.apache.solr.util.DateMathParser;
import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;
public class FacetRange extends FacetRequestSorted {
String field;
Object start;
@ -203,6 +205,10 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
"Unable to range facet on field:" + sf);
}
if (fcontext.facetInfo != null) {
return refineFacets();
}
createRangeList();
return getRangeCountsIndexed();
}
@ -322,7 +328,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
}
for (int idx = 0; idx<otherList.size(); idx++) {
// we dont' skip these buckets based on mincount
// we don't skip these buckets based on mincount
Range range = otherList.get(idx);
SimpleOrderedMap bucket = new SimpleOrderedMap();
res.add(range.label.toString(), bucket);
@ -583,4 +589,123 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
}
}
// this refineFacets method is patterned after FacetFieldProcessor.refineFacets and should
// probably be merged when range facet becomes more like field facet in it's ability to sort and limit
protected SimpleOrderedMap<Object> refineFacets() throws IOException {
boolean skipThisFacet = (fcontext.flags & SKIP_FACET) != 0;
List leaves = FacetFieldProcessor.asList(fcontext.facetInfo.get("_l")); // We have not seen this bucket: do full faceting for this bucket, including all sub-facets
List<List> skip = FacetFieldProcessor.asList(fcontext.facetInfo.get("_s")); // We have seen this bucket, so skip stats on it, and skip sub-facets except for the specified sub-facets that should calculate specified buckets.
List<List> partial = FacetFieldProcessor.asList(fcontext.facetInfo.get("_p")); // We have not seen this bucket, do full faceting for this bucket, and most sub-facets... but some sub-facets are partial and should only visit specified buckets.
// currently, only _s should be present for range facets. In the future, range facets will
// be more like field facets and will have the same refinement cases. When that happens, we should try to unify the refinement code more
assert leaves.size() == 0;
assert partial.size() == 0;
// For leaf refinements, we do full faceting for each leaf bucket. Any sub-facets of these buckets will be fully evaluated. Because of this, we should never
// encounter leaf refinements that have sub-facets that return partial results.
SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
List<SimpleOrderedMap> bucketList = new ArrayList<>( leaves.size() + skip.size() + partial.size() );
res.add("buckets", bucketList);
// TODO: an alternate implementations can fill all accs at once
createAccs(-1, 1);
for (Object bucketVal : leaves) {
bucketList.add( refineBucket(bucketVal, false, null) );
}
for (List bucketAndFacetInfo : skip) {
assert bucketAndFacetInfo.size() == 2;
Object bucketVal = bucketAndFacetInfo.get(0);
Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
bucketList.add( refineBucket(bucketVal, true, facetInfo ) );
}
// The only difference between skip and missing is the value of "skip" passed to refineBucket
for (List bucketAndFacetInfo : partial) {
assert bucketAndFacetInfo.size() == 2;
Object bucketVal = bucketAndFacetInfo.get(0);
Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
bucketList.add( refineBucket(bucketVal, false, facetInfo ) );
}
/*** special buckets
if (freq.missing) {
Map<String,Object> bucketFacetInfo = (Map<String,Object>)fcontext.facetInfo.get("missing");
if (bucketFacetInfo != null || !skipThisFacet) {
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, skipThisFacet, bucketFacetInfo);
res.add("missing", missingBucket);
}
}
**********/
// If there are just a couple of leaves, and if the domain is large, then
// going by term is likely the most efficient?
// If the domain is small, or if the number of leaves is large, then doing
// the normal collection method may be best.
return res;
}
private SimpleOrderedMap<Object> refineBucket(Object bucketVal, boolean skip, Map<String,Object> facetInfo) throws IOException {
// TODO: refactor this repeated code from above
Comparable start = calc.getValue(bucketVal.toString());
Comparable end = calc.getValue(freq.end.toString());
EnumSet<FacetParams.FacetRangeInclude> include = freq.include;
String gap = freq.gap.toString();
Comparable low = calc.getValue(bucketVal.toString());
Comparable high = calc.addGap(low, gap);
if (end.compareTo(high) < 0) {
if (freq.hardend) {
high = end;
} else {
end = high;
}
}
if (high.compareTo(low) < 0) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"range facet infinite loop (is gap negative? did the math overflow?)");
}
if (high.compareTo(low) == 0) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"range facet infinite loop: gap is either zero, or too small relative start/end and caused underflow: " + low + " + " + gap + " = " + high );
}
boolean incLower =
(include.contains(FacetParams.FacetRangeInclude.LOWER) ||
(include.contains(FacetParams.FacetRangeInclude.EDGE) &&
0 == low.compareTo(start)));
boolean incUpper =
(include.contains(FacetParams.FacetRangeInclude.UPPER) ||
(include.contains(FacetParams.FacetRangeInclude.EDGE) &&
0 == high.compareTo(end)));
Range range = new Range(low, low, high, incLower, incUpper);
// now refine this range
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
FieldType ft = sf.getType();
bucket.add("val", bucketVal);
// String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query...
Query domainQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
fillBucket(bucket, domainQ, null, skip, facetInfo);
return bucket;
}
}

View File

@ -60,6 +60,18 @@
<dynamicField name="*_p_i_ni_mv_dv" type="pint" indexed="false" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_p_i_ni_ns" type="pint" indexed="false" stored="false" docValues="false" />
<dynamicField name="*_p_i_ni_ns_mv" type="pint" indexed="false" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_p_i_smf" type="pint" indexed="true" stored="true" sortMissingFirst="true"/>
<dynamicField name="*_p_i_dv_smf" type="pint" indexed="true" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_i_mv_smf" type="pint" indexed="true" stored="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_i_mv_dv_smf" type="pint" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_i_ni_dv_smf" type="pint" indexed="false" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_i_ni_mv_dv_smf" type="pint" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_i_sml" type="pint" indexed="true" stored="true" sortMissingLast="true"/>
<dynamicField name="*_p_i_dv_sml" type="pint" indexed="true" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_i_mv_sml" type="pint" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_i_mv_dv_sml" type="pint" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_i_ni_dv_sml" type="pint" indexed="false" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_i_ni_mv_dv_sml" type="pint" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_l" type="plong" indexed="true" stored="true"/>
<dynamicField name="*_p_l_dv" type="plong" indexed="true" stored="true" docValues="true"/>
@ -73,7 +85,19 @@
<dynamicField name="*_p_l_ni_mv_dv" type="plong" indexed="false" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_p_l_ni_ns" type="plong" indexed="false" stored="false" docValues="false" />
<dynamicField name="*_p_l_ni_ns_mv" type="plong" indexed="false" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_p_l_smf" type="plong" indexed="true" stored="true" sortMissingFirst="true"/>
<dynamicField name="*_p_l_dv_smf" type="plong" indexed="true" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_l_mv_smf" type="plong" indexed="true" stored="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_l_mv_dv_smf" type="plong" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_l_ni_dv_smf" type="plong" indexed="false" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_l_ni_mv_dv_smf" type="plong" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_l_sml" type="plong" indexed="true" stored="true" sortMissingLast="true"/>
<dynamicField name="*_p_l_dv_sml" type="plong" indexed="true" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_l_mv_sml" type="plong" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_l_mv_dv_sml" type="plong" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_l_ni_dv_sml" type="plong" indexed="false" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_l_ni_mv_dv_sml" type="plong" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_d" type="pdouble" indexed="true" stored="true"/>
<dynamicField name="*_p_d_dv" type="pdouble" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_p_d_mv" type="pdouble" indexed="true" stored="true" multiValued="true"/>
@ -86,7 +110,19 @@
<dynamicField name="*_p_d_ni_mv_dv" type="pdouble" indexed="false" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_p_d_ni_ns" type="pdouble" indexed="false" stored="false" docValues="false"/>
<dynamicField name="*_p_d_ni_ns_mv" type="pdouble" indexed="false" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_p_d_smf" type="pdouble" indexed="true" stored="true" sortMissingFirst="true"/>
<dynamicField name="*_p_d_dv_smf" type="pdouble" indexed="true" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_d_mv_smf" type="pdouble" indexed="true" stored="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_d_mv_dv_smf" type="pdouble" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_d_ni_dv_smf" type="pdouble" indexed="false" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_d_ni_mv_dv_smf" type="pdouble" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_d_sml" type="pdouble" indexed="true" stored="true" sortMissingLast="true"/>
<dynamicField name="*_p_d_dv_sml" type="pdouble" indexed="true" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_d_mv_sml" type="pdouble" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_d_mv_dv_sml" type="pdouble" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_d_ni_dv_sml" type="pdouble" indexed="false" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_d_ni_mv_dv_sml" type="pdouble" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_f" type="pfloat" indexed="true" stored="true"/>
<dynamicField name="*_p_f_dv" type="pfloat" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_p_f_mv" type="pfloat" indexed="true" stored="true" multiValued="true"/>
@ -99,6 +135,18 @@
<dynamicField name="*_p_f_ni_mv_dv" type="pfloat" indexed="false" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_p_f_ni_ns" type="pfloat" indexed="false" stored="false" docValues="false"/>
<dynamicField name="*_p_f_ni_ns_mv" type="pfloat" indexed="false" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_p_f_smf" type="pfloat" indexed="true" stored="true" sortMissingFirst="true"/>
<dynamicField name="*_p_f_dv_smf" type="pfloat" indexed="true" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_f_mv_smf" type="pfloat" indexed="true" stored="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_f_mv_dv_smf" type="pfloat" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_f_ni_dv_smf" type="pfloat" indexed="false" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_f_ni_mv_dv_smf" type="pfloat" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_f_sml" type="pfloat" indexed="true" stored="true" sortMissingLast="true"/>
<dynamicField name="*_p_f_dv_sml" type="pfloat" indexed="true" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_f_mv_sml" type="pfloat" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_f_mv_dv_sml" type="pfloat" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_f_ni_dv_sml" type="pfloat" indexed="false" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_f_ni_mv_dv_sml" type="pfloat" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_dt" type="pdate" indexed="true" stored="true"/>
<dynamicField name="*_p_dt_dv" type="pdate" indexed="true" stored="true" docValues="true"/>
@ -112,6 +160,18 @@
<dynamicField name="*_p_dt_ni_mv_dv" type="pdate" indexed="false" stored="true" docValues="true" multiValued="true"/>
<dynamicField name="*_p_dt_ni_ns" type="pdate" indexed="false" stored="false" docValues="false"/>
<dynamicField name="*_p_dt_ni_ns_mv" type="pdate" indexed="false" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_p_dt_smf" type="pdate" indexed="true" stored="true" sortMissingFirst="true"/>
<dynamicField name="*_p_dt_dv_smf" type="pdate" indexed="true" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_dt_mv_smf" type="pdate" indexed="true" stored="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_dt_mv_dv_smf" type="pdate" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_dt_ni_dv_smf" type="pdate" indexed="false" stored="true" docValues="true" sortMissingFirst="true"/>
<dynamicField name="*_p_dt_ni_mv_dv_smf" type="pdate" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingFirst="true"/>
<dynamicField name="*_p_dt_sml" type="pdate" indexed="true" stored="true" sortMissingLast="true"/>
<dynamicField name="*_p_dt_dv_sml" type="pdate" indexed="true" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_dt_mv_sml" type="pdate" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_dt_mv_dv_sml" type="pdate" indexed="true" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_p_dt_ni_dv_sml" type="pdate" indexed="false" stored="true" docValues="true" sortMissingLast="true"/>
<dynamicField name="*_p_dt_ni_mv_dv_sml" type="pdate" indexed="false" stored="true" docValues="true" multiValued="true" sortMissingLast="true"/>
<!-- return DV fields as stored -->
<dynamicField name="*_p_i_dv_ns" type="pint" indexed="true" stored="false" docValues="true" useDocValuesAsStored="true"/>

View File

@ -269,6 +269,8 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
log.info("collection state: " + printClusterStateInfo(DEFAULT_COLLECTION));
if (VERBOSE) System.out.println("control docs:"
+ controlClient.query(new SolrQuery("*:*")).getResults()
.getNumFound() + "\n\n");

View File

@ -204,6 +204,8 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
log.info("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
log.info("collection state: " + printClusterStateInfo(DEFAULT_COLLECTION));
waitForReplicationFromReplicas(DEFAULT_COLLECTION, cloudClient.getZkStateReader(), new TimeOut(30, TimeUnit.SECONDS));
// waitForAllWarmingSearchers();

View File

@ -23,6 +23,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
@ -169,7 +170,10 @@ public class TestPolicyCloud extends SolrCloudTestCase {
assertTrue(((Number) val.get("cores")).intValue() > 0);
assertTrue("freedisk value is " + ((Number) val.get("freedisk")).doubleValue(), Double.compare(((Number) val.get("freedisk")).doubleValue(), 0.0d) > 0);
assertTrue("heapUsage value is " + ((Number) val.get("heapUsage")).doubleValue(), Double.compare(((Number) val.get("heapUsage")).doubleValue(), 0.0d) > 0);
assertTrue("sysLoadAvg value is " + ((Number) val.get("sysLoadAvg")).doubleValue(), Double.compare(((Number) val.get("sysLoadAvg")).doubleValue(), 0.0d) > 0);
if (!Constants.WINDOWS) {
// the system load average metrics is not available on windows platform
assertTrue("sysLoadAvg value is " + ((Number) val.get("sysLoadAvg")).doubleValue(), Double.compare(((Number) val.get("sysLoadAvg")).doubleValue(), 0.0d) > 0);
}
String overseerNode = OverseerTaskProcessor.getLeaderNode(cluster.getZkClient());
cluster.getSolrClient().request(CollectionAdminRequest.addRole(overseerNode, "overseer"));
for (int i = 0; i < 10; i++) {

View File

@ -18,9 +18,11 @@ package org.apache.solr.schema;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedHashSet;
@ -29,6 +31,9 @@ import java.util.Locale;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
@ -71,6 +76,13 @@ import com.google.common.collect.ImmutableMap;
*/
public class TestPointFields extends SolrTestCaseJ4 {
private static final String[] FIELD_SUFFIXES = new String[] {
"", "_dv", "_mv", "_mv_dv", "_ni", "_ni_dv", "_ni_dv_ns", "_ni_dv_ns_mv",
"_ni_mv", "_ni_mv_dv", "_ni_ns", "_ni_ns_mv", "_dv_ns", "_ni_ns_dv", "_dv_ns_mv",
"_smf", "_dv_smf", "_mv_smf", "_mv_dv_smf", "_ni_dv_smf", "_ni_mv_dv_smf",
"_sml", "_dv_sml", "_mv_sml", "_mv_dv_sml", "_ni_dv_sml", "_ni_mv_dv_sml"
};
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema-point.xml");
@ -126,30 +138,44 @@ public class TestPointFields extends SolrTestCaseJ4 {
public void testIntPointFieldSortAndFunction() throws Exception {
final SortedSet<String> regexToTest = dynFieldRegexesForType(IntPointField.class);
final String[] sequential = new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
final List<String> sequential = Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
final List<Integer> randomInts = getRandomInts(10, false);
final List<Integer> randomIntsMissing = getRandomInts(10, true);
for (String r : Arrays.asList("*_p_i", "*_p_i_dv", "*_p_i_dv_ns", "*_p_i_ni_dv",
"*_p_i_ni_dv_ns", "*_p_i_ni_ns_dv")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSort(r.replace("*","number"), sequential);
// TODO: test some randomly generated (then sorted) arrays (with dups and/or missing values)
doTestIntPointFunctionQuery(r.replace("*","number"), "int");
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomInts);
doTestIntPointFunctionQuery(field, "int");
}
for (String r : Arrays.asList("*_p_i_smf", "*_p_i_dv_smf", "*_p_i_ni_dv_smf",
"*_p_i_sml", "*_p_i_dv_sml", "*_p_i_ni_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomIntsMissing);
doTestIntPointFunctionQuery(field, "int");
}
for (String r : Arrays.asList("*_p_i_ni", "*_p_i_ni_ns")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "w/o docValues", "42");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "w/o docValues", "42");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "w/o docValues", "42");
doTestPointFieldFunctionQueryError(field, "w/o docValues", "42");
}
for (String r : Arrays.asList("*_p_i_mv", "*_p_i_ni_mv", "*_p_i_ni_mv_dv", "*_p_i_ni_dv_ns_mv",
"*_p_i_ni_ns_mv", "*_p_i_dv_ns_mv", "*_p_i_mv_dv")) {
"*_p_i_ni_ns_mv", "*_p_i_dv_ns_mv", "*_p_i_mv_dv",
"*_p_i_mv_smf", "*_p_i_mv_dv_smf", "*_p_i_ni_mv_dv_smf",
"*_p_i_mv_sml", "*_p_i_mv_dv_sml", "*_p_i_ni_mv_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "42");
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "42", "666");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "42");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "42", "666");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "multivalued", "42");
doTestPointFieldSortError(field, "multivalued", "42", "666");
doTestPointFieldFunctionQueryError(field, "multivalued", "42");
doTestPointFieldFunctionQueryError(field, "multivalued", "42", "666");
}
assertEquals("Missing types in the test", Collections.<String>emptySet(), regexToTest);
@ -209,7 +235,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testIntPointFieldMultiValuedFacetField() throws Exception {
testPointFieldMultiValuedFacetField("number_p_i_mv", "number_p_i_mv_dv", getSequentialStringArrayWithInts(20));
testPointFieldMultiValuedFacetField("number_p_i_mv", "number_p_i_mv_dv", getRandomStringArrayWithInts(20, false));
testPointFieldMultiValuedFacetField("number_p_i_mv", "number_p_i_mv_dv", toStringArray(getRandomInts(20, false)));
}
@Test
@ -241,12 +267,78 @@ public class TestPointFields extends SolrTestCaseJ4 {
testMultiValuedIntPointFieldsAtomicUpdates("number_p_i_ni_mv_dv", "int");
testMultiValuedIntPointFieldsAtomicUpdates("number_p_i_dv_ns_mv", "int");
}
private <T> String[] toStringArray(List<T> list) {
return list.stream().map(String::valueOf).collect(Collectors.toList()).toArray(new String[list.size()]);
}
private class PosVal <T extends Comparable<T>> {
int pos;
T val;
PosVal(int pos, T val) {
this.pos = pos;
this.val = val;
}
}
/** Primary sort by value, with nulls either first or last as specified, and then secondary sort by position. */
private <T extends Comparable<T>>
Comparator<PosVal<T>> getPosValComparator(final boolean ascending, final boolean nullsFirst) {
return (o1, o2) -> {
if (o1.val == null) {
if (o2.val == null) {
return ascending ? Integer.compare(o1.pos, o2.pos) : Integer.compare(o2.pos, o1.pos);
} else {
return nullsFirst ? -1 : 1;
}
} else if (o2.val == null) {
return nullsFirst ? 1 : -1;
} else {
return ascending ? o1.val.compareTo(o2.val) : o2.val.compareTo(o1.val);
}
};
}
/**
* Primary ascending sort by value, with missing values (represented as null) either first or last as specified,
* and then secondary ascending sort by position.
*/
private <T extends Comparable<T>> String[] toAscendingStringArray(List<T> list, boolean missingFirst) {
return toStringArray(toAscendingPosVals(list, missingFirst).stream().map(pv -> pv.val).collect(Collectors.toList()));
}
/**
* Primary ascending sort by value, with missing values (represented as null) either first or last as specified,
* and then secondary ascending sort by position.
*
* @return a list of the (originally) positioned values sorted as described above.
*/
private <T extends Comparable<T>> List<PosVal<T>> toAscendingPosVals(List<T> list, boolean missingFirst) {
List<PosVal<T>> posVals = IntStream.range(0, list.size())
.mapToObj(i -> new PosVal<>(i, list.get(i))).collect(Collectors.toList());
posVals.sort(getPosValComparator(true, missingFirst));
return posVals;
}
/**
* Primary descending sort by value, with missing values (represented as null) either first or last as specified,
* and then secondary descending sort by position.
*
* @return a list of the (originally) positioned values sorted as described above.
*/
private <T extends Comparable<T>> List<PosVal<T>> toDescendingPosVals(List<T> list, boolean missingFirst) {
List<PosVal<T>> posVals = IntStream.range(0, list.size())
.mapToObj(i -> new PosVal<>(i, list.get(i))).collect(Collectors.toList());
posVals.sort(getPosValComparator(false, missingFirst));
return posVals;
}
@Test
public void testIntPointSetQuery() throws Exception {
doTestSetQueries("number_p_i", getRandomStringArrayWithInts(20, false), false);
doTestSetQueries("number_p_i_mv", getRandomStringArrayWithInts(20, false), true);
doTestSetQueries("number_p_i_ni_dv", getRandomStringArrayWithInts(20, false), false);
doTestSetQueries("number_p_i", toStringArray(getRandomInts(20, false)), false);
doTestSetQueries("number_p_i_mv", toStringArray(getRandomInts(20, false)), true);
doTestSetQueries("number_p_i_ni_dv", toStringArray(getRandomInts(20, false)), false);
}
// DoublePointField
@ -300,38 +392,48 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDoublePointFieldSortAndFunction() throws Exception {
final SortedSet<String> regexToTest = dynFieldRegexesForType(DoublePointField.class);
final String[] sequential = new String[]{"0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0"};
final String[] randstrs = getRandomStringArrayWithDoubles(10, true);
final List<String> sequential = Arrays.asList("0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0");
List<Double> randomDoubles = getRandomDoubles(10, false);
List<Double> randomDoublesMissing = getRandomDoubles(10, true);
for (String r : Arrays.asList("*_p_d", "*_p_d_dv", "*_p_d_dv_ns", "*_p_d_ni_dv",
"*_p_d_ni_dv_ns", "*_p_d_ni_ns_dv")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSort(r.replace("*","number"), sequential);
doTestPointFieldSort(r.replace("*","number"), randstrs);
// TODO: test some randomly generated (then sorted) arrays (with dups and/or missing values)
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomDoubles);
doTestFloatPointFunctionQuery(field, "double");
}
doTestFloatPointFunctionQuery(r.replace("*","number"), "double");
for (String r : Arrays.asList("*_p_d_smf", "*_p_d_dv_smf", "*_p_d_ni_dv_smf",
"*_p_d_sml", "*_p_d_dv_sml", "*_p_d_ni_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomDoublesMissing);
doTestFloatPointFunctionQuery(field, "double");
}
for (String r : Arrays.asList("*_p_d_ni", "*_p_d_ni_ns")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "w/o docValues", "42.34");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "w/o docValues", "42.34");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "w/o docValues", "42.34");
doTestPointFieldFunctionQueryError(field, "w/o docValues", "42.34");
}
for (String r : Arrays.asList("*_p_d_mv", "*_p_d_ni_mv", "*_p_d_ni_mv_dv", "*_p_d_ni_dv_ns_mv",
"*_p_d_ni_ns_mv", "*_p_d_dv_ns_mv", "*_p_d_mv_dv")) {
"*_p_d_ni_ns_mv", "*_p_d_dv_ns_mv", "*_p_d_mv_dv",
"*_p_d_mv_smf", "*_p_d_mv_dv_smf", "*_p_d_ni_mv_dv_smf",
"*_p_d_mv_sml", "*_p_d_mv_dv_sml", "*_p_d_ni_mv_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "42.34");
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "42.34", "66.6");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "42.34");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "42.34", "66.6");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "multivalued", "42.34");
doTestPointFieldSortError(field, "multivalued", "42.34", "66.6");
doTestPointFieldFunctionQueryError(field, "multivalued", "42.34");
doTestPointFieldFunctionQueryError(field, "multivalued", "42.34", "66.6");
}
assertEquals("Missing types in the test", Collections.<String>emptySet(), regexToTest);
}
@Test
@ -339,7 +441,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
testPointFieldFacetField("number_p_d", "number_p_d_dv", getSequentialStringArrayWithDoubles(10));
clearIndex();
assertU(commit());
testPointFieldFacetField("number_p_d", "number_p_d_dv", getRandomStringArrayWithDoubles(10, false));
testPointFieldFacetField("number_p_d", "number_p_d_dv", toStringArray(getRandomDoubles(10, false)));
}
@Test
@ -357,14 +459,14 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDoublePointFieldMultiValuedExactQuery() throws Exception {
testPointFieldMultiValuedExactQuery("number_p_d_mv", getRandomStringArrayWithDoubles(20, false));
testPointFieldMultiValuedExactQuery("number_p_d_ni_mv_dv", getRandomStringArrayWithDoubles(20, false));
testPointFieldMultiValuedExactQuery("number_p_d_mv", toStringArray(getRandomDoubles(20, false)));
testPointFieldMultiValuedExactQuery("number_p_d_ni_mv_dv", toStringArray(getRandomDoubles(20, false)));
}
@Test
public void testDoublePointFieldMultiValuedNonSearchableExactQuery() throws Exception {
testPointFieldMultiValuedExactQuery("number_p_d_ni_mv", getRandomStringArrayWithDoubles(20, false), false);
testPointFieldMultiValuedExactQuery("number_p_d_ni_ns_mv", getRandomStringArrayWithDoubles(20, false), false);
testPointFieldMultiValuedExactQuery("number_p_d_ni_mv", toStringArray(getRandomDoubles(20, false)), false);
testPointFieldMultiValuedExactQuery("number_p_d_ni_ns_mv", toStringArray(getRandomDoubles(20, false)), false);
}
@Test
@ -384,7 +486,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDoublePointFieldMultiValuedFacetField() throws Exception {
testPointFieldMultiValuedFacetField("number_p_d_mv", "number_p_d_mv_dv", getSequentialStringArrayWithDoubles(20));
testPointFieldMultiValuedFacetField("number_p_d_mv", "number_p_d_mv_dv", getRandomStringArrayWithDoubles(20, false));
testPointFieldMultiValuedFacetField("number_p_d_mv", "number_p_d_mv_dv", toStringArray(getRandomDoubles(20, false)));
}
@Test
@ -395,7 +497,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDoublePointMultiValuedFunctionQuery() throws Exception {
testPointMultiValuedFunctionQuery("number_p_d_mv", "number_p_d_mv_dv", "double", getSequentialStringArrayWithDoubles(20));
testPointMultiValuedFunctionQuery("number_p_d_mv", "number_p_d_mv_dv", "double", getRandomStringArrayWithFloats(20, true));
testPointMultiValuedFunctionQuery("number_p_d_mv", "number_p_d_mv_dv", "double", toAscendingStringArray(getRandomFloats(20, false), true));
}
@Test
@ -460,9 +562,9 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDoublePointSetQuery() throws Exception {
doTestSetQueries("number_p_d", getRandomStringArrayWithDoubles(20, false), false);
doTestSetQueries("number_p_d_mv", getRandomStringArrayWithDoubles(20, false), true);
doTestSetQueries("number_p_d_ni_dv", getRandomStringArrayWithDoubles(20, false), false);
doTestSetQueries("number_p_d", toStringArray(getRandomDoubles(20, false)), false);
doTestSetQueries("number_p_d_mv", toStringArray(getRandomDoubles(20, false)), true);
doTestSetQueries("number_p_d_ni_dv", toStringArray(getRandomDoubles(20, false)), false);
}
// Float
@ -514,38 +616,48 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testFloatPointFieldSortAndFunction() throws Exception {
final SortedSet<String> regexToTest = dynFieldRegexesForType(FloatPointField.class);
final String[] sequential = new String[]{"0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0"};
final String[] randstrs = getRandomStringArrayWithFloats(10, true);
final List<String> sequential = Arrays.asList("0.0", "1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0");
final List<Float> randomFloats = getRandomFloats(10, false);
final List<Float> randomFloatsMissing = getRandomFloats(10, true);
for (String r : Arrays.asList("*_p_f", "*_p_f_dv", "*_p_f_dv_ns", "*_p_f_ni_dv",
for (String r : Arrays.asList("*_p_f", "*_p_f_dv", "*_p_f_dv_ns", "*_p_f_ni_dv",
"*_p_f_ni_dv_ns", "*_p_f_ni_ns_dv")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSort(r.replace("*","number"), sequential);
doTestPointFieldSort(r.replace("*","number"), randstrs);
// TODO: test some randomly generated (then sorted) arrays (with dups and/or missing values)
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomFloats);
doTestFloatPointFunctionQuery(r.replace("*","number"), "float");
doTestFloatPointFunctionQuery(field, "float");
}
for (String r : Arrays.asList("*_p_f_smf", "*_p_f_dv_smf", "*_p_f_ni_dv_smf",
"*_p_f_sml", "*_p_f_dv_sml", "*_p_f_ni_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomFloatsMissing);
doTestFloatPointFunctionQuery(field, "float");
}
for (String r : Arrays.asList("*_p_f_ni", "*_p_f_ni_ns")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "w/o docValues", "42.34");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "w/o docValues", "42.34");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "w/o docValues", "42.34");
doTestPointFieldFunctionQueryError(field, "w/o docValues", "42.34");
}
for (String r : Arrays.asList("*_p_f_mv", "*_p_f_ni_mv", "*_p_f_ni_mv_dv", "*_p_f_ni_dv_ns_mv",
"*_p_f_ni_ns_mv", "*_p_f_dv_ns_mv", "*_p_f_mv_dv")) {
"*_p_f_ni_ns_mv", "*_p_f_dv_ns_mv", "*_p_f_mv_dv",
"*_p_f_mv_smf", "*_p_f_mv_dv_smf", "*_p_f_ni_mv_dv_smf",
"*_p_f_mv_sml", "*_p_f_mv_dv_sml", "*_p_f_ni_mv_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "42.34");
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "42.34", "66.6");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "42.34");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "42.34", "66.6");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "multivalued", "42.34");
doTestPointFieldSortError(field, "multivalued", "42.34", "66.6");
doTestPointFieldFunctionQueryError(field, "multivalued", "42.34");
doTestPointFieldFunctionQueryError(field, "multivalued", "42.34", "66.6");
}
assertEquals("Missing types in the test", Collections.<String>emptySet(), regexToTest);
}
@Test
@ -553,7 +665,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
testPointFieldFacetField("number_p_f", "number_p_f_dv", getSequentialStringArrayWithDoubles(10));
clearIndex();
assertU(commit());
testPointFieldFacetField("number_p_f", "number_p_f_dv", getRandomStringArrayWithFloats(10, false));
testPointFieldFacetField("number_p_f", "number_p_f_dv", toStringArray(getRandomFloats(10, false)));
}
@Test
@ -571,14 +683,14 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testFloatPointFieldMultiValuedExactQuery() throws Exception {
testPointFieldMultiValuedExactQuery("number_p_f_mv", getRandomStringArrayWithFloats(20, false));
testPointFieldMultiValuedExactQuery("number_p_f_ni_mv_dv", getRandomStringArrayWithFloats(20, false));
testPointFieldMultiValuedExactQuery("number_p_f_mv", toStringArray(getRandomFloats(20, false)));
testPointFieldMultiValuedExactQuery("number_p_f_ni_mv_dv", toStringArray(getRandomFloats(20, false)));
}
@Test
public void testFloatPointFieldMultiValuedNonSearchableExactQuery() throws Exception {
testPointFieldMultiValuedExactQuery("number_p_f_ni_mv", getRandomStringArrayWithFloats(20, false), false);
testPointFieldMultiValuedExactQuery("number_p_f_ni_ns_mv", getRandomStringArrayWithFloats(20, false), false);
testPointFieldMultiValuedExactQuery("number_p_f_ni_mv", toStringArray(getRandomFloats(20, false)), false);
testPointFieldMultiValuedExactQuery("number_p_f_ni_ns_mv", toStringArray(getRandomFloats(20, false)), false);
}
@Test
@ -603,13 +715,13 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testFloatPointFieldMultiValuedFacetField() throws Exception {
testPointFieldMultiValuedFacetField("number_p_f_mv", "number_p_f_mv_dv", getSequentialStringArrayWithDoubles(20));
testPointFieldMultiValuedFacetField("number_p_f_mv", "number_p_f_mv_dv", getRandomStringArrayWithFloats(20, false));
testPointFieldMultiValuedFacetField("number_p_f_mv", "number_p_f_mv_dv", toStringArray(getRandomFloats(20, false)));
}
@Test
public void testFloatPointMultiValuedFunctionQuery() throws Exception {
testPointMultiValuedFunctionQuery("number_p_f_mv", "number_p_f_mv_dv", "float", getSequentialStringArrayWithDoubles(20));
testPointMultiValuedFunctionQuery("number_p_f_mv", "number_p_f_mv_dv", "float", getRandomStringArrayWithFloats(20, true));
testPointMultiValuedFunctionQuery("number_p_f_mv", "number_p_f_mv_dv", "float", toAscendingStringArray(getRandomFloats(20, false), true));
}
@ -635,9 +747,9 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testFloatPointSetQuery() throws Exception {
doTestSetQueries("number_p_f", getRandomStringArrayWithFloats(20, false), false);
doTestSetQueries("number_p_f_mv", getRandomStringArrayWithFloats(20, false), true);
doTestSetQueries("number_p_f_ni_dv", getRandomStringArrayWithFloats(20, false), false);
doTestSetQueries("number_p_f", toStringArray(getRandomFloats(20, false)), false);
doTestSetQueries("number_p_f_mv", toStringArray(getRandomFloats(20, false)), true);
doTestSetQueries("number_p_f_ni_dv", toStringArray(getRandomFloats(20, false)), false);
}
@Test
@ -689,36 +801,50 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testLongPointFieldSortAndFunction() throws Exception {
final SortedSet<String> regexToTest = dynFieldRegexesForType(LongPointField.class);
final String[] vals = new String[]{ String.valueOf(Integer.MIN_VALUE),
"1", "2", "3", "4", "5", "6", "7",
String.valueOf(Integer.MAX_VALUE), String.valueOf(Long.MAX_VALUE)};
final List<Long> vals = Arrays.asList((long)Integer.MIN_VALUE,
1L, 2L, 3L, 4L, 5L, 6L, 7L,
(long)Integer.MAX_VALUE, Long.MAX_VALUE);
final List<Long> randomLongs = getRandomLongs(10, false);
final List<Long> randomLongsMissing = getRandomLongs(10, true);
for (String r : Arrays.asList("*_p_l", "*_p_l_dv", "*_p_l_dv_ns", "*_p_l_ni_dv",
"*_p_l_ni_dv_ns", "*_p_l_ni_ns_dv")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSort(r.replace("*","number"), vals);
// TODO: test some randomly generated (then sorted) arrays (with dups and/or missing values)
doTestIntPointFunctionQuery(r.replace("*","number"), "long");
String field = r.replace("*", "number");
doTestPointFieldSort(field, vals);
doTestPointFieldSort(field, randomLongs);
doTestIntPointFunctionQuery(field, "long");
}
for (String r : Arrays.asList("*_p_l_smf", "*_p_l_dv_smf", "*_p_l_ni_dv_smf",
"*_p_l_sml", "*_p_l_dv_sml", "*_p_l_ni_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
String field = r.replace("*", "number");
doTestPointFieldSort(field, vals);
doTestPointFieldSort(field, randomLongsMissing);
doTestIntPointFunctionQuery(field, "long");
}
for (String r : Arrays.asList("*_p_l_ni", "*_p_l_ni_ns")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "w/o docValues", "4234");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "w/o docValues", "4234");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "w/o docValues", "4234");
doTestPointFieldFunctionQueryError(field, "w/o docValues", "4234");
}
for (String r : Arrays.asList("*_p_l_mv", "*_p_l_ni_mv", "*_p_l_ni_mv_dv", "*_p_l_ni_dv_ns_mv",
"*_p_l_ni_ns_mv", "*_p_l_dv_ns_mv", "*_p_l_mv_dv")) {
"*_p_l_ni_ns_mv", "*_p_l_dv_ns_mv", "*_p_l_mv_dv",
"*_p_l_mv_smf", "*_p_l_mv_dv_smf", "*_p_l_ni_mv_dv_smf",
"*_p_l_mv_sml", "*_p_l_mv_dv_sml", "*_p_l_ni_mv_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "4234");
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "4234", "66666666");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "4234");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "4234", "66666666");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "multivalued", "4234");
doTestPointFieldSortError(field, "multivalued", "4234", "66666666");
doTestPointFieldFunctionQueryError(field, "multivalued", "4234");
doTestPointFieldFunctionQueryError(field, "multivalued", "4234", "66666666");
}
assertEquals("Missing types in the test", Collections.<String>emptySet(), regexToTest);
}
@Test
@ -726,7 +852,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
testPointFieldFacetField("number_p_l", "number_p_l_dv", getSequentialStringArrayWithInts(10));
clearIndex();
assertU(commit());
testPointFieldFacetField("number_p_l", "number_p_l_dv", getRandomStringArrayWithLongs(10, true));
testPointFieldFacetField("number_p_l", "number_p_l_dv", toStringArray(getRandomLongs(10, false)));
}
@Test
@ -771,7 +897,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testLongPointFieldMultiValuedFacetField() throws Exception {
testPointFieldMultiValuedFacetField("number_p_l_mv", "number_p_l_mv_dv", getSequentialStringArrayWithInts(20));
testPointFieldMultiValuedFacetField("number_p_l_mv", "number_p_l_mv_dv", getRandomStringArrayWithLongs(20, false));
testPointFieldMultiValuedFacetField("number_p_l_mv", "number_p_l_mv_dv", toStringArray(getRandomLongs(20, false)));
}
@Test
@ -806,9 +932,9 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testLongPointSetQuery() throws Exception {
doTestSetQueries("number_p_l", getRandomStringArrayWithLongs(20, false), false);
doTestSetQueries("number_p_l_mv", getRandomStringArrayWithLongs(20, false), true);
doTestSetQueries("number_p_l_ni_dv", getRandomStringArrayWithLongs(20, false), false);
doTestSetQueries("number_p_l", toStringArray(getRandomLongs(20, false)), false);
doTestSetQueries("number_p_l_mv", toStringArray(getRandomLongs(20, false)), true);
doTestSetQueries("number_p_l_ni_dv", toStringArray(getRandomLongs(20, false)), false);
}
@Test
@ -862,37 +988,48 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDatePointFieldSortAndFunction() throws Exception {
final SortedSet<String> regexToTest = dynFieldRegexesForType(DatePointField.class);
final String[] sequential = getSequentialStringArrayWithDates(10);
final List<String> sequential = Arrays.asList(getSequentialStringArrayWithDates(10));
final List<Instant> randomDates = getRandomInstants(10, false);
final List<Instant> randomDatesMissing = getRandomInstants(10, true);
for (String r : Arrays.asList("*_p_dt", "*_p_dt_dv", "*_p_dt_dv_ns", "*_p_dt_ni_dv",
"*_p_dt_ni_dv_ns", "*_p_dt_ni_ns_dv")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSort(r.replace("*","number"), sequential);
// TODO: test some randomly generated (then sorted) arrays (with dups and/or missing values)
doTestDatePointFunctionQuery(r.replace("*","number"), "date");
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomDates);
doTestDatePointFunctionQuery(field, "date");
}
for (String r : Arrays.asList("*_p_dt_smf", "*_p_dt_dv_smf", "*_p_dt_ni_dv_smf",
"*_p_dt_sml", "*_p_dt_dv_sml", "*_p_dt_ni_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
String field = r.replace("*", "number");
doTestPointFieldSort(field, sequential);
doTestPointFieldSort(field, randomDatesMissing);
doTestDatePointFunctionQuery(field, "date");
}
for (String r : Arrays.asList("*_p_dt_ni", "*_p_dt_ni_ns")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "w/o docValues", "1995-12-31T23:59:59Z");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "w/o docValues", "1995-12-31T23:59:59Z");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "w/o docValues", "1995-12-31T23:59:59Z");
doTestPointFieldFunctionQueryError(field, "w/o docValues", "1995-12-31T23:59:59Z");
}
for (String r : Arrays.asList("*_p_dt_mv", "*_p_dt_ni_mv", "*_p_dt_ni_mv_dv", "*_p_dt_ni_dv_ns_mv",
"*_p_dt_ni_ns_mv", "*_p_dt_dv_ns_mv", "*_p_dt_mv_dv")) {
"*_p_dt_ni_ns_mv", "*_p_dt_dv_ns_mv", "*_p_dt_mv_dv",
"*_p_dt_mv_smf", "*_p_dt_mv_dv_smf", "*_p_dt_ni_mv_dv_smf",
"*_p_dt_mv_sml", "*_p_dt_mv_dv_sml", "*_p_dt_ni_mv_dv_sml")) {
assertTrue(r, regexToTest.remove(r));
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "1995-12-31T23:59:59Z");
doTestPointFieldSortError(r.replace("*","number"), "multivalued", "1995-12-31T23:59:59Z", "2000-12-31T23:59:59Z");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "1995-12-31T23:59:59Z");
doTestPointFieldFunctionQueryError(r.replace("*","number"), "multivalued", "1995-12-31T23:59:59Z", "2000-12-31T23:59:59Z");
String field = r.replace("*", "number");
doTestPointFieldSortError(field, "multivalued", "1995-12-31T23:59:59Z");
doTestPointFieldSortError(field, "multivalued", "1995-12-31T23:59:59Z", "2000-12-31T23:59:59Z");
doTestPointFieldFunctionQueryError(field, "multivalued", "1995-12-31T23:59:59Z");
doTestPointFieldFunctionQueryError(field, "multivalued", "1995-12-31T23:59:59Z", "2000-12-31T23:59:59Z");
}
assertEquals("Missing types in the test", Collections.<String>emptySet(), regexToTest);
}
@Test
@ -942,7 +1079,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDatePointFieldMultiValuedFacetField() throws Exception {
testPointFieldMultiValuedFacetField("number_p_dt_mv", "number_p_dt_mv_dv", getSequentialStringArrayWithDates(20));
testPointFieldMultiValuedFacetField("number_p_dt_mv", "number_p_dt_mv_dv", getRandomStringArrayWithDates(20, false));
testPointFieldMultiValuedFacetField("number_p_dt_mv", "number_p_dt_mv_dv", toStringArray(getRandomInstants(20, false)));
}
@Test
@ -977,9 +1114,9 @@ public class TestPointFields extends SolrTestCaseJ4 {
@Test
public void testDatePointSetQuery() throws Exception {
doTestSetQueries("number_p_dt", getRandomStringArrayWithDates(20, false), false);
doTestSetQueries("number_p_dt_mv", getRandomStringArrayWithDates(20, false), true);
doTestSetQueries("number_p_dt_ni_dv", getRandomStringArrayWithDates(20, false), false);
doTestSetQueries("number_p_dt", toStringArray(getRandomInstants(20, false)), false);
doTestSetQueries("number_p_dt_mv", toStringArray(getRandomInstants(20, false)), true);
doTestSetQueries("number_p_dt_ni_dv", toStringArray(getRandomInstants(20, false)), false);
}
@ -1012,15 +1149,14 @@ public class TestPointFields extends SolrTestCaseJ4 {
public void testInternals() throws IOException {
String[] types = new String[]{"i", "l", "f", "d"};
String[] suffixes = new String[]{"", "_dv", "_mv", "_mv_dv", "_ni", "_ni_dv", "_ni_dv_ns", "_ni_dv_ns_mv", "_ni_mv", "_ni_mv_dv", "_ni_ns", "_ni_ns_mv", "_dv_ns", "_ni_ns_dv", "_dv_ns_mv"};
Set<String> typesTested = new HashSet<>();
for (String type:types) {
for (String suffix:suffixes) {
for (String suffix:FIELD_SUFFIXES) {
doTestInternals("number_p_" + type + suffix, getSequentialStringArrayWithInts(10));
typesTested.add("*_p_" + type + suffix);
}
}
for (String suffix:suffixes) {
for (String suffix:FIELD_SUFFIXES) {
doTestInternals("number_p_dt" + suffix, getSequentialStringArrayWithDates(10));
typesTested.add("*_p_dt" + suffix);
}
@ -1047,50 +1183,39 @@ public class TestPointFields extends SolrTestCaseJ4 {
return typesToTest;
}
private String[] getRandomStringArrayWithDoubles(int length, boolean sorted) {
Set<Double> set;
if (sorted) {
set = new TreeSet<>();
} else {
set = new HashSet<>();
}
while (set.size() < length) {
double f = random().nextDouble() * (Double.MAX_VALUE/2);
if (random().nextBoolean()) {
f = f * -1;
private <T> List<T> getRandomList(int length, boolean missingVals, Supplier<T> randomVal) {
List<T> list = new ArrayList<>(length);
for (int i = 0 ; i < length ; ++i) {
T val = null;
// Sometimes leave val as null when we're producing missing values
if (missingVals == false || usually()) {
val = randomVal.get();
}
set.add(f);
list.add(val);
}
String[] stringArr = new String[length];
int i = 0;
for (double val:set) {
stringArr[i] = String.valueOf(val);
i++;
}
return stringArr;
return list;
}
private String[] getRandomStringArrayWithFloats(int length, boolean sorted) {
Set<Float> set;
if (sorted) {
set = new TreeSet<>();
} else {
set = new HashSet<>();
}
while (set.size() < length) {
float f = random().nextFloat() * (Float.MAX_VALUE/2);
if (random().nextBoolean()) {
f = f * -1;
}
set.add(f);
}
String[] stringArr = new String[length];
int i = 0;
for (float val:set) {
stringArr[i] = String.valueOf(val);
i++;
}
return stringArr;
private List<Double> getRandomDoubles(int length, boolean missingVals) {
return getRandomList(length, missingVals,
() -> random().nextDouble() * Double.MAX_VALUE * (random().nextBoolean() ? 1.D : -1.D));
}
private List<Float> getRandomFloats(int length, boolean missingVals) {
return getRandomList(length, missingVals,
() -> random().nextFloat() * Float.MAX_VALUE * (random().nextBoolean() ? 1.f : -1.f));
}
private List<Integer> getRandomInts(int length, boolean missingVals) {
return getRandomList(length, missingVals, () -> random().nextInt());
}
private List<Long> getRandomLongs(int length, boolean missingVals){
return getRandomList(length, missingVals, () -> random().nextLong());
}
private List<Instant> getRandomInstants(int length, boolean missingVals){
return getRandomList(length, missingVals, () -> Instant.ofEpochMilli(random().nextLong()));
}
private String[] getSequentialStringArrayWithInts(int length) {
@ -1117,74 +1242,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
}
return arr;
}
private String[] getRandomStringArrayWithInts(int length, boolean sorted) {
Set<Integer> set;
if (sorted) {
set = new TreeSet<>();
} else {
set = new HashSet<>();
}
while (set.size() < length) {
int number = random().nextInt(100);
if (random().nextBoolean()) {
number = number * -1;
}
set.add(number);
}
String[] stringArr = new String[length];
int i = 0;
for (int val:set) {
stringArr[i] = String.valueOf(val);
i++;
}
return stringArr;
}
private String[] getRandomStringArrayWithLongs(int length, boolean sorted) {
Set<Long> set;
if (sorted) {
set = new TreeSet<>();
} else {
set = new HashSet<>();
}
while (set.size() < length) {
long number = random().nextLong();
if (random().nextBoolean()) {
number = number * -1;
}
set.add(number);
}
String[] stringArr = new String[length];
int i = 0;
for (long val:set) {
stringArr[i] = String.valueOf(val);
i++;
}
return stringArr;
}
private String[] getRandomStringArrayWithDates(int length, boolean sorted) {
assert length < 60;
Set<Integer> set;
if (sorted) {
set = new TreeSet<>();
} else {
set = new HashSet<>();
}
while (set.size() < length) {
int number = random().nextInt(60);
set.add(number);
}
String[] stringArr = new String[length];
int i = 0;
for (int val:set) {
stringArr[i] = String.format(Locale.ROOT, "1995-12-11T19:59:%02dZ", val);
i++;
}
return stringArr;
}
private void doTestFieldNotIndexed(String field, String[] values) throws IOException {
assert values.length == 10;
// test preconditions
@ -1379,9 +1437,9 @@ public class TestPointFields extends SolrTestCaseJ4 {
String[] arr;
if (testLong) {
arr = getRandomStringArrayWithLongs(100, true);
arr = toAscendingStringArray(getRandomLongs(100, false), true);
} else {
arr = getRandomStringArrayWithInts(100, true);
arr = toAscendingStringArray(getRandomInts(100, false), true);
}
for (int i = 0; i < arr.length; i++) {
assertU(adoc("id", String.valueOf(i), fieldName, arr[i]));
@ -1423,14 +1481,6 @@ public class TestPointFields extends SolrTestCaseJ4 {
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + docValuesField +"']/int[@name='" + numbers[2] + "'][.='1']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + docValuesField +"']/int[@name='" + numbers[3] + "'][.='1']");
// assertU(commit());
// assertQ(req("q", "id:0", "fl", "id, " + docValuesField, "facet", "true", "facet.field", docValuesField, "facet.mincount", "0"),
// "//*[@numFound='1']",
// "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + docValuesField +"']/int[@name='" + numbers[0] + "'][.='1']",
// "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + docValuesField +"']/int[@name='" + numbers[1] + "'][.='0']",
// "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + docValuesField +"']/int[@name='" + numbers[2] + "'][.='0']",
// "count(//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='" + docValuesField +"']/int))==10");
assertFalse(h.getCore().getLatestSchema().getField(nonDocValuesField).hasDocValues());
assertTrue(h.getCore().getLatestSchema().getField(nonDocValuesField).getType() instanceof PointField);
assertQEx("Expecting Exception",
@ -1875,13 +1925,6 @@ public class TestPointFields extends SolrTestCaseJ4 {
assertTrue(h.getCore().getLatestSchema().getField(docValuesField).getType() instanceof PointField);
String function = "field(" + docValuesField + ", min)";
// assertQ(req("q", "*:*", "fl", "id, " + function),
// "//*[@numFound='10']",
// "//result/doc[1]/" + type + "[@name='" + function + "'][.='" + numbers[0] + "']",
// "//result/doc[2]/" + type + "[@name='" + function + "'][.='" + numbers[1] + "']",
// "//result/doc[3]/" + type + "[@name='" + function + "'][.='" + numbers[2] + "']",
// "//result/doc[10]/" + type + "[@name='" + function + "'][.='" + numbers[9] + "']");
assertQ(req("q", "*:*", "fl", "id, " + docValuesField, "sort", function + " desc"),
"//*[@numFound='10']",
"//result/doc[1]/str[@name='id'][.='9']",
@ -2063,27 +2106,32 @@ public class TestPointFields extends SolrTestCaseJ4 {
* @param field name of field to sort on
* @param values list of values in ascending order
*/
private void doTestPointFieldSort(String field, String... values) throws Exception {
assert values != null && 2 <= values.length;
// TODO: need to add sort missing coverage...
//
// idea: accept "null" as possible value for sort missing tests ?
//
// need to account for possibility that multiple nulls will be in non deterministic order
// always using secondary sort on id seems prudent ... handles any "dups" in values[]
private <T extends Comparable<T>> void doTestPointFieldSort(String field, List<T> values) throws Exception {
assert values != null && 2 <= values.size();
final List<SolrInputDocument> docs = new ArrayList<>(values.size());
final String[] ascXpathChecks = new String[values.size() + 1];
final String[] descXpathChecks = new String[values.size() + 1];
ascXpathChecks[values.size()] = "//*[@numFound='" + values.size() + "']";
descXpathChecks[values.size()] = "//*[@numFound='" + values.size() + "']";
final List<SolrInputDocument> docs = new ArrayList<>(values.length);
final String[] ascXpathChecks = new String[values.length + 1];
final String[] descXpathChecks = new String[values.length + 1];
ascXpathChecks[values.length] = "//*[@numFound='" + values.length + "']";
descXpathChecks[values.length] = "//*[@numFound='" + values.length + "']";
boolean missingFirst = field.endsWith("_sml") == false;
for (int i = values.length-1; i >= 0; i--) {
docs.add(sdoc("id", String.valueOf(i), field, String.valueOf(values[i])));
List<PosVal<T>> ascendingPosVals = toAscendingPosVals(values, missingFirst);
for (int i = ascendingPosVals.size() - 1 ; i >= 0 ; --i) {
T value = ascendingPosVals.get(i).val;
if (value == null) {
docs.add(sdoc("id", String.valueOf(i))); // null => missing value
} else {
docs.add(sdoc("id", String.valueOf(i), field, String.valueOf(value)));
}
// reminder: xpath array indexes start at 1
ascXpathChecks[i]= "//result/doc["+ (1 + i)+"]/str[@name='id'][.='"+i+"']";
descXpathChecks[i]= "//result/doc["+ (values.length - i) +"]/str[@name='id'][.='"+i+"']";
}
List<PosVal<T>> descendingPosVals = toDescendingPosVals
(ascendingPosVals.stream().map(pv->pv.val).collect(Collectors.toList()), missingFirst);
for (int i = descendingPosVals.size() - 1 ; i >= 0 ; --i) {
descXpathChecks[i]= "//result/doc[" + (i + 1) + "]/str[@name='id'][.='" + descendingPosVals.get(i).pos + "']";
}
// ensure doc add order doesn't affect results
@ -2093,11 +2141,10 @@ public class TestPointFields extends SolrTestCaseJ4 {
}
assertU(commit());
assertQ(req("q", "*:*", "fl", "id", "sort", field + " asc"),
assertQ(req("q", "*:*", "fl", "id, " + field, "sort", field + " asc, id asc"),
ascXpathChecks);
assertQ(req("q", "*:*", "fl", "id", "sort", field + " desc"),
assertQ(req("q", "*:*", "fl", "id, " + field, "sort", field + " desc, id desc"),
descXpathChecks);
clearIndex();
assertU(commit());
@ -2200,9 +2247,9 @@ public class TestPointFields extends SolrTestCaseJ4 {
String[] arr;
if (testDouble) {
arr = getRandomStringArrayWithDoubles(10, true);
arr = toAscendingStringArray(getRandomDoubles(10, false), true);
} else {
arr = getRandomStringArrayWithFloats(10, true);
arr = toAscendingStringArray(getRandomFloats(10, false), true);
}
for (int i = 0; i < arr.length; i++) {
assertU(adoc("id", String.valueOf(i), fieldName, arr[i]));
@ -2316,7 +2363,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
StringBuilder builder = new StringBuilder(fieldName + ":(");
for (int i = 0; i < numTerms; i++) {
if (sf.getType().getNumberType() == NumberType.DATE) {
builder.append(String.valueOf(values[i]).replace(":", "\\:") + ' ');
builder.append(values[i].replaceAll("(:|^[-+])", "\\\\$1") + ' ');
} else {
builder.append(String.valueOf(values[i]).replace("-", "\\-") + ' ');
}
@ -2327,7 +2374,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
"//*[@numFound='" + numTerms + "']",
"//*[@name='parsed_filter_queries']/str[.='(" + getSetQueryToString(fieldName, values, numTerms) + ")']");
} else {
// Won't use PointInSetQuery if the fiels is not indexed, but should match the same docs
// Won't use PointInSetQuery if the field is not indexed, but should match the same docs
assertQ(req(CommonParams.DEBUG, CommonParams.QUERY, "q", "*:*", "fq", builder.toString(), "fl", "id," + fieldName),
"//*[@numFound='" + numTerms + "']");
}
@ -2761,7 +2808,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
}
assertU(commit());
assertTrue(h.getCore().getLatestSchema().getField(field).getType() instanceof DatePointField);
assertQ(req("q", "*:*", "fl", "id, " + field, "sort", "product(-1,ms(" + field + ")) asc"),
assertQ(req("q", "*:*", "fl", "id, " + field, "sort", "product(-1,ms(" + field + "," + baseDate +")) asc"),
"//*[@numFound='10']",
"//result/doc[1]/date[@name='" + field + "'][.='1995-01-10T10:59:20Z']",
"//result/doc[2]/date[@name='" + field + "'][.='1995-01-10T10:59:19Z']",
@ -2990,7 +3037,6 @@ public class TestPointFields extends SolrTestCaseJ4 {
public void testWhiteboxCreateFields() throws Exception {
String[] typeNames = new String[]{"i", "l", "f", "d", "dt"};
String[] suffixes = new String[]{"", "_dv", "_mv", "_mv_dv", "_ni", "_ni_dv", "_ni_dv_ns", "_ni_dv_ns_mv", "_ni_mv", "_ni_mv_dv", "_ni_ns", "_ni_ns_mv", "_dv_ns", "_ni_ns_dv", "_dv_ns_mv"};
Class<?>[] expectedClasses = new Class[]{IntPoint.class, LongPoint.class, FloatPoint.class, DoublePoint.class, LongPoint.class};
Date dateToTest = new Date();
@ -3004,7 +3050,7 @@ public class TestPointFields extends SolrTestCaseJ4 {
Set<String> typesTested = new HashSet<>();
for (int i = 0; i < typeNames.length; i++) {
for (String suffix:suffixes) {
for (String suffix:FIELD_SUFFIXES) {
doWhiteboxCreateFields("whitebox_p_" + typeNames[i] + suffix, expectedClasses[i], values[i]);
typesTested.add("*_p_" + typeNames[i] + suffix);
}

View File

@ -84,8 +84,7 @@ public class DebugAgg extends AggValueSource {
this.numSlots = numSlots;
creates.addAndGet(1);
sub = new CountSlotArrAcc(fcontext, numSlots);
new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
// new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
}
@Override

View File

@ -22,6 +22,7 @@ import java.util.List;
import org.apache.solr.JSONTestUtil;
import org.apache.solr.SolrTestCaseHS;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.SimpleOrderedMap;
@ -32,7 +33,7 @@ import org.junit.Test;
import org.noggit.JSONParser;
import org.noggit.ObjectBuilder;
@SolrTestCaseJ4.SuppressPointFields
public class TestJsonFacetRefinement extends SolrTestCaseHS {
private static SolrInstances servers; // for distributed testing
@ -314,6 +315,32 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
"}"
);
// basic refining test through/under a query facet
client.testJQ(params(p, "q", "*:*",
"json.facet", "{" +
"q1 : { type:query, q:'*:*', facet:{" +
"cat0:{${terms} type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true}" +
"}}" +
"}"
)
, "facets=={ count:8" +
", q1:{ count:8, cat0:{ buckets:[ {val:A,count:4} ] } }" +
"}"
);
// basic refining test through/under a range facet
client.testJQ(params(p, "q", "*:*",
"json.facet", "{" +
"r1 : { type:range, field:${num_d} start:-20, end:20, gap:40 , facet:{" +
"cat0:{${terms} type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true}" +
"}}" +
"}"
)
, "facets=={ count:8" +
", r1:{ buckets:[{val:-20.0,count:8, cat0:{buckets:[{val:A,count:4}]} }] }" +
"}"
);
// test that basic stats work for refinement
client.testJQ(params(p, "q", "*:*",
"json.facet", "{" +

View File

@ -20,12 +20,6 @@
// specific language governing permissions and limitations
// under the License.
== cartesianProduct
//TODO
== cell
//TODO
== classify
The `classify` function classifies tuples using a logistic regression text classification model. It was designed specifically to work with models trained using the <<stream-sources.adoc#train,train function>>. The `classify` function uses the <<stream-sources.adoc#model,model function>> to retrieve a stored model and then scores a stream of tuples using the model. The tuples read by the classifier must contain a text field that can be used for classification. The classify function uses a Lucene analyzer to extract the features from the text so the model can be applied. By default the `classify` function looks for the analyzer using the name of text field in the tuple. If the Solr schema on the worker node does not contain this field, the analyzer can be looked up in another field by specifying the `analyzerField` parameter.
@ -237,7 +231,26 @@ daemonStream.close();
----
== eval
//todo
The `eval` function allows for use cases where new streaming expressions are generated on the fly and then evaluated.
The `eval` function wraps a streaming expression and reads a single tuple from the underlying stream.
The `eval` function then retrieves a string Streaming Expressions from the `expr_s` field of the tuple.
The `eval` function then compiles the string Streaming Expression and emits the tuples.
=== eval Parameters
* `StreamExpression`: (Mandatory) The stream which provides the streaming expression to be evaluated.
=== eval Syntax
[source,text]
----
eval(expr)
----
In the example above the `eval` expression reads the first tuple from the underlying expression. It then compiles and
executes the string Streaming Expression in the epxr_s field.
[[StreamingExpressions-executor]]
== executor
@ -517,9 +530,6 @@ merge(
on="fieldA asc")
----
== list
// TODO
== null
The null expression is a useful utility function for understanding bottlenecks when performing parallel relational algebra (joins, intersections, rollups etc.). The null function reads all the tuples from an underlying stream and returns a single tuple with the count and processing time. Because the null stream adds minimal overhead of it's own, it can be used to isolate the performance of Solr's /export handler. If the /export handlers performance is not the bottleneck, then the bottleneck is likely occurring in the workers where the stream decorators are running.

View File

@ -412,30 +412,6 @@ or(and(fieldA,fieldB),fieldC) // (fieldA && fieldB) || fieldC
or(fieldA,fieldB,fieldC,and(fieldD,fieldE),fieldF)
----
== analyze
//TODO
== second
//TODO
== minute
//TODO
== hour
//TODO
== day
//TODO
== month
//TODO
== year
//TODO
== convert
//TODO
== raw
The `raw` function will return whatever raw value is the parameter. This is useful for cases where you want to use a string as part of another evaluator.
@ -457,5 +433,3 @@ raw(true) // "true" (note: this returns the string "true" and not the boolean tr
eq(raw(fieldA), fieldA) // true if the value of fieldA equals the string "fieldA"
----
== UUID
//TODO

View File

@ -284,6 +284,7 @@ The `significantTerms` function queries a SolrCloud collection, but instead of r
* `collection`: (Mandatory) The collection that the function is run on.
* `q`: (Mandatory) The query that describes the foreground document set.
* `field`: (Mandatory) The field to extract the terms from.
* `limit`: (Optional, Default 20) The max number of terms to return.
* `minDocFreq`: (Optional, Defaults to 5 documents) The minimum number of documents the term must appear in on a shard. This is a float value. If greater then 1.0 then it's considered the absolute number of documents. If less then 1.0 it's treated as a percentage of documents.
* `maxDocFreq`: (Optional, Defaults to 30% of documents) The maximum number of documents the term can appear in on a shard. This is a float value. If greater then 1.0 then it's considered the absolute number of documents. If less then 1.0 it's treated as a percentage of documents.
@ -295,12 +296,14 @@ The `significantTerms` function queries a SolrCloud collection, but instead of r
----
significantTerms(collection1,
q="body:Solr",
field="author",
limit="50",
minDocFreq="10",
maxDocFreq=".20",
minTermLength="5")
----
In the example above the `significantTerms` function is querying `collection1` and returning at most 50 significant terms that appear in 10 or more documents but not more then 20% of the corpus.
In the example above the `significantTerms` function is querying `collection1` and returning at most 50 significant terms from the `authors` field that appear in 10 or more documents but not more then 20% of the corpus.
== shortestPath

View File

@ -2094,7 +2094,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
while (true) {
long replicaIndexVersion = getIndexVersion(pullReplica);
if (leaderIndexVersion == replicaIndexVersion) {
log.debug("Leader replica's version ({}) in sync with replica({}): {} == {}", leader.getName(), pullReplica.getName(), leaderIndexVersion, replicaIndexVersion);
log.info("Leader replica's version ({}) in sync with replica({}): {} == {}", leader.getName(), pullReplica.getName(), leaderIndexVersion, replicaIndexVersion);
// Make sure the host is serving the correct version
try (SolrCore core = containers.get(pullReplica.getNodeName()).getCore(pullReplica.getCoreName())) {
@ -2105,7 +2105,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
if (Long.parseLong(servingVersion) == replicaIndexVersion) {
break;
} else {
log.debug("Replica {} has the correct version replicated, but the searcher is not ready yet. Replicated version: {}, Serving version: {}", pullReplica.getName(), replicaIndexVersion, servingVersion);
log.info("Replica {} has the correct version replicated, but the searcher is not ready yet. Replicated version: {}, Serving version: {}", pullReplica.getName(), replicaIndexVersion, servingVersion);
}
} finally {
if (ref != null) ref.decref();
@ -2117,9 +2117,9 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
fail(String.format(Locale.ROOT, "Timed out waiting for replica %s (%d) to replicate from leader %s (%d)", pullReplica.getName(), replicaIndexVersion, leader.getName(), leaderIndexVersion));
}
if (leaderIndexVersion > replicaIndexVersion) {
log.debug("{} version is {} and leader's is {}, will wait for replication", pullReplica.getName(), replicaIndexVersion, leaderIndexVersion);
log.info("{} version is {} and leader's is {}, will wait for replication", pullReplica.getName(), replicaIndexVersion, leaderIndexVersion);
} else {
log.debug("Leader replica's version ({}) is lower than pull replica({}): {} < {}", leader.getName(), pullReplica.getName(), leaderIndexVersion, replicaIndexVersion);
log.info("Leader replica's version ({}) is lower than pull replica({}): {} < {}", leader.getName(), pullReplica.getName(), leaderIndexVersion, replicaIndexVersion);
}
}
Thread.sleep(1000);

View File

@ -58,7 +58,7 @@ public class StoppableCommitThread extends StoppableThread {
break;
}
}
log.debug("StoppableCommitThread finished. Committed {} times. Failed {} times.", numCommits.get(), numFails.get());
log.info("StoppableCommitThread finished. Committed {} times. Failed {} times.", numCommits.get(), numFails.get());
}
@Override