Merge 'master' into feature/autoscaling

This commit is contained in:
Cao Manh Dat 2017-07-18 12:13:39 +07:00
commit cafd0714cd
86 changed files with 2452 additions and 1891 deletions

View File

@ -0,0 +1,67 @@
#!/usr/bin/env bash
# This shell script will download the software required to build the ref
# guide using RVM (Ruby Version Manager), and then run the following
# under solr/solr-ref-guide: "ant clean build-site build-pdf".
#
# The following will be downloaded and installed into $HOME/.rvm/:
# RVM, Ruby, and Ruby gems jekyll, jekyll-asciidoc, and pygments.rb.
#
# The script expects to be run in the top-level project directory.
#
# RVM will attempt to verify the signature on downloaded RVM software if
# you have gpg or gpg2 installed. If you do, as a one-time operation you
# must import two keys (substitute gpg2 below if you have it installed):
#
# gpg --keyserver hkp://keys.gnupg.net --recv-keys \
# 409B6B1796C275462A1703113804BB82D39DC0E3 \
# 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -x # Echo commands to the console
set -e # Fail the script if any command fails
RVM_PATH=$HOME/.rvm
RUBY_VERSION=ruby-2.3.3
GEMSET=solr-refguide-gemset
# Install the "stable" RVM release to ~/.rvm/, and don't mess with .bash_profile etc.
\curl -sSL https://get.rvm.io | bash -s -- --ignore-dotfiles stable
set +x # Temporarily disable command echoing to reduce clutter
function echoRun() {
local cmd="$1"
echo "Running '$cmd'"
$cmd
}
echoRun "source $RVM_PATH/scripts/rvm" # Load RVM into a shell session *as a Bash function*
echoRun "rvm autolibs disable" # Enable single-user mode
echoRun "rvm install $RUBY_VERSION" # Install Ruby
echoRun "rvm gemset create $GEMSET" # Create this project's gemset
echoRun "rvm $RUBY_VERSION@$GEMSET" # Activate this project's gemset
# Install gems in the gemset. Param --force disables dependency conflict detection.
echoRun "gem install --force --version 3.5.0 jekyll"
echoRun "gem install --force --version 2.1.0 jekyll-asciidoc"
echoRun "gem install --force --version 1.1.2 pygments.rb"
cd solr/solr-ref-guide
set -x # Re-enable command echoing
ant clean build-site build-pdf

View File

@ -16,6 +16,12 @@ Changes in Runtime Behavior
======================= Lucene 7.1.0 =======================
(No Changes)
Optimizations
* LUCENE-7905: Optimize how OrdinalMap (used by
SortedSetDocValuesFacetCounts and others) builds its map (Robert
Muir, Adrien Grand, Mike McCandless)
======================= Lucene 7.0.0 =======================
New Features

View File

@ -30,8 +30,8 @@ import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;

View File

@ -18,21 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
/**
* A wrapper for CompositeIndexReader providing access to DocValues.
@ -649,283 +638,6 @@ public class MultiDocValues {
}
}
/** maps per-segment ordinals to/from global ordinal space */
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
// TODO: use more efficient packed ints structures?
// TODO: pull this out? it's pretty generic (maps between N ord()-enabled TermsEnums)
public static class OrdinalMap implements Accountable {
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
// j first since we actually want higher weights first
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
/** Inverse the map. */
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedSetDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Creates an ordinal map that allows mapping ords to/from a merged
* space from <code>subs</code>.
* @param owner a cache key
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
* not be dense (e.g. can be FilteredTermsEnums}.
* @param weights a weight for each sub. This is ideally correlated with
* the number of unique terms that each sub introduces compared
* to the other subs
* @throws IOException if an I/O error occurred.
*/
public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
// enums are not sorted, so let's sort to save memory
final SegmentMap segmentMap = new SegmentMap(weights);
return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
/** Cache key of whoever asked for this awful thing */
public final IndexReader.CacheKey owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
final PackedLongValues globalOrdDeltas;
// globalOrd -> first segment container
final PackedLongValues firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
final SegmentMap segmentMap;
// ram usage
final long ramBytesUsed;
OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
this.segmentMap = segmentMap;
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long segmentOrds[] = new long[subs.length];
ReaderSlice slices[] = new ReaderSlice[subs.length];
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
for (int i = 0; i < slices.length; i++) {
slices[i] = new ReaderSlice(0, 0, i);
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
}
MultiTermsEnum mte = new MultiTermsEnum(slices);
mte.reset(indexes);
long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
for (int i = 0; i < mte.getMatchCount(); i++) {
int segmentIndex = matches[i].index;
long segmentOrd = matches[i].terms.ord();
long delta = globalOrd - segmentOrd;
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd) {
ordDeltaBits[segmentIndex] |= delta;
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
assert firstSegmentIndex < segmentOrds.length;
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
this.firstSegments = firstSegments.build();
this.globalOrdDeltas = globalOrdDeltas.build();
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final PackedLongValues deltas = ordDeltas[i].build();
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final PackedLongValues.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert !it.hasNext();
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
/**
* Given a segment number, return a {@link LongValues} instance that maps
* segment ordinals to global ordinals.
*/
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
/**
* Given global ordinal, returns the ordinal of the first segment which contains
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
*/
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
/**
* Given a global ordinal, returns the index of the first
* segment that contains this term.
*/
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
/**
* Returns the total number of unique terms in global ord space.
*/
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
resources.add(Accountables.namedAccountable("first segments", firstSegments));
resources.add(Accountables.namedAccountable("segment map", segmentMap));
// TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
return resources;
}
}
/**
* Implements SortedDocValues over n subs, using an OrdinalMap
* @lucene.internal

View File

@ -166,7 +166,7 @@ final class MultiSorter {
final SortedDocValues sorted = Sorter.getOrWrapSorted(readers.get(i), sortField);
values[i] = sorted;
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;

View File

@ -302,6 +302,8 @@ public final class MultiTermsEnum extends TermsEnum {
// gather equal top fields
if (queue.size() > 0) {
// TODO: we could maybe defer this somewhat costly operation until one of the APIs that
// needs to see the top is invoked (docFreq, postings, etc.)
pullTop();
} else {
current = null;

View File

@ -0,0 +1,368 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
/** Maps per-segment ordinals to/from global ordinal space, using a compact packed-ints representation.
*
* <p><b>NOTE</b>: this is a costly operation, as it must merge sort all terms, and may require non-trivial RAM once done. It's better to operate in
* segment-private ordinal space instead when possible.
*
* @lucene.internal */
public class OrdinalMap implements Accountable {
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
// TODO: use more efficient packed ints structures?
private static class TermsEnumIndex {
public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
final int subIndex;
final TermsEnum termsEnum;
BytesRef currentTerm;
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
public BytesRef next() throws IOException {
currentTerm = termsEnum.next();
return currentTerm;
}
}
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
// j first since we actually want higher weights first
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
/** Inverse the map. */
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedSetDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Creates an ordinal map that allows mapping ords to/from a merged
* space from <code>subs</code>.
* @param owner a cache key
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
* not be dense (e.g. can be FilteredTermsEnums}.
* @param weights a weight for each sub. This is ideally correlated with
* the number of unique terms that each sub introduces compared
* to the other subs
* @throws IOException if an I/O error occurred.
*/
public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
// enums are not sorted, so let's sort to save memory
final SegmentMap segmentMap = new SegmentMap(weights);
return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
/** Cache key of whoever asked for this awful thing */
public final IndexReader.CacheKey owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
final PackedLongValues globalOrdDeltas;
// globalOrd -> first segment container
final PackedLongValues firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
final SegmentMap segmentMap;
// ram usage
final long ramBytesUsed;
OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
this.segmentMap = segmentMap;
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long[] segmentOrds = new long[subs.length];
// Just merge-sorts by term:
PriorityQueue<TermsEnumIndex> queue = new PriorityQueue<TermsEnumIndex>(subs.length) {
@Override
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return a.currentTerm.compareTo(b.currentTerm) < 0;
}
};
for (int i = 0; i < subs.length; i++) {
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
if (sub.next() != null) {
queue.add(sub);
}
}
BytesRefBuilder scratch = new BytesRefBuilder();
long globalOrd = 0;
while (queue.size() != 0) {
TermsEnumIndex top = queue.top();
scratch.copyBytes(top.currentTerm);
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
// Advance past this term, recording the per-segment ord deltas:
while (true) {
top = queue.top();
long segmentOrd = top.termsEnum.ord();
long delta = globalOrd - segmentOrd;
int segmentIndex = top.subIndex;
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
ordDeltaBits[segmentIndex] |= delta;
// for each per-segment ord, map it back to the global term; the while loop is needed
// in case the incoming TermsEnums don't have compact ordinals (some ordinal values
// are skipped), which can happen e.g. with a FilteredTermsEnum:
assert segmentOrds[segmentIndex] <= segmentOrd;
// TODO: we could specialize this case (the while loop is not needed when the ords
// are compact)
do {
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
} while (segmentOrds[segmentIndex] <= segmentOrd);
if (top.next() == null) {
queue.pop();
if (queue.size() == 0) {
break;
}
} else {
queue.updateTop();
}
if (queue.top().currentTerm.equals(scratch.get()) == false) {
break;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
this.firstSegments = firstSegments.build();
this.globalOrdDeltas = globalOrdDeltas.build();
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final PackedLongValues deltas = ordDeltas[i].build();
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final PackedLongValues.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert it.hasNext() == false;
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
/**
* Given a segment number, return a {@link LongValues} instance that maps
* segment ordinals to global ordinals.
*/
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
/**
* Given global ordinal, returns the ordinal of the first segment which contains
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
*/
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
/**
* Given a global ordinal, returns the index of the first
* segment that contains this term.
*/
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
/**
* Returns the total number of unique terms in global ord space.
*/
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
resources.add(Accountables.namedAccountable("first segments", firstSegments));
resources.add(Accountables.namedAccountable("segment map", segmentMap));
// TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
return resources;
}
}

View File

@ -608,7 +608,6 @@ public class OfflineSorter {
int count = 0;
while ((spare = iter.next()) != null) {
assert spare.length <= Short.MAX_VALUE;
out.write(spare);
count++;
}

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;

View File

@ -42,6 +42,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConjunctionDISI;
@ -152,10 +153,10 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
private class CountOneSegment implements Callable<Void> {
final LeafReader leafReader;
final MatchingDocs hits;
final MultiDocValues.OrdinalMap ordinalMap;
final OrdinalMap ordinalMap;
final int segOrd;
public CountOneSegment(LeafReader leafReader, MatchingDocs hits, MultiDocValues.OrdinalMap ordinalMap, int segOrd) {
public CountOneSegment(LeafReader leafReader, MatchingDocs hits, OrdinalMap ordinalMap, int segOrd) {
this.leafReader = leafReader;
this.hits = hits;
this.ordinalMap = ordinalMap;
@ -240,7 +241,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
/** Does all the "real work" of tallying up the counts. */
private final void count(List<MatchingDocs> matchingDocs) throws IOException, InterruptedException {
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
@ -281,7 +282,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
private final void countAll() throws IOException, InterruptedException {
//System.out.println("ssdv count");
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in

View File

@ -31,8 +31,8 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConjunctionDISI;
@ -155,7 +156,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
}
private void countOneSegment(MultiDocValues.OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
private void countOneSegment(OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
if (segValues == null) {
// nothing to count
@ -236,7 +237,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
//System.out.println("ssdv count");
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
@ -267,7 +268,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
private final void countAll() throws IOException {
//System.out.println("ssdv count");
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in

View File

@ -20,7 +20,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
@ -37,9 +37,9 @@ final class GlobalOrdinalsCollector implements Collector {
final String field;
final LongBitSet collectedOrds;
final MultiDocValues.OrdinalMap ordinalMap;
final OrdinalMap ordinalMap;
GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
GlobalOrdinalsCollector(String field, OrdinalMap ordinalMap, long valueCount) {
this.field = field;
this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount);

View File

@ -21,7 +21,7 @@ import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreWeight;
@ -41,7 +41,7 @@ final class GlobalOrdinalsQuery extends Query {
// All the ords of matching docs found with OrdinalsCollector.
private final LongBitSet foundOrds;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
private final OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
@ -50,7 +50,7 @@ final class GlobalOrdinalsQuery extends Query {
// id of the context rather than the context itself in order not to hold references to index readers
private final Object indexReaderContextId;
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery,
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, OrdinalMap globalOrds, Query toQuery,
Query fromQuery, Object indexReaderContextId) {
this.foundOrds = foundOrds;
this.joinField = joinField;

View File

@ -21,7 +21,7 @@ import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
@ -35,13 +35,13 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
final boolean doMinMax;
final int min;
final int max;
final MultiDocValues.OrdinalMap ordinalMap;
final OrdinalMap ordinalMap;
final LongBitSet collectedOrds;
protected final Scores scores;
protected final Occurrences occurrences;
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) {
GlobalOrdinalsWithScoreCollector(String field, OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) {
if (valueCount > Integer.MAX_VALUE) {
// We simply don't support more than
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
@ -168,7 +168,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Min extends GlobalOrdinalsWithScoreCollector {
public Min(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Min(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Min, min, max);
}
@ -185,7 +185,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Max extends GlobalOrdinalsWithScoreCollector {
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Max(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Max, min, max);
}
@ -202,7 +202,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Sum extends GlobalOrdinalsWithScoreCollector {
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Sum(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Total, min, max);
}
@ -219,7 +219,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Avg extends GlobalOrdinalsWithScoreCollector {
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Avg(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Avg, min, max);
}
@ -241,7 +241,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class NoScore extends GlobalOrdinalsWithScoreCollector {
public NoScore(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public NoScore(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.None, min, max);
}

View File

@ -21,7 +21,7 @@ import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
@ -39,7 +39,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
private final GlobalOrdinalsWithScoreCollector collector;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
private final OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
@ -52,7 +52,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
private final Object indexReaderContextId;
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, ScoreMode scoreMode, String joinField,
MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max,
OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max,
Object indexReaderContextId) {
this.collector = collector;
this.joinField = joinField;

View File

@ -34,8 +34,8 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
@ -407,7 +407,7 @@ public final class JoinUtil {
/**
* Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, MultiDocValues.OrdinalMap, int, int)},
* Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, OrdinalMap, int, int)},
* but disables the min and max filtering.
*
* @param joinField The {@link SortedDocValues} field containing the join values
@ -425,7 +425,7 @@ public final class JoinUtil {
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap) throws IOException {
OrdinalMap ordinalMap) throws IOException {
return createJoinQuery(joinField, fromQuery, toQuery, searcher, scoreMode, ordinalMap, 0, Integer.MAX_VALUE);
}
@ -464,7 +464,7 @@ public final class JoinUtil {
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap,
OrdinalMap ordinalMap,
int min,
int max) throws IOException {
int numSegments = searcher.getIndexReader().leaves().size();

View File

@ -55,11 +55,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
@ -267,7 +266,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
@ -372,7 +371,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
@ -500,7 +499,7 @@ public class TestJoinUtil extends LuceneTestCase {
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
@ -621,7 +620,7 @@ public class TestJoinUtil extends LuceneTestCase {
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
Query fromQuery = new TermQuery(new Term("type", "from"));
@ -1036,7 +1035,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = new IndexSearcher(r);
@ -1067,7 +1066,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = new IndexSearcher(r);
@ -1590,7 +1589,7 @@ public class TestJoinUtil extends LuceneTestCase {
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
context.ordinalMap = MultiDocValues.OrdinalMap.build(
context.ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
}
@ -1712,7 +1711,7 @@ public class TestJoinUtil extends LuceneTestCase {
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
Directory dir;
IndexSearcher searcher;

View File

@ -26,6 +26,7 @@ import java.io.IOException;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
@ -163,7 +164,7 @@ public class SerializedDVStrategy extends SpatialStrategy {
}//PredicateValueSourceQuery
/**
* Implements a ValueSource by deserializing a Shape in from BinaryDocValues using BinaryCodec.
* Implements a ShapeValueSource by deserializing a Shape from BinaryDocValues using BinaryCodec.
* @see #makeShapeValueSource()
*/
static class ShapeDocValueSource extends ShapeValuesSource {
@ -178,7 +179,7 @@ public class SerializedDVStrategy extends SpatialStrategy {
@Override
public ShapeValues getValues(LeafReaderContext readerContext) throws IOException {
final BinaryDocValues docValues = readerContext.reader().getBinaryDocValues(fieldName);
final BinaryDocValues docValues = DocValues.getBinary(readerContext.reader(), fieldName);
return new ShapeValues() {
@Override

View File

@ -68,6 +68,11 @@ Bug Fixes
* SOLR-10668: fix NPE at sort=childfield(..) .. on absent values (Mikhail Khludnev)
* SOLR-8984: EnumField's error reporting to now indicate the field name in failure log (Lanny Ripple,
Ann Addicks via Ishan Chattopadhyaya)
* SOLR-11012: Fix three (JavaBinCodec not being closed) Resource Leak warnings. (Christine Poerschke)
Optimizations
----------------------
@ -88,6 +93,11 @@ Other Changes
* SOLR-10964: Reduce SolrIndexSearcher casting in LTRRescorer. (Christine Poerschke)
* SOLR-11075: Refactor handling of params in CloudSolrStream and FacetStream (Erick Erickson)
* SOLR-11052: Remove unnecessary Long-to-Integer and back casts in ReplicationHandler.
(Ramsey Haddad via Christine Poerschke)
================== 7.0.0 ==================
Versions of Major Components
@ -212,6 +222,10 @@ Upgrading from Solr 6.x
* StandardRequestHandler is deprecated. Simply use SearchHandler instead.
* The parameter names 'fromNode' for MOVEREPLICA and 'source', 'target' for REPLACENODE have been deprecated and
replaced with 'sourceNode' and 'targetNode' instead. The old names will continue to work for back-compatibility
but they will be removed in 8.0. See SOLR-11068 for more details.
New Features
----------------------
* SOLR-9857, SOLR-9858: Collect aggregated metrics from nodes and shard leaders in overseer. (ab)
@ -364,6 +378,12 @@ Bug Fixes
* SOLR-11045: The new replica created by MoveReplica will have to have same name and coreName as the
old one in case of HDFS (Cao Manh Dat)
* SOLR-11043: Fix facet.range.method=dv and interval facets on single-valued float fields with negative values.
(Tomás Fernández Löbbe, Steve Rowe)
* SOLR-11073: Fix overflow in interval faceting when querying Long limits (e.g. (Long.MAX_VALUE TO Long.MAX_VALUE])
(Tomás Fernández Löbbe)
Optimizations
----------------------
@ -503,6 +523,12 @@ Other Changes
all affected tests (Anshum Gupta)
- SOLR-11059: Randomize PointFields in schema-blockjoinfacetcomponent.xml and all related tests (Anshum Gupta)
- SOLR-11060: Randomize PointFields in schema-custom-field.xml and all related tests (Anshum Gupta)
- SOLR-11095: Randomize PointFields in doc-expiry & exitabe-directory test configsets (hossman)
- SOLR-11097: Randomize PointFields in schema-id-and-version-fields-only.xml and all affected tests (hossman)
- SOLR-11098: Randomize PointFields in cloud-managed-preanalyzed & schema-preanalyzed.xml and all affected tests (hossman)
- SOLR-11101: Randomize PointFields in "cloud-minimal" test configset and all affected tests (Steve Rowe)
- SOLR-11103: Randomize PointFields in "cloud-hdfs" test configset and all affected tests (Steve Rowe)
- SOLR-11105: Randomize PointFields in "cloud-minimal-jmx" and "cloud-minimal-inplace-updates" test configsets (Steve Rowe)
* SOLR-6807: Changed requestDispatcher's handleSelect to default to false, thus ignoring "qt".
Simplified configs to not refer to handleSelect or "qt". Switch all tests that assumed true to assume false
@ -526,6 +552,11 @@ Other Changes
* SOLR-10796: TestPointFields: increase randomized testing of non-trivial values. (Steve Rowe)
* SOLR-11068: MOVEREPLICA and REPLACENODE API parameter names are now 'sourceNode' and 'targetNode'. The old names
viz. 'fromNode' for MOVEREPLICA and 'source', 'target' for REPLACENODE have been deprecated. (shalin)
* SOLR-11088: Fix sporadic failures of MetricsHandlerTest.testPropertyFilter on jenkins (shalin)
================== 6.7.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -660,6 +691,8 @@ when using one of Exact*StatsCache (Mikhail Khludnev)
* SOLR-11024: ParallelStream should set the StreamContext when constructing SolrStreams (Joel Bernstein)
* SOLR-10908: CloudSolrStream.toExpression incorrectly handles fq clauses (Rohit Singh via Erick Erickson)
Optimizations
----------------------
* SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1)

View File

@ -31,6 +31,7 @@ import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Utils;
@ -81,20 +82,22 @@ public class MoveReplicaCmd implements Cmd{
"Collection: " + collection + " replica: " + replicaName + " does not exist");
}
} else {
ocmh.checkRequired(message, SHARD_ID_PROP, "fromNode");
String fromNode = message.getStr("fromNode");
String sourceNode = message.getStr(CollectionParams.SOURCE_NODE, message.getStr(CollectionParams.FROM_NODE));
if (sourceNode == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "sourceNode is a required param" );
}
String shardId = message.getStr(SHARD_ID_PROP);
Slice slice = clusterState.getCollection(collection).getSlice(shardId);
List<Replica> sliceReplicas = new ArrayList<>(slice.getReplicas());
Collections.shuffle(sliceReplicas, RANDOM);
for (Replica r : slice.getReplicas()) {
if (r.getNodeName().equals(fromNode)) {
if (r.getNodeName().equals(sourceNode)) {
replica = r;
}
}
if (replica == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Collection: " + collection + " node: " + fromNode + " do not have any replica belong to shard: " + shardId);
"Collection: " + collection + " node: " + sourceNode + " do not have any replica belong to shard: " + shardId);
}
}

View File

@ -37,6 +37,7 @@ import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.zookeeper.KeeperException;
@ -59,9 +60,11 @@ public class ReplaceNodeCmd implements OverseerCollectionMessageHandler.Cmd {
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
ZkStateReader zkStateReader = ocmh.zkStateReader;
ocmh.checkRequired(message, "source", "target");
String source = message.getStr("source");
String target = message.getStr("target");
String source = message.getStr(CollectionParams.SOURCE_NODE, message.getStr("source"));
String target = message.getStr(CollectionParams.TARGET_NODE, message.getStr("target"));
if (source == null || target == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "sourceNode and targetNode are required params" );
}
String async = message.getStr("async");
int timeout = message.getInt("timeout", 10 * 60); // 10 minutes
boolean parallel = message.getBool("parallel", false);

View File

@ -58,7 +58,15 @@ import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.cloud.overseer.SliceMutator;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.*;
import org.apache.solr.common.cloud.BeforeReconnect;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DefaultConnectionStrategy;
import org.apache.solr.common.cloud.DefaultZkACLProvider;
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.LiveNodesListener;
import org.apache.solr.common.cloud.OnReconnect;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Replica.Type;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;

View File

@ -17,10 +17,6 @@
package org.apache.solr.handler;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.solr.common.util.Utils.makeMap;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
@ -38,6 +34,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
@ -53,8 +50,8 @@ import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
@ -82,6 +79,10 @@ import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.solr.common.util.Utils.makeMap;
public class ExportWriter implements SolrCore.RawWriter, Closeable {
private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private OutputStreamWriter respWriter;
@ -1257,7 +1258,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
protected SortedDocValues vals;
protected MultiDocValues.OrdinalMap ordinalMap;
protected OrdinalMap ordinalMap;
protected LongValues globalOrds;
protected SortedDocValues currentVals;

View File

@ -205,7 +205,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
private final Map<String, FileInfo> confFileInfoCache = new HashMap<>();
private Integer reserveCommitDuration = readIntervalMs("00:00:10");
private Long reserveCommitDuration = readIntervalMs("00:00:10");
volatile IndexCommit indexCommitPoint;
@ -1695,8 +1695,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
}
private static Integer readIntervalMs(String interval) {
return (int) TimeUnit.MILLISECONDS.convert(readIntervalNs(interval), TimeUnit.NANOSECONDS);
private static Long readIntervalMs(String interval) {
return TimeUnit.MILLISECONDS.convert(readIntervalNs(interval), TimeUnit.NANOSECONDS);
}
private static Long readIntervalNs(String interval) {

View File

@ -218,6 +218,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("sequence", SequenceEvaluator.class)
.withFunctionName("addAll", AddAllEvaluator.class)
.withFunctionName("residuals", ResidualsEvaluator.class)
.withFunctionName("plot", PlotStream.class)
// Boolean Stream Evaluators
.withFunctionName("and", AndEvaluator.class)

View File

@ -882,14 +882,26 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
rsp.add(SolrSnapshotManager.SNAPSHOTS_INFO, snapshots);
return null;
}),
REPLACENODE_OP(REPLACENODE, (req, rsp, h) -> req.getParams().required().getAll(req.getParams().getAll(null, "parallel"), "source", "target")),
REPLACENODE_OP(REPLACENODE, (req, rsp, h) -> {
SolrParams params = req.getParams();
String sourceNode = params.get(CollectionParams.SOURCE_NODE, params.get("source"));
if (sourceNode == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, CollectionParams.SOURCE_NODE + " is a require parameter");
}
String targetNode = params.get(CollectionParams.TARGET_NODE, params.get("target"));
if (targetNode == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, CollectionParams.TARGET_NODE + " is a require parameter");
}
return params.getAll(null, "source", "target", CollectionParams.SOURCE_NODE, CollectionParams.TARGET_NODE);
}),
MOVEREPLICA_OP(MOVEREPLICA, (req, rsp, h) -> {
Map<String, Object> map = req.getParams().required().getAll(null,
COLLECTION_PROP);
return req.getParams().getAll(map,
"fromNode",
"targetNode",
CollectionParams.FROM_NODE,
CollectionParams.SOURCE_NODE,
CollectionParams.TARGET_NODE,
"replica",
"shard");
}),

View File

@ -24,15 +24,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -42,6 +33,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
@ -88,6 +80,16 @@ import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
/**
* The ExpandComponent is designed to work with the CollapsingPostFilter.
* The CollapsingPostFilter collapses a result set on a field.
@ -274,7 +276,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
IntObjectHashMap<BytesRef> ordBytes = null;
if(values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
@ -520,7 +522,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
private static class GroupExpandCollector implements Collector, GroupCollector {
private SortedDocValues docValues;
private MultiDocValues.OrdinalMap ordinalMap;
private OrdinalMap ordinalMap;
private SortedDocValues segmentValues;
private LongValues segmentOrdinalMap;
private MultiDocValues.MultiSortedDocValues multiSortedDocValues;

View File

@ -22,7 +22,7 @@ import java.util.Map;
import org.apache.lucene.index.*;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;

View File

@ -22,9 +22,9 @@ import java.util.function.Predicate;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;

View File

@ -22,9 +22,9 @@ import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;

View File

@ -216,9 +216,7 @@ public class IntervalFacets implements Iterable<FacetInterval> {
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0) bits ^= 0x7fffffffffffffffL;
return bits;
return NumericUtils.sortableFloatBits((int)super.longValue());
}
};
break;
@ -227,9 +225,7 @@ public class IntervalFacets implements Iterable<FacetInterval> {
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0) bits ^= 0x7fffffffffffffffL;
return bits;
return NumericUtils.sortableDoubleBits(super.longValue());
}
};
break;
@ -443,12 +439,12 @@ public class IntervalFacets implements Iterable<FacetInterval> {
INCLUDED,
GREATER_THAN_END,
}
/**
* Helper class to match and count of documents in specified intervals
*/
public static class FacetInterval {
/**
* Key to represent this interval
*/
@ -508,6 +504,11 @@ public class IntervalFacets implements Iterable<FacetInterval> {
* The current count of documents in that match this interval
*/
private int count;
/**
* If this field is set to true, this interval {@code #getCount()} will always return 0.
*/
private boolean includeNoDocs = false;
/**
*
@ -650,7 +651,14 @@ public class IntervalFacets implements Iterable<FacetInterval> {
throw new AssertionError();
}
if (startOpen) {
startLimit++;
if (startLimit == Long.MAX_VALUE) {
/*
* This interval can match no docs
*/
includeNoDocs = true;
} else {
startLimit++;
}
}
}
@ -678,7 +686,14 @@ public class IntervalFacets implements Iterable<FacetInterval> {
throw new AssertionError();
}
if (endOpen) {
endLimit--;
if (endLimit == Long.MIN_VALUE) {
/*
* This interval can match no docs
*/
includeNoDocs = true;
} else {
endLimit--;
}
}
}
}
@ -886,6 +901,9 @@ public class IntervalFacets implements Iterable<FacetInterval> {
* @return The count of document that matched this interval
*/
public int getCount() {
if (includeNoDocs) {
return 0;
}
return this.count;
}
@ -910,7 +928,6 @@ public class IntervalFacets implements Iterable<FacetInterval> {
*/
@Override
public Iterator<FacetInterval> iterator() {
return new ArrayList<FacetInterval>(Arrays.asList(intervals)).iterator();
}

View File

@ -386,8 +386,11 @@ public class EnumField extends PrimitiveFieldType {
return null;
}
final Integer intValue = stringValueToIntValue(value.toString());
if (intValue == null || intValue.equals(DEFAULT_VALUE))
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown value for enum field: " + value.toString());
if (intValue == null || intValue.equals(DEFAULT_VALUE)) {
String exceptionMessage = String.format(Locale.ENGLISH, "Unknown value for enum field: %s, value: %s",
field.getName(), value.toString());
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, exceptionMessage);
}
final LegacyFieldType newType = new LegacyFieldType();

View File

@ -25,12 +25,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValues;
@ -43,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionValues;
@ -69,10 +64,17 @@ import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.StrField;
import org.apache.solr.uninverting.UninvertingReader;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
import static org.apache.solr.common.params.CommonParams.SORT;
/**
@ -474,7 +476,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
private final DocValuesProducer collapseValuesProducer;
private FixedBitSet collapsedSet;
private SortedDocValues collapseValues;
private MultiDocValues.OrdinalMap ordinalMap;
private OrdinalMap ordinalMap;
private SortedDocValues segmentValues;
private LongValues segmentOrdinalMap;
private MultiDocValues.MultiSortedDocValues multiSortedDocValues;
@ -920,7 +922,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
private LeafReaderContext[] contexts;
private DocValuesProducer collapseValuesProducer;
private SortedDocValues collapseValues;
protected MultiDocValues.OrdinalMap ordinalMap;
protected OrdinalMap ordinalMap;
protected SortedDocValues segmentValues;
protected LongValues segmentOrdinalMap;
protected MultiDocValues.MultiSortedDocValues multiSortedDocValues;

View File

@ -23,6 +23,7 @@ import java.util.List;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;
@ -43,7 +44,7 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
boolean multiValuedField;
SortedSetDocValues si; // only used for term lookups (for both single and multi-valued)
MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
FacetFieldProcessorByArrayDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
@ -176,7 +177,7 @@ public class MinMaxAgg extends SimpleAggValueSource {
class SingleValuedOrdAcc extends OrdAcc {
SortedDocValues topLevel;
SortedDocValues[] subDvs;
MultiDocValues.OrdinalMap ordMap;
OrdinalMap ordMap;
LongValues toGlobal;
SortedDocValues subDv;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -30,7 +31,7 @@ import org.apache.solr.schema.SchemaField;
class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
SortedSetDocValues topLevel;
SortedSetDocValues[] subDvs;
MultiDocValues.OrdinalMap ordMap;
OrdinalMap ordMap;
LongValues toGlobal;
SortedSetDocValues subDv;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -31,7 +32,7 @@ import org.apache.solr.search.SolrIndexSearcher;
class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
SortedDocValues topLevel;
SortedDocValues[] subDvs;
MultiDocValues.OrdinalMap ordMap;
OrdinalMap ordMap;
LongValues toGlobal;
SortedDocValues subDv;

View File

@ -21,8 +21,8 @@ import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
@ -50,7 +50,7 @@ class BlockJoinFieldFacetAccumulator {
// elems are : facet value counter<<32 | last parent doc num
private long[] segmentAccums = new long[0];
// for mapping per-segment ords to global ones
private MultiDocValues.OrdinalMap ordinalMap;
private OrdinalMap ordinalMap;
private SchemaField schemaField;
private SortedDocValues segmentSDV;

View File

@ -17,7 +17,7 @@
-->
<schema name="id-and-version-fields-only" version="1.6">
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>

View File

@ -30,7 +30,7 @@
</analyzer>
</fieldType>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>

View File

@ -17,8 +17,8 @@
-->
<schema name="minimal" version="1.1">
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<dynamicField name="*" type="string" indexed="true" stored="true"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>

View File

@ -17,8 +17,8 @@
-->
<schema name="managed-preanalyzed" version="1.6">
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="preanalyzed-no-analyzer" class="solr.PreAnalyzedField" parserImpl="json"/>
<fieldType name="preanalyzed-with-analyzer" class="solr.PreAnalyzedField">

View File

@ -26,6 +26,6 @@
<uniqueKey>id</uniqueKey>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
</schema>

View File

@ -17,8 +17,8 @@
-->
<schema name="minimal" version="1.1">
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<dynamicField name="*" type="string" indexed="true" stored="true"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>

View File

@ -17,8 +17,8 @@
-->
<schema name="minimal" version="1.1">
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<dynamicField name="*" type="string" indexed="true" stored="true"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>

View File

@ -31,15 +31,15 @@
1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
1.6: useDocValuesAsStored defaults to true.
-->
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<!-- Field type demonstrating an Analyzer failure -->
<fieldType name="failtype1" class="solr.TextField">
@ -95,8 +95,8 @@
<!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
seconds part (.999) is optional.
-->
<fieldType name="date" class="solr.TrieDateField" sortMissingLast="true"/>
<fieldType name="tdate" class="solr.TrieDateField" sortMissingLast="true" precisionStep="6"/>
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" sortMissingLast="true"/>
<fieldType name="tdate" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" sortMissingLast="true" precisionStep="6"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">

View File

@ -17,8 +17,8 @@
-->
<schema name="minimal" version="1.1">
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<dynamicField name="*" type="string" indexed="true" stored="true"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>

View File

@ -35,6 +35,9 @@ import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
@ -86,7 +89,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
}
}
CollectionAdminRequest.MoveReplica moveReplica = new CollectionAdminRequest.MoveReplica(coll, replica.getName(), targetNode);
CollectionAdminRequest.MoveReplica moveReplica = createMoveReplicaRequest(coll, replica, targetNode);
moveReplica.processAsync("000", cloudClient);
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
// wait for async request success
@ -141,7 +144,7 @@ public class MoveReplicaTest extends SolrCloudTestCase {
}
assertTrue("replica never fully recovered", recovered);
moveReplica = new CollectionAdminRequest.MoveReplica(coll, shardId, targetNode, replica.getNodeName());
moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
moveReplica.process(cloudClient);
checkNumOfCores(cloudClient, replica.getNodeName(), 1);
// wait for recovery
@ -181,6 +184,49 @@ public class MoveReplicaTest extends SolrCloudTestCase {
assertTrue("replica never fully recovered", recovered);
}
private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode, String shardId) {
if (random().nextBoolean()) {
return new CollectionAdminRequest.MoveReplica(coll, shardId, targetNode, replica.getNodeName());
} else {
// for backcompat testing of SOLR-11068
// todo remove in solr 8.0
return new BackCompatMoveReplicaRequest(coll, shardId, targetNode, replica.getNodeName());
}
}
private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica replica, String targetNode) {
if (random().nextBoolean()) {
return new CollectionAdminRequest.MoveReplica(coll, replica.getName(), targetNode);
} else {
// for backcompat testing of SOLR-11068
// todo remove in solr 8.0
return new BackCompatMoveReplicaRequest(coll, replica.getName(), targetNode);
}
}
/**
* Added for backcompat testing
* todo remove in solr 8.0
*/
static class BackCompatMoveReplicaRequest extends CollectionAdminRequest.MoveReplica {
public BackCompatMoveReplicaRequest(String collection, String replica, String targetNode) {
super(collection, replica, targetNode);
}
public BackCompatMoveReplicaRequest(String collection, String shard, String sourceNode, String targetNode) {
super(collection, shard, sourceNode, targetNode);
}
@Override
public SolrParams getParams() {
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
if (randomlyMoveReplica) {
params.set(CollectionParams.FROM_NODE, sourceNode);
}
return params;
}
}
private Replica getRandomReplica(String coll, CloudSolrClient cloudClient) {
List<Replica> replicas = cloudClient.getZkStateReader().getClusterState().getCollection(coll).getReplicas();
Collections.shuffle(replicas, random());

View File

@ -33,6 +33,9 @@ import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.StrUtils;
import org.junit.BeforeClass;
import org.junit.Test;
@ -80,7 +83,7 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
create.setCreateNodeSet(StrUtils.join(l, ',')).setMaxShardsPerNode(3);
cloudClient.request(create);
log.info("excluded_node : {} ", emptyNode);
new CollectionAdminRequest.ReplaceNode(node2bdecommissioned, emptyNode).processAsync("000", cloudClient);
createReplaceNodeRequest(node2bdecommissioned, emptyNode, null).processAsync("000", cloudClient);
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
boolean success = false;
for (int i = 0; i < 300; i++) {
@ -99,7 +102,7 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
}
//let's do it back
new CollectionAdminRequest.ReplaceNode(emptyNode, node2bdecommissioned).setParallel(Boolean.TRUE).processAsync("001", cloudClient);
createReplaceNodeRequest(emptyNode, node2bdecommissioned, Boolean.TRUE).processAsync("001", cloudClient);
requestStatus = CollectionAdminRequest.requestStatus("001");
for (int i = 0; i < 200; i++) {
@ -125,4 +128,23 @@ public class ReplaceNodeTest extends SolrCloudTestCase {
assertEquals(create.getNumPullReplicas().intValue(), s.getReplicas(EnumSet.of(Replica.Type.PULL)).size());
}
}
private CollectionAdminRequest.AsyncCollectionAdminRequest createReplaceNodeRequest(String sourceNode, String targetNode, Boolean parallel) {
if (random().nextBoolean()) {
return new CollectionAdminRequest.ReplaceNode(sourceNode, targetNode).setParallel(parallel);
} else {
// test back compat with old param names
// todo remove in solr 8.0
return new CollectionAdminRequest.AsyncCollectionAdminRequest(CollectionParams.CollectionAction.REPLACENODE) {
@Override
public SolrParams getParams() {
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
params.set("source", sourceNode);
params.set("target", targetNode);
if (parallel != null) params.set("parallel", parallel.toString());
return params;
}
};
}
}
}

View File

@ -189,6 +189,8 @@ public class MetricsHandlerTest extends SolrTestCaseJ4 {
@Test
public void testPropertyFilter() throws Exception {
assertQ(req("*:*"), "//result[@numFound='0']");
MetricsHandler handler = new MetricsHandler(h.getCoreContainer());
SolrQueryResponse resp = new SolrQueryResponse();

View File

@ -35,6 +35,7 @@ import org.apache.solr.common.params.FacetParams.FacetRangeOther;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.TimeZoneUtils;
import org.junit.BeforeClass;
@ -183,6 +184,29 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
add_doc("id", "2004", "hotel_s1", "b", "airport_s1", "ams", "duration_i1", "5");
}
public void testDvMethodNegativeFloatRangeFacet() throws Exception {
String field = "negative_num_f1_dv";
assertTrue("Unexpected schema configuration", h.getCore().getLatestSchema().getField(field).hasDocValues());
assertEquals("Unexpected schema configuration", NumberType.FLOAT, h.getCore().getLatestSchema().getField(field).getType().getNumberType());
assertFalse("Unexpected schema configuration", h.getCore().getLatestSchema().getField(field).multiValued());
final String[] commonParams = {
"q", "*:*", "facet", "true", "facet.range.start", "-2", "facet.range.end", "0", "facet.range.gap", "2"
};
final String countAssertion
= "//lst[@name='facet_counts']/lst[@name='facet_ranges']/lst[@name='%s']/lst[@name='counts']/int[@name='-2.0'][.='1']";
assertU(adoc("id", "10001", field, "-1.0"));
assertU(commit());
assertQ(req(commonParams, "facet.range", field, "facet.range.method", "filter"),
String.format(Locale.ROOT, countAssertion, field)
);
assertQ(req(commonParams, "facet.range", field, "facet.range.method", "dv"),
String.format(Locale.ROOT, countAssertion, field)
);
}
public void testDefaultsAndAppends() throws Exception {
// all defaults
@ -3385,8 +3409,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
ModifiableSolrParams params = new ModifiableSolrParams();
Integer[] values = new Integer[2];
do {
values[0] = random().nextInt(3000);
values[1] = random().nextInt(3000);
values[0] = random().nextInt(3000) * (random().nextBoolean()?-1:1);
values[1] = random().nextInt(3000) * (random().nextBoolean()?-1:1);
} while (values[0].equals(values[1]));
Arrays.sort(values);
long gapNum = Math.max(1, random().nextInt(3000));
@ -3404,8 +3428,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
ModifiableSolrParams params = new ModifiableSolrParams();
Float[] values = new Float[2];
do {
values[0] = random().nextFloat() * 3000;
values[1] = random().nextFloat() * 3000;
values[0] = random().nextFloat() * 3000 * (random().nextBoolean()?-1:1);
values[1] = random().nextFloat() * 3000 * (random().nextBoolean()?-1:1);
} while (values[0].equals(values[1]));
Arrays.sort(values);
float gapNum = Math.max(1, random().nextFloat() * 3000);
@ -3425,8 +3449,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
ModifiableSolrParams params = new ModifiableSolrParams();
Date[] dates = new Date[2];
do {
dates[0] = new Date((long)(random().nextDouble()*(new Date().getTime())));
dates[1] = new Date((long)(random().nextDouble()*(new Date().getTime())));
dates[0] = new Date((long)(random().nextDouble()*(new Date().getTime()) * (random().nextBoolean()?-1:1)));
dates[1] = new Date((long)(random().nextDouble()*(new Date().getTime()) * (random().nextBoolean()?-1:1)));
} while (dates[0].equals(dates[1]));
Arrays.sort(dates);
long dateDiff = (dates[1].getTime() - dates[0].getTime())/1000;

View File

@ -664,19 +664,27 @@ public class TestIntervalFaceting extends SolrTestCaseJ4 {
assertU(adoc("id", "12", "test_l_dv", String.valueOf(Long.MAX_VALUE - 3)));
assertU(adoc("id", "13", "test_l_dv", String.valueOf(Long.MAX_VALUE - 2)));
assertU(adoc("id", "14", "test_l_dv", String.valueOf(Long.MAX_VALUE - 1)));
assertU(adoc("id", "15", "test_l_dv", String.valueOf(Long.MAX_VALUE)));
assertU(adoc("id", "16", "test_l_dv", String.valueOf(Long.MIN_VALUE)));
assertU(commit());
assertIntervalQuery("test_l_dv", "[0," + Integer.MAX_VALUE + "]", "10");
assertIntervalQuery("test_l_dv", "[" + Integer.MAX_VALUE + "," + Long.MAX_VALUE + "]", "3");
assertIntervalQuery("test_l_dv", "[" + Integer.MAX_VALUE + ",*]", "3");
assertIntervalQuery("test_l_dv", "(10," + Long.MAX_VALUE + "]", "4");
assertIntervalQuery("test_l_dv", "[" + Long.MAX_VALUE + "," + Long.MAX_VALUE + "]", "1");
assertIntervalQuery("test_l_dv", "[" + Long.MAX_VALUE + ",*]", "1");
assertIntervalQuery("test_l_dv", "(" + Long.MAX_VALUE + ",*]", "0");
assertIntervalQuery("test_l_dv", "(*, " + Long.MIN_VALUE + "]", "1");
assertIntervalQuery("test_l_dv", "(*, " + Long.MIN_VALUE + ")", "0");
assertIntervalQuery("test_l_dv", "(" + (Long.MAX_VALUE - 1) + ",*]", "1");
assertIntervalQuery("test_l_dv", "[" + (Long.MAX_VALUE - 1) + ",*]", "2");
}
@Test
public void testFloatFields() {
doTestFloat("test_f_dv");
doTestFloat("test_f_dv", false);
}
private void doTestFloat(String field) {
private void doTestFloat(String field, boolean testDouble) {
assertU(adoc("id", "1", field, "0"));
assertU(adoc("id", "2", field, "1"));
assertU(adoc("id", "3", field, "2"));
@ -705,11 +713,71 @@ public class TestIntervalFaceting extends SolrTestCaseJ4 {
assertIntervalQuery(field, "(1,1)", "0");
assertIntervalQuery(field, "(4,7)", "4");
assertIntervalQuery(field, "(123,*)", "1");
clearIndex();
assertU(adoc("id", "16", field, "-1.3"));
assertU(adoc("id", "17", field, "0.0"));
assertU(adoc("id", "18", field, "-0.0"));
assertU(adoc("id", "19", field, String.valueOf(Float.MIN_VALUE)));
assertU(adoc("id", "20", field, String.valueOf(Float.MAX_VALUE)));
assertU(adoc("id", "21", field, String.valueOf(Float.NEGATIVE_INFINITY)));
assertU(adoc("id", "22", field, String.valueOf(Float.POSITIVE_INFINITY)));
assertU(commit());
assertIntervalQuery(field, "[*,*]", "7");
assertIntervalQuery(field, "(*,*)", "7");
assertIntervalQuery(field, "(-1,1)", "3");
assertIntervalQuery(field, "(-2,1)", "4");
assertIntervalQuery(field, "(-1.3,0)", "1");
assertIntervalQuery(field, "[-1.3,0)", "2");
assertIntervalQuery(field, "[-1.3,0]", "3");
assertIntervalQuery(field, "(" + Float.NEGATIVE_INFINITY + ",0)", "2");
assertIntervalQuery(field, "(* ,0)", "3");
assertIntervalQuery(field, "[" + Float.NEGATIVE_INFINITY + ",0)", "3");
assertIntervalQuery(field, "(0, " + Float.MIN_VALUE + ")", "0");
assertIntervalQuery(field, "(0, " + Float.MIN_VALUE + "]", "1");
assertIntervalQuery(field, "(0, " + Float.MAX_VALUE + ")", "1");
assertIntervalQuery(field, "(0, " + Float.MAX_VALUE + "]", "2");
assertIntervalQuery(field, "(0, " + Float.POSITIVE_INFINITY + ")", "2");
assertIntervalQuery(field, "(0, " + Float.POSITIVE_INFINITY + "]", "3");
assertIntervalQuery(field, "[-0.0, 0.0]", "2");
assertIntervalQuery(field, "[-0.0, 0.0)", "1");
assertIntervalQuery(field, "(-0.0, 0.0]", "1");
if (testDouble) {
clearIndex();
assertU(adoc("id", "16", field, "-1.3"));
assertU(adoc("id", "17", field, "0.0"));
assertU(adoc("id", "18", field, "-0.0"));
assertU(adoc("id", "19", field, String.valueOf(Double.MIN_VALUE)));
assertU(adoc("id", "20", field, String.valueOf(Double.MAX_VALUE)));
assertU(adoc("id", "21", field, String.valueOf(Double.NEGATIVE_INFINITY)));
assertU(adoc("id", "22", field, String.valueOf(Double.POSITIVE_INFINITY)));
assertU(commit());
assertIntervalQuery(field, "[*,*]", "7");
assertIntervalQuery(field, "(*,*)", "7");
assertIntervalQuery(field, "(-1,1)", "3");
assertIntervalQuery(field, "(-2,1)", "4");
assertIntervalQuery(field, "(-1.3,0)", "1");
assertIntervalQuery(field, "[-1.3,0)", "2");
assertIntervalQuery(field, "[-1.3,0]", "3");
assertIntervalQuery(field, "(" + Double.NEGATIVE_INFINITY + ",0)", "2");
assertIntervalQuery(field, "(* ,0)", "3");
assertIntervalQuery(field, "[" + Double.NEGATIVE_INFINITY + ",0)", "3");
assertIntervalQuery(field, "(0, " + Double.MIN_VALUE + ")", "0");
assertIntervalQuery(field, "(0, " + Double.MIN_VALUE + "]", "1");
assertIntervalQuery(field, "(0, " + Double.MAX_VALUE + ")", "1");
assertIntervalQuery(field, "(0, " + Double.MAX_VALUE + "]", "2");
assertIntervalQuery(field, "(0, " + Double.POSITIVE_INFINITY + ")", "2");
assertIntervalQuery(field, "(0, " + Double.POSITIVE_INFINITY + "]", "3");
}
}
@Test
public void testDoubleFields() {
doTestFloat("test_d_dv");
doTestFloat("test_d_dv", true);
}
@Test

View File

@ -152,9 +152,9 @@ public class EnumFieldTest extends SolrTestCaseJ4 {
@Test
public void testBogusEnumIndexing() throws Exception {
ignoreException("Unknown value for enum field: blabla");
ignoreException("Unknown value for enum field: 10");
ignoreException("Unknown value for enum field: -4");
ignoreException("Unknown value for enum field: " + FIELD_NAME + ", value: blabla");
ignoreException("Unknown value for enum field: " + FIELD_NAME + ", value: 10");
ignoreException("Unknown value for enum field: " + FIELD_NAME + ", value: -4");
clearIndex();

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@
package org.apache.solr.update;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static org.junit.Assert.assertEquals;
import java.lang.invoke.MethodHandles;
@ -340,74 +341,85 @@ public class SoftAutoCommitTest extends AbstractSolrTestCase {
hardTracker.setOpenSearcher(false);
// try to add 5 docs really fast
long fast5start = System.nanoTime();
final long preFirstNanos = System.nanoTime();
for( int i=0;i<5; i++ ) {
assertU(adoc("id", ""+500 + i, "subject", "five fast docs"));
}
long fast5end = System.nanoTime() - TimeUnit.NANOSECONDS.convert(300, TimeUnit.MILLISECONDS); // minus a tad of slop
long fast5time = 1 + TimeUnit.MILLISECONDS.convert(fast5end - fast5start, TimeUnit.NANOSECONDS);
// total time for all 5 adds determines the number of soft to expect
long expectedSoft = (long)Math.ceil((double) fast5time / softCommitWaitMillis);
long expectedHard = (long)Math.ceil((double) fast5time / hardCommitWaitMillis);
final long postLastNanos = System.nanoTime();
expectedSoft = Math.max(1, expectedSoft);
expectedHard = Math.max(1, expectedHard);
monitor.assertSaneOffers();
// note: counting from 1 for multiplication
for (int i = 1; i <= expectedSoft; i++) {
// Wait for the soft commit with plenty of fudge to survive nasty envs
Long soft = monitor.soft.poll(softCommitWaitMillis * 3, MILLISECONDS);
if (soft != null || i == 1) {
assertNotNull(i + ": soft wasn't fast enough", soft);
monitor.assertSaneOffers();
// have to assume none of the docs were added until
// very end of the add window
long softMs = TimeUnit.MILLISECONDS.convert(soft - fast5end, TimeUnit.NANOSECONDS);
assertTrue(i + ": soft occurred too fast: " +
softMs + " < (" + softCommitWaitMillis + " * " + i + ")",
softMs >= (softCommitWaitMillis * i));
} else {
// we may have guessed wrong and there were fewer commits
assertNull("Got a soft commit we weren't expecting", monitor.soft.poll(2000, MILLISECONDS));
}
}
// note: counting from 1 for multiplication
for (int i = 1; i <= expectedHard; i++) {
// wait for the hard commit, shouldn't need any fudge given
// other actions already taken
Long hard = monitor.hard.poll(hardCommitWaitMillis, MILLISECONDS);
assertNotNull(i + ": hard wasn't fast enough", hard);
monitor.assertSaneOffers();
// have to assume none of the docs were added until
// very end of the add window
long hardMs = TimeUnit.MILLISECONDS.convert(hard - fast5end, TimeUnit.NANOSECONDS);
assertTrue(i + ": hard occurred too fast: " +
hardMs + " < (" + hardCommitWaitMillis + " * " + i + ")",
hardMs >= (hardCommitWaitMillis * i));
}
final long maxTimeMillis = MILLISECONDS.convert(postLastNanos - preFirstNanos, NANOSECONDS);
log.info("maxTimeMillis: {}ns - {}ns == {}ms", postLastNanos, preFirstNanos, maxTimeMillis);
// we are only guessing how many commits we may see, allow one extra of each
monitor.soft.poll(softCommitWaitMillis + 200, MILLISECONDS);
monitor.hard.poll(hardCommitWaitMillis + 200, MILLISECONDS);
// clear commits
monitor.hard.clear();
monitor.soft.clear();
// NOTE: explicitly using truncated division of longs to round down
// even if evenly divisible, need +1 to account for possible "last" commit triggered by "last" doc
final long maxExpectedSoft = 1L + (maxTimeMillis / softCommitWaitMillis);
final long maxExpectedHard = 1L + (maxTimeMillis / hardCommitWaitMillis);
log.info("maxExpectedSoft={}", maxExpectedSoft);
log.info("maxExpectedHard={}", maxExpectedHard);
// do a loop pool over each monitor queue, asserting that:
// - we get at least one commit
// - we don't get more then the max possible commits expected
// - any commit we do get doesn't happen "too fast" relative the previous commit
// (or first doc added for the first commit)
monitor.assertSaneOffers();
assertRapidMultiCommitQueues("softCommit", preFirstNanos, softCommitWaitMillis,
maxExpectedSoft, monitor.soft);
monitor.assertSaneOffers();
assertRapidMultiCommitQueues("hardCommit", preFirstNanos, hardCommitWaitMillis,
maxExpectedHard, monitor.hard);
// now wait a bit...
// w/o other action we shouldn't see any additional hard/soft commits
// wait a bit, w/o other action we shouldn't see any
// new hard/soft commits
assertNull("Got a hard commit we weren't expecting",
monitor.hard.poll(1000, MILLISECONDS));
monitor.hard.poll(1000, MILLISECONDS));
assertNull("Got a soft commit we weren't expecting",
monitor.soft.poll(0, MILLISECONDS));
monitor.soft.poll(0, MILLISECONDS));
monitor.assertSaneOffers();
}
/**
* Helper method
* @see #testSoftAndHardCommitMaxTimeRapidAdds
*/
private static void assertRapidMultiCommitQueues
(final String debug, final long startTimestampNanos, final long commitWaitMillis,
final long maxNumCommits, final BlockingQueue<Long> queue) throws InterruptedException {
assert 0 < maxNumCommits;
// do all our math/comparisons in Nanos...
final long commitWaitNanos = NANOSECONDS.convert(commitWaitMillis, MILLISECONDS);
// these will be modified in each iteration of our assertion loop
long prevTimestampNanos = startTimestampNanos;
int count = 1;
Long commitNanos = queue.poll(commitWaitMillis * 3, MILLISECONDS);
assertNotNull(debug + ": did not find a single commit", commitNanos);
while (null != commitNanos) {
if (commitNanos < prevTimestampNanos + commitWaitMillis) {
fail(debug + ": commit#" + count + " has TS too low relative to previous TS & commitWait: " +
"commitNanos=" + commitNanos + ", prevTimestampNanos=" + prevTimestampNanos +
", commitWaitMillis=" + commitWaitMillis);
}
if (maxNumCommits < count) {
fail(debug + ": commit#" + count + " w/ commitNanos=" + commitNanos +
", but maxNumCommits=" +maxNumCommits);
}
prevTimestampNanos = commitNanos;
count++;
commitNanos = queue.poll(commitWaitMillis * 3, MILLISECONDS);
}
}
}
class MockEventListener implements SolrEventListener {

View File

@ -183,12 +183,14 @@ public class TestUtils extends SolrTestCaseJ4 {
public void testBinaryCommands() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new JavaBinCodec().marshal((MapWriter) ew -> {
ew.put("set-user", fromJSONString("{x:y}"));
ew.put("set-user", fromJSONString("{x:y,x1:y1}"));
ew.put("single", Arrays.asList(fromJSONString("[{x:y,x1:y1},{x2:y2}]"), fromJSONString( "{x2:y2}")));
ew.put("multi", Arrays.asList(fromJSONString("{x:y,x1:y1}"), fromJSONString( "{x2:y2}")));
}, baos);
try (final JavaBinCodec jbc = new JavaBinCodec()) {
jbc.marshal((MapWriter) ew -> {
ew.put("set-user", fromJSONString("{x:y}"));
ew.put("set-user", fromJSONString("{x:y,x1:y1}"));
ew.put("single", Arrays.asList(fromJSONString("[{x:y,x1:y1},{x2:y2}]"), fromJSONString( "{x2:y2}")));
ew.put("multi", Arrays.asList(fromJSONString("{x:y,x1:y1}"), fromJSONString( "{x2:y2}")));
}, baos);
}
ContentStream stream = new ContentStreamBase.ByteArrayStream(baos.toByteArray(),null, "application/javabin");
List<CommandOperation> commands = CommandOperation.readCommands(Collections.singletonList(stream), new NamedList(), Collections.singleton("single"));

View File

@ -19,6 +19,11 @@
The definitive manual on Asciidoc syntax is in the http://asciidoctor.org/docs/user-manual/[Asciidoctor User Manual]. To help people get started, however, here is a simpler cheat sheet.
== AsciiDoc vs Asciidoctor Syntax
We use tools from the Asciidoctor project to build the HTML and PDF versions of the Ref Guide. Asciidoctor is a Ruby port of the original AsciiDoc project, which was mostly abandoned several years ago.
While much of the syntax between the two is the same, there are many conventions supported by Asciidoctor that did not exist in AsciiDoc. While the Asciidoctor project has tried to provide back-compatibility with the older project, that may not be true forever. For this reason, it's strongly recommended to only use the Asciidoctor User Manual as a reference for any syntax that's not described here.
== Basic syntax
=== Bold
@ -35,25 +40,25 @@ More info: http://asciidoctor.org/docs/user-manual/#bold-and-italic
=== Headings
Equal signs (`=`) are used for heading levels. Each equal sign is a level. Each page can *only* have one top level. Levels should be appropriately nested.
Equal signs (`=`) are used for heading levels. Each equal sign is a level. Each page can *only* have one top level (i.e., only one section with a single `=`).
Validation occurs to ensure that level 3s are preceded by level 2s, level 4s are preceded by level 3s, etc. Including out-of-sequence heading levels (such as a level 3 then a level 5) will not fail the build, but will produce an error.
Levels should be appropriately nested. During the build, validation occurs to ensure that level 3s are preceded by level 2s, level 4s are preceded by level 3s, etc. Including out-of-sequence heading levels (such as a level 3 then a level 5) will not fail the build, but will produce an error.
More info: http://asciidoctor.org/docs/user-manual/#sections
=== Code Examples
Use backticks ``` for text that should be monospaced, such as a code or class name in the body of a paragraph.
Use backticks ``` for text that should be monospaced, such as code or a class name in the body of a paragraph.
More info: http://asciidoctor.org/docs/user-manual/#mono
Longer code examples can be separated from text with `source` blocks. These allow defining the syntax being used so the code is properly highlighted.
[source]
.Example Source Block
[source]
----
[source,xml]
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
----
If your code block will include line breaks, put 4 hyphens (`----`) before and after the entire block.
@ -66,9 +71,9 @@ Titles can be added to most blocks (images, source blocks, tables, etc.) by simp
[source]
----
[source,xml]
.Example ID field
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
[source,xml]
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
----
== Links
@ -82,27 +87,118 @@ However, you can add a name to a link by adding the URI followed by square brack
http://lucene.apache.org/solr[Solr Website]
=== Link to Other Pages/Sections of the Guide
A warning up front, linking to other pages can be a little bit painful.
A warning up front, linking to other pages can be a little bit painful. There are slightly different rules depending on the type of link you want to create, and where you are linking from.
To link to an anchor (or heading) on the _same page_, you can simply use double angle brackets (`<< >>`) around the anchor/heading/section name. Note that any heading (aka section title) that starts with equal signs is automatically an anchor.
The build process includes a validation for internal or inter-page links, so if you can build the docs locally, you can use that to verify you constructed your link properly (or pay attention to the Jenkins build after your commit).
More info: http://asciidoctor.org/docs/user-manual/#internal-cross-references
To link to another page or even a heading on _another page_, you use a similar syntax, but you must refer to the full filename and refer to the section title you want to link to by changing it to lower case and including hyphens between each word.
For example, if I want to link from one page to the Schema API page's section "Modify the Schema", I need to create a link that looks like this:
[source]
For more information about modifying the schema with the API, see the section <<schema-api.adoc#modify-the-schema>>.
You can add text to appear by adding it after the file name and section reference, as in:
With all of the below examples, you can add text to display as the link title by adding a comma after the section reference followed by the display text, as in:
[source]
<<schema-api.adoc#modify-the-schema,Modify the Schema>>
==== Link to a Section on the Same Page
To link to an anchor (or section title) on the _same page_, you can simply use double angle brackets (`<< >>`) around the anchor/heading/section title you want to link to. Any section title (a heading that starts with equal signs) automatically becomes an anchor during conversion and is available for deep linking.
Example::
If I have a section on a page that looks like this (from `defining-fields.adoc`):
+
[source]
----
== Field Properties
Field definitions can have the following properties:
----
+
To link to this section from another part of the same `defining-fields.adoc` page, I simply need to put the section title in double angle brackets, as in:
+
[source]
See also the <<Field Properties>> section.
+
The section title will be used as the display text; to customize that add a comma after the the section title, then the text you want used for display.
More info: http://asciidoctor.org/docs/user-manual/#internal-cross-references
==== Link to a Section with an Anchor ID
When linking to any section (on the same page or another one), you must also be aware of any pre-defined anchors that may be in use (these will be in double brackets, like `[[ ]]`). When the page is converted, those will be the references your link needs to point to.
Example::
Take this example from `configsets-api.adoc`:
+
[source]
----
[[configsets-create]]
== Create a ConfigSet
----
+
To link to this section, there are two approaches depending on where you are linking from:
* From the same page, simply use the anchor name: `\<<configsets-create>>`.
* From another page, use the page name and the anchor name: `\<<configsets-api.adoc#configsets-create>>`.
==== Link to Another Page
To link to _another page_ or a section on another page, you must refer to the full filename and refer to the section you want to link to.
Unfortunately, when you want to refer the reader to another page without deep-linking to a section, you cannot simply put the other file name in angle brackets and call it a day. This is due to the PDF conversion - once all the pages are combined into one big page for one big PDF, the lack of a specific reference causes inter-page links to fail.
So, *you must always link to a specific section*. If all you want is a reference to the top of another page, you can use the `page-shortname` attribute found at the top of every page as your anchor reference.
Example::
The file `upgrading-solr.adoc` has a `page-shortname` at the top that looks like this:
+
[source]
----
= Upgrading Solr
:page-shortname: upgrading-solr
:page-permalink: upgrading-solr.html
----
+
To construct a link to this page, we need to refer to the file name (`upgrading-solr.adoc`), then use the `page-shortname` as our anchor reference. As in:
+
[source]
For more information about upgrades, see <<upgrading-solr.adoc#upgrading-solr>>.
TIP: As of July 2017, all pages have a `page-shortname` that is equivalent to the filename (without the `.adoc` part).
==== Link to a Section on Another Page
Linking to a section is the same conceptually as linking to the top of a page, you just need to take a little extra care to format the anchor ID in your link reference properly.
When you link to a section on another page, you must make a simple conversion of the title into the format the section ID will be created during the conversion. These are the rules that transform the sections:
--
* All characters are lower-cased.
** `Using security.json with Solr` becomes `using security.json with solr`
* All non-alpha characters are removed, with the exception of hyphens (so all periods, commas, ampersands, parentheses, etc., are stripped).
** `using security.json with solr` becomes `using security json with solr`
* All whitespaces are replaced with hyphens.
** `using security json with solr` becomes `using-security-json-with-solr`
--
Example::
The file `schema-api.adoc` has a section "Modify the Schema" that looks like this:
+
[source]
----
== Modify the Schema
`POST /_collection_/schema`
----
+
To link from to this section from another page, you would create a link structured like this:
+
--
* the file name of the page with the section (`schema-api.adoc`),
* then the hash symbol (`#`),
* then the converted section title (`modify-the-schema`),
* then a comma and any link title for display.
--
+
The link in context would look like this:
+
[source]
For more information, see the section <<schema-api.adoc#modify-the-schema,Modify the Schema>>.
More info: http://asciidoctor.org/docs/user-manual/#inter-document-cross-references
== Item Lists
== Lists
Asciidoc supports three types of lists:
@ -115,11 +211,15 @@ Each type of list can be mixed with the other types. So, you could have an order
=== Unordered Lists
Simple bulleted lists need each line to start with an asterisk (`*`). It should be the first character of the line, and be followed by a space.
These lists also need to be separated from the
More info: http://asciidoctor.org/docs/user-manual/#unordered-lists
=== Ordered Lists
Numbered lists need each line to start with a period (`.`). It should be the first character of the line, and be followed by a space.
This style is preferred over manually numbering your list.
More info: http://asciidoctor.org/docs/user-manual/#ordered-lists
=== Labeled Lists
@ -127,6 +227,10 @@ These are like question & answer lists or glossary definitions. Each line should
Labeled lists can be nested by adding an additional colon (such as `:::`, etc.).
If your content will span multiple paragraphs or include source blocks, etc., you will want to add a plus sign (`+`) to keep the sections together for your reader.
TIP: We prefer this style of list for parameters because it allows more freedom in how you present the details for each parameter. For example, it supports ordered or unordered lists inside it automatically, and you can include multiple paragraphs and source blocks without trying to cram them into a smaller table cell.
More info: http://asciidoctor.org/docs/user-manual/#labeled-list
== Images
@ -207,7 +311,9 @@ Many more examples of formatting:
=== More Options
Tables can also be given footer rows, borders, and captions. CSV or DSV can be used instead of formatting the data in pipes.
Tables can also be given footer rows, borders, and captions. You can determine the width of columns, or the width of the table as a whole.
CSV or DSV can also be used instead of formatting the data in pipes.
More info: http://asciidoctor.org/docs/user-manual/#tables
@ -227,13 +333,13 @@ You can add titles to admonitions by making it an admonition block. The structur
[source]
----
[NOTE]
.Title of Note
[NOTE]
====
Text of note
====
----
In this example, the type of admonition is included in square brackets (`[NOTE]`), and the title is prefixed with a period. Four equal signs give the start and end points of the note text (which can include new lines, lists, etc.).
In this example, the type of admonition is included in square brackets (`[NOTE]`), and the title is prefixed with a period. Four equal signs give the start and end points of the note text (which can include new lines, lists, code examples, etc.).
More info: http://asciidoctor.org/docs/user-manual/#admonition

View File

@ -44,7 +44,7 @@ At most only one of the `min`, `max`, or `sort` (see below) parameters may be sp
If none are specified, the group head document of each group will be selected based on the highest scoring document in that group. The default is none.
sort::
Selects the group head document for each group based on which document comes first according to the specified <<common-query-parameters.adoc#CommonQueryParameters-ThesortParameter,sort string>>.
Selects the group head document for each group based on which document comes first according to the specified <<common-query-parameters.adoc#sort-parameter,sort string>>.
+
At most only one of the `min`, `max`, (see above) or `sort` parameters may be specified.
+

View File

@ -1832,14 +1832,14 @@ This command recreates replicas in one node (the source) to another node (the ta
For source replicas that are also shard leaders the operation will wait for the number of seconds set with the `timeout` parameter to make sure there's an active replica that can become a leader (either an existing replica becoming a leader or the new replica completing recovery and becoming a leader).
`/admin/collections?action=REPLACENODE&source=_source-node_&target=_target-node_`
`/admin/collections?action=REPLACENODE&sourceNode=_source-node_&targetNode=_target-node_`
=== REPLACENODE Parameters
`source`::
`sourceNode`::
The source node from which the replicas need to be copied from. This parameter is required.
`target`::
`targetNode`::
The target node where replicas will be copied. This parameter is required.
`parallel`::
@ -1860,7 +1860,7 @@ This operation does not hold necessary locks on the replicas that belong to on t
This command moves a replica from one node to a new node. In case of shared filesystems the `dataDir` will be reused.
`/admin/collections?action=MOVEREPLICA&collection=collection&shard=shard&replica=replica&node=nodeName&toNode=nodeName`
`/admin/collections?action=MOVEREPLICA&collection=collection&shard=shard&replica=replica&sourceNode=nodeName&targetNode=nodeName`
=== MOVEREPLICA Parameters
@ -1873,10 +1873,10 @@ The name of the shard that the replica belongs to. This parameter is required.
`replica`::
The name of the replica. This parameter is required.
`node`::
`sourceNode`::
The name of the node that contains the replica. This parameter is required.
`toNode`::
`targetNode`::
The name of the destination node. This parameter is required.
`async`::

View File

@ -20,33 +20,9 @@
Several query parsers share supported query parameters.
The table below summarizes Solr's common query parameters, which are supported by the <<requesthandlers-and-searchcomponents-in-solrconfig#searchhandlers,Search RequestHandlers>>
The following sections describe Solr's common query parameters, which are supported by the <<requesthandlers-and-searchcomponents-in-solrconfig#searchhandlers,Search RequestHandlers>>.
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Parameter |Description
|<<CommonQueryParameters-ThedefTypeParameter,defType>> |Selects the query parser to be used to process the query.
|<<CommonQueryParameters-ThesortParameter,sort>> |Sorts the response to a query in either ascending or descending order based on the response's score or another specified characteristic.
|<<CommonQueryParameters-ThestartParameter,start>> |Specifies an offset (by default, 0) into the responses at which Solr should begin displaying content.
|<<CommonQueryParameters-TherowsParameter,rows>> |Controls how many rows of responses are displayed at a time (default value: 10)
|<<CommonQueryParameters-Thefq_FilterQuery_Parameter,fq>> |Applies a filter query to the search results.
|<<CommonQueryParameters-Thefl_FieldList_Parameter,fl>> |Limits the information included in a query response to a specified list of fields. The fields need to either be `stored="true"` or `docValues="true"`
|<<CommonQueryParameters-ThedebugParameter,debug>> |Request additional debugging information in the response. Specifying the `debug=timing` parameter returns just the timing information; specifying the `debug=results` parameter returns "explain" information for each of the documents returned; specifying the `debug=query parameter` returns all of the debug information.
|<<CommonQueryParameters-TheexplainOtherParameter,explainOther>> |Allows clients to specify a Lucene query to identify a set of documents. If non-blank, the explain info of each document which matches this query, relative to the main query (specified by the q parameter) will be returned along with the rest of the debugging information.
|<<CommonQueryParameters-ThetimeAllowedParameter,timeAllowed>> |Defines the time allowed for the query to be processed. If the time elapses before the query response is complete, partial information may be returned.
|<<CommonQueryParameters-ThesegmentTerminateEarlyParameter,segmentTerminateEarly>> |Indicates that, if possible, Solr should stop collecting documents from each individual (sorted) segment once it can determine that any subsequent documents in that segment will not be candidates for the `rows` being returned. The default is false.
|<<CommonQueryParameters-TheomitHeaderParameter,omitHeader>> |Excludes the header from the returned results, if set to true. The header contains information about the request, such as the time the request took to complete. The default is false.
|<<CommonQueryParameters-ThewtParameter,wt>> |Specifies the Response Writer to be used to format the query response.
|<<CommonQueryParameters-ThelogParamsListParameter,logParamsList>> |By default, Solr logs all parameters. Set this parameter to restrict which parameters are logged. Valid entries are the parameters to be logged, separated by commas (i.e., `logParamsList=param1,param2`). An empty list will log no parameters, so if logging all parameters is desired, do not define this additional parameter at all.
|<<CommonQueryParameters-TheechoParamsParameter,echoParams>> |The response header can include parameters sent with the query request. This parameter controls what is contained in that section of the response header. Valid values are `none`, `all`, and `explicit`. The default value is `explicit.`
|===
The following sections describe these parameters in detail.
[[CommonQueryParameters-ThedefTypeParameter]]
== The defType Parameter
== defType Parameter
The defType parameter selects the query parser that Solr should use to process the main query parameter (`q`) in the request. For example:
@ -54,8 +30,7 @@ The defType parameter selects the query parser that Solr should use to process t
If no defType param is specified, then by default, the <<the-standard-query-parser.adoc#the-standard-query-parser,The Standard Query Parser>> is used. (eg: `defType=lucene`)
[[CommonQueryParameters-ThesortParameter]]
== The sort Parameter
== sort Parameter
The `sort` parameter arranges search results in either ascending (`asc`) or descending (`desc`) order. The parameter can be used with either numerical or alphabetical content. The directions can be entered in either all lowercase or all uppercase letters (i.e., both `asc` or `ASC`).
@ -87,26 +62,23 @@ Regarding the sort parameter's arguments:
* Multiple sort orderings can be separated by a comma, using this syntax: `sort=<field name>+<direction>,<field name>+<direction>],...`
** When more than one sort criteria is provided, the second entry will only be used if the first entry results in a tie. If there is a third entry, it will only be used if the first AND second entries are tied. This pattern continues with further entries.
[[CommonQueryParameters-ThestartParameter]]
== The start Parameter
== start Parameter
When specified, the `start` parameter specifies an offset into a query's result set and instructs Solr to begin displaying results from this offset.
The default value is "0". In other words, by default, Solr returns results without an offset, beginning where the results themselves begin.
The default value is `0`. In other words, by default, Solr returns results without an offset, beginning where the results themselves begin.
Setting the `start` parameter to some other number, such as 3, causes Solr to skip over the preceding records and start at the document identified by the offset.
Setting the `start` parameter to some other number, such as `3`, causes Solr to skip over the preceding records and start at the document identified by the offset.
You can use the `start` parameter this way for paging. For example, if the `rows` parameter is set to 10, you could display three successive pages of results by setting start to 0, then re-issuing the same query and setting start to 10, then issuing the query again and setting start to 20.
[[CommonQueryParameters-TherowsParameter]]
== The rows Parameter
== rows Parameter
You can use the rows parameter to paginate results from a query. The parameter specifies the maximum number of documents from the complete result set that Solr should return to the client at one time.
You can use the `rows` parameter to paginate results from a query. The parameter specifies the maximum number of documents from the complete result set that Solr should return to the client at one time.
The default value is 10. That is, by default, Solr returns 10 documents at a time in response to a query.
The default value is `10`. That is, by default, Solr returns 10 documents at a time in response to a query.
[[CommonQueryParameters-Thefq_FilterQuery_Parameter]]
== The fq (Filter Query) Parameter
== fq (Filter Query) Parameter
The `fq` parameter defines a query that can be used to restrict the superset of documents that can be returned, without influencing score. It can be very useful for speeding up complex queries, since the queries specified with `fq` are cached independently of the main query. When a later query uses the same filter, there's a cache hit, and filter results are returned quickly from the cache.
@ -127,14 +99,13 @@ fq=+popularity:[10 TO *] +section:0
----
* The document sets from each filter query are cached independently. Thus, concerning the previous examples: use a single `fq` containing two mandatory clauses if those clauses appear together often, and use two separate `fq` parameters if they are relatively independent. (To learn about tuning cache sizes and making sure a filter cache actually exists, see <<the-well-configured-solr-instance.adoc#the-well-configured-solr-instance,The Well-Configured Solr Instance>>.)
* It is also possible to use <<the-standard-query-parser.adoc#TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser,filter(condition) syntax>> inside the `fq` to cache clauses individually and - among other things - to achieve union of cached filter queries.
* It is also possible to use <<the-standard-query-parser.adoc#differences-between-lucene-query-parser-and-the-solr-standard-query-parser,filter(condition) syntax>> inside the `fq` to cache clauses individually and - among other things - to achieve union of cached filter queries.
* As with all parameters: special characters in an URL need to be properly escaped and encoded as hex values. Online tools are available to help you with URL-encoding. For example: http://meyerweb.com/eric/tools/dencoder/.
[[CommonQueryParameters-Thefl_FieldList_Parameter]]
== The fl (Field List) Parameter
== fl (Field List) Parameter
The `fl` parameter limits the information included in a query response to a specified list of fields. The fields need to either be `stored="true"` or `docValues="true"``.`
The `fl` parameter limits the information included in a query response to a specified list of fields. The fields must be either `stored="true"` or `docValues="true"``.`
The field list can be specified as a space-separated or comma-separated list of field names. The string "score" can be used to indicate that the score of each document for the particular query should be returned as a field. The wildcard character `*` selects all the fields in the document which are either `stored="true"` or `docValues="true"` and `useDocValuesAsStored="true"` (which is the default when docValues are enabled). You can also add pseudo-fields, functions and transformers to the field list request.
@ -154,8 +125,7 @@ This table shows some basic examples of how to use `fl`:
|*,dv_field_name |Return all the `stored` fields in each document, and any `docValues` fields that have `useDocValuesAsStored="true`" and the docValues from dv_field_name even if it has `useDocValuesAsStored="false`"
|===
[[CommonQueryParameters-FunctionValues]]
=== Function Values
=== Functions with fl
<<function-queries.adoc#function-queries,Functions>> can be computed for each document in the result and returned as a pseudo-field:
@ -164,8 +134,7 @@ This table shows some basic examples of how to use `fl`:
fl=id,title,product(price,popularity)
----
[[CommonQueryParameters-DocumentTransformers]]
=== Document Transformers
=== Document Transformers with fl
<<transforming-result-documents.adoc#transforming-result-documents,Document Transformers>> can be used to modify the information returned about each documents in the results of a query:
@ -174,7 +143,6 @@ fl=id,title,product(price,popularity)
fl=id,title,[explain]
----
[[CommonQueryParameters-FieldNameAliases]]
=== Field Name Aliases
You can change the key used to in the response for a field, function, or transformer by prefixing it with a `_"displayName_:`". For example:
@ -203,8 +171,7 @@ fl=id,sales_price:price,secret_sauce:prod(price,popularity),why_score:[explain s
}]}}]}}
----
[[CommonQueryParameters-ThedebugParameter]]
== The debug Parameter
== debug Parameter
The `debug` parameter can be specified multiple times and supports the following arguments:
@ -218,8 +185,7 @@ For backwards compatibility with older versions of Solr, `debugQuery=true` may i
The default behavior is not to include debugging information.
[[CommonQueryParameters-TheexplainOtherParameter]]
== The explainOther Parameter
== explainOther Parameter
The `explainOther` parameter specifies a Lucene query in order to identify a set of documents. If this parameter is included and is set to a non-blank value, the query will return debugging information, along with the "explain info" of each document that matches the Lucene query, relative to the main query (which is specified by the q parameter). For example:
@ -232,45 +198,40 @@ The query above allows you to examine the scoring explain info of the top matchi
The default value of this parameter is blank, which causes no extra "explain info" to be returned.
[[CommonQueryParameters-ThetimeAllowedParameter]]
== The timeAllowed Parameter
== timeAllowed Parameter
This parameter specifies the amount of time, in milliseconds, allowed for a search to complete. If this time expires before the search is complete, any partial results will be returned, but values such as `numFound`, <<faceting.adoc#faceting,facet>> counts, and result <<the-stats-component.adoc#the-stats-component,stats>> may not be accurate for the entire result set.
This value is only checked at the time of:
1. Query Expansion, and
2. Document collection
. Query Expansion, and
. Document collection
As this check is periodically performed, the actual time for which a request can be processed before it is aborted would be marginally greater than or equal to the value of `timeAllowed`. If the request consumes more time in other stages, e.g., custom components, etc., this parameter is not expected to abort the request.
As this check is periodically performed, the actual time for which a request can be processed before it is aborted would be marginally greater than or equal to the value of `timeAllowed`. If the request consumes more time in other stages, custom components, etc., this parameter is not expected to abort the request.
[[CommonQueryParameters-ThesegmentTerminateEarlyParameter]]
== The segmentTerminateEarly Parameter
== segmentTerminateEarly Parameter
This parameter may be set to either true or false.
This parameter may be set to either `true` or `false`.
If set to true, and if <<indexconfig-in-solrconfig.adoc#mergepolicyfactory,the mergePolicyFactory>> for this collection is a {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] which uses a `sort` option which is compatible with <<CommonQueryParameters-ThesortParameter,the sort parameter>> specified for this query, then Solr will attempt to use an {lucene-javadocs}/core/org/apache/lucene/search/EarlyTerminatingSortingCollector.html[`EarlyTerminatingSortingCollector`].
If set to `true`, and if <<indexconfig-in-solrconfig.adoc#mergepolicyfactory,the mergePolicyFactory>> for this collection is a {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] which uses a `sort` option compatible with <<sort Parameter,the sort parameter>> specified for this query, then Solr will attempt to use an {lucene-javadocs}/core/org/apache/lucene/search/EarlyTerminatingSortingCollector.html[`EarlyTerminatingSortingCollector`].
If early termination is used, a `segmentTerminatedEarly` header will be included in the `responseHeader`.
Similar to using <<CommonQueryParameters-ThetimeAllowedParameter,the `timeAllowed `Parameter>>, when early segment termination happens values such as `numFound`, <<faceting.adoc#faceting,Facet>> counts, and result <<the-stats-component.adoc#the-stats-component,Stats>> may not be accurate for the entire result set.
Similar to using <<timeAllowed Parameter,the `timeAllowed `Parameter>>, when early segment termination happens values such as `numFound`, <<faceting.adoc#faceting,Facet>> counts, and result <<the-stats-component.adoc#the-stats-component,Stats>> may not be accurate for the entire result set.
The default value of this parameter is false.
The default value of this parameter is `false`.
[[CommonQueryParameters-TheomitHeaderParameter]]
== The omitHeader Parameter
== omitHeader Parameter
This parameter may be set to either true or false.
This parameter may be set to either `true` or `false`.
If set to true, this parameter excludes the header from the returned results. The header contains information about the request, such as the time it took to complete. The default value for this parameter is false.
If set to `true`, this parameter excludes the header from the returned results. The header contains information about the request, such as the time it took to complete. The default value for this parameter is `false`.
[[CommonQueryParameters-ThewtParameter]]
== The wt Parameter
== wt Parameter
The `wt` parameter selects the Response Writer that Solr should use to format the query's response. For detailed descriptions of Response Writers, see <<response-writers.adoc#response-writers,Response Writers>>.
[[CommonQueryParameters-Thecache_falseParameter]]
== The cache=false Parameter
== cache Parameter
Solr caches the results of all queries and filter queries by default. To disable result caching, set the `cache=false` parameter.
@ -279,24 +240,22 @@ You can also use the `cost` option to control the order in which non-cached filt
For very high cost filters, if `cache=false` and `cost>=100` and the query implements the `PostFilter` interface, a Collector will be requested from that query and used to filter documents after they have matched the main query and all other filter queries. There can be multiple post filters; they are also ordered by cost.
For example:
// TODO: fix this, it looks horrible (CT)
This is a normal function range query used as a filter, all matching documents generated up front and cached:
[source,text]
----
// normal function range query used as a filter, all matching documents
// generated up front and cached
fq={!frange l=10 u=100}mul(popularity,price)
// function range query run in parallel with the main query like a traditional
// lucene filter
This is a function range query run in parallel with the main query like a traditional lucene filter:
[source,text]
fq={!frange l=10 u=100 cache=false}mul(popularity,price)
// function range query checked after each document that already matches the query
// and all other filters. Good for really expensive function queries.
fq={!frange l=10 u=100 cache=false cost=100}mul(popularity,price)
----
This is a function range query checked after each document that already matches the query and all other filters. This is good for really expensive function queries:
[[CommonQueryParameters-ThelogParamsListParameter]]
== The logParamsList Parameter
[source,text]
fq={!frange l=10 u=100 cache=false cost=100}mul(popularity,price)
== logParamsList Parameter
By default, Solr logs all parameters of requests. Set this parameter to restrict which parameters of a request are logged. This may help control logging to only those parameters considered important to your organization.
@ -308,27 +267,17 @@ And only the 'q' and 'fq' parameters will be logged.
If no parameters should be logged, you can send `logParamsList` as empty (i.e., `logParamsList=`).
[TIP]
====
This parameter does not only apply to query requests, but to any kind of request to Solr.
====
TIP: This parameter not only applies to query requests, but to any kind of request to Solr.
[[CommonQueryParameters-TheechoParamsParameter]]
== The echoParams Parameter
== echoParams Parameter
The `echoParams` parameter controls what information about request parameters is included in the response header.
The table explains how Solr responds to various settings of the `echoParams` parameter:
The `echoParams` parameter accepts the following values:
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Value |Meaning
|explicit |This is the default value. Only parameters included in the actual request, plus the `_` parameter (which is a 64-bit numeric timestamp) will be added to the params section of the response header.
|all |Include all request parameters that contributed to the query. This will include everything defined in the request handler definition found in `solrconfig.xml` as well as parameters included with the request, plus the `_` parameter. If a parameter is included in the request handler definition AND the request, it will appear multiple times in the response header.
|none |Entirely removes the "params" section of the response header. No information about the request parameters will be available in the response.
|===
* `explicit`: This is the default value. Only parameters included in the actual request, plus the `_` parameter (which is a 64-bit numeric timestamp) will be added to the params section of the response header.
* `all`: Include all request parameters that contributed to the query. This will include everything defined in the request handler definition found in `solrconfig.xml` as well as parameters included with the request, plus the `_` parameter. If a parameter is included in the request handler definition AND the request, it will appear multiple times in the response header.
* `none`: Entirely removes the "params" section of the response header. No information about the request parameters will be available in the response.
Here is an example of a JSON response where the echoParams parameter was not included, so the default of `explicit` is active. The request URL that created this response included three parameters - `q`, `wt`, and `indent`:

View File

@ -22,7 +22,7 @@ Solr logs are a key way to know what's happening in the system. There are severa
[IMPORTANT]
====
In addition to the logging options described below, there is a way to configure which request parameters (such as parameters sent as part of queries) are logged with an additional request parameter called `logParamsList`. See the section on <<common-query-parameters.adoc#CommonQueryParameters-ThelogParamsListParameter,Common Query Parameters>> for more information.
In addition to the logging options described below, there is a way to configure which request parameters (such as parameters sent as part of queries) are logged with an additional request parameter called `logParamsList`. See the section on <<common-query-parameters.adoc#logparamslist-parameter,Common Query Parameters>> for more information.
====
== Temporary Logging Settings

View File

@ -63,7 +63,7 @@ Fields can have many of the same properties as field types. Properties from the
|omitPositions |Similar to `omitTermFreqAndPositions` but preserves term frequency information. |true or false |*
|termVectors termPositions termOffsets termPayloads |These options instruct Solr to maintain full term vectors for each document, optionally including position, offset and payload information for each term occurrence in those vectors. These can be used to accelerate highlighting and other ancillary functionality, but impose a substantial cost in terms of index size. They are not necessary for typical uses of Solr. |true or false |false
|required |Instructs Solr to reject any attempts to add a document which does not have a value for this field. This property defaults to false. |true or false |false
|useDocValuesAsStored |If the field has `<<docvalues.adoc#docvalues,docValues>>` enabled, setting this to true would allow the field to be returned as if it were a stored field (even if it has `stored=false`) when matching "`*`" in an <<common-query-parameters.adoc#CommonQueryParameters-Thefl_FieldList_Parameter,fl parameter>>. |true or false |true
|useDocValuesAsStored |If the field has `<<docvalues.adoc#docvalues,docValues>>` enabled, setting this to true would allow the field to be returned as if it were a stored field (even if it has `stored=false`) when matching "`*`" in an <<common-query-parameters.adoc#fl-field-list-parameter,fl parameter>>. |true or false |true
|large |Large fields are always lazy loaded and will only take up space in the document cache if the actual value is < 512KB. This option requires `stored="true"` and `multiValued="false"`. It's intended for fields that might have very large values so that they don't get cached in memory. |true or false |false
|===

View File

@ -57,7 +57,7 @@ DocValues are only available for specific field types. The types chosen determin
These Lucene types are related to how the {lucene-javadocs}/core/org/apache/lucene/index/DocValuesType.html[values are sorted and stored].
There is an additional configuration option available, which is to modify the `docValuesFormat` <<field-type-definitions-and-properties.adoc#FieldTypeDefinitionsandProperties-docValuesFormat,used by the field type>>. The default implementation employs a mixture of loading some things into memory and keeping some on disk. In some cases, however, you may choose to specify an alternative {lucene-javadocs}/core/org/apache/lucene/codecs/DocValuesFormat.html[DocValuesFormat implementation]. For example, you could choose to keep everything in memory by specifying `docValuesFormat="Memory"` on a field type:
There is an additional configuration option available, which is to modify the `docValuesFormat` <<field-type-definitions-and-properties.adoc#docvaluesformat,used by the field type>>. The default implementation employs a mixture of loading some things into memory and keeping some on disk. In some cases, however, you may choose to specify an alternative {lucene-javadocs}/core/org/apache/lucene/codecs/DocValuesFormat.html[DocValuesFormat implementation]. For example, you could choose to keep everything in memory by specifying `docValuesFormat="Memory"` on a field type:
[source,xml]
----
@ -73,13 +73,13 @@ Lucene index back-compatibility is only supported for the default codec. If you
=== Sorting, Faceting & Functions
If `docValues="true"` for a field, then DocValues will automatically be used any time the field is used for <<common-query-parameters.adoc#CommonQueryParameters-ThesortParameter,sorting>>, <<faceting.adoc#faceting,faceting>> or <<function-queries.adoc#function-queries,function queries>>.
If `docValues="true"` for a field, then DocValues will automatically be used any time the field is used for <<common-query-parameters.adoc#sort-parameter,sorting>>, <<faceting.adoc#faceting,faceting>> or <<function-queries.adoc#function-queries,function queries>>.
=== Retrieving DocValues During Search
Field values retrieved during search queries are typically returned from stored values. However, non-stored docValues fields will be also returned along with other stored fields when all fields (or pattern matching globs) are specified to be returned (e.g. "`fl=*`") for search queries depending on the effective value of the `useDocValuesAsStored` parameter for each field. For schema versions >= 1.6, the implicit default is `useDocValuesAsStored="true"`. See <<field-type-definitions-and-properties.adoc#field-type-definitions-and-properties,Field Type Definitions and Properties>> & <<defining-fields.adoc#defining-fields,Defining Fields>> for more details.
When `useDocValuesAsStored="false"`, non-stored DocValues fields can still be explicitly requested by name in the <<common-query-parameters.adoc#CommonQueryParameters-Thefl_FieldList_Parameter,fl param>>, but will not match glob patterns (`"*"`). Note that returning DocValues along with "regular" stored fields at query time has performance implications that stored fields may not because DocValues are column-oriented and may therefore incur additional cost to retrieve for each returned document. Also note that while returning non-stored fields from DocValues, the values of a multi-valued field are returned in sorted order (and not insertion order). If you require the multi-valued fields to be returned in the original insertion order, then make your multi-valued field as stored (such a change requires re-indexing).
When `useDocValuesAsStored="false"`, non-stored DocValues fields can still be explicitly requested by name in the <<common-query-parameters.adoc#fl-field-list-parameter,fl param>>, but will not match glob patterns (`"*"`). Note that returning DocValues along with "regular" stored fields at query time has performance implications that stored fields may not because DocValues are column-oriented and may therefore incur additional cost to retrieve for each returned document. Also note that while returning non-stored fields from DocValues, the values of a multi-valued field are returned in sorted order (and not insertion order). If you require the multi-valued fields to be returned in the original insertion order, then make your multi-valued field as stored (such a change requires re-indexing).
In cases where the query is returning _only_ docValues fields performance may improve since returning stored fields requires disk reads and decompression whereas returning docValues fields in the fl list only requires memory access.

View File

@ -23,30 +23,24 @@ Faceting is the arrangement of search results into categories based on indexed t
Searchers are presented with the indexed terms, along with numerical counts of how many matching documents were found for each term. Faceting makes it easy for users to explore search results, narrowing in on exactly the results they are looking for.
[[Faceting-GeneralParameters]]
== General Parameters
== General Facet Parameters
There are two general parameters for controlling faceting.
[[Faceting-ThefacetParameter]]
=== The facet Parameter
If set to *true*, this parameter enables facet counts in the query response. If set to *false*, a blank or missing value, this parameter disables faceting. None of the other parameters listed below will have any effect unless this parameter is set to *true*. The default value is blank (false).
[[Faceting-Thefacet.queryParameter]]
=== The facet.query Parameter
`facet`::
If set to `true`, this parameter enables facet counts in the query response. If set to `false`, a blank or missing value, this parameter disables faceting. None of the other parameters listed below will have any effect unless this parameter is set to `true`. The default value is blank (false).
`facet.query`::
This parameter allows you to specify an arbitrary query in the Lucene default syntax to generate a facet count.
+
By default, Solr's faceting feature automatically determines the unique terms for a field and returns a count for each of those terms. Using `facet.query`, you can override this default behavior and select exactly which terms or expressions you would like to see counted. In a typical implementation of faceting, you will specify a number of `facet.query` parameters. This parameter can be particularly useful for numeric-range-based facets or prefix-based facets.
+
You can set the `facet.query` parameter multiple times to indicate that multiple queries should be used as separate facet constraints.
+
To use facet queries in a syntax other than the default syntax, prefix the facet query with the name of the query notation. For example, to use the hypothetical `myfunc` query parser, you could set the `facet.query` parameter like so:
+
`facet.query={!myfunc}name~fred`
[[Faceting-Field-ValueFacetingParameters]]
== Field-Value Faceting Parameters
Several parameters can be used to trigger faceting based on the indexed terms in a field.
@ -55,335 +49,218 @@ When using these parameters, it is important to remember that "term" is a very s
If you want Solr to perform both analysis (for searching) and faceting on the full literal strings, use the `copyField` directive in your Schema to create two versions of the field: one Text and one String. Make sure both are `indexed="true"`. (For more information about the `copyField` directive, see <<documents-fields-and-schema-design.adoc#documents-fields-and-schema-design,Documents, Fields, and Schema Design>>.)
The table below summarizes Solr's field value faceting parameters.
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Parameter |Description
|<<Faceting-Thefacet.fieldParameter,facet.field>> |Identifies a field to be treated as a facet.
|<<Faceting-Thefacet.prefixParameter,facet.prefix>> |Limits the terms used for faceting to those that begin with the specified prefix.
|<<Faceting-Thefacet.containsParameter,facet.contains>> |Limits the terms used for faceting to those that contain the specified substring.
|<<Faceting-Thefacet.contains.ignoreCaseParameter,facet.contains.ignoreCase>> |If facet.contains is used, ignore case when searching for the specified substring.
|<<Faceting-Thefacet.sortParameter,facet.sort>> |Controls how faceted results are sorted.
|<<Faceting-Thefacet.limitParameter,facet.limit>> |Controls how many constraints should be returned for each facet.
|<<Faceting-Thefacet.offsetParameter,facet.offset>> |Specifies an offset into the facet results at which to begin displaying facets.
|<<Faceting-Thefacet.mincountParameter,facet.mincount>> |Specifies the minimum counts required for a facet field to be included in the response.
|<<Faceting-Thefacet.missingParameter,facet.missing>> |Controls whether Solr should compute a count of all matching results which have no value for the field, in addition to the term-based constraints of a facet field.
|<<Faceting-Thefacet.methodParameter,facet.method>> |Selects the algorithm or method Solr should use when faceting a field.
|<<Faceting-Thefacet.existsParameter,facet.exists>> |Caps facet counts by one. Available only for `facet.method=enum` as performance optimization.
|<<Faceting-Thefacet.excludeTermsParameter,facet.excludeTerms>> |Removes specific terms from facet counts. This allows you to exclude certain terms from faceting, while maintaining the terms in the index for general queries.
|<<Faceting-Thefacet.enum.cache.minDfParameter,facet.enum.cache.minDf>> |(Advanced) Specifies the minimum document frequency (the number of documents matching a term) for which the `filterCache` should be used when determining the constraint count for that term.
|<<Faceting-Over-RequestParameters,facet.overrequest.count>> |(Advanced) A number of documents, beyond the effective `facet.limit` to request from each shard in a distributed search
|<<Faceting-Over-RequestParameters,facet.overrequest.ratio>> |(Advanced) A multiplier of the effective `facet.limit` to request from each shard in a distributed search
|<<Faceting-Thefacet.threadsParameter,facet.threads>> |(Advanced) Controls parallel execution of field faceting
|===
These parameters are described in the sections below.
[[Faceting-Thefacet.fieldParameter]]
=== The facet.field Parameter
Unless otherwise specified, all of the parameters below can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.<parameter>`
`facet.field`::
The `facet.field` parameter identifies a field that should be treated as a facet. It iterates over each Term in the field and generate a facet count using that Term as the constraint. This parameter can be specified multiple times in a query to select multiple facet fields.
+
IMPORTANT: If you do not set this parameter to at least one field in the schema, none of the other parameters described in this section will have any effect.
[IMPORTANT]
====
If you do not set this parameter to at least one field in the schema, none of the other parameters described in this section will have any effect.
====
[[Faceting-Thefacet.prefixParameter]]
=== The facet.prefix Parameter
`facet.prefix`::
The `facet.prefix` parameter limits the terms on which to facet to those starting with the given string prefix. This does not limit the query in any way, only the facets that would be returned in response to the query.
+
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.prefix`.
[[Faceting-Thefacet.containsParameter]]
=== The facet.contains Parameter
`facet.contains`::
The `facet.contains` parameter limits the terms on which to facet to those containing the given substring. This does not limit the query in any way, only the facets that would be returned in response to the query.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.contains`.
[[Faceting-Thefacet.contains.ignoreCaseParameter]]
=== The facet.contains.ignoreCase Parameter
`facet.contains.ignoreCase`::
If `facet.contains` is used, the `facet.contains.ignoreCase` parameter causes case to be ignored when matching the given substring against candidate facet terms.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.contains.ignoreCase`.
[[Faceting-Thefacet.sortParameter]]
=== The facet.sort Parameter
`facet.sort`::
This parameter determines the ordering of the facet field constraints.
+
There are two options for this parameter.
count:: Sort the constraints by count (highest count first).
index:: Return the constraints sorted in their index order (lexicographic by indexed term). For terms in the ASCII range, this will be alphabetically sorted.
+
--
`count`::: Sort the constraints by count (highest count first).
`index`::: Return the constraints sorted in their index order (lexicographic by indexed term). For terms in the ASCII range, this will be alphabetically sorted.
--
+
The default is `count` if `facet.limit` is greater than 0, otherwise, the default is `index`.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.sort`.
[[Faceting-Thefacet.limitParameter]]
=== The facet.limit Parameter
`facet.limit`::
This parameter specifies the maximum number of constraint counts (essentially, the number of facets for a field that are returned) that should be returned for the facet fields. A negative value means that Solr will return unlimited number of constraint counts.
+
The default value is `100`.
The default value is 100.
This parameter can be specified on a per-field basis to apply a distinct limit to each field with the syntax of `f.<fieldname>.facet.limit`.
[[Faceting-Thefacet.offsetParameter]]
=== The facet.offset Parameter
`facet.offset`::
The `facet.offset` parameter indicates an offset into the list of constraints to allow paging.
+
The default value is `0`.
The default value is 0.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.offset`.
[[Faceting-Thefacet.mincountParameter]]
=== The facet.mincount Parameter
`facet.mincount`::
The `facet.mincount` parameter specifies the minimum counts required for a facet field to be included in the response. If a field's counts are below the minimum, the field's facet is not returned.
+
The default value is `0`.
The default value is 0.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.mincount`.
[[Faceting-Thefacet.missingParameter]]
=== The facet.missing Parameter
If set to true, this parameter indicates that, in addition to the Term-based constraints of a facet field, a count of all results that match the query but which have no facet value for the field should be computed and returned in the response.
The default value is false.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.missing`.
[[Faceting-Thefacet.methodParameter]]
=== The facet.method Parameter
The facet.method parameter selects the type of algorithm or method Solr should use when faceting a field.
`facet.missing`::
If set to `true`, this parameter indicates that, in addition to the Term-based constraints of a facet field, a count of all results that match the query but which have no facet value for the field should be computed and returned in the response.
+
The default value is `false`.
`facet.method`::
The `facet.method` parameter selects the type of algorithm or method Solr should use when faceting a field.
+
The following methods are available.
enum:: Enumerates all terms in a field, calculating the set intersection of documents that match the term with documents that match the query.
+
--
`enum`::: Enumerates all terms in a field, calculating the set intersection of documents that match the term with documents that match the query.
+
This method is recommended for faceting multi-valued fields that have only a few distinct values. The average number of values per document does not matter.
+
For example, faceting on a field with U.S. States such as `Alabama, Alaska, ... Wyoming` would lead to fifty cached filters which would be used over and over again. The `filterCache` should be large enough to hold all the cached filters.
fc:: Calculates facet counts by iterating over documents that match the query and summing the terms that appear in each document.
`fc`::: Calculates facet counts by iterating over documents that match the query and summing the terms that appear in each document.
+
This is currently implemented using an `UnInvertedField` cache if the field either is multi-valued or is tokenized (according to `FieldType.isTokened()`). Each document is looked up in the cache to see what terms/values it contains, and a tally is incremented for each value.
+
This method is excellent for situations where the number of indexed values for the field is high, but the number of values per document is low. For multi-valued fields, a hybrid approach is used that uses term filters from the `filterCache` for terms that match many documents. The letters `fc` stand for field cache.
fcs:: Per-segment field faceting for single-valued string fields. Enable with `facet.method=fcs` and control the number of threads used with the `threads` local parameter. This parameter allows faceting to be faster in the presence of rapid index changes.
`fcs`::: Per-segment field faceting for single-valued string fields. Enable with `facet.method=fcs` and control the number of threads used with the `threads` local parameter. This parameter allows faceting to be faster in the presence of rapid index changes.
--
+
The default value is `fc` (except for fields using the `BoolField` field type and when `facet.exists=true` is requested) since it tends to use less memory and is faster when a field has many unique terms in the index.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.method`.
[[Faceting-Thefacet.enum.cache.minDfParameter]]
=== The facet.enum.cache.minDf Parameter
`facet.enum.cache.minDf`::
This parameter indicates the minimum document frequency (the number of documents matching a term) for which the filterCache should be used when determining the constraint count for that term. This is only used with the `facet.method=enum` method of faceting.
+
A value greater than zero decreases the filterCache's memory usage, but increases the time required for the query to be processed. If you are faceting on a field with a very large number of terms, and you wish to decrease memory usage, try setting this parameter to a value between `25` and `50`, and run a few tests. Then, optimize the parameter setting as necessary.
+
The default value is `0`, causing the filterCache to be used for all terms in the field.
A value greater than zero decreases the filterCache's memory usage, but increases the time required for the query to be processed. If you are faceting on a field with a very large number of terms, and you wish to decrease memory usage, try setting this parameter to a value between 25 and 50, and run a few tests. Then, optimize the parameter setting as necessary.
`facet.exists`::
To cap facet counts by 1, specify `facet.exists=true`. This parameter can be used with `facet.method=enum` or when it's omitted. It can be used only on non-trie fields (such as strings). It may speed up facet counting on large indices and/or high-cardinality facet values.
The default value is 0, causing the filterCache to be used for all terms in the field.
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.enum.cache.minDf`.
[[Faceting-Thefacet.existsParameter]]
=== The facet.exists Parameter
To cap facet counts by 1, specify `facet.exists=true`. It can be used with `facet.method=enum` or when it's omitted. It can be used only on non-trie fields (such as strings). It may speed up facet counting on large indices and/or high-cardinality facet values..
This parameter can be specified on a per-field basis with the syntax of `f.<fieldname>.facet.exists` or via local parameter` facet.field={!facet.method=enum facet.exists=true}size`.
[[Faceting-Thefacet.excludeTermsParameter]]
=== The facet.excludeTerms Parameter
`facet.excludeTerms`::
If you want to remove terms from facet counts but keep them in the index, the `facet.excludeTerms` parameter allows you to do that.
[[Faceting-Over-RequestParameters]]
=== Over-Request Parameters
`facet.overrequest.count` and `facet.overrequest.ratio`::
In some situations, the accuracy in selecting the "top" constraints returned for a facet in a distributed Solr query can be improved by "over requesting" the number of desired constraints (i.e., `facet.limit`) from each of the individual shards. In these situations, each shard is by default asked for the top `10 + (1.5 * facet.limit)` constraints.
+
In some situations, depending on how your docs are partitioned across your shards and what `facet.limit` value you used, you may find it advantageous to increase or decrease the amount of over-requesting Solr does. This can be achieved by setting the `facet.overrequest.count` (defaults to `10`) and `facet.overrequest.ratio` (defaults to `1.5`) parameters.
In some situations, the accuracy in selecting the "top" constraints returned for a facet in a distributed Solr query can be improved by "Over Requesting" the number of desired constraints (ie: `facet.limit`) from each of the individual Shards. In these situations, each shard is by default asked for the top "`10 + (1.5 * facet.limit)`" constraints.
`facet.threads`::
This parameter will cause loading the underlying fields used in faceting to be executed in parallel with the number of threads specified. Specify as `facet.threads=N` where `N` is the maximum number of threads used.
+
Omitting this parameter or specifying the thread count as `0` will not spawn any threads, and only the main request thread will be used. Specifying a negative number of threads will create up to `Integer.MAX_VALUE` threads.
In some situations, depending on how your docs are partitioned across your shards, and what `facet.limit` value you used, you may find it advantageous to increase or decrease the amount of over-requesting Solr does. This can be achieved by setting the `facet.overrequest.count` (defaults to 10) and `facet.overrequest.ratio` (defaults to 1.5) parameters.
[[Faceting-Thefacet.threadsParameter]]
=== The facet.threads Parameter
This param will cause loading the underlying fields used in faceting to be executed in parallel with the number of threads specified. Specify as `facet.threads=N` where `N` is the maximum number of threads used. Omitting this parameter or specifying the thread count as 0 will not spawn any threads, and only the main request thread will be used. Specifying a negative number of threads will create up to Integer.MAX_VALUE threads.
[[Faceting-RangeFaceting]]
== Range Faceting
You can use Range Faceting on any date field or any numeric field that supports range queries. This is particularly useful for stitching together a series of range queries (as facet by query) for things like prices.
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Parameter |Description
|<<Faceting-Thefacet.rangeParameter,facet.range>> |Specifies the field to facet by range.
|<<Faceting-Thefacet.range.startParameter,facet.range.start>> |Specifies the start of the facet range.
|<<Faceting-Thefacet.range.endParameter,facet.range.end>> |Specifies the end of the facet range.
|<<Faceting-Thefacet.range.gapParameter,facet.range.gap>> |Specifies the span of the range as a value to be added to the lower bound.
|<<Faceting-Thefacet.range.hardendParameter,facet.range.hardend>> |A boolean parameter that specifies how Solr handles a range gap that cannot be evenly divided between the range start and end values. If true, the last range constraint will have the `facet.range.end` value an upper bound. If false, the last range will have the smallest possible upper bound greater then `facet.range.end` such that the range is the exact width of the specified range gap. The default value for this parameter is false.
|<<Faceting-Thefacet.range.includeParameter,facet.range.include>> |Specifies inclusion and exclusion preferences for the upper and lower bounds of the range. See the `facet.range.include` topic for more detailed information.
|<<Faceting-Thefacet.range.otherParameter,facet.range.other>> |Specifies counts for Solr to compute in addition to the counts for each facet range constraint.
|<<Faceting-Thefacet.range.methodParameter,facet.range.method>> |Specifies the algorithm or method to use for calculating facets.
|===
[[Faceting-Thefacet.rangeParameter]]
=== The facet.range Parameter
`facet.range`::
The `facet.range` parameter defines the field for which Solr should create range facets. For example:
+
`facet.range=price&facet.range=age`
+
`facet.range=lastModified_dt`
[[Faceting-Thefacet.range.startParameter]]
=== The facet.range.start Parameter
`facet.range.start`::
The `facet.range.start` parameter specifies the lower bound of the ranges. You can specify this parameter on a per field basis with the syntax of `f.<fieldname>.facet.range.start`. For example:
+
`f.price.facet.range.start=0.0&f.age.facet.range.start=10`
+
`f.lastModified_dt.facet.range.start=NOW/DAY-30DAYS`
[[Faceting-Thefacet.range.endParameter]]
=== The facet.range.end Parameter
The facet.range.end specifies the upper bound of the ranges. You can specify this parameter on a per field basis with the syntax of `f.<fieldname>.facet.range.end`. For example:
`facet.range.end`::
The `facet.range.end` specifies the upper bound of the ranges. You can specify this parameter on a per field basis with the syntax of `f.<fieldname>.facet.range.end`. For example:
+
`f.price.facet.range.end=1000.0&f.age.facet.range.start=99`
+
`f.lastModified_dt.facet.range.end=NOW/DAY+30DAYS`
[[Faceting-Thefacet.range.gapParameter]]
=== The facet.range.gap Parameter
`facet.range.gap`::
The span of each range expressed as a value to be added to the lower bound. For date fields, this should be expressed using the {solr-javadocs}/solr-core/org/apache/solr/util/DateMathParser.html[`DateMathParser` syntax] (such as, `facet.range.gap=%2B1DAY ... '+1DAY'`). You can specify this parameter on a per-field basis with the syntax of `f.<fieldname>.facet.range.gap`. For example:
+
`f.price.facet.range.gap=100&f.age.facet.range.gap=10`
+
`f.lastModified_dt.facet.range.gap=+1DAY`
[[Faceting-Thefacet.range.hardendParameter]]
=== The facet.range.hardend Parameter
`facet.range.hardend`::
The `facet.range.hardend` parameter is a Boolean parameter that specifies how Solr should handle cases where the `facet.range.gap` does not divide evenly between `facet.range.start` and `facet.range.end`.
If *true*, the last range constraint will have the `facet.range.end` value as an upper bound. If *false*, the last range will have the smallest possible upper bound greater then `facet.range.end` such that the range is the exact width of the specified range gap. The default value for this parameter is false.
+
If `true`, the last range constraint will have the `facet.range.end` value as an upper bound. If `false`, the last range will have the smallest possible upper bound greater then `facet.range.end` such that the range is the exact width of the specified range gap. The default value for this parameter is false.
+
This parameter can be specified on a per field basis with the syntax `f.<fieldname>.facet.range.hardend`.
[[Faceting-Thefacet.range.includeParameter]]
=== The facet.range.include Parameter
`facet.range.include`::
By default, the ranges used to compute range faceting between `facet.range.start` and `facet.range.end` are inclusive of their lower bounds and exclusive of the upper bounds. The "before" range defined with the `facet.range.other` parameter is exclusive and the "after" range is inclusive. This default, equivalent to "lower" below, will not result in double counting at the boundaries. You can use the `facet.range.include` parameter to modify this behavior using the following options:
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Option |Description
|lower |All gap-based ranges include their lower bound.
|upper |All gap-based ranges include their upper bound.
|edge |The first and last gap ranges include their edge bounds (lower for the first one, upper for the last one) even if the corresponding upper/lower option is not specified.
|outer |The "before" and "after" ranges will be inclusive of their bounds, even if the first or last ranges already include those boundaries.
|all |Includes all options: lower, upper, edge, outer.
|===
+
--
* `lower`: All gap-based ranges include their lower bound.
* `upper`: All gap-based ranges include their upper bound.
* `edge`: The first and last gap ranges include their edge bounds (lower for the first one, upper for the last one) even if the corresponding upper/lower option is not specified.
* `outer`: The "before" and "after" ranges will be inclusive of their bounds, even if the first or last ranges already include those boundaries.
* `all`: Includes all options: `lower`, `upper`, `edge`, and `outer`.
--
+
You can specify this parameter on a per field basis with the syntax of `f.<fieldname>.facet.range.include`, and you can specify it multiple times to indicate multiple choices.
+
NOTE: To ensure you avoid double-counting, do not choose both `lower` and `upper`, do not choose `outer`, and do not choose `all`.
[NOTE]
====
To ensure you avoid double-counting, do not choose both `lower` and `upper`, do not choose `outer`, and do not choose `all`.
====
[[Faceting-Thefacet.range.otherParameter]]
=== The facet.range.other Parameter
`facet.range.other`::
The `facet.range.other` parameter specifies that in addition to the counts for each range constraint between `facet.range.start` and `facet.range.end`, counts should also be computed for these options:
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Option |Description
|before |All records with field values lower then lower bound of the first range.
|after |All records with field values greater then the upper bound of the last range.
|between |All records with field values between the start and end bounds of all ranges.
|none |Do not compute any counts.
|all |Compute counts for before, between, and after.
|===
+
--
* `before`: All records with field values lower then lower bound of the first range.
* `after`: All records with field values greater then the upper bound of the last range.
* `between`: All records with field values between the start and end bounds of all ranges.
* `none`: Do not compute any counts.
* `all`: Compute counts for before, between, and after.
--
+
This parameter can be specified on a per field basis with the syntax of `f.<fieldname>.facet.range.other`. In addition to the `all` option, this parameter can be specified multiple times to indicate multiple choices, but `none` will override all other options.
[[Faceting-Thefacet.range.methodParameter]]
=== The facet.range.method Parameter
`facet.range.method`::
The `facet.range.method` parameter selects the type of algorithm or method Solr should use for range faceting. Both methods produce the same results, but performance may vary.
+
--
filter::: This method generates the ranges based on other facet.range parameters, and for each of them executes a filter that later intersects with the main query resultset to get the count. It will make use of the filterCache, so it will benefit of a cache large enough to contain all ranges.
+
dv::: This method iterates the documents that match the main query, and for each of them finds the correct range for the value. This method will make use of <<docvalues.adoc#docvalues,docValues>> (if enabled for the field) or fieldCache. The `dv` method is not supported for field type DateRangeField or when using <<result-grouping.adoc#result-grouping,group.facets>>.
--
+
The default value for this parameter is `filter`.
filter:: This method generates the ranges based on other facet.range parameters, and for each of them executes a filter that later intersects with the main query resultset to get the count. It will make use of the filterCache, so it will benefit of a cache large enough to contain all ranges.
dv:: This method iterates the documents that match the main query, and for each of them finds the correct range for the value. This method will make use of <<docvalues.adoc#docvalues,docValues>> (if enabled for the field) or fieldCache. The `dv` method is not supported for field type DateRangeField or when using <<result-grouping.adoc#result-grouping,group.facets>>.
Default value for this parameter is "filter".
[[Faceting-Thefacet.mincountParameterinRangeFaceting]]
=== The facet.mincount Parameter in Range Faceting
The `facet.mincount` parameter, the same one as used in field faceting is also applied to range faceting. When used, no ranges with a count below the minimum will be included in the response.
.Date Ranges & Time Zones
[NOTE]
====
Range faceting on date fields is a common situation where the <<working-with-dates.adoc#tz,`TZ`>> parameter can be useful to ensure that the "facet counts per day" or "facet counts per month" are based on a meaningful definition of when a given day/month "starts" relative to a particular TimeZone.
For more information, see the examples in the <<working-with-dates.adoc#working-with-dates,Working with Dates>> section.
====
=== facet.mincount in Range Faceting
The `facet.mincount` parameter, the same one as used in field faceting is also applied to range faceting. When used, no ranges with a count below the minimum will be included in the response.
[[Faceting-Pivot_DecisionTree_Faceting]]
== Pivot (Decision Tree) Faceting
Pivoting is a summarization tool that lets you automatically sort, count, total or average data stored in a table. The results are typically displayed in a second table showing the summarized data. Pivot faceting lets you create a summary table of the results from a faceting documents by multiple fields.
Another way to look at it is that the query produces a Decision Tree, in that Solr tells you "for facet A, the constraints/counts are X/N, Y/M, etc. If you were to constrain A by X, then the constraint counts for B would be S/P, T/Q, etc.". In other words, it tells you in advance what the "next" set of facet results would be for a field if you apply a constraint from the current facet results.
[[Faceting-facet.pivot]]
=== facet.pivot
`facet.pivot`::
The `facet.pivot` parameter defines the fields to use for the pivot. Multiple `facet.pivot` values will create multiple "facet_pivot" sections in the response. Separate each list of fields with a comma.
[[Faceting-facet.pivot.mincount]]
=== facet.pivot.mincount
`facet.pivot.mincount`::
The `facet.pivot.mincount` parameter defines the minimum number of documents that need to match in order for the facet to be included in results. The default is 1.
+
Using the "`bin/solr -e techproducts`" example, A query URL like this one will return the data below, with the pivot faceting results found in the section "facet_pivot":
+
[source,text]
----
http://localhost:8983/solr/techproducts/select?q=*:*&facet.pivot=cat,popularity,inStock
&facet.pivot=popularity,cat&facet=true&facet.field=cat&facet.limit=5
&rows=0&wt=json&indent=true&facet.pivot.mincount=2
----
+
[source,json]
----
{ "facet_counts":{
@ -413,10 +290,9 @@ http://localhost:8983/solr/techproducts/select?q=*:*&facet.pivot=cat,popularity,
}]}}}
----
[[Faceting-CombiningStatsComponentWithPivots]]
=== Combining Stats Component With Pivots
In addition to some of the <<Faceting-LocalParametersforFaceting,general local parameters>> supported by other types of faceting, a `stats` local parameters can be used with `facet.pivot` to refer to <<the-stats-component.adoc#the-stats-component,`stats.field`>> instances (by tag) that you would like to have computed for each Pivot Constraint.
In addition to some of the <<Local Parameters for Faceting,general local parameters>> supported by other types of faceting, a `stats` local parameters can be used with `facet.pivot` to refer to <<the-stats-component.adoc#the-stats-component,`stats.field`>> instances (by tag) that you would like to have computed for each Pivot Constraint.
In the example below, two different (overlapping) sets of statistics are computed for each of the facet.pivot result hierarchies:
@ -503,7 +379,6 @@ Results:
"..."}]}}}}]}]}}
----
[[Faceting-CombiningFacetQueriesAndFacetRangesWithPivotFacets]]
=== Combining Facet Queries And Facet Ranges With Pivot Facets
A `query` local parameter can be used with `facet.pivot` to refer to `facet.query` instances (by tag) that should be computed for each pivot constraint. Similarly, a `range` local parameter can be used with `facet.pivot` to refer to `facet.range` instances.
@ -630,10 +505,9 @@ facet.pivot={!range=r1}cat,inStock
"..."]}]}}}
----
[[Faceting-AdditionalPivotParameters]]
=== Additional Pivot Parameters
Although `facet.pivot.mincount` deviates in name from the `facet.mincount` parameter used by field faceting, many other Field faceting parameters described above can also be used with pivot faceting:
Although `facet.pivot.mincount` deviates in name from the `facet.mincount` parameter used by field faceting, many of the faceting parameters described above can also be used with pivot faceting:
* `facet.limit`
* `facet.offset`
@ -641,7 +515,6 @@ Although `facet.pivot.mincount` deviates in name from the `facet.mincount` param
* `facet.overrequest.count`
* `facet.overrequest.ratio`
[[Faceting-IntervalFaceting]]
== Interval Faceting
Another supported form of faceting is interval faceting. This sounds similar to range faceting, but the functionality is really closer to doing facet queries with range queries. Interval faceting allows you to set variable intervals and count the number of documents that have values within those intervals in the specified field.
@ -652,23 +525,21 @@ If you are concerned about the performance of your searches you should test with
This method will use <<docvalues.adoc#docvalues,docValues>> if they are enabled for the field, will use fieldCache otherwise.
[[Faceting-Thefacet.intervalparameter]]
=== The facet.interval parameter
Use these parameters for interval faceting:
`facet.interval`::
This parameter Indicates the field where interval faceting must be applied. It can be used multiple times in the same request to indicate multiple fields.
+
`facet.interval=price&facet.interval=size`
[[Faceting-Thefacet.interval.setparameter]]
=== The facet.interval.set parameter
`facet.interval.set`::
This parameter is used to set the intervals for the field, it can be specified multiple times to indicate multiple intervals. This parameter is global, which means that it will be used for all fields indicated with `facet.interval` unless there is an override for a specific field. To override this parameter on a specific field you can use: `f.<fieldname>.facet.interval.set`, for example:
+
[source,text]
f.price.facet.interval.set=[0,10]&f.price.facet.interval.set=(10,100]
[[Faceting-IntervalSyntax]]
=== Interval Syntax
Intervals must begin with either '(' or '[', be followed by the start value, then a comma (','), the end value, and finally a closing ')' or '].
@ -699,12 +570,10 @@ Interval faceting supports output key replacement described below. Output keys c
&facet=true
----
[[Faceting-LocalParametersforFaceting]]
== Local Parameters for Faceting
The <<local-parameters-in-queries.adoc#local-parameters-in-queries,LocalParams syntax>> allows overriding global settings. It can also provide a method of adding metadata to other parameter values, much like XML attributes.
[[Faceting-TaggingandExcludingFilters]]
=== Tagging and Excluding Filters
You can tag specific filters and exclude those filters when faceting. This is useful when doing multi-select faceting.
@ -732,7 +601,6 @@ To return counts for doctype values that are currently not selected, tag filters
Filter exclusion is supported for all types of facets. Both the `tag` and `ex` local parameters may specify multiple values by separating them with commas.
[[Faceting-ChangingtheOutputKey]]
=== Changing the Output Key
To change the output key for a faceting command, specify a new name with the `key` local parameter. For example:
@ -741,14 +609,12 @@ To change the output key for a faceting command, specify a new name with the `ke
The parameter setting above causes the field facet results for the "doctype" field to be returned using the key "mylabel" rather than "doctype" in the response. This can be helpful when faceting on the same field multiple times with different exclusions.
[[Faceting-Limitingfacetwithcertainterms]]
=== Limiting Facet with Certain Terms
To limit field facet with certain terms specify them comma separated with `terms` local parameter. Commas and quotes in terms can be escaped with backslash, as in `\,`. In this case facet is calculated on a way similar to `facet.method=enum` , but ignores `facet.enum.cache.minDf`. For example:
`facet.field={!terms='alfa,betta,with\,with\',with space'}symbol`
[[Faceting-RelatedTopics]]
== Related Topics
* <<spatial-search.adoc#spatial-search,Heatmap Faceting (Spatial)>>
See also <<spatial-search.adoc#spatial-search,Heatmap Faceting (Spatial)>>.

View File

@ -90,11 +90,11 @@ For multivalued fields, specifies a distance between multiple values, which prev
`autoGeneratePhraseQueries`:: For text fields. If `true`, Solr automatically generates phrase queries for adjacent terms. If `false`, terms must be enclosed in double-quotes to be treated as phrases.
`enableGraphQueries`::
For text fields, applicable when querying with <<the-standard-query-parser.adoc#TheStandardQueryParser-StandardQueryParserParameters,`sow=false`>>. Use `true` (the default) for field types with query analyzers including graph-aware filters, e.g., <<filter-descriptions.adoc#synonym-graph-filter,Synonym Graph Filter>> and <<filter-descriptions.adoc#word-delimiter-graph-filter,Word Delimiter Graph Filter>>.
For text fields, applicable when querying with <<the-standard-query-parser.adoc#standard-query-parser-parameters,`sow=false`>>. Use `true` (the default) for field types with query analyzers including graph-aware filters, e.g., <<filter-descriptions.adoc#synonym-graph-filter,Synonym Graph Filter>> and <<filter-descriptions.adoc#word-delimiter-graph-filter,Word Delimiter Graph Filter>>.
+
Use `false` for field types with query analyzers including filters that can match docs when some tokens are missing, e.g., <<filter-descriptions.adoc#shingle-filter,Shingle Filter>>.
[[FieldTypeDefinitionsandProperties-docValuesFormat]]
[[docvaluesformat]]
`docValuesFormat`::
Defines a custom `DocValuesFormat` to use for fields of this type. This requires that a schema-aware codec, such as the `SchemaCodecFactory` has been configured in solrconfig.xml.
@ -130,7 +130,7 @@ The default values for each property depend on the underlying `FieldType` class,
|omitPositions |Similar to `omitTermFreqAndPositions` but preserves term frequency information. |true or false |*
|termVectors termPositions termOffsets termPayloads |These options instruct Solr to maintain full term vectors for each document, optionally including position, offset and payload information for each term occurrence in those vectors. These can be used to accelerate highlighting and other ancillary functionality, but impose a substantial cost in terms of index size. They are not necessary for typical uses of Solr. |true or false |false
|required |Instructs Solr to reject any attempts to add a document which does not have a value for this field. This property defaults to false. |true or false |false
|useDocValuesAsStored |If the field has <<docvalues.adoc#docvalues,docValues>> enabled, setting this to true would allow the field to be returned as if it were a stored field (even if it has `stored=false`) when matching "`*`" in an <<common-query-parameters.adoc#CommonQueryParameters-Thefl_FieldList_Parameter,fl parameter>>. |true or false |true
|useDocValuesAsStored |If the field has <<docvalues.adoc#docvalues,docValues>> enabled, setting this to true would allow the field to be returned as if it were a stored field (even if it has `stored=false`) when matching "`*`" in an <<common-query-parameters.adoc#fl-field-list-parameter,fl parameter>>. |true or false |true
|large |Large fields are always lazy loaded and will only take up space in the document cache if the actual value is < 512KB. This option requires `stored="true"` and `multiValued="false"`. It's intended for fields that might have very large values so that they don't get cached in memory. |true or false |false
|===

View File

@ -60,7 +60,7 @@ the output would be:
<float name="score">0.343</float>
...
----
* Use in a parameter that is explicitly for specifying functions, such as the EDisMax query parser's <<the-extended-dismax-query-parser.adoc#the-extended-dismax-query-parser,`boost`>> param, or DisMax query parser's <<the-dismax-query-parser.adoc#TheDisMaxQueryParser-Thebf_BoostFunctions_Parameter,`bf` (boost function) parameter>>. (Note that the `bf` parameter actually takes a list of function queries separated by white space and each with an optional boost. Make sure you eliminate any internal white space in single function queries when using `bf`). For example:
* Use in a parameter that is explicitly for specifying functions, such as the EDisMax query parser's <<the-extended-dismax-query-parser.adoc#the-extended-dismax-query-parser,`boost`>> param, or DisMax query parser's <<the-dismax-query-parser.adoc#bf-boost-functions-parameter,`bf` (boost function) parameter>>. (Note that the `bf` parameter actually takes a list of function queries separated by white space and each with an optional boost. Make sure you eliminate any internal white space in single function queries when using `bf`). For example:
+
[source,text]
----

View File

@ -108,7 +108,7 @@ If the configuration options for the built-in merge policies do not fully suit y
</mergePolicyFactory>
----
The example above shows Solr's {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] being configured to sort documents in merged segments by `"timestamp desc"`, and wrapped around a `TieredMergePolicyFactory` configured to use the values `maxMergeAtOnce=10` and `segmentsPerTier=10` via the `inner` prefix defined by `SortingMergePolicyFactory` 's `wrapped.prefix` option. For more information on using `SortingMergePolicyFactory`, see <<common-query-parameters.adoc#CommonQueryParameters-ThesegmentTerminateEarlyParameter,the segmentTerminateEarly parameter>>.
The example above shows Solr's {solr-javadocs}/solr-core/org/apache/solr/index/SortingMergePolicyFactory.html[`SortingMergePolicyFactory`] being configured to sort documents in merged segments by `"timestamp desc"`, and wrapped around a `TieredMergePolicyFactory` configured to use the values `maxMergeAtOnce=10` and `segmentsPerTier=10` via the `inner` prefix defined by `SortingMergePolicyFactory` 's `wrapped.prefix` option. For more information on using `SortingMergePolicyFactory`, see <<common-query-parameters.adoc#segmentterminateearly-parameter,the segmentTerminateEarly parameter>>.
=== mergeScheduler

View File

@ -67,4 +67,4 @@ The `ltr` stands for Learning To Rank, please see <<learning-to-rank.adoc#learni
== Combining Ranking Queries with Other Solr Features
The `rq` parameter and the re-ranking feature in general works well with other Solr features. For example, it can be used in conjunction with the <<collapse-and-expand-results.adoc#collapse-and-expand-results,collapse parser>> to re-rank the group heads after they've been collapsed. It also preserves the order of documents elevated by the <<the-query-elevation-component.adoc#the-query-elevation-component,elevation component>>. And it even has its own custom explain so you can see how the re-ranking scores were derived when looking at <<common-query-parameters.adoc#CommonQueryParameters-ThedebugParameter,debug information>>.
The `rq` parameter and the re-ranking feature in general works well with other Solr features. For example, it can be used in conjunction with the <<collapse-and-expand-results.adoc#collapse-and-expand-results,collapse parser>> to re-rank the group heads after they've been collapsed. It also preserves the order of documents elevated by the <<the-query-elevation-component.adoc#the-query-elevation-component,elevation component>>. And it even has its own custom explain so you can see how the re-ranking scores were derived when looking at <<common-query-parameters.adoc#debug-parameter,debug information>>.

View File

@ -94,7 +94,7 @@ http://localhost:8983/solr/techproducts/get?id=mydoc&id=IW-02
}
----
Real Time Get requests can also be combined with filter queries, specified with an <<common-query-parameters.adoc#CommonQueryParameters-Thefq_FilterQuery_Parameter,`fq` parameter>>, just like search requests:
Real Time Get requests can also be combined with filter queries, specified with an <<common-query-parameters.adoc#fq-filter-query-parameter,`fq` parameter>>, just like search requests:
[source,text]
----

View File

@ -65,7 +65,7 @@ All of the parameters described in the section <<searching.adoc#searching,Searc
Besides `defaults`, there are other options for the SearchHandler, which are:
* `appends`: This allows definition of parameters that are added to the user query. These might be <<common-query-parameters.adoc#CommonQueryParameters-Thefq_FilterQuery_Parameter,filter queries>>, or other query rules that should be added to each query. There is no mechanism in Solr to allow a client to override these additions, so you should be absolutely sure you always want these parameters applied to queries.
* `appends`: This allows definition of parameters that are added to the user query. These might be <<common-query-parameters.adoc#fq-filter-query-parameter,filter queries>>, or other query rules that should be added to each query. There is no mechanism in Solr to allow a client to override these additions, so you should be absolutely sure you always want these parameters applied to queries.
+
[source,xml]
----
@ -125,7 +125,7 @@ There are several default search components that work with all SearchHandlers wi
|mlt |`solr.MoreLikeThisComponent` |Described in the section <<morelikethis.adoc#morelikethis,MoreLikeThis>>.
|highlight |`solr.HighlightComponent` |Described in the section <<highlighting.adoc#highlighting,Highlighting>>.
|stats |`solr.StatsComponent` |Described in the section <<the-stats-component.adoc#the-stats-component,The Stats Component>>.
|debug |`solr.DebugComponent` |Described in the section on <<common-query-parameters.adoc#CommonQueryParameters-ThedebugParameter,Common Query Parameters>>.
|debug |`solr.DebugComponent` |Described in the section on <<common-query-parameters.adoc#debug-parameter,Common Query Parameters>>.
|expand |`solr.ExpandComponent` |Described in the section <<collapse-and-expand-results.adoc#collapse-and-expand-results,Collapse and Expand Results>>.
|===

View File

@ -22,15 +22,12 @@ The SpellCheck component is designed to provide inline query suggestions based o
The basis for these suggestions can be terms in a field in Solr, externally created text files, or fields in other Lucene indexes.
[[SpellChecking-ConfiguringtheSpellCheckComponent]]
== Configuring the SpellCheckComponent
[[SpellChecking-DefineSpellCheckinsolrconfig.xml]]
=== Define Spell Check in solrconfig.xml
The first step is to specify the source of terms in `solrconfig.xml`. There are three approaches to spell checking in Solr, discussed below.
[[SpellChecking-IndexBasedSpellChecker]]
==== IndexBasedSpellChecker
The `IndexBasedSpellChecker` uses a Solr index as the basis for a parallel index used for spell checking. It requires defining a field as the basis for the index terms; a common practice is to copy terms from some fields (such as `title`, `body`, etc.) to another field created for spell checking. Here is a simple example of configuring `solrconfig.xml` with the `IndexBasedSpellChecker`:
@ -57,7 +54,6 @@ The `spellcheckIndexDir` defines the location of the directory that holds the sp
Finally, _buildOnCommit_ defines whether to build the spell check index at every commit (that is, every time new documents are added to the index). It is optional, and can be omitted if you would rather set it to `false`.
[[SpellChecking-DirectSolrSpellChecker]]
==== DirectSolrSpellChecker
The `DirectSolrSpellChecker` uses terms from the Solr index without building a parallel index like the `IndexBasedSpellChecker`. This spell checker has the benefit of not having to be built regularly, meaning that the terms are always up-to-date with terms in the index. Here is how this might be configured in `solrconfig.xml`
@ -89,9 +85,8 @@ Because this spell checker is querying the main index, you may want to limit how
The `maxInspections` parameter defines the maximum number of possible matches to review before returning results; the default is 5. `minQueryLength` defines how many characters must be in the query before suggestions are provided; the default is 4.
At first, spellchecker analyses incoming query words by looking up them in the index. Only query words, which are absent in index or too rare ones (below `maxQueryFrequency` ) are considered as misspelled and used for finding suggestions. Words which are frequent than `maxQueryFrequency` bypass spellchecker unchanged. After suggestions for every misspelled word are found they are filtered for enough frequency with `thresholdTokenFrequency` as boundary value. These parameters (`maxQueryFrequency` and `thresholdTokenFrequency`) can be a percentage (such as .01, or 1%) or an absolute value (such as 4).
At first, spellchecker analyses incoming query words by looking up them in the index. Only query words, which are absent in index or too rare ones (below `maxQueryFrequency`) are considered as misspelled and used for finding suggestions. Words which are frequent than `maxQueryFrequency` bypass spellchecker unchanged. After suggestions for every misspelled word are found they are filtered for enough frequency with `thresholdTokenFrequency` as boundary value. These parameters (`maxQueryFrequency` and `thresholdTokenFrequency`) can be a percentage (such as .01, or 1%) or an absolute value (such as 4).
[[SpellChecking-FileBasedSpellChecker]]
==== FileBasedSpellChecker
The `FileBasedSpellChecker` uses an external file as a spelling dictionary. This can be useful if using Solr as a spelling server, or if spelling suggestions don't need to be based on actual terms in the index. In `solrconfig.xml`, you would define the searchComponent as so:
@ -120,7 +115,6 @@ The differences here are the use of the `sourceLocation` to define the location
In the previous example, _name_ is used to name this specific definition of the spellchecker. Multiple definitions can co-exist in a single `solrconfig.xml`, and the _name_ helps to differentiate them. If only defining one spellchecker, no name is required.
====
[[SpellChecking-WordBreakSolrSpellChecker]]
==== WordBreakSolrSpellChecker
`WordBreakSolrSpellChecker` offers suggestions by combining adjacent query terms and/or breaking terms into multiple words. It is a `SpellCheckComponent` enhancement, leveraging Lucene's `WordBreakSpellChecker`. It can detect spelling errors resulting from misplaced whitespace without the use of shingle-based dictionaries and provides collation support for word-break errors, including cases where the user has a mix of single-word spelling errors and word-break errors in the same query. It also provides shard support.
@ -145,7 +139,6 @@ Some of the parameters will be familiar from the discussion of the other spell c
The spellchecker can be configured with a traditional checker (ie: `DirectSolrSpellChecker`). The results are combined and collations can contain a mix of corrections from both spellcheckers.
[[SpellChecking-AddIttoaRequestHandler]]
=== Add It to a Request Handler
Queries will be sent to a <<query-syntax-and-parsing.adoc#query-syntax-and-parsing,RequestHandler>>. If every request should generate a suggestion, then you would add the following to the `requestHandler` that you are using:
@ -173,151 +166,86 @@ Here is an example with multiple dictionaries:
</requestHandler>
----
[[SpellChecking-SpellCheckParameters]]
== Spell Check Parameters
The SpellCheck component accepts the parameters described in the table below.
The SpellCheck component accepts the parameters described below.
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
`spellcheck`::
This parameter turns on SpellCheck suggestions for the request. If `true`, then spelling suggestions will be generated. This is required if spell checking is desired.
[cols="30,70",options="header"]
|===
|Parameter |Description
|<<SpellChecking-ThespellcheckParameter,spellcheck>> |Turns on or off SpellCheck suggestions for the request. If *true*, then spelling suggestions will be generated.
|<<SpellChecking-Thespellcheck.qorqParameter,spellcheck.q or q>> |Selects the query to be spellchecked.
|<<SpellChecking-Thespellcheck.buildParameter,spellcheck.build>> |Instructs Solr to build a dictionary for use in spellchecking.
|<<SpellChecking-Thespellcheck.collateParameter,spellcheck.collate>> |Causes Solr to build a new query based on the best suggestion for each term in the submitted query.
|<<SpellChecking-Thespellcheck.maxCollationsParameter,spellcheck.maxCollations>> |This parameter specifies the maximum number of collations to return.
|<<SpellChecking-Thespellcheck.maxCollationTriesParameter,spellcheck.maxCollationTries>> |This parameter specifies the number of collation possibilities for Solr to try before giving up.
|<<SpellChecking-Thespellcheck.maxCollationEvaluationsParameter,spellcheck.maxCollationEvaluations>> |This parameter specifies the maximum number of word correction combinations to rank and evaluate prior to deciding which collation candidates to test against the index.
|<<SpellChecking-Thespellcheck.collateExtendedResultsParameter,spellcheck.collateExtendedResults>> |If true, returns an expanded response detailing the collations found. If `spellcheck.collate` is false, this parameter will be ignored.
|<<SpellChecking-Thespellcheck.collateMaxCollectDocsParameter,spellcheck.collateMaxCollectDocs>> |The maximum number of documents to collect when testing potential Collations
|<<SpellChecking-Thespellcheck.collateParam._ParameterPrefix,spellcheck.collateParam.*>> |Specifies param=value pairs that can be used to override normal query params when validating collations
|<<SpellChecking-Thespellcheck.countParameter,spellcheck.count>> |Specifies the maximum number of spelling suggestions to be returned.
|<<SpellChecking-Thespellcheck.dictionaryParameter,spellcheck.dictionary>> |Specifies the dictionary that should be used for spellchecking.
|<<SpellChecking-Thespellcheck.extendedResultsParameter,spellcheck.extendedResults>> |Causes Solr to return additional information about spellcheck results, such as the frequency of each original term in the index (origFreq) as well as the frequency of each suggestion in the index (frequency). Note that this result format differs from the non-extended one as the returned suggestion for a word is actually an array of lists, where each list holds the suggested term and its frequency.
|<<SpellChecking-Thespellcheck.onlyMorePopularParameter,spellcheck.onlyMorePopular>> |Limits spellcheck responses to queries that are more popular than the original query.
|<<SpellChecking-Thespellcheck.maxResultsForSuggestParameter,spellcheck.maxResultsForSuggest>> |The maximum number of hits the request can return in order to both generate spelling suggestions and set the "correctlySpelled" element to "false".
|<<SpellChecking-Thespellcheck.alternativeTermCountParameter,spellcheck.alternativeTermCount>> |The count of suggestions to return for each query term existing in the index and/or dictionary.
|<<SpellChecking-Thespellcheck.reloadParameter,spellcheck.reload>> |Reloads the spellchecker.
|<<SpellChecking-Thespellcheck.accuracyParameter,spellcheck.accuracy>> |Specifies an accuracy value to help decide whether a result is worthwhile.
|<<spellcheck_DICT_NAME,spellcheck.<DICT_NAME>.key>> |Specifies a key/value pair for the implementation handling a given dictionary.
|===
`spellcheck.q` or `q`::
This parameter specifies the query to spellcheck.
+
If `spellcheck.q` is defined, then it is used; otherwise the original input query is used. The `spellcheck.q` parameter is intended to be the original query, minus any extra markup like field names, boosts, and so on. If the `q` parameter is specified, then the `SpellingQueryConverter` class is used to parse it into tokens; otherwise the <<tokenizers.adoc#white-space-tokenizer,`WhitespaceTokenizer`>> is used.
+
The choice of which one to use is up to the application. Essentially, if you have a spelling "ready" version in your application, then it is probably better to use `spellcheck.q`. Otherwise, if you just want Solr to do the job, use the `q` parameter.
[[SpellChecking-ThespellcheckParameter]]
=== The spellcheck Parameter
This parameter turns on SpellCheck suggestions for the request. If *true*, then spelling suggestions will be generated.
[[SpellChecking-Thespellcheck.qorqParameter]]
=== The spellcheck.q or q Parameter
This parameter specifies the query to spellcheck. If `spellcheck.q` is defined, then it is used; otherwise the original input query is used. The `spellcheck.q` parameter is intended to be the original query, minus any extra markup like field names, boosts, and so on. If the `q` parameter is specified, then the `SpellingQueryConverter` class is used to parse it into tokens; otherwise the <<tokenizers.adoc#white-space-tokenizer,`WhitespaceTokenizer`>> is used. The choice of which one to use is up to the application. Essentially, if you have a spelling "ready" version in your application, then it is probably better to use `spellcheck.q`. Otherwise, if you just want Solr to do the job, use the `q` parameter.
[NOTE]
====
The SpellingQueryConverter class does not deal properly with non-ASCII characters. In this case, you have either to use `spellcheck.q`, or implement your own QueryConverter.
====
[[SpellChecking-Thespellcheck.buildParameter]]
=== The spellcheck.build Parameter
If set to *true*, this parameter creates the dictionary that the SolrSpellChecker will use for spell-checking. In a typical search application, you will need to build the dictionary before using the SolrSpellChecker. However, it's not always necessary to build a dictionary first. For example, you can configure the spellchecker to use a dictionary that already exists.
NOTE: The `SpellingQueryConverter` class does not deal properly with non-ASCII characters. In this case, you have either to use `spellcheck.q`, or implement your own QueryConverter.
`spellcheck.build`::
If set to `true`, this parameter creates the dictionary to be used for spell-checking. In a typical search application, you will need to build the dictionary before using the spell check. However, it's not always necessary to build a dictionary first. For example, you can configure the spellchecker to use a dictionary that already exists.
+
The dictionary will take some time to build, so this parameter should not be sent with every request.
[[SpellChecking-Thespellcheck.reloadParameter]]
=== The spellcheck.reload Parameter
`spellcheck.reload`::
If set to `true`, this parameter reloads the spellchecker. The results depend on the implementation of `SolrSpellChecker.reload()`. In a typical implementation, reloading the spellchecker means reloading the dictionary.
If set to true, this parameter reloads the spellchecker. The results depend on the implementation of `SolrSpellChecker.reload()`. In a typical implementation, reloading the spellchecker means reloading the dictionary.
`spellcheck.count`::
This parameter specifies the maximum number of suggestions that the spellchecker should return for a term. If this parameter isn't set, the value defaults to `1`. If the parameter is set but not assigned a number, the value defaults to `5`. If the parameter is set to a positive integer, that number becomes the maximum number of suggestions returned by the spellchecker.
[[SpellChecking-Thespellcheck.countParameter]]
=== The spellcheck.count Parameter
`spellcheck.onlyMorePopular`::
If `true`, Solr will to return suggestions that result in more hits for the query than the existing query. Note that this will return more popular suggestions even when the given query term is present in the index and considered "correct".
This parameter specifies the maximum number of suggestions that the spellchecker should return for a term. If this parameter isn't set, the value defaults to 1. If the parameter is set but not assigned a number, the value defaults to 5. If the parameter is set to a positive integer, that number becomes the maximum number of suggestions returned by the spellchecker.
`spellcheck.maxResultsForSuggest`::
If, for example, this is set to `5` and the user's query returns 5 or fewer results, the spellchecker will report "correctlySpelled=false" and also offer suggestions (and collations if requested). Setting this greater than zero is useful for creating "did-you-mean?" suggestions for queries that return a low number of hits.
[[SpellChecking-Thespellcheck.onlyMorePopularParameter]]
=== The spellcheck.onlyMorePopular Parameter
`spellcheck.alternativeTermCount`::
Defines the number of suggestions to return for each query term existing in the index and/or dictionary. Presumably, users will want fewer suggestions for words with docFrequency>0. Also, setting this value enables context-sensitive spell suggestions.
If *true*, Solr will to return suggestions that result in more hits for the query than the existing query. Note that this will return more popular suggestions even when the given query term is present in the index and considered "correct".
`spellcheck.extendedResults`::
If `true`, this parameter causes to Solr to return additional information about spellcheck results, such as the frequency of each original term in the index (`origFreq`) as well as the frequency of each suggestion in the index (`frequency`). Note that this result format differs from the non-extended one as the returned suggestion for a word is actually an array of lists, where each list holds the suggested term and its frequency.
[[SpellChecking-Thespellcheck.maxResultsForSuggestParameter]]
=== The spellcheck.maxResultsForSuggest Parameter
For example, if this is set to 5 and the user's query returns 5 or fewer results, the spellchecker will report "correctlySpelled=false" and also offer suggestions (and collations if requested). Setting this greater than zero is useful for creating "did-you-mean?" suggestions for queries that return a low number of hits.
[[SpellChecking-Thespellcheck.alternativeTermCountParameter]]
=== The spellcheck.alternativeTermCount Parameter
Specify the number of suggestions to return for each query term existing in the index and/or dictionary. Presumably, users will want fewer suggestions for words with docFrequency>0. Also setting this value turns "on" context-sensitive spell suggestions.
[[SpellChecking-Thespellcheck.extendedResultsParameter]]
=== The spellcheck.extendedResults Parameter
This parameter causes to Solr to include additional information about the suggestion, such as the frequency in the index.
[[SpellChecking-Thespellcheck.collateParameter]]
=== The spellcheck.collate Parameter
If *true*, this parameter directs Solr to take the best suggestion for each token (if one exists) and construct a new query from the suggestions. For example, if the input query was "jawa class lording" and the best suggestion for "jawa" was "java" and "lording" was "loading", then the resulting collation would be "java class loading".
The spellcheck.collate parameter only returns collations that are guaranteed to result in hits if re-queried, even when applying original `fq` parameters. This is especially helpful when there is more than one correction per query.
`spellcheck.collate`::
If `true`, this parameter directs Solr to take the best suggestion for each token (if one exists) and construct a new query from the suggestions.
+
For example, if the input query was "jawa class lording" and the best suggestion for "jawa" was "java" and "lording" was "loading", then the resulting collation would be "java class loading".
+
The `spellcheck.collate` parameter only returns collations that are guaranteed to result in hits if re-queried, even when applying original `fq` parameters. This is especially helpful when there is more than one correction per query.
NOTE: This only returns a query to be used. It does not actually run the suggested query.
[[SpellChecking-Thespellcheck.maxCollationsParameter]]
=== The spellcheck.maxCollations Parameter
`spellcheck.maxCollations`::
The maximum number of collations to return. The default is `1`. This parameter is ignored if `spellcheck.collate` is false.
The maximum number of collations to return. The default is *1*. This parameter is ignored if `spellcheck.collate` is false.
`spellcheck.maxCollationTries`::
This parameter specifies the number of collation possibilities for Solr to try before giving up. Lower values ensure better performance. Higher values may be necessary to find a collation that can return results. The default value is `0`, which is equivalent to not checking collations. This parameter is ignored if `spellcheck.collate` is false.
[[SpellChecking-Thespellcheck.maxCollationTriesParameter]]
=== The spellcheck.maxCollationTries Parameter
`spellcheck.maxCollationEvaluations`::
This parameter specifies the maximum number of word correction combinations to rank and evaluate prior to deciding which collation candidates to test against the index. This is a performance safety-net in case a user enters a query with many misspelled words. The default is `10000` combinations, which should work well in most situations.
This parameter specifies the number of collation possibilities for Solr to try before giving up. Lower values ensure better performance. Higher values may be necessary to find a collation that can return results. The default value is `0`, which maintains backwards-compatible (Solr 1.4) behavior (do not check collations). This parameter is ignored if `spellcheck.collate` is false.
`spellcheck.collateExtendedResults`::
If `true`, this parameter returns an expanded response format detailing the collations Solr found. The default value is `false` and this is ignored if `spellcheck.collate` is false.
[[SpellChecking-Thespellcheck.maxCollationEvaluationsParameter]]
=== The spellcheck.maxCollationEvaluations Parameter
This parameter specifies the maximum number of word correction combinations to rank and evaluate prior to deciding which collation candidates to test against the index. This is a performance safety-net in case a user enters a query with many misspelled words. The default is *10,000* combinations, which should work well in most situations.
[[SpellChecking-Thespellcheck.collateExtendedResultsParameter]]
=== The spellcheck.collateExtendedResults Parameter
If *true*, this parameter returns an expanded response format detailing the collations Solr found. The default value is *false* and this is ignored if `spellcheck.collate` is false.
[[SpellChecking-Thespellcheck.collateMaxCollectDocsParameter]]
=== The spellcheck.collateMaxCollectDocs Parameter
This parameter specifies the maximum number of documents that should be collect when testing potential collations against the index. A value of *0* indicates that all documents should be collected, resulting in exact hit-counts. Otherwise an estimation is provided as a performance optimization in cases where exact hit-counts are unnecessary the higher the value specified, the more precise the estimation.
The default value for this parameter is *0*, but when `spellcheck.collateExtendedResults` is *false*, the optimization is always used as if a *1* had been specified.
[[SpellChecking-Thespellcheck.collateParam._ParameterPrefix]]
=== The spellcheck.collateParam.* Parameter Prefix
`spellcheck.collateMaxCollectDocs`::
This parameter specifies the maximum number of documents that should be collected when testing potential collations against the index. A value of `0` indicates that all documents should be collected, resulting in exact hit-counts. Otherwise an estimation is provided as a performance optimization in cases where exact hit-counts are unnecessary the higher the value specified, the more precise the estimation.
+
The default value for this parameter is `0`, but when `spellcheck.collateExtendedResults` is false, the optimization is always used as if `1` had been specified.
`spellcheck.collateParam.*` Prefix::
This parameter prefix can be used to specify any additional parameters that you wish to the Spellchecker to use when internally validating collation queries. For example, even if your regular search results allow for loose matching of one or more query terms via parameters like `q.op=OR` and `mm=20%` you can specify override params such as `spellcheck.collateParam.q.op=AND&spellcheck.collateParam.mm=100%` to require that only collations consisting of words that are all found in at least one document may be returned.
[[SpellChecking-Thespellcheck.dictionaryParameter]]
=== The spellcheck.dictionary Parameter
This parameter causes Solr to use the dictionary named in the parameter's argument. The default setting is "default". This parameter can be used to invoke a specific spellchecker on a per request basis.
[[SpellChecking-Thespellcheck.accuracyParameter]]
=== The spellcheck.accuracy Parameter
`spellcheck.dictionary`::
This parameter causes Solr to use the dictionary named in the parameter's argument. The default setting is `default`. This parameter can be used to invoke a specific spellchecker on a per request basis.
`spellcheck.accuracy`::
Specifies an accuracy value to be used by the spell checking implementation to decide whether a result is worthwhile or not. The value is a float between 0 and 1. Defaults to `Float.MIN_VALUE`.
[[spellcheck_DICT_NAME]]
=== The spellcheck.<DICT_NAME>.key Parameter
Specifies a key/value pair for the implementation handling a given dictionary. The value that is passed through is just `key=value` (`spellcheck.<DICT_NAME>.` is stripped off.
`spellcheck.<DICT_NAME>.key`::
Specifies a key/value pair for the implementation handling a given dictionary. The value that is passed through is just `key=value` (`spellcheck.<DICT_NAME>.` is stripped off).
+
For example, given a dictionary called `foo`, `spellcheck.foo.myKey=myValue` would result in `myKey=myValue` being passed through to the implementation handling the dictionary `foo`.
[[SpellChecking-Example]]
=== Example
=== Spell Check Example
Using Solr's `bin/solr -e techproducts` example, this query shows the results of a simple request that defines a query using the `spellcheck.q` parameter, and forces the collations to require all input terms must match:
@ -368,19 +296,15 @@ Results:
</lst>
----
[[SpellChecking-DistributedSpellCheck]]
== Distributed SpellCheck
The `SpellCheckComponent` also supports spellchecking on distributed indexes. If you are using the SpellCheckComponent on a request handler other than "/select", you must provide the following two parameters:
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
`shards`::
Specifies the shards in your distributed indexing configuration. For more information about distributed indexing, see <<distributed-search-with-index-sharding.adoc#distributed-search-with-index-sharding,Distributed Search with Index Sharding>>
[cols="30,70",options="header"]
|===
|Parameter |Description
|shards |Specifies the shards in your distributed indexing configuration. For more information about distributed indexing, see <<distributed-search-with-index-sharding.adoc#distributed-search-with-index-sharding,Distributed Search with Index Sharding>>
|shards.qt |Specifies the request handler Solr uses for requests to shards. This parameter is not required for the `/select` request handler.
|===
`shards.qt`::
Specifies the request handler Solr uses for requests to shards. This parameter is not required for the `/select` request handler.
For example:

View File

@ -33,50 +33,25 @@ ____
Whether or not you remember this explanation, do remember that the DisMax Query Parser was primarily designed to be easy to use and to accept almost any input without returning an error.
[[TheDisMaxQueryParser-DisMaxParameters]]
== DisMax Parameters
== DisMax Query Parser Parameters
In addition to the common request parameter, highlighting parameters, and simple facet parameters, the DisMax query parser supports the parameters described below. Like the standard query parser, the DisMax query parser allows default parameter values to be specified in `solrconfig.xml`, or overridden by query-time values in the request.
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
[cols="30,70",options="header"]
|===
|Parameter |Description
|<<TheDisMaxQueryParser-TheqParameter,q>> |Defines the raw input strings for the query.
|<<TheDisMaxQueryParser-Theq.altParameter,q.alt>> |Calls the standard query parser and defines query input strings, when the q parameter is not used.
|<<TheDisMaxQueryParser-Theqf_QueryFields_Parameter,qf>> |Query Fields: specifies the fields in the index on which to perform the query. If absent, defaults to `df`.
|<<TheDisMaxQueryParser-Themm_MinimumShouldMatch_Parameter,mm>> |Minimum "Should" Match: specifies a minimum number of clauses that must match in a query. If no 'mm' parameter is specified in the query, or as a default in `solrconfig.xml`, the effective value of the `q.op` parameter (either in the query or as a default in `solrconfig.xml`) is used to influence the behavior. If `q.op` is effectively AND'ed, then mm=100%; if `q.op` is OR'ed, then mm=1. Users who want to force the legacy behavior should set a default value for the 'mm' parameter in their `solrconfig.xml` file. Users should add this as a configured default for their request handlers. This parameter tolerates miscellaneous white spaces in expressions (e.g., `" 3 < -25% 10 < -3\n", " \n-25%\n ", " \n3\n "`).
|<<TheDisMaxQueryParser-Thepf_PhraseFields_Parameter,pf>> |Phrase Fields: boosts the score of documents in cases where all of the terms in the q parameter appear in close proximity.
|<<TheDisMaxQueryParser-Theps_PhraseSlop_Parameter,ps>> |Phrase Slop: specifies the number of positions two terms can be apart in order to match the specified phrase.
|<<TheDisMaxQueryParser-Theqs_QueryPhraseSlop_Parameter,qs>> |Query Phrase Slop: specifies the number of positions two terms can be apart in order to match the specified phrase. Used specifically with the `qf` parameter.
|<<TheDisMaxQueryParser-Thetie_TieBreaker_Parameter,tie>> |Tie Breaker: specifies a float value (which should be something much less than 1) to use as tiebreaker in DisMax queries. Default: 0.0
|<<TheDisMaxQueryParser-Thebq_BoostQuery_Parameter,bq>> |Boost Query: specifies a factor by which a term or phrase should be "boosted" in importance when considering a match.
|<<TheDisMaxQueryParser-Thebf_BoostFunctions_Parameter,bf>> |Boost Functions: specifies functions to be applied to boosts. (See for details about function queries.)
|===
In addition to the common request parameters, highlighting parameters, and simple facet parameters, the DisMax query parser supports the parameters described below. Like the standard query parser, the DisMax query parser allows default parameter values to be specified in `solrconfig.xml`, or overridden by query-time values in the request.
The sections below explain these parameters in detail.
[[TheDisMaxQueryParser-TheqParameter]]
=== The q Parameter
=== q Parameter
The `q` parameter defines the main "query" constituting the essence of the search. The parameter supports raw input strings provided by users with no special escaping. The + and - characters are treated as "mandatory" and "prohibited" modifiers for terms. Text wrapped in balanced quote characters (for example, "San Jose") is treated as a phrase. Any query containing an odd number of quote characters is evaluated as if there were no quote characters at all.
[IMPORTANT]
====
IMPORTANT: The `q` parameter does not support wildcard characters such as *.
The `q` parameter does not support wildcard characters such as *.
====
[[TheDisMaxQueryParser-Theq.altParameter]]
=== The q.alt Parameter
=== q.alt Parameter
If specified, the `q.alt` parameter defines a query (which by default will be parsed using standard query parsing syntax) when the main q parameter is not specified or is blank. The `q.alt` parameter comes in handy when you need something like a query to match all documents (don't forget `&rows=0` for that one!) in order to get collection-wide faceting counts.
[[TheDisMaxQueryParser-Theqf_QueryFields_Parameter]]
=== The qf (Query Fields) Parameter
=== qf (Query Fields) Parameter
The `qf` parameter introduces a list of fields, each of which is assigned a boost factor to increase or decrease that particular field's importance in the query. For example, the query below:
@ -85,8 +60,7 @@ The `qf` parameter introduces a list of fields, each of which is assigned a boos
assigns `fieldOne` a boost of 2.3, leaves `fieldTwo` with the default boost (because no boost factor is specified), and `fieldThree` a boost of 0.4. These boost factors make matches in `fieldOne` much more significant than matches in `fieldTwo`, which in turn are much more significant than matches in `fieldThree`.
[[TheDisMaxQueryParser-Themm_MinimumShouldMatch_Parameter]]
=== The mm (Minimum Should Match) Parameter
=== mm (Minimum Should Match) Parameter
When processing queries, Lucene/Solr recognizes three types of clauses: mandatory, prohibited, and "optional" (also known as "should" clauses). By default, all words or phrases specified in the `q` parameter are treated as "optional" clauses unless they are preceded by a "+" or a "-". When dealing with these "optional" clauses, the `mm` parameter makes it possible to say that a certain minimum number of those clauses must match. The DisMax query parser offers great flexibility in how the minimum number can be specified.
@ -115,27 +89,23 @@ When specifying `mm` values, keep in mind the following:
The default value of `mm` is 100% (meaning that all clauses must match).
[[TheDisMaxQueryParser-Thepf_PhraseFields_Parameter]]
=== The pf (Phrase Fields) Parameter
=== pf (Phrase Fields) Parameter
Once the list of matching documents has been identified using the `fq` and `qf` parameters, the `pf` parameter can be used to "boost" the score of documents in cases where all of the terms in the q parameter appear in close proximity.
The format is the same as that used by the `qf` parameter: a list of fields and "boosts" to associate with each of them when making phrase queries out of the entire q parameter.
[[TheDisMaxQueryParser-Theps_PhraseSlop_Parameter]]
=== The ps (Phrase Slop) Parameter
=== ps (Phrase Slop) Parameter
The `ps` parameter specifies the amount of "phrase slop" to apply to queries specified with the pf parameter. Phrase slop is the number of positions one token needs to be moved in relation to another token in order to match a phrase specified in a query.
[[TheDisMaxQueryParser-Theqs_QueryPhraseSlop_Parameter]]
=== The qs (Query Phrase Slop) Parameter
=== qs (Query Phrase Slop) Parameter
The `qs` parameter specifies the amount of slop permitted on phrase queries explicitly included in the user's query string with the `qf` parameter. As explained above, slop refers to the number of positions one token needs to be moved in relation to another token in order to match a phrase specified in a query.
[[TheDisMaxQueryParser-Thetie_TieBreaker_Parameter]]
=== The tie (Tie Breaker) Parameter
The `tie` parameter specifies a float value (which should be something much less than 1) to use as tiebreaker in DisMax queries.
@ -145,8 +115,7 @@ When a term from the user's input is tested against multiple fields, more than o
A value of "0.0" - the default - makes the query a pure "disjunction max query": that is, only the maximum scoring subquery contributes to the final score. A value of "1.0" makes the query a pure "disjunction sum query" where it doesn't matter what the maximum scoring sub query is, because the final score will be the sum of the subquery scores. Typically a low value, such as 0.1, is useful.
[[TheDisMaxQueryParser-Thebq_BoostQuery_Parameter]]
=== The bq (Boost Query) Parameter
=== bq (Boost Query) Parameter
The `bq` parameter specifies an additional, optional, query clause that will be added to the user's main query to influence the score. For example, if you wanted to add a relevancy boost for recent documents:
@ -159,8 +128,7 @@ bq=date:[NOW/DAY-1YEAR TO NOW/DAY]
You can specify multiple `bq` parameters. If you want your query to be parsed as separate clauses with separate boosts, use multiple `bq` parameters.
[[TheDisMaxQueryParser-Thebf_BoostFunctions_Parameter]]
=== The bf (Boost Functions) Parameter
=== bf (Boost Functions) Parameter
The `bf` parameter specifies functions (with optional boosts) that will be used to construct FunctionQueries which will be added to the user's main query as optional clauses that will influence the score. Any function supported natively by Solr can be used, along with a boost value. For example:
@ -180,7 +148,7 @@ bf=recip(rord(creationDate),1,1000,1000)
bq={!func}recip(rord(creationDate),1,1000,1000)
----
[[TheDisMaxQueryParser-ExamplesofQueriesSubmittedtotheDisMaxQueryParser]]
== Examples of Queries Submitted to the DisMax Query Parser
All of the sample URLs in this section assume you are running Solr's "techproducts" example:

View File

@ -1,4 +1,4 @@
= The Extended DisMax Query Parser
= The Extended DisMax (eDismax) Query Parser
:page-shortname: the-extended-dismax-query-parser
:page-permalink: the-extended-dismax-query-parser.html
// Licensed to the Apache Software Foundation (ASF) under one
@ -33,76 +33,52 @@ In addition to supporting all the DisMax query parser parameters, Extended Disma
* supports pure negative nested queries: queries such as `+foo (-foo)` will match all documents.
* lets you specify which fields the end user is allowed to query, and to disallow direct fielded searches.
[[TheExtendedDisMaxQueryParser-ExtendedDisMaxParameters]]
== Extended DisMax Parameters
In addition to all the <<the-dismax-query-parser.adoc#TheDisMaxQueryParser-DisMaxParameters,DisMax parameters>>, Extended DisMax includes these query parameters:
In addition to all the <<the-dismax-query-parser.adoc#dismax-query-parser-parameters,DisMax parameters>>, Extended DisMax includes these query parameters:
[[TheExtendedDisMaxQueryParser-ThesowParameter]]
=== The sow Parameter
`sow`::
Split on whitespace. If set to `false`, whitespace-separated term sequences will be provided to text analysis in one shot, enabling proper function of analysis filters that operate over term sequences, e.g., multi-word synonyms and shingles. Defaults to `true`, so text analysis is invoked separately for each individual whitespace-separated term.
Split on whitespace: if set to `false`, whitespace-separated term sequences will be provided to text analysis in one shot, enabling proper function of analysis filters that operate over term sequences, e.g. multi-word synonyms and shingles. Defaults to `true`: text analysis is invoked separately for each individual whitespace-separated term.
`mm.autoRelax`::
If `true`, the number of clauses required (<<the-dismax-query-parser.adoc#mm-minimum-should-match-parameter,minimum should match>>) will automatically be relaxed if a clause is removed (by e.g. stopwords filter) from some but not all <<the-dismax-query-parser.adoc#qf-query-fields-parameter,`qf`>> fields. Use this parameter as a workaround if you experience that queries return zero hits due to uneven stopword removal between the `qf` fields.
+
Note that relaxing `mm` may cause undesired side effects, such as hurting the precision of the search, depending on the nature of your index content.
[[TheExtendedDisMaxQueryParser-Themm.autoRelaxParameter]]
=== The mm.autoRelax Parameter
If true, the number of clauses required (<<the-dismax-query-parser.adoc#TheDisMaxQueryParser-Themm_MinimumShouldMatch_Parameter,minimum should match>>) will automatically be relaxed if a clause is removed (by e.g. stopwords filter) from some but not all <<the-dismax-query-parser.adoc#TheDisMaxQueryParser-Theqf_QueryFields_Parameter,`qf`>> fields. Use this parameter as a workaround if you experience that queries return zero hits due to uneven stopword removal between the `qf` fields.
Note that relaxing mm may cause undesired side effects, hurting the precision of the search, depending on the nature of your index content.
[[TheExtendedDisMaxQueryParser-TheboostParameter]]
=== The boost Parameter
A multivalued list of strings parsed as queries with scores multiplied by the score from the main query for all matching documents. This parameter is shorthand for wrapping the query produced by eDisMax using the `BoostQParserPlugin`
[[TheExtendedDisMaxQueryParser-ThelowercaseOperatorsParameter]]
=== The lowercaseOperators Parameter
`boost`::
A multivalued list of strings parsed as queries with scores multiplied by the score from the main query for all matching documents. This parameter is shorthand for wrapping the query produced by eDisMax using the `BoostQParserPlugin`.
`lowercaseOperators`::
A Boolean parameter indicating if lowercase "and" and "or" should be treated the same as operators "AND" and "OR".
Defaults to `false`.
[[TheExtendedDisMaxQueryParser-ThepsParameter]]
=== The ps Parameter
`ps`::
Phrase Slop. The default amount of slop - distance between terms - on phrase queries built with `pf`, `pf2` and/or `pf3` fields (affects boosting). See also the section <<Using 'Slop'>> below.
Default amount of slop on phrase queries built with `pf`, `pf2` and/or `pf3` fields (affects boosting).
`pf2`::
[[TheExtendedDisMaxQueryParser-Thepf2Parameter]]
=== The pf2 Parameter
A multivalued list of fields with optional weights, based on pairs of word shingles.
[[TheExtendedDisMaxQueryParser-Theps2Parameter]]
=== The ps2 Parameter
A multivalued list of fields with optional weights. Similar to `pf`, but based on _pairs_ of word shingles.
`ps2`::
This is similar to `ps` but overrides the slop factor used for `pf2`. If not specified, `ps` is used.
[[TheExtendedDisMaxQueryParser-Thepf3Parameter]]
=== The pf3 Parameter
A multivalued list of fields with optional weights, based on triplets of word shingles. Similar to `pf`, except that instead of building a phrase per field out of all the words in the input, it builds a set of phrases for each field out of each triplet of word shingles.
[[TheExtendedDisMaxQueryParser-Theps3Parameter]]
=== The ps3 Parameter
`pf3`::
A multivalued list of fields with optional weights, based on triplets of word shingles. Similar to `pf`, except that instead of building a phrase per field out of all the words in the input, it builds a set of phrases for each field out of each _triplet_ of word shingles.
`ps3`::
This is similar to `ps` but overrides the slop factor used for `pf3`. If not specified, `ps` is used.
[[TheExtendedDisMaxQueryParser-ThestopwordsParameter]]
=== The stopwords Parameter
A Boolean parameter indicating if the `StopFilterFactory` configured in the query analyzer should be respected when parsing the query: if it is false, then the `StopFilterFactory` in the query analyzer is ignored.
[[TheExtendedDisMaxQueryParser-TheufParameter]]
=== The uf Parameter
`stopwords`::
A Boolean parameter indicating if the `StopFilterFactory` configured in the query analyzer should be respected when parsing the query. If this is set to `false`, then the `StopFilterFactory` in the query analyzer is ignored.
`uf`::
Specifies which schema fields the end user is allowed to explicitly query. This parameter supports wildcards. The default is to allow all fields, equivalent to `uf=\*`. To allow only title field, use `uf=title`. To allow title and all fields ending with '_s', use `uf=title,*_s`. To allow all fields except title, use `uf=*,-title`. To disallow all fielded searches, use `uf=-*`.
[[TheExtendedDisMaxQueryParser-Fieldaliasingusingper-fieldqfoverrides]]
=== Field aliasing using per-field qf overrides
=== Field Aliasing using Per-Field qf Overrides
Per-field overrides of the `qf` parameter may be specified to provide 1-to-many aliasing from field names specified in the query string, to field names used in the underlying query. By default, no aliasing is used and field names specified in the query string are treated as literal field names in the index.
[[TheExtendedDisMaxQueryParser-ExamplesofQueriesSubmittedtotheExtendedDisMaxQueryParser]]
== Examples of Queries Submitted to the Extended DisMax Query Parser
== Examples of eDismax Queries
All of the sample URLs in this section assume you are running Solr's "```techproducts```" example:
@ -158,14 +134,12 @@ qf=title text last_name first_name
f.name.qf=last_name first_name
----
[[TheExtendedDisMaxQueryParser-Usingnegativeboost]]
== Using negative boost
== Using Negative Boost
Negative query boosts have been supported at the "Query" object level for a long time (resulting in negative scores for matching documents). Now the QueryParsers have been updated to handle this too.
[[TheExtendedDisMaxQueryParser-Using_slop_]]
== Using 'slop'
== Using 'Slop'
`Dismax` and `Edismax` can run queries against all query fields, and also run a query in the form of a phrase against the phrase fields. (This will work only for boosting documents, not actually for matching.) However, that phrase query can have a 'slop,' which is the distance between the terms of the query while still considering it a phrase match. For example:
@ -223,8 +197,7 @@ A document that contains "Hans Anderson" will match, but a document that contain
Finally, in addition to the phrase fields (`pf`) parameter, `edismax` also supports the `pf2` and `pf3` parameters, for fields over which to create bigram and trigram phrase queries. The phrase slop for these parameters' queries can be specified using the `ps2` and `ps3` parameters, respectively. If you use `pf2`/`pf3` but `ps2`/`ps3`, then the phrase slop for these parameters' queries will be taken from the `ps` parameter, if any.
[[TheExtendedDisMaxQueryParser-Usingthe_magicfields__val_and_query_]]
== Using the "magic fields" \_val_ and \_query_
== Using the "Magic Fields" \_val_ and \_query_
The Solr Query Parser's use of `\_val_` and `\_query_` differs from the Lucene Query Parser in the following ways:
@ -257,9 +230,4 @@ createdate:[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]
createdate:[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]
----
[IMPORTANT]
====
`TO` must be uppercase, or Solr will report a 'Range Group' error.
====
IMPORTANT: `TO` must be uppercase, or Solr will report a 'Range Group' error.

View File

@ -22,31 +22,28 @@ Solr's default Query Parser is also known as the "```lucene```" parser.
The key advantage of the standard query parser is that it supports a robust and fairly intuitive syntax allowing you to create a variety of structured queries. The largest disadvantage is that it's very intolerant of syntax errors, as compared with something like the <<the-dismax-query-parser.adoc#the-dismax-query-parser,DisMax>> query parser which is designed to throw as few errors as possible.
[[TheStandardQueryParser-StandardQueryParserParameters]]
== Standard Query Parser Parameters
In addition to the <<common-query-parameters.adoc#common-query-parameters,Common Query Parameters>>, <<faceting.adoc#faceting,Faceting Parameters>>, <<highlighting.adoc#highlighting,Highlighting Parameters>>, and <<morelikethis.adoc#morelikethis,MoreLikeThis Parameters>>, the standard query parser supports the parameters described in the table below.
// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
`q`::
Defines a query using standard query syntax. This parameter is mandatory.
[cols="30,70",options="header"]
|===
|Parameter |Description
|q |Defines a query using standard query syntax. This parameter is mandatory.
|q.op |Specifies the default operator for query expressions, overriding the default operator specified in the Schema. Possible values are "AND" or "OR".
|df |Specifies a default field, overriding the definition of a default field in the Schema.
|sow |Split on whitespace: if set to `false`, whitespace-separated term sequences will be provided to text analysis in one shot, enabling proper function of analysis filters that operate over term sequences, e.g. multi-word synonyms and shingles. Defaults to `true`: text analysis is invoked separately for each individual whitespace-separated term.
|===
`q.op`::
Specifies the default operator for query expressions, overriding the default operator specified in the Schema. Possible values are "AND" or "OR".
`df`::
Specifies a default field, overriding the definition of a default field in the Schema.
`sow`::
Split on whitespace: if set to `false`, whitespace-separated term sequences will be provided to text analysis in one shot, enabling proper function of analysis filters that operate over term sequences, e.g. multi-word synonyms and shingles. Defaults to `true`: text analysis is invoked separately for each individual whitespace-separated term.
Default parameter values are specified in `solrconfig.xml`, or overridden by query-time values in the request.
== Standard Query Parser Response
[[TheStandardQueryParser-TheStandardQueryParser_sResponse]]
== The Standard Query Parser's Response
By default, the response from the standard query parser contains one `<result>` block, which is unnamed. If the <<common-query-parameters.adoc#debug-parameter,`debug` parameter>> is used, then an additional `<lst>` block will be returned, using the name "debug". This will contain useful debugging info, including the original query string, the parsed query string, and explain info for each document in the <result> block. If the <<common-query-parameters.adoc#explainother-parameter,`explainOther` parameter>> is also used, then additional explain info will be provided for all the documents matching that query.
By default, the response from the standard query parser contains one `<result>` block, which is unnamed. If the <<common-query-parameters.adoc#CommonQueryParameters-ThedebugParameter,`debug` parameter>> is used, then an additional `<lst>` block will be returned, using the name "debug". This will contain useful debugging info, including the original query string, the parsed query string, and explain info for each document in the <result> block. If the <<common-query-parameters.adoc#CommonQueryParameters-TheexplainOtherParameter,`explainOther` parameter>> is also used, then additional explain info will be provided for all the documents matching that query.
[[TheStandardQueryParser-SampleResponses]]
=== Sample Responses
This section presents examples of responses from the standard query parser.
@ -97,7 +94,6 @@ Results:
</response>
----
[[TheStandardQueryParser-SpecifyingTermsfortheStandardQueryParser]]
== Specifying Terms for the Standard Query Parser
A query to the standard query parser is broken up into terms and operators. There are two types of terms: single terms and phrases.
@ -107,19 +103,12 @@ A query to the standard query parser is broken up into terms and operators. Ther
Multiple terms can be combined together with Boolean operators to form more complex queries (as described below).
[IMPORTANT]
====
IMPORTANT: It is important that the analyzer used for queries parses terms and phrases in a way that is consistent with the way the analyzer used for indexing parses terms and phrases; otherwise, searches may produce unexpected results.
It is important that the analyzer used for queries parses terms and phrases in a way that is consistent with the way the analyzer used for indexing parses terms and phrases; otherwise, searches may produce unexpected results.
====
[[TheStandardQueryParser-TermModifiers]]
=== Term Modifiers
Solr supports a variety of term modifiers that add flexibility or precision, as needed, to searches. These modifiers include wildcard characters, characters for making a search "fuzzy" or more general, and so on. The sections below describe these modifiers in detail.
[[TheStandardQueryParser-WildcardSearches]]
=== Wildcard Searches
Solr's standard query parser supports single and multiple character wildcard searches within single terms. Wildcard characters can be applied to single terms, but not to search phrases.
@ -133,7 +122,6 @@ Solr's standard query parser supports single and multiple character wildcard sea
|Multiple characters (matches zero or more sequential characters) |* |The wildcard search: `tes*` would match test, testing, and tester. You can also use wildcard characters in the middle of a term. For example: `te*t` would match test and text. `*est` would match pest and test.
|===
[[TheStandardQueryParser-FuzzySearches]]
=== Fuzzy Searches
Solr's standard query parser supports fuzzy searches based on the Damerau-Levenshtein Distance or Edit Distance algorithm. Fuzzy searches discover terms that are similar to a specified term without necessarily being an exact match. To perform a fuzzy search, use the tilde ~ symbol at the end of a single-word term. For example, to search for a term similar in spelling to "roam," use the fuzzy search:
@ -148,14 +136,8 @@ An optional distance parameter specifies the maximum number of edits allowed, be
This will match terms like roams & foam - but not foams since it has an edit distance of "2".
[IMPORTANT]
====
IMPORTANT: In many cases, stemming (reducing terms to a common stem) can produce similar effects to fuzzy searches and wildcard searches.
In many cases, stemming (reducing terms to a common stem) can produce similar effects to fuzzy searches and wildcard searches.
====
[[TheStandardQueryParser-ProximitySearches]]
=== Proximity Searches
A proximity search looks for terms that are within a specific distance from one another.
@ -166,7 +148,6 @@ To perform a proximity search, add the tilde character ~ and a numeric value to
The distance referred to here is the number of term movements needed to match the specified phrase. In the example above, if "apache" and "jakarta" were 10 spaces apart in a field, but "apache" appeared before "jakarta", more than 10 term movements would be required to move the terms together and position "apache" to the right of "jakarta" with a space in between.
[[TheStandardQueryParser-RangeSearches]]
=== Range Searches
A range search specifies a range of values for a field (a range with an upper bound and a lower bound). The query matches documents whose values for the specified field or fields fall within the range. Range queries can be inclusive or exclusive of the upper and lower bounds. Sorting is done lexicographically, except on numeric fields. For example, the range query below matches all documents whose `popularity` field has a value between 52 and 10,000, inclusive.
@ -185,8 +166,6 @@ The brackets around a query determine its inclusiveness.
* Curly brackets `{` & `}` denote an exclusive range query that matches values between the upper and lower bounds, but excluding the upper and lower bounds themselves.
* You can mix these types so one end of the range is inclusive and the other is exclusive. Here's an example: `count:{1 TO 10]`
[[TheStandardQueryParser-BoostingaTermwith_]]
=== Boosting a Term with "^"
Lucene/Solr provides the relevance level of matching documents based on the terms found. To boost a term use the caret symbol `^` with a boost factor (a number) at the end of the term you are searching. The higher the boost factor, the more relevant the term will be.
@ -204,7 +183,6 @@ This will make documents with the term jakarta appear more relevant. You can als
By default, the boost factor is 1. Although the boost factor must be positive, it can be less than 1 (for example, it could be 0.2).
[[TheStandardQueryParser-ConstantScorewith_]]
=== Constant Score with "^="
Constant score queries are created with `<query_clause>^=<score>`, which sets the entire clause to the specified score for any documents matching that clause. This is desirable when you only care about matches for a particular clause and don't want other relevancy factors such as term frequency (the number of times the term appears in the field) or inverse document frequency (a measure across the whole index for how rare a term is in a field).
@ -214,9 +192,7 @@ Example:
[source,text]
(description:blue OR color:blue)^=1.0 text:shoes
[[TheStandardQueryParser-SpecifyingFieldsinaQuerytotheStandardQueryParser]]
== Specifying Fields in a Query to the Standard Query Parser
== Querying Specific Fields
Data indexed in Solr is organized in fields, which are <<defining-fields.adoc#defining-fields,defined in the Solr Schema>>. Searches can take advantage of fields to add precision to queries. For example, you can search for a term only in a specific field, such as a title field.
@ -234,7 +210,6 @@ Since text is the default field, the field indicator is not required; hence the
The field is only valid for the term that it directly precedes, so the query `title:Do it right` will find only "Do" in the title field. It will find "it" and "right" in the default field (in this case the text field).
[[TheStandardQueryParser-BooleanOperatorsSupportedbytheStandardQueryParser]]
== Boolean Operators Supported by the Standard Query Parser
Boolean operators allow you to apply Boolean logic to queries, requiring the presence or absence of specific terms or conditions in fields in order to match documents. The table below summarizes the Boolean operators supported by the standard query parser.
@ -253,19 +228,9 @@ Boolean operators allow you to apply Boolean logic to queries, requiring the pre
Boolean operators allow terms to be combined through logic operators. Lucene supports AND, "`+`", OR, NOT and "`-`" as Boolean operators.
[IMPORTANT]
====
IMPORTANT: When specifying Boolean operators with keywords such as AND or NOT, the keywords must appear in all uppercase.
When specifying Boolean operators with keywords such as AND or NOT, the keywords must appear in all uppercase.
====
[NOTE]
====
The standard query parser supports all the Boolean operators listed in the table above. The DisMax query parser supports only `+` and `-`.
====
NOTE: The standard query parser supports all the Boolean operators listed in the table above. The DisMax query parser supports only `+` and `-`.
The OR operator is the default conjunction operator. This means that if there is no Boolean operator between two terms, the OR operator is used. The OR operator links two terms and finds a matching document if either of the terms exist in a document. This is equivalent to a union using sets. The symbol || can be used in place of the word OR.
@ -277,8 +242,6 @@ or
`"jakarta apache" OR jakarta`
[[TheStandardQueryParser-TheBooleanOperator_]]
=== The Boolean Operator "+"
The `+` symbol (also known as the "required" operator) requires that the term after the `+` symbol exist somewhere in a field in at least one document in order for the query to return a match.
@ -287,15 +250,8 @@ For example, to search for documents that must contain "jakarta" and that may or
`+jakarta lucene`
[NOTE]
====
NOTE: This operator is supported by both the standard query parser and the DisMax query parser.
This operator is supported by both the standard query parser and the DisMax query parser.
====
[[TheStandardQueryParser-TheBooleanOperatorAND_]]
=== The Boolean Operator AND ("&&")
The AND operator matches documents where both terms exist anywhere in the text of a single document. This is equivalent to an intersection using sets. The symbol `&&` can be used in place of the word AND.
@ -307,7 +263,6 @@ To search for documents that contain "jakarta apache" and "Apache Lucene," use e
`"jakarta apache" && "Apache Lucene"`
[[TheStandardQueryParser-TheBooleanOperatorNOT_]]
=== The Boolean Operator NOT ("!")
The NOT operator excludes documents that contain the term after NOT. This is equivalent to a difference using sets. The symbol `!` can be used in place of the word NOT.
@ -318,7 +273,6 @@ The following queries search for documents that contain the phrase "jakarta apac
`"jakarta apache" ! "Apache Lucene"`
[[TheStandardQueryParser-TheBooleanOperator-]]
=== The Boolean Operator "-"
The `-` symbol or "prohibit" operator excludes documents that contain the term after the `-` symbol.
@ -327,7 +281,6 @@ For example, to search for documents that contain "jakarta apache" but not "Apac
`"jakarta apache" -"Apache Lucene"`
[[TheStandardQueryParser-EscapingSpecialCharacters]]
=== Escaping Special Characters
Solr gives the following characters special meaning when they appear in a query:
@ -341,7 +294,6 @@ To make Solr interpret any of these characters literally, rather as a special ch
\(1\+1\)\:2
----
[[TheStandardQueryParser-GroupingTermstoFormSub-Queries]]
== Grouping Terms to Form Sub-Queries
Lucene/Solr supports using parentheses to group clauses to form sub-queries. This can be very useful if you want to control the Boolean logic for a query.
@ -352,15 +304,13 @@ The query below searches for either "jakarta" or "apache" and "website":
This adds precision to the query, requiring that the term "website" exist, along with either term "jakarta" and "apache."
[[TheStandardQueryParser-GroupingClauseswithinaField]]
=== Grouping Clauses within a Field
To apply two or more Boolean operators to a single field in a search, group the Boolean clauses within parentheses. For example, the query below searches for a title field that contains both the word "return" and the phrase "pink panther":
`title:(+return +"pink panther")`
[[TheStandardQueryParser-Comments]]
== Comments
== Comments in Queries
C-Style comments are supported in query strings.
@ -370,7 +320,6 @@ Example:
Comments may be nested.
[[TheStandardQueryParser-DifferencesbetweenLuceneQueryParserandtheSolrStandardQueryParser]]
== Differences between Lucene Query Parser and the Solr Standard Query Parser
Solr's standard query parser differs from the Lucene Query Parser in the following ways:
@ -399,7 +348,6 @@ This can even be used to cache individual clauses of complex filter queries. In
* Constant score queries are created with `<query_clause>^=<score>`, which sets the entire clause to the specified score for any documents matching that clause:
** `q=(description:blue color:blue)^=1.0 title:blue^=5.0`
[[TheStandardQueryParser-SpecifyingDatesandTimes]]
=== Specifying Dates and Times
Queries against fields using the `TrieDateField` type (typically range queries) should use the <<working-with-dates.adoc#working-with-dates,appropriate date syntax>>:
@ -410,9 +358,3 @@ Queries against fields using the `TrieDateField` type (typically range queries)
* `pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY]`
* `createdate:[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]`
* `createdate:[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]`
[[TheStandardQueryParser-RelatedTopics]]
== Related Topics
* <<local-parameters-in-queries.adoc#local-parameters-in-queries,Local Parameters in Queries>>
* <<other-parsers.adoc#other-parsers,Other Parsers>>

View File

@ -192,4 +192,4 @@ Here we compute some statistics for the price field. The min, max, mean, 90th, a
Sets of `stats.field` parameters can be referenced by `'tag'` when using Pivot Faceting to compute multiple statistics at every level (i.e.: field) in the tree of pivot constraints.
For more information and a detailed example, please see <<faceting.adoc#Faceting-CombiningStatsComponentWithPivots,Combining Stats Component With Pivots>>.
For more information and a detailed example, please see <<faceting.adoc#combining-stats-component-with-pivots,Combining Stats Component With Pivots>>.

View File

@ -38,8 +38,8 @@ public class MoveReplicaSuggester extends Suggester {
//iterate through elements and identify the least loaded
List<Clause.Violation> leastSeriousViolation = null;
Integer targetNodeIndex = null;
Integer fromNodeIndex = null;
ReplicaInfo fromReplicaInfo = null;
Integer sourceNodeIndex = null;
ReplicaInfo sourceReplicaInfo = null;
for (Pair<ReplicaInfo, Row> fromReplica : getValidReplicas(true, true, -1)) {
Row fromRow = fromReplica.second();
ReplicaInfo replicaInfo = fromReplica.first();
@ -62,17 +62,17 @@ public class MoveReplicaSuggester extends Suggester {
if (!containsNewErrors(errs) && isLessSerious(errs, leastSeriousViolation)) {
leastSeriousViolation = errs;
targetNodeIndex = j;
fromNodeIndex = i;
fromReplicaInfo = replicaInfo;
sourceNodeIndex = i;
sourceReplicaInfo = replicaInfo;
}
}
}
if (targetNodeIndex != null && fromNodeIndex != null) {
getMatrix().set(fromNodeIndex, getMatrix().get(fromNodeIndex).removeReplica(fromReplicaInfo.collection, fromReplicaInfo.shard, fromReplicaInfo.type).first());
getMatrix().set(targetNodeIndex, getMatrix().get(targetNodeIndex).addReplica(fromReplicaInfo.collection, fromReplicaInfo.shard, fromReplicaInfo.type));
if (targetNodeIndex != null && sourceNodeIndex != null) {
getMatrix().set(sourceNodeIndex, getMatrix().get(sourceNodeIndex).removeReplica(sourceReplicaInfo.collection, sourceReplicaInfo.shard, sourceReplicaInfo.type).first());
getMatrix().set(targetNodeIndex, getMatrix().get(targetNodeIndex).addReplica(sourceReplicaInfo.collection, sourceReplicaInfo.shard, sourceReplicaInfo.type));
return new CollectionAdminRequest.MoveReplica(
fromReplicaInfo.collection,
fromReplicaInfo.name,
sourceReplicaInfo.collection,
sourceReplicaInfo.name,
getMatrix().get(targetNodeIndex).node);
}
return null;

View File

@ -73,7 +73,7 @@ public class CloudSolrStream extends TupleStream implements Expressible {
protected String zkHost;
protected String collection;
protected SolrParams params;
protected ModifiableSolrParams params;
protected Map<String, String> fieldMappings;
protected StreamComparator comp;
private boolean trace;
@ -172,18 +172,14 @@ public class CloudSolrStream extends TupleStream implements Expressible {
// collection
expression.addParameter(collection);
// parameters
ModifiableSolrParams mParams = new ModifiableSolrParams(SolrParams.toMultiMap(params.toNamedList()));
for (Entry<String, String[]> param : mParams.getMap().entrySet()) {
String value = String.join(",", param.getValue());
// SOLR-8409: This is a special case where the params contain a " character
// Do note that in any other BASE streams with parameters where a " might come into play
// that this same replacement needs to take place.
value = value.replace("\"", "\\\"");
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), value));
for (Entry<String, String[]> param : params.getMap().entrySet()) {
for (String val : param.getValue()) {
// SOLR-8409: Escaping the " is a special case.
// Do note that in any other BASE streams with parameters where a " might come into play
// that this same replacement needs to take place.
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(),
val.replace("\"", "\\\"")));
}
}
// zkHost

View File

@ -66,7 +66,7 @@ public class FacetStream extends TupleStream implements Expressible {
private List<Tuple> tuples = new ArrayList<Tuple>();
private int index;
private String zkHost;
private SolrParams params;
private ModifiableSolrParams params;
private String collection;
protected transient SolrClientCache cache;
protected transient CloudSolrClient cloudSolrClient;
@ -216,7 +216,7 @@ public class FacetStream extends TupleStream implements Expressible {
private void init(String collection, SolrParams params, Bucket[] buckets, FieldComparator[] bucketSorts, Metric[] metrics, int bucketSizeLimit, String zkHost) throws IOException {
this.zkHost = zkHost;
this.params = params;
this.params = new ModifiableSolrParams(params);
this.buckets = buckets;
this.metrics = metrics;
this.bucketSizeLimit = bucketSizeLimit;
@ -242,11 +242,11 @@ public class FacetStream extends TupleStream implements Expressible {
expression.addParameter(collection);
// parameters
ModifiableSolrParams tmpParams = new ModifiableSolrParams(params);
for (Entry<String, String[]> param : tmpParams.getMap().entrySet()) {
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(),
String.join(",", param.getValue())));
for (Entry<String, String[]> param : params.getMap().entrySet()) {
for (String val : param.getValue()) {
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), val));
}
}
// buckets

View File

@ -0,0 +1,221 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.stream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.SingleValueComparator;
import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.eval.StreamEvaluator;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class PlotStream extends TupleStream implements Expressible {
private static final long serialVersionUID = 1;
private StreamContext streamContext;
private Map<String,String> stringParams = new HashMap<>();
private Map<String,StreamEvaluator> evaluatorParams = new HashMap<>();
private Map<String,TupleStream> streamParams = new HashMap<>();
private List<String> fieldNames = new ArrayList();
private Map<String, String> fieldLabels = new HashMap();
private boolean finished;
public PlotStream(StreamExpression expression, StreamFactory factory) throws IOException {
fieldNames.add("plot");
fieldNames.add("data");
fieldLabels.put("plot","plot");
fieldLabels.put("data", "data");
List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression);
//Get all the named params
for(StreamExpressionNamedParameter np : namedParams) {
String name = np.getName();
//fieldNames.add(name);
//fieldLabels.put(name, name);
StreamExpressionParameter param = np.getParameter();
// we're going to split these up here so we only make the choice once
// order of these in read() doesn't matter
if(param instanceof StreamExpressionValue) {
stringParams.put(name, ((StreamExpressionValue)param).getValue());
} else if (factory.isEvaluator((StreamExpression) param)) {
StreamEvaluator evaluator = factory.constructEvaluator((StreamExpression) param);
evaluatorParams.put(name, evaluator);
} else if(factory.isStream((StreamExpression)param)) {
TupleStream tupleStream = factory.constructStream((StreamExpression) param);
streamParams.put(name, tupleStream);
}
else{
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - only string, evaluator, or stream named parameters are supported, but param %d is none of those",expression, name));
}
}
}
@Override
public StreamExpression toExpression(StreamFactory factory) throws IOException{
return toExpression(factory, true);
}
private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException {
// function name
StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
// add string based params
for(Entry<String,String> param : stringParams.entrySet()){
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), param.getValue()));
}
// add evaluator based params
for(Entry<String,StreamEvaluator> param : evaluatorParams.entrySet()){
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), param.getValue().toExpression(factory)));
}
// add stream based params
for(Entry<String,TupleStream> param : streamParams.entrySet()){
if(includeStreams){
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), ((Expressible)param.getValue()).toExpression(factory)));
}
else{
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), "<stream>"));
}
}
return expression;
}
@Override
public Explanation toExplanation(StreamFactory factory) throws IOException {
StreamExplanation explanation = new StreamExplanation(getStreamNodeId().toString());
explanation.setFunctionName(factory.getFunctionName(this.getClass()));
explanation.setImplementingClass(this.getClass().getName());
explanation.setExpressionType(ExpressionType.STREAM_DECORATOR);
explanation.setExpression(toExpression(factory, false).toString());
return explanation;
}
public void setStreamContext(StreamContext context) {
this.streamContext = context;
// also set in evalators and streams
for(StreamEvaluator evaluator : evaluatorParams.values()){
evaluator.setStreamContext(context);
}
for(TupleStream stream : streamParams.values()){
stream.setStreamContext(context);
}
}
public List<TupleStream> children() {
List<TupleStream> l = new ArrayList<TupleStream>();
return l;
}
public Tuple read() throws IOException {
if(finished) {
Map<String,Object> m = new HashMap<>();
m.put("EOF", true);
return new Tuple(m);
} else {
finished = true;
Map<String, Object> values = new HashMap<>();
// add all string based params
// these could come from the context, or they will just be treated as straight strings
for(Entry<String,String> param : stringParams.entrySet()){
if(streamContext.getLets().containsKey(param.getValue())){
values.put(param.getKey(), streamContext.getLets().get(param.getValue()));
}
else{
values.put(param.getKey(), param.getValue());
}
}
// add all evaluators
for(Entry<String,StreamEvaluator> param : evaluatorParams.entrySet()){
values.put(param.getKey(), param.getValue().evaluateOverContext());
}
List<Number> y = (List<Number>)values.get("y");
List<Number> x = (List<Number>)values.get("x");
if(x == null) {
//x is null so add a sequence
x = new ArrayList();
for(int i=0; i<y.size(); i++) {
x.add(i+1);
}
}
List<List<Number>> xy = new ArrayList();
for(int i=0; i<x.size(); i++) {
List<Number> pair = new ArrayList();
pair.add(x.get(i));
pair.add(y.get(i));
xy.add(pair);
}
values.put("plot", values.get("type"));
values.put("data", xy);
Tuple tup = new Tuple(values);
tup.fieldLabels = fieldLabels;
tup.fieldNames = fieldNames;
return tup;
}
}
public void close() throws IOException {
// Nothing to do here
}
public void open() throws IOException {
// nothing to do here
}
/** Return the stream sort - ie, the order in which records are returned */
public StreamComparator getStreamSort(){
return new SingleValueComparator();
}
public int getCost() {
return 0;
}
}

View File

@ -37,6 +37,7 @@ import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.CommonAdminParams;
import org.apache.solr.common.params.CoreAdminParams;
@ -547,7 +548,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
}
public static class ReplaceNode extends AsyncCollectionAdminRequest {
String source, target;
String sourceNode, targetNode;
Boolean parallel;
/**
@ -556,8 +557,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
*/
public ReplaceNode(String source, String target) {
super(CollectionAction.REPLACENODE);
this.source = checkNotNull("source",source);
this.target = checkNotNull("target",target);
this.sourceNode = checkNotNull(CollectionParams.SOURCE_NODE, source);
this.targetNode = checkNotNull(CollectionParams.TARGET_NODE, target);
}
public ReplaceNode setParallel(Boolean flag) {
@ -568,8 +569,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
@Override
public SolrParams getParams() {
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
params.set("source", source);
params.set("target", target);
params.set(CollectionParams.SOURCE_NODE, sourceNode);
params.set(CollectionParams.TARGET_NODE, targetNode);
if (parallel != null) params.set("parallel", parallel.toString());
return params;
}
@ -577,9 +578,9 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
}
public static class MoveReplica extends AsyncCollectionAdminRequest {
String collection, replica, targetNode;
String shard, fromNode;
boolean randomlyMoveReplica;
protected String collection, replica, targetNode;
protected String shard, sourceNode;
protected boolean randomlyMoveReplica;
public MoveReplica(String collection, String replica, String targetNode) {
super(CollectionAction.MOVEREPLICA);
@ -589,12 +590,12 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
this.randomlyMoveReplica = false;
}
public MoveReplica(String collection, String shard, String fromNode, String targetNode) {
public MoveReplica(String collection, String shard, String sourceNode, String targetNode) {
super(CollectionAction.MOVEREPLICA);
this.collection = checkNotNull("collection",collection);
this.shard = checkNotNull("shard",shard);
this.fromNode = checkNotNull("fromNode",fromNode);
this.targetNode = checkNotNull("targetNode",targetNode);
this.sourceNode = checkNotNull(CollectionParams.SOURCE_NODE, sourceNode);
this.targetNode = checkNotNull(CollectionParams.TARGET_NODE, targetNode);
this.randomlyMoveReplica = true;
}
@ -602,10 +603,10 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
public SolrParams getParams() {
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
params.set("collection", collection);
params.set("targetNode", targetNode);
params.set(CollectionParams.TARGET_NODE, targetNode);
if (randomlyMoveReplica) {
params.set("shard", shard);
params.set("fromNode", fromNode);
params.set(CollectionParams.SOURCE_NODE, sourceNode);
} else {
params.set("replica", replica);
}

View File

@ -31,6 +31,15 @@ public interface CollectionParams {
String ACTION = "action";
String NAME = "name";
/**
* @deprecated use {@link #SOURCE_NODE} instead
*/
@Deprecated
String FROM_NODE = "fromNode";
String SOURCE_NODE = "sourceNode";
String TARGET_NODE = "targetNode";
enum LockLevel {
CLUSTER(0),

View File

@ -245,14 +245,16 @@ public class CommandOperation {
}
};
new JavaBinCodec() {
try (final JavaBinCodec jbc = new JavaBinCodec() {
int level = 0;
@Override
protected Map<Object, Object> newMap(int size) {
level++;
return level == 1 ? map : super.newMap(size);
}
}.unmarshal(in);
}) {
jbc.unmarshal(in);
}
return operations;
}

View File

@ -106,9 +106,11 @@ public class Utils {
}
public static InputStream toJavabin(Object o) throws IOException {
BinaryRequestWriter.BAOS baos = new BinaryRequestWriter.BAOS();
new JavaBinCodec().marshal(o,baos);
return new ByteBufferInputStream(ByteBuffer.wrap(baos.getbuf(),0,baos.size()));
try (final JavaBinCodec jbc = new JavaBinCodec()) {
BinaryRequestWriter.BAOS baos = new BinaryRequestWriter.BAOS();
jbc.marshal(o,baos);
return new ByteBufferInputStream(ByteBuffer.wrap(baos.getbuf(),0,baos.size()));
}
}
public static Collection getDeepCopy(Collection c, int maxDepth, boolean mutable) {

View File

@ -6058,10 +6058,40 @@ public class StreamExpressionTest extends SolrCloudTestCase {
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
Map out = (Map)tuples.get(0).get("return-value");
assertEquals((double)out.get("p-value"), 0.788298D, .0001);
assertEquals((double)out.get("f-ratio"), 0.24169D, .0001);
assertEquals((double) out.get("p-value"), 0.788298D, .0001);
assertEquals((double) out.get("f-ratio"), 0.24169D, .0001);
}
@Test
public void testPlot() throws Exception {
String cexpr = "let(a=array(3,2,3), plot(type=scatter, x=a, y=array(5,6,3)))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
String plot = tuples.get(0).getString("plot");
assertTrue(plot.equals("scatter"));
List<List<Number>> data = (List<List<Number>>)tuples.get(0).get("data");
assertTrue(data.size() == 3);
List<Number> pair1 = data.get(0);
assertTrue(pair1.get(0).intValue() == 3);
assertTrue(pair1.get(1).intValue() == 5);
List<Number> pair2 = data.get(1);
assertTrue(pair2.get(0).intValue() == 2);
assertTrue(pair2.get(1).intValue() == 6);
List<Number> pair3 = data.get(2);
assertTrue(pair3.get(0).intValue() == 3);
assertTrue(pair3.get(1).intValue() == 3);
}
@Test
public void testMovingAverage() throws Exception {
String cexpr = "movingAvg(array(1,2,3,4,5,6,7), 4)";

View File

@ -74,12 +74,15 @@ public class StreamExpressionToExpessionTest extends LuceneTestCase {
String expressionString;
// Basic test
stream = new CloudSolrStream(StreamExpressionParser.parse("search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")"), factory);
stream = new CloudSolrStream(StreamExpressionParser.parse("search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", fq=\"a_s:one\", fq=\"a_s:two\")"), factory);
expressionString = stream.toExpression(factory).toString();
System.out.println("ExpressionString: " + expressionString.toString());
assertTrue(expressionString.contains("search(collection1,"));
assertTrue(expressionString.contains("q=\"*:*\""));
assertTrue(expressionString.contains("fl=\"id,a_s,a_i,a_f\""));
assertTrue(expressionString.contains("sort=\"a_f asc, a_i asc\""));
assertTrue(expressionString.contains("fq=\"a_s:one\""));
assertTrue(expressionString.contains("fq=\"a_s:two\""));
// Basic w/aliases
stream = new CloudSolrStream(StreamExpressionParser.parse("search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", aliases=\"id=izzy,a_s=kayden\")"), factory);