diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2140a9d1ad7..8195e2ca6f7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -100,6 +100,10 @@ New Features
* SOLR-5353: Enhance CoreAdmin api to split a route key's documents from an index
and leave behind all other documents. (shalin)
+* SOLR-5027: CollapsingQParserPlugin for high performance field collapsing on high cardinality fields.
+ (Joel Bernstein)
+
+
Bug Fixes
----------------------
diff --git a/solr/core/ivy.xml b/solr/core/ivy.xml
index e3039510c2f..2d165ba8819 100644
--- a/solr/core/ivy.xml
+++ b/solr/core/ivy.xml
@@ -39,6 +39,7 @@
+
diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java
new file mode 100644
index 00000000000..716fea79e47
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java
@@ -0,0 +1,917 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.schema.TrieFloatField;
+import org.apache.solr.schema.TrieIntField;
+import org.apache.solr.schema.TrieLongField;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.handler.component.QueryElevationComponent;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+
+import com.carrotsearch.hppc.FloatArrayList;
+import com.carrotsearch.hppc.IntOpenHashSet;
+import com.carrotsearch.hppc.cursors.IntCursor;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Iterator;
+
+/**
+
+ The CollapsingQParserPlugin is a PostFilter that performs field collapsing.
+ This is a high performance alternative to standard Solr
+ field collapsing (with ngroups) when the number of distinct groups
+ in the result set is high.
+
+ Sample syntax:
+
+ Collapse based on the highest scoring document:
+
+
+ fq=(!collapse field=field_name}
+
+
+ Collapse based on the min value of a numeric field:
+
+ fq={!collapse field=field_name min=field_name}
+
+ Collapse based on the max value of a numeric field:
+
+ fq={!collapse field=field_name max=field_name}
+
+ Collapse with a null policy:
+
+ fq={!collapse field=field_name nullPolicy=nullPolicy}
+
+ There are three null policies:
+ ignore : removes docs with a null value in the collapse field (default).
+ expand : treats each doc with a null value in the collapse field as a separate group.
+ collapse : collapses all docs with a null value into a single group using either highest score, or min/max.
+
+ The CollapsingQParserPlugin fully supports the QueryElevationComponent
+
+
+ **/
+
+public class CollapsingQParserPlugin extends QParserPlugin {
+
+ public static final String NAME = "collapse";
+ public static final String NULL_COLLAPSE = "collapse";
+ public static final String NULL_IGNORE = "ignore";
+ public static final String NULL_EXPAND = "expand";
+
+
+ public void init(NamedList namedList) {
+
+ }
+
+ public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest request) {
+ return new CollapsingQParser(qstr, localParams, params, request);
+ }
+
+ private class CollapsingQParser extends QParser {
+
+ public CollapsingQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest request) {
+ super(qstr, localParams, params, request);
+ }
+
+ public Query parse() throws SyntaxError {
+ try {
+ return new CollapsingPostFilter(localParams, params, req);
+ } catch (Exception e) {
+ throw new SyntaxError(e.getMessage(), e);
+ }
+ }
+ }
+
+ private class CollapsingPostFilter extends ExtendedQueryBase implements PostFilter {
+
+ private Object cacheId;
+ private String field;
+ private int leafCount;
+ private SortedDocValues docValues;
+ private int maxDoc;
+ private String max;
+ private String min;
+ private FieldType fieldType;
+ private int nullPolicy;
+ private SolrIndexSearcher searcher;
+ private SolrParams solrParams;
+ private Map context;
+ private IndexSchema schema;
+ public static final int NULL_POLICY_IGNORE = 0;
+ public static final int NULL_POLICY_COLLAPSE = 1;
+ public static final int NULL_POLICY_EXPAND = 2;
+
+ public void setCache(boolean cache) {
+
+ }
+
+ public void setCacheSep(boolean cacheSep) {
+
+ }
+
+ public boolean getCacheSep() {
+ return false;
+ }
+
+ public boolean getCache() {
+ return false;
+ }
+
+ public int hashCode() {
+ return this.cacheId.hashCode()*((1+Float.floatToIntBits(this.getBoost()))*31);
+ }
+
+ public boolean equals(Object o) {
+ //Uses the unique id for equals to ensure that the query result cache always fails.
+ if(o instanceof CollapsingPostFilter) {
+ CollapsingPostFilter c = (CollapsingPostFilter)o;
+ //Do object comparison to be sure only the same object will return true.
+ if(this.cacheId == c.cacheId && this.getBoost()==c.getBoost()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public int getCost() {
+ return Math.max(super.getCost(), 100);
+ }
+
+ public String toString(String s) {
+ return s;
+ }
+
+ public CollapsingPostFilter(SolrParams localParams, SolrParams params, SolrQueryRequest request) throws IOException {
+ this.cacheId = new Object();
+ this.field = localParams.get("field");
+ this.solrParams = params;
+ String nPolicy = localParams.get("nullPolicy", NULL_IGNORE);
+ if(nPolicy.equals(NULL_IGNORE)) {
+ this.nullPolicy = NULL_POLICY_IGNORE;
+ } else if (nPolicy.equals(NULL_COLLAPSE)) {
+ this.nullPolicy = NULL_POLICY_COLLAPSE;
+ } else if(nPolicy.equals((NULL_EXPAND))) {
+ this.nullPolicy = NULL_POLICY_EXPAND;
+ }
+ this.searcher = request.getSearcher();
+ this.leafCount = searcher.getTopReaderContext().leaves().size();
+ this.maxDoc = searcher.maxDoc();
+ this.schema = searcher.getSchema();
+ SchemaField schemaField = schema.getField(this.field);
+ if(schemaField.hasDocValues()) {
+ this.docValues = searcher.getAtomicReader().getSortedDocValues(this.field);
+ } else {
+ this.docValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), this.field);
+ }
+
+ this.max = localParams.get("max");
+ if(this.max != null) {
+ this.fieldType = searcher.getSchema().getField(this.max).getType();
+ }
+
+ this.min = localParams.get("min");
+ if(this.min != null) {
+ this.fieldType = searcher.getSchema().getField(this.min).getType();
+ }
+
+ this.context = request.getContext();
+ }
+
+ private IntOpenHashSet getBoostDocs(IndexSearcher indexSearcher, Set boosted) throws IOException {
+ IntOpenHashSet boostDocs = null;
+ if(boosted != null) {
+ SchemaField idField = this.schema.getUniqueKeyField();
+ String fieldName = idField.getName();
+ HashSet localBoosts = new HashSet(boosted.size()*2);
+ Iterator boostedIt = boosted.iterator();
+ while(boostedIt.hasNext()) {
+ localBoosts.add(new BytesRef(boostedIt.next()));
+ }
+
+ boostDocs = new IntOpenHashSet(boosted.size()*2);
+
+ Listleaves = indexSearcher.getTopReaderContext().leaves();
+ TermsEnum termsEnum = null;
+ DocsEnum docsEnum = null;
+ for(AtomicReaderContext leaf : leaves) {
+ AtomicReader reader = leaf.reader();
+ int docBase = leaf.docBase;
+ Bits liveDocs = reader.getLiveDocs();
+ Terms terms = reader.terms(fieldName);
+ termsEnum = terms.iterator(termsEnum);
+ Iterator it = localBoosts.iterator();
+ while(it.hasNext()) {
+ BytesRef ref = it.next();
+ if(termsEnum.seekExact(ref)) {
+ docsEnum = termsEnum.docs(liveDocs, docsEnum);
+ int doc = docsEnum.nextDoc();
+ if(doc != -1) {
+ //Found the document.
+ boostDocs.add(doc+docBase);
+ it.remove();
+ }
+ }
+ }
+ }
+ }
+
+ return boostDocs;
+ }
+
+ public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) {
+ try {
+ IntOpenHashSet boostDocs = getBoostDocs(indexSearcher, (Set) (this.context.get(QueryElevationComponent.BOOSTED)));
+
+ if(this.min != null || this.max != null) {
+
+ return new CollapsingFieldValueCollector(this.maxDoc,
+ this.leafCount,
+ this.docValues,
+ this.searcher,
+ this.nullPolicy,
+ max != null ? this.max : this.min,
+ max != null,
+ needsScores(this.solrParams),
+ this.fieldType,
+ boostDocs);
+ } else {
+ return new CollapsingScoreCollector(this.maxDoc, this.leafCount, this.docValues, this.nullPolicy, boostDocs);
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private boolean needsScores(SolrParams params) {
+
+ String sortSpec = params.get("sort");
+ if(sortSpec != null) {
+ String[] sorts = sortSpec.split(",");
+ for(String s: sorts) {
+ String parts[] = s.split(" ");
+ if(parts[0].equals("score")) {
+ return true;
+ }
+ }
+ } else {
+ //No sort specified so it defaults to score.
+ return true;
+ }
+
+ String fl = params.get("fl");
+ if(fl != null) {
+ String[] fls = fl.split(",");
+ for(String f : fls) {
+ if(f.trim().equals("score")) {
+ return true;
+ }
+ }
+ }
+
+ if(this.context.containsKey(QueryElevationComponent.BOOSTED)) {
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ private class DummyScorer extends Scorer {
+
+ public float score;
+
+ public DummyScorer() {
+ super(null);
+ }
+
+ public float score() {
+ return score;
+ }
+
+ public int freq() {
+ return 0;
+ }
+
+ public int advance(int i) {
+ return -1;
+ }
+
+ public int nextDoc() {
+ return 0;
+ }
+
+ public int docID() {
+ return 0;
+ }
+
+ public long cost() {
+ return 0;
+ }
+ }
+
+
+ private class CollapsingScoreCollector extends DelegatingCollector {
+
+ private AtomicReaderContext[] contexts;
+ private OpenBitSet collapsedSet;
+ private SortedDocValues values;
+ private int[] ords;
+ private float[] scores;
+ private int docBase;
+ private int maxDoc;
+ private int nullPolicy;
+ private float nullScore = -Float.MAX_VALUE;
+ private int nullDoc;
+ private FloatArrayList nullScores;
+ private IntOpenHashSet boostDocs;
+
+ public CollapsingScoreCollector(int maxDoc,
+ int segments,
+ SortedDocValues values,
+ int nullPolicy,
+ IntOpenHashSet boostDocs) {
+ this.maxDoc = maxDoc;
+ this.contexts = new AtomicReaderContext[segments];
+ this.collapsedSet = new OpenBitSet(maxDoc);
+ this.boostDocs = boostDocs;
+ if(this.boostDocs != null) {
+ //Set the elevated docs now.
+ Iterator it = this.boostDocs.iterator();
+ while(it.hasNext()) {
+ IntCursor cursor = it.next();
+ this.collapsedSet.fastSet(cursor.value);
+ }
+ }
+ this.values = values;
+ int valueCount = values.getValueCount();
+ this.ords = new int[valueCount];
+ Arrays.fill(this.ords, -1);
+ this.scores = new float[valueCount];
+ Arrays.fill(this.scores, -Float.MAX_VALUE);
+ this.nullPolicy = nullPolicy;
+ if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ nullScores = new FloatArrayList();
+ }
+ }
+
+ public boolean acceptsDocsOutOfOrder() {
+ //Documents must be sent in order to this collector.
+ return false;
+ }
+
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ this.contexts[context.ord] = context;
+ this.docBase = context.docBase;
+ }
+
+ public void collect(int docId) throws IOException {
+ int globalDoc = docId+this.docBase;
+ int ord = values.getOrd(globalDoc);
+ if(ord > -1) {
+ float score = scorer.score();
+ if(score > scores[ord]) {
+ ords[ord] = globalDoc;
+ scores[ord] = score;
+ }
+ } else if (this.collapsedSet.fastGet(globalDoc)) {
+ //The doc is elevated so score does not matter
+ //We just want to be sure it doesn't fall into the null policy
+ } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
+ float score = scorer.score();
+ if(score > nullScore) {
+ nullScore = score;
+ nullDoc = globalDoc;
+ }
+ } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ collapsedSet.fastSet(globalDoc);
+ nullScores.add(scorer.score());
+ }
+ }
+
+ public void finish() throws IOException {
+ if(contexts.length == 0) {
+ return;
+ }
+
+ if(nullScore > 0) {
+ this.collapsedSet.fastSet(nullDoc);
+ }
+
+ for(int i=0; i -1) {
+ collapsedSet.fastSet(doc);
+ }
+ }
+
+ int currentContext = 0;
+ int currentDocBase = 0;
+ int nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
+ delegate.setNextReader(contexts[currentContext]);
+ DummyScorer dummy = new DummyScorer();
+ delegate.setScorer(dummy);
+ DocIdSetIterator it = collapsedSet.iterator();
+ int docId = -1;
+ int nullScoreIndex = 0;
+ while((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+
+ int ord = values.getOrd(docId);
+ if(ord > -1) {
+ dummy.score = scores[ord];
+ } else if(this.boostDocs != null && boostDocs.contains(docId)) {
+ //Elevated docs don't need a score.
+ dummy.score = 0F;
+ } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
+ dummy.score = nullScore;
+ } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ dummy.score = nullScores.get(nullScoreIndex++);
+ }
+
+ while(docId >= nextDocBase) {
+ currentContext++;
+ currentDocBase = contexts[currentContext].docBase;
+ nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
+ delegate.setNextReader(contexts[currentContext]);
+ }
+
+ int contextDoc = docId-currentDocBase;
+ delegate.collect(contextDoc);
+ }
+
+ if(delegate instanceof DelegatingCollector) {
+ ((DelegatingCollector) delegate).finish();
+ }
+ }
+ }
+
+ private class CollapsingFieldValueCollector extends DelegatingCollector {
+ private AtomicReaderContext[] contexts;
+ private SortedDocValues values;
+
+ private int docBase;
+ private int maxDoc;
+ private int nullPolicy;
+
+ private FieldValueCollapse fieldValueCollapse;
+ private boolean needsScores;
+ private IntOpenHashSet boostDocs;
+
+ public CollapsingFieldValueCollector(int maxDoc,
+ int segments,
+ SortedDocValues values,
+ SolrIndexSearcher searcher,
+ int nullPolicy,
+ String field,
+ boolean max,
+ boolean needsScores,
+ FieldType fieldType,
+ IntOpenHashSet boostDocs) throws IOException{
+
+ this.maxDoc = maxDoc;
+ this.contexts = new AtomicReaderContext[segments];
+ this.values = values;
+ int valueCount = values.getValueCount();
+ this.nullPolicy = nullPolicy;
+ this.needsScores = needsScores;
+ this.boostDocs = boostDocs;
+ if(fieldType instanceof TrieIntField) {
+ this.fieldValueCollapse = new IntValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs);
+ } else if(fieldType instanceof TrieLongField) {
+ this.fieldValueCollapse = new LongValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs);
+ } else if(fieldType instanceof TrieFloatField) {
+ this.fieldValueCollapse = new FloatValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs);
+ }
+ }
+
+ public boolean acceptsDocsOutOfOrder() {
+ //Documents must be sent in order to this collector.
+ return false;
+ }
+
+ public void setScorer(Scorer scorer) {
+ this.fieldValueCollapse.setScorer(scorer);
+ }
+
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ this.contexts[context.ord] = context;
+ this.docBase = context.docBase;
+ this.fieldValueCollapse.setNextReader(context);
+ }
+
+ public void collect(int docId) throws IOException {
+ int globalDoc = docId+this.docBase;
+ int ord = values.getOrd(globalDoc);
+ fieldValueCollapse.collapse(ord, docId, globalDoc);
+ }
+
+ public void finish() throws IOException {
+ if(contexts.length == 0) {
+ return;
+ }
+
+ int currentContext = 0;
+ int currentDocBase = 0;
+ int nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
+ delegate.setNextReader(contexts[currentContext]);
+ DummyScorer dummy = new DummyScorer();
+ delegate.setScorer(dummy);
+ DocIdSetIterator it = fieldValueCollapse.getCollapsedSet().iterator();
+ int docId = -1;
+ int nullScoreIndex = 0;
+ float[] scores = fieldValueCollapse.getScores();
+ FloatArrayList nullScores = fieldValueCollapse.getNullScores();
+ float nullScore = fieldValueCollapse.getNullScore();
+ while((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+
+ if(this.needsScores){
+ int ord = values.getOrd(docId);
+ if(ord > -1) {
+ dummy.score = scores[ord];
+ } else if (boostDocs != null && boostDocs.contains(docId)) {
+ //Its an elevated doc so no score is needed
+ dummy.score = 0F;
+ } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
+ dummy.score = nullScore;
+ } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ dummy.score = nullScores.get(nullScoreIndex++);
+ }
+ }
+
+ while(docId >= nextDocBase) {
+ currentContext++;
+ currentDocBase = contexts[currentContext].docBase;
+ nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
+ delegate.setNextReader(contexts[currentContext]);
+ }
+
+ int contextDoc = docId-currentDocBase;
+ delegate.collect(contextDoc);
+ }
+
+ if(delegate instanceof DelegatingCollector) {
+ ((DelegatingCollector) delegate).finish();
+ }
+ }
+ }
+
+ private abstract class FieldValueCollapse {
+ protected int nullPolicy;
+ protected int[] ords;
+ protected Scorer scorer;
+ protected FloatArrayList nullScores;
+ protected float nullScore;
+ protected float[] scores;
+ protected OpenBitSet collapsedSet;
+ protected IntOpenHashSet boostDocs;
+ protected int nullDoc = -1;
+ protected boolean needsScores;
+ protected boolean max;
+ protected String field;
+
+ public abstract void collapse(int ord, int contextDoc, int globalDoc) throws IOException;
+ public abstract void setNextReader(AtomicReaderContext context) throws IOException;
+
+ public FieldValueCollapse(SolrIndexSearcher searcher,
+ String field,
+ int nullPolicy,
+ boolean max,
+ boolean needsScores,
+ IntOpenHashSet boostDocs) {
+ this.field = field;
+ this.nullPolicy = nullPolicy;
+ this.max = max;
+ this.needsScores = needsScores;
+ this.collapsedSet = new OpenBitSet(searcher.maxDoc());
+ this.boostDocs = boostDocs;
+ if(this.boostDocs != null) {
+ Iterator it = boostDocs.iterator();
+ while(it.hasNext()) {
+ IntCursor cursor = it.next();
+ this.collapsedSet.fastSet(cursor.value);
+ }
+ }
+ }
+
+ public OpenBitSet getCollapsedSet() {
+ if(nullDoc > -1) {
+ this.collapsedSet.fastSet(nullDoc);
+ }
+
+ for(int i=0; i -1) {
+ collapsedSet.fastSet(doc);
+ }
+ }
+
+ return collapsedSet;
+ }
+
+ public void setScorer(Scorer scorer) {
+ this.scorer = scorer;
+ }
+
+ public FloatArrayList getNullScores() {
+ return nullScores;
+ }
+
+ public float getNullScore() {
+ return this.nullScore;
+ }
+
+ public float[] getScores() {
+ return scores;
+ }
+ }
+
+ private class IntValueCollapse extends FieldValueCollapse {
+
+ private FieldCache.Ints vals;
+ private IntCompare comp;
+ private int nullVal;
+ private int[] ordVals;
+
+ public IntValueCollapse(SolrIndexSearcher searcher,
+ String field,
+ int nullPolicy,
+ int[] ords,
+ boolean max,
+ boolean needsScores,
+ IntOpenHashSet boostDocs) throws IOException {
+ super(searcher, field, nullPolicy, max, needsScores, boostDocs);
+ this.ords = ords;
+ this.ordVals = new int[ords.length];
+ Arrays.fill(ords, -1);
+
+ if(max) {
+ comp = new MaxIntComp();
+ Arrays.fill(ordVals, Integer.MIN_VALUE);
+ } else {
+ comp = new MinIntComp();
+ Arrays.fill(ordVals, Integer.MAX_VALUE);
+ this.nullVal = Integer.MAX_VALUE;
+ }
+
+ if(needsScores) {
+ this.scores = new float[ords.length];
+ if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ nullScores = new FloatArrayList();
+ }
+ }
+ }
+
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ this.vals = FieldCache.DEFAULT.getInts(context.reader(), this.field, false);
+ }
+
+ public void collapse(int ord, int contextDoc, int globalDoc) throws IOException {
+ int val = vals.get(contextDoc);
+ if(ord > -1) {
+ if(comp.test(val, ordVals[ord])) {
+ ords[ord] = globalDoc;
+ ordVals[ord] = val;
+ if(needsScores) {
+ scores[ord] = scorer.score();
+ }
+ }
+ } else if(this.collapsedSet.fastGet(globalDoc)) {
+ // Elevated doc so do nothing.
+ } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
+ if(comp.test(val, nullVal)) {
+ nullVal = val;
+ nullDoc = globalDoc;
+ if(needsScores) {
+ nullScore = scorer.score();
+ }
+ }
+ } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ this.collapsedSet.fastSet(globalDoc);
+ if(needsScores) {
+ nullScores.add(scorer.score());
+ }
+ }
+ }
+ }
+
+ private class LongValueCollapse extends FieldValueCollapse {
+
+ private FieldCache.Longs vals;
+ private LongCompare comp;
+ private long nullVal;
+ private long[] ordVals;
+
+ public LongValueCollapse(SolrIndexSearcher searcher,
+ String field,
+ int nullPolicy,
+ int[] ords,
+ boolean max,
+ boolean needsScores,
+ IntOpenHashSet boostDocs) throws IOException {
+ super(searcher, field, nullPolicy, max, needsScores, boostDocs);
+ this.ords = ords;
+ this.ordVals = new long[ords.length];
+ Arrays.fill(ords, -1);
+
+ if(max) {
+ comp = new MaxLongComp();
+ Arrays.fill(ordVals, Long.MIN_VALUE);
+ } else {
+ this.nullVal = Long.MAX_VALUE;
+ comp = new MinLongComp();
+ Arrays.fill(ordVals, Long.MAX_VALUE);
+ }
+
+ if(needsScores) {
+ this.scores = new float[ords.length];
+ if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ nullScores = new FloatArrayList();
+ }
+ }
+ }
+
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ this.vals = FieldCache.DEFAULT.getLongs(context.reader(), this.field, false);
+ }
+
+ public void collapse(int ord, int contextDoc, int globalDoc) throws IOException {
+ long val = vals.get(contextDoc);
+ if(ord > -1) {
+ if(comp.test(val, ordVals[ord])) {
+ ords[ord] = globalDoc;
+ ordVals[ord] = val;
+ if(needsScores) {
+ scores[ord] = scorer.score();
+ }
+ }
+ } else if (this.collapsedSet.fastGet(globalDoc)) {
+ //Elevated doc so do nothing
+ } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
+ if(comp.test(val, nullVal)) {
+ nullVal = val;
+ nullDoc = globalDoc;
+ if(needsScores) {
+ nullScore = scorer.score();
+ }
+ }
+ } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ this.collapsedSet.fastSet(globalDoc);
+ if(needsScores) {
+ nullScores.add(scorer.score());
+ }
+ }
+ }
+ }
+
+ private class FloatValueCollapse extends FieldValueCollapse {
+
+ private FieldCache.Floats vals;
+ private FloatCompare comp;
+ private float nullVal;
+ private float[] ordVals;
+
+ public FloatValueCollapse(SolrIndexSearcher searcher,
+ String field,
+ int nullPolicy,
+ int[] ords,
+ boolean max,
+ boolean needsScores,
+ IntOpenHashSet boostDocs) throws IOException {
+ super(searcher, field, nullPolicy, max, needsScores, boostDocs);
+ this.ords = ords;
+ this.ordVals = new float[ords.length];
+ Arrays.fill(ords, -1);
+
+ if(max) {
+ comp = new MaxFloatComp();
+ Arrays.fill(ordVals, -Float.MAX_VALUE );
+ } else {
+ this.nullVal = Float.MAX_VALUE;
+ comp = new MinFloatComp();
+ Arrays.fill(ordVals, Float.MAX_VALUE);
+ }
+
+ if(needsScores) {
+ this.scores = new float[ords.length];
+ if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ nullScores = new FloatArrayList();
+ }
+ }
+ }
+
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ this.vals = FieldCache.DEFAULT.getFloats(context.reader(), this.field, false);
+ }
+
+ public void collapse(int ord, int contextDoc, int globalDoc) throws IOException {
+ float val = vals.get(contextDoc);
+ if(ord > -1) {
+ if(comp.test(val, ordVals[ord])) {
+ ords[ord] = globalDoc;
+ ordVals[ord] = val;
+ if(needsScores) {
+ scores[ord] = scorer.score();
+ }
+ }
+ } else if (this.collapsedSet.fastGet(globalDoc)) {
+ //Elevated doc so do nothing
+ } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
+ if(comp.test(val, nullVal)) {
+ nullVal = val;
+ nullDoc = globalDoc;
+ if(needsScores) {
+ nullScore = scorer.score();
+ }
+ }
+ } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
+ this.collapsedSet.fastSet(globalDoc);
+ if(needsScores) {
+ nullScores.add(scorer.score());
+ }
+ }
+ }
+ }
+
+ private interface IntCompare {
+ public boolean test(int i1, int i2);
+ }
+
+ private interface FloatCompare {
+ public boolean test(float i1, float i2);
+ }
+
+ private interface LongCompare {
+ public boolean test(long i1, long i2);
+ }
+
+ private class MaxIntComp implements IntCompare {
+ public boolean test(int i1, int i2) {
+ return i1 > i2;
+ }
+ }
+
+ private class MinIntComp implements IntCompare {
+ public boolean test(int i1, int i2) {
+ return i1 < i2;
+ }
+ }
+
+ private class MaxFloatComp implements FloatCompare {
+ public boolean test(float i1, float i2) {
+ return i1 > i2;
+ }
+ }
+
+ private class MinFloatComp implements FloatCompare {
+ public boolean test(float i1, float i2) {
+ return i1 < i2;
+ }
+ }
+
+ private class MaxLongComp implements LongCompare {
+ public boolean test(long i1, long i2) {
+ return i1 > i2;
+ }
+ }
+
+ private class MinLongComp implements LongCompare {
+ public boolean test(long i1, long i2) {
+ return i1 < i2;
+ }
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
index e36759c74b9..4cbe8b38dfc 100644
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@@ -51,7 +51,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
SwitchQParserPlugin.NAME, SwitchQParserPlugin.class,
MaxScoreQParserPlugin.NAME, MaxScoreQParserPlugin.class,
BlockJoinParentQParserPlugin.NAME, BlockJoinParentQParserPlugin.class,
- BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class
+ BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class,
+ CollapsingQParserPlugin.NAME, CollapsingQParserPlugin.class
};
/** return a {@link QParser} */
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml
new file mode 100644
index 00000000000..c2b0d73a36e
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml
@@ -0,0 +1,578 @@
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.data.dir:}
+
+
+
+ 1000000
+ 2000000
+ 3000000
+ 4000000
+
+
+ ${tests.luceneMatchVersion:LUCENE_CURRENT}
+
+
+
+
+
+
+
+
+
+
+
+
+ ${solr.ulog.dir:}
+
+
+
+ ${solr.commitwithin.softcommit:true}
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+
+
+ true
+
+
+
+
+
+ dismax
+ *:*
+ 0.01
+
+ text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
+
+
+ text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
+
+
+ ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
+
+
+ 3<-1 5<-2 6<90%
+
+ 100
+
+
+
+
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+ 4
+ true
+ text,name,subject,title,whitetok
+
+
+
+
+
+
+
+ lowerpunctfilt
+
+
+ default
+ lowerfilt
+ spellchecker1
+ false
+
+
+ direct
+ DirectSolrSpellChecker
+ lowerfilt
+ 3
+
+
+ wordbreak
+ solr.WordBreakSolrSpellChecker
+ lowerfilt
+ true
+ true
+ 10
+
+
+ multipleFields
+ lowerfilt1and2
+ spellcheckerMultipleFields
+ false
+
+
+
+ jarowinkler
+ lowerfilt
+
+ org.apache.lucene.search.spell.JaroWinklerDistance
+ spellchecker2
+
+
+
+ solr.FileBasedSpellChecker
+ external
+ spellings.txt
+ UTF-8
+ spellchecker3
+
+
+
+ freq
+ lowerfilt
+ spellcheckerFreq
+
+ freq
+ false
+
+
+ fqcn
+ lowerfilt
+ spellcheckerFQCN
+ org.apache.solr.spelling.SampleComparator
+ false
+
+
+ perDict
+ org.apache.solr.handler.component.DummyCustomParamSpellChecker
+ lowerfilt
+
+
+
+
+
+
+
+ termsComp
+
+
+
+
+
+
+
+
+ false
+
+ false
+
+ 1
+
+
+ spellcheck
+
+
+
+
+ direct
+ false
+ false
+ 1
+
+
+ spellcheck
+
+
+
+
+ default
+ wordbreak
+ 20
+
+
+ spellcheck
+
+
+
+
+ direct
+ wordbreak
+ 20
+
+
+ spellcheck
+
+
+
+
+ dismax
+ lowerfilt1^1
+
+
+ spellcheck
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ tvComponent
+
+
+
+
+
+ string
+ elevate.xml
+
+
+
+
+
+ explicit
+
+
+ elevate
+
+
+
+
+
+
+
+
+
+
+
+ 100
+
+
+
+
+
+ 70
+
+
+
+
+
+
+ ]]>
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ 10
+ .,!?
+
+
+
+
+
+ WORD
+ en
+ US
+
+
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
+
+
+ explicit
+ true
+
+
+
+
+ solr
+ solrconfig.xml schema.xml admin-extra.html
+
+
+
+ prefix-${solr.test.sys.prop2}-suffix
+
+
+
+
+
+ false
+ true
+ v_t,t_field
+ org.apache.solr.update.processor.TextProfileSignature
+
+
+
+
+
+ false
+ false
+ id
+
+ org.apache.solr.update.processor.Lookup3Signature
+
+
+
+
+
+
+ true
+ non_indexed_signature_sS
+ false
+ v_t,t_field
+ org.apache.solr.update.processor.TextProfileSignature
+
+
+
+
+
+
+ uniq
+ uniq2
+ uniq3
+
+
+
+
+
+
+
+
+ regex_dup_A_s
+ x
+ x_x
+
+
+
+ regex_dup_B_s
+ x
+ x_x
+
+
+
+
+
+
+
+ regex_dup_A_s
+ x
+ x_x
+
+
+ regex_dup_B_s
+ x
+ x_x
+
+
+
+
+
+
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 57fd43b6df6..626999910eb 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -195,6 +195,16 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
}
}
+ public void testQueryCollapse() throws Exception {
+ SolrQueryRequest req = req("myField","foo_s");
+ try {
+ assertQueryEquals("collapse", req,
+ "{!collapse field=$myField}");
+ } finally {
+ req.close();
+ }
+ }
+
public void testQueryNested() throws Exception {
SolrQueryRequest req = req("df", "foo_s");
try {
diff --git a/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java
new file mode 100644
index 00000000000..f79bb511adc
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.*;
+
+public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-collapseqparser.xml", "schema11.xml");
+ }
+
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ // if you override setUp or tearDown, you better call
+ // the super classes version
+ super.setUp();
+ clearIndex();
+ assertU(commit());
+ }
+
+ @Test
+ public void testCollapseQueries() throws Exception {
+ String[] doc = {"id","1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000"};
+ assertU(adoc(doc));
+ String[] doc1 = {"id","2", "term_s","YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", "test_tf", "200"};
+ assertU(adoc(doc1));
+
+ String[] doc2 = {"id","3", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200"};
+ assertU(adoc(doc2));
+
+ String[] doc3 = {"id","4", "term_s", "YYYY", "test_ti", "500", "test_tl", "1000", "test_tf", "2000"};
+ assertU(adoc(doc3));
+
+ assertU(commit());
+
+ //Test collapse by score
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s}");
+ params.add("defType", "edismax");
+ params.add("bf", "field(test_ti)");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']");
+
+ //Test collapse by score with elevation
+
+ params = new ModifiableSolrParams();
+ params.add("q", "YYYY");
+ params.add("fq", "{!collapse field=group_s nullPolicy=collapse}");
+ params.add("defType", "edismax");
+ params.add("bf", "field(test_ti)");
+ params.add("qf", "term_s");
+ params.add("qt", "/elevate");
+ assertQ(req(params), "*[count(//doc)=3]", "//doc[./int[1][@name='test_ti']='5']");
+
+ //Test collapse by min int field
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s min=test_ti}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='5']");
+
+ //Test collapse by max int field
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s max=test_ti}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']");
+
+ //Test collapse by min long field
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s min=test_tl}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='5']");
+
+ //Test collapse by max long field
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s max=test_tl}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']");
+
+ //Test collapse by min float field
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s min=test_tf}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']");
+
+ //Test collapse by min float field
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s max=test_tf}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='5']");
+
+ //Test nullPolicy expand
+ params = new ModifiableSolrParams();
+ params.add("q", "*:*");
+ params.add("fq", "{!collapse field=group_s max=test_tf nullPolicy=expand}");
+ assertQ(req(params), "*[count(//doc)=3]");
+
+ //Test nullPolicy collapse
+ params = new ModifiableSolrParams();
+ params.add("q", "test_ti:(500 5000)");
+ params.add("fq", "{!collapse field=group_s max=test_tf nullPolicy=collapse}");
+ assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='500']");
+ }
+}