From 3a594688a5a9efd5df6718c94070398e04aadf2e Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Wed, 23 Oct 2013 23:19:12 +0000 Subject: [PATCH] SOLR-5027 CollapsingQParserPlugin git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1535208 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 + solr/core/ivy.xml | 1 + .../solr/search/CollapsingQParserPlugin.java | 917 ++++++++++++++++++ .../org/apache/solr/search/QParserPlugin.java | 3 +- .../conf/solrconfig-collapseqparser.xml | 578 +++++++++++ .../apache/solr/search/QueryEqualityTest.java | 10 + .../search/TestCollapseQParserPlugin.java | 128 +++ 7 files changed, 1640 insertions(+), 1 deletion(-) create mode 100644 solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java create mode 100644 solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml create mode 100644 solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2140a9d1ad7..8195e2ca6f7 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -100,6 +100,10 @@ New Features * SOLR-5353: Enhance CoreAdmin api to split a route key's documents from an index and leave behind all other documents. (shalin) +* SOLR-5027: CollapsingQParserPlugin for high performance field collapsing on high cardinality fields. + (Joel Bernstein) + + Bug Fixes ---------------------- diff --git a/solr/core/ivy.xml b/solr/core/ivy.xml index e3039510c2f..2d165ba8819 100644 --- a/solr/core/ivy.xml +++ b/solr/core/ivy.xml @@ -39,6 +39,7 @@ + diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java new file mode 100644 index 00000000000..716fea79e47 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -0,0 +1,917 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +import org.apache.lucene.util.BytesRef; +import org.apache.solr.schema.TrieFloatField; +import org.apache.solr.schema.TrieIntField; +import org.apache.solr.schema.TrieLongField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.handler.component.QueryElevationComponent; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.*; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.Bits; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; + +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntOpenHashSet; +import com.carrotsearch.hppc.cursors.IntCursor; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.Set; +import java.util.HashSet; +import java.util.List; +import java.util.Iterator; + +/** + + The CollapsingQParserPlugin is a PostFilter that performs field collapsing. + This is a high performance alternative to standard Solr + field collapsing (with ngroups) when the number of distinct groups + in the result set is high. +

+ Sample syntax: +

+ Collapse based on the highest scoring document: +

+ + fq=(!collapse field=field_name} + +

+ Collapse based on the min value of a numeric field: +

+ fq={!collapse field=field_name min=field_name} +

+ Collapse based on the max value of a numeric field: +

+ fq={!collapse field=field_name max=field_name} +

+ Collapse with a null policy: +

+ fq={!collapse field=field_name nullPolicy=nullPolicy} +

+ There are three null policies:
+ ignore : removes docs with a null value in the collapse field (default).
+ expand : treats each doc with a null value in the collapse field as a separate group.
+ collapse : collapses all docs with a null value into a single group using either highest score, or min/max. +

+ The CollapsingQParserPlugin fully supports the QueryElevationComponent + + + **/ + +public class CollapsingQParserPlugin extends QParserPlugin { + + public static final String NAME = "collapse"; + public static final String NULL_COLLAPSE = "collapse"; + public static final String NULL_IGNORE = "ignore"; + public static final String NULL_EXPAND = "expand"; + + + public void init(NamedList namedList) { + + } + + public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest request) { + return new CollapsingQParser(qstr, localParams, params, request); + } + + private class CollapsingQParser extends QParser { + + public CollapsingQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest request) { + super(qstr, localParams, params, request); + } + + public Query parse() throws SyntaxError { + try { + return new CollapsingPostFilter(localParams, params, req); + } catch (Exception e) { + throw new SyntaxError(e.getMessage(), e); + } + } + } + + private class CollapsingPostFilter extends ExtendedQueryBase implements PostFilter { + + private Object cacheId; + private String field; + private int leafCount; + private SortedDocValues docValues; + private int maxDoc; + private String max; + private String min; + private FieldType fieldType; + private int nullPolicy; + private SolrIndexSearcher searcher; + private SolrParams solrParams; + private Map context; + private IndexSchema schema; + public static final int NULL_POLICY_IGNORE = 0; + public static final int NULL_POLICY_COLLAPSE = 1; + public static final int NULL_POLICY_EXPAND = 2; + + public void setCache(boolean cache) { + + } + + public void setCacheSep(boolean cacheSep) { + + } + + public boolean getCacheSep() { + return false; + } + + public boolean getCache() { + return false; + } + + public int hashCode() { + return this.cacheId.hashCode()*((1+Float.floatToIntBits(this.getBoost()))*31); + } + + public boolean equals(Object o) { + //Uses the unique id for equals to ensure that the query result cache always fails. + if(o instanceof CollapsingPostFilter) { + CollapsingPostFilter c = (CollapsingPostFilter)o; + //Do object comparison to be sure only the same object will return true. + if(this.cacheId == c.cacheId && this.getBoost()==c.getBoost()) { + return true; + } + } + return false; + } + + public int getCost() { + return Math.max(super.getCost(), 100); + } + + public String toString(String s) { + return s; + } + + public CollapsingPostFilter(SolrParams localParams, SolrParams params, SolrQueryRequest request) throws IOException { + this.cacheId = new Object(); + this.field = localParams.get("field"); + this.solrParams = params; + String nPolicy = localParams.get("nullPolicy", NULL_IGNORE); + if(nPolicy.equals(NULL_IGNORE)) { + this.nullPolicy = NULL_POLICY_IGNORE; + } else if (nPolicy.equals(NULL_COLLAPSE)) { + this.nullPolicy = NULL_POLICY_COLLAPSE; + } else if(nPolicy.equals((NULL_EXPAND))) { + this.nullPolicy = NULL_POLICY_EXPAND; + } + this.searcher = request.getSearcher(); + this.leafCount = searcher.getTopReaderContext().leaves().size(); + this.maxDoc = searcher.maxDoc(); + this.schema = searcher.getSchema(); + SchemaField schemaField = schema.getField(this.field); + if(schemaField.hasDocValues()) { + this.docValues = searcher.getAtomicReader().getSortedDocValues(this.field); + } else { + this.docValues = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), this.field); + } + + this.max = localParams.get("max"); + if(this.max != null) { + this.fieldType = searcher.getSchema().getField(this.max).getType(); + } + + this.min = localParams.get("min"); + if(this.min != null) { + this.fieldType = searcher.getSchema().getField(this.min).getType(); + } + + this.context = request.getContext(); + } + + private IntOpenHashSet getBoostDocs(IndexSearcher indexSearcher, Set boosted) throws IOException { + IntOpenHashSet boostDocs = null; + if(boosted != null) { + SchemaField idField = this.schema.getUniqueKeyField(); + String fieldName = idField.getName(); + HashSet localBoosts = new HashSet(boosted.size()*2); + Iterator boostedIt = boosted.iterator(); + while(boostedIt.hasNext()) { + localBoosts.add(new BytesRef(boostedIt.next())); + } + + boostDocs = new IntOpenHashSet(boosted.size()*2); + + Listleaves = indexSearcher.getTopReaderContext().leaves(); + TermsEnum termsEnum = null; + DocsEnum docsEnum = null; + for(AtomicReaderContext leaf : leaves) { + AtomicReader reader = leaf.reader(); + int docBase = leaf.docBase; + Bits liveDocs = reader.getLiveDocs(); + Terms terms = reader.terms(fieldName); + termsEnum = terms.iterator(termsEnum); + Iterator it = localBoosts.iterator(); + while(it.hasNext()) { + BytesRef ref = it.next(); + if(termsEnum.seekExact(ref)) { + docsEnum = termsEnum.docs(liveDocs, docsEnum); + int doc = docsEnum.nextDoc(); + if(doc != -1) { + //Found the document. + boostDocs.add(doc+docBase); + it.remove(); + } + } + } + } + } + + return boostDocs; + } + + public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) { + try { + IntOpenHashSet boostDocs = getBoostDocs(indexSearcher, (Set) (this.context.get(QueryElevationComponent.BOOSTED))); + + if(this.min != null || this.max != null) { + + return new CollapsingFieldValueCollector(this.maxDoc, + this.leafCount, + this.docValues, + this.searcher, + this.nullPolicy, + max != null ? this.max : this.min, + max != null, + needsScores(this.solrParams), + this.fieldType, + boostDocs); + } else { + return new CollapsingScoreCollector(this.maxDoc, this.leafCount, this.docValues, this.nullPolicy, boostDocs); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private boolean needsScores(SolrParams params) { + + String sortSpec = params.get("sort"); + if(sortSpec != null) { + String[] sorts = sortSpec.split(","); + for(String s: sorts) { + String parts[] = s.split(" "); + if(parts[0].equals("score")) { + return true; + } + } + } else { + //No sort specified so it defaults to score. + return true; + } + + String fl = params.get("fl"); + if(fl != null) { + String[] fls = fl.split(","); + for(String f : fls) { + if(f.trim().equals("score")) { + return true; + } + } + } + + if(this.context.containsKey(QueryElevationComponent.BOOSTED)) { + return true; + } + + return false; + } + } + + private class DummyScorer extends Scorer { + + public float score; + + public DummyScorer() { + super(null); + } + + public float score() { + return score; + } + + public int freq() { + return 0; + } + + public int advance(int i) { + return -1; + } + + public int nextDoc() { + return 0; + } + + public int docID() { + return 0; + } + + public long cost() { + return 0; + } + } + + + private class CollapsingScoreCollector extends DelegatingCollector { + + private AtomicReaderContext[] contexts; + private OpenBitSet collapsedSet; + private SortedDocValues values; + private int[] ords; + private float[] scores; + private int docBase; + private int maxDoc; + private int nullPolicy; + private float nullScore = -Float.MAX_VALUE; + private int nullDoc; + private FloatArrayList nullScores; + private IntOpenHashSet boostDocs; + + public CollapsingScoreCollector(int maxDoc, + int segments, + SortedDocValues values, + int nullPolicy, + IntOpenHashSet boostDocs) { + this.maxDoc = maxDoc; + this.contexts = new AtomicReaderContext[segments]; + this.collapsedSet = new OpenBitSet(maxDoc); + this.boostDocs = boostDocs; + if(this.boostDocs != null) { + //Set the elevated docs now. + Iterator it = this.boostDocs.iterator(); + while(it.hasNext()) { + IntCursor cursor = it.next(); + this.collapsedSet.fastSet(cursor.value); + } + } + this.values = values; + int valueCount = values.getValueCount(); + this.ords = new int[valueCount]; + Arrays.fill(this.ords, -1); + this.scores = new float[valueCount]; + Arrays.fill(this.scores, -Float.MAX_VALUE); + this.nullPolicy = nullPolicy; + if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + nullScores = new FloatArrayList(); + } + } + + public boolean acceptsDocsOutOfOrder() { + //Documents must be sent in order to this collector. + return false; + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + this.contexts[context.ord] = context; + this.docBase = context.docBase; + } + + public void collect(int docId) throws IOException { + int globalDoc = docId+this.docBase; + int ord = values.getOrd(globalDoc); + if(ord > -1) { + float score = scorer.score(); + if(score > scores[ord]) { + ords[ord] = globalDoc; + scores[ord] = score; + } + } else if (this.collapsedSet.fastGet(globalDoc)) { + //The doc is elevated so score does not matter + //We just want to be sure it doesn't fall into the null policy + } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { + float score = scorer.score(); + if(score > nullScore) { + nullScore = score; + nullDoc = globalDoc; + } + } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + collapsedSet.fastSet(globalDoc); + nullScores.add(scorer.score()); + } + } + + public void finish() throws IOException { + if(contexts.length == 0) { + return; + } + + if(nullScore > 0) { + this.collapsedSet.fastSet(nullDoc); + } + + for(int i=0; i -1) { + collapsedSet.fastSet(doc); + } + } + + int currentContext = 0; + int currentDocBase = 0; + int nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc; + delegate.setNextReader(contexts[currentContext]); + DummyScorer dummy = new DummyScorer(); + delegate.setScorer(dummy); + DocIdSetIterator it = collapsedSet.iterator(); + int docId = -1; + int nullScoreIndex = 0; + while((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + + int ord = values.getOrd(docId); + if(ord > -1) { + dummy.score = scores[ord]; + } else if(this.boostDocs != null && boostDocs.contains(docId)) { + //Elevated docs don't need a score. + dummy.score = 0F; + } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { + dummy.score = nullScore; + } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + dummy.score = nullScores.get(nullScoreIndex++); + } + + while(docId >= nextDocBase) { + currentContext++; + currentDocBase = contexts[currentContext].docBase; + nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc; + delegate.setNextReader(contexts[currentContext]); + } + + int contextDoc = docId-currentDocBase; + delegate.collect(contextDoc); + } + + if(delegate instanceof DelegatingCollector) { + ((DelegatingCollector) delegate).finish(); + } + } + } + + private class CollapsingFieldValueCollector extends DelegatingCollector { + private AtomicReaderContext[] contexts; + private SortedDocValues values; + + private int docBase; + private int maxDoc; + private int nullPolicy; + + private FieldValueCollapse fieldValueCollapse; + private boolean needsScores; + private IntOpenHashSet boostDocs; + + public CollapsingFieldValueCollector(int maxDoc, + int segments, + SortedDocValues values, + SolrIndexSearcher searcher, + int nullPolicy, + String field, + boolean max, + boolean needsScores, + FieldType fieldType, + IntOpenHashSet boostDocs) throws IOException{ + + this.maxDoc = maxDoc; + this.contexts = new AtomicReaderContext[segments]; + this.values = values; + int valueCount = values.getValueCount(); + this.nullPolicy = nullPolicy; + this.needsScores = needsScores; + this.boostDocs = boostDocs; + if(fieldType instanceof TrieIntField) { + this.fieldValueCollapse = new IntValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + } else if(fieldType instanceof TrieLongField) { + this.fieldValueCollapse = new LongValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + } else if(fieldType instanceof TrieFloatField) { + this.fieldValueCollapse = new FloatValueCollapse(searcher, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + } + } + + public boolean acceptsDocsOutOfOrder() { + //Documents must be sent in order to this collector. + return false; + } + + public void setScorer(Scorer scorer) { + this.fieldValueCollapse.setScorer(scorer); + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + this.contexts[context.ord] = context; + this.docBase = context.docBase; + this.fieldValueCollapse.setNextReader(context); + } + + public void collect(int docId) throws IOException { + int globalDoc = docId+this.docBase; + int ord = values.getOrd(globalDoc); + fieldValueCollapse.collapse(ord, docId, globalDoc); + } + + public void finish() throws IOException { + if(contexts.length == 0) { + return; + } + + int currentContext = 0; + int currentDocBase = 0; + int nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc; + delegate.setNextReader(contexts[currentContext]); + DummyScorer dummy = new DummyScorer(); + delegate.setScorer(dummy); + DocIdSetIterator it = fieldValueCollapse.getCollapsedSet().iterator(); + int docId = -1; + int nullScoreIndex = 0; + float[] scores = fieldValueCollapse.getScores(); + FloatArrayList nullScores = fieldValueCollapse.getNullScores(); + float nullScore = fieldValueCollapse.getNullScore(); + while((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + + if(this.needsScores){ + int ord = values.getOrd(docId); + if(ord > -1) { + dummy.score = scores[ord]; + } else if (boostDocs != null && boostDocs.contains(docId)) { + //Its an elevated doc so no score is needed + dummy.score = 0F; + } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { + dummy.score = nullScore; + } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + dummy.score = nullScores.get(nullScoreIndex++); + } + } + + while(docId >= nextDocBase) { + currentContext++; + currentDocBase = contexts[currentContext].docBase; + nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc; + delegate.setNextReader(contexts[currentContext]); + } + + int contextDoc = docId-currentDocBase; + delegate.collect(contextDoc); + } + + if(delegate instanceof DelegatingCollector) { + ((DelegatingCollector) delegate).finish(); + } + } + } + + private abstract class FieldValueCollapse { + protected int nullPolicy; + protected int[] ords; + protected Scorer scorer; + protected FloatArrayList nullScores; + protected float nullScore; + protected float[] scores; + protected OpenBitSet collapsedSet; + protected IntOpenHashSet boostDocs; + protected int nullDoc = -1; + protected boolean needsScores; + protected boolean max; + protected String field; + + public abstract void collapse(int ord, int contextDoc, int globalDoc) throws IOException; + public abstract void setNextReader(AtomicReaderContext context) throws IOException; + + public FieldValueCollapse(SolrIndexSearcher searcher, + String field, + int nullPolicy, + boolean max, + boolean needsScores, + IntOpenHashSet boostDocs) { + this.field = field; + this.nullPolicy = nullPolicy; + this.max = max; + this.needsScores = needsScores; + this.collapsedSet = new OpenBitSet(searcher.maxDoc()); + this.boostDocs = boostDocs; + if(this.boostDocs != null) { + Iterator it = boostDocs.iterator(); + while(it.hasNext()) { + IntCursor cursor = it.next(); + this.collapsedSet.fastSet(cursor.value); + } + } + } + + public OpenBitSet getCollapsedSet() { + if(nullDoc > -1) { + this.collapsedSet.fastSet(nullDoc); + } + + for(int i=0; i -1) { + collapsedSet.fastSet(doc); + } + } + + return collapsedSet; + } + + public void setScorer(Scorer scorer) { + this.scorer = scorer; + } + + public FloatArrayList getNullScores() { + return nullScores; + } + + public float getNullScore() { + return this.nullScore; + } + + public float[] getScores() { + return scores; + } + } + + private class IntValueCollapse extends FieldValueCollapse { + + private FieldCache.Ints vals; + private IntCompare comp; + private int nullVal; + private int[] ordVals; + + public IntValueCollapse(SolrIndexSearcher searcher, + String field, + int nullPolicy, + int[] ords, + boolean max, + boolean needsScores, + IntOpenHashSet boostDocs) throws IOException { + super(searcher, field, nullPolicy, max, needsScores, boostDocs); + this.ords = ords; + this.ordVals = new int[ords.length]; + Arrays.fill(ords, -1); + + if(max) { + comp = new MaxIntComp(); + Arrays.fill(ordVals, Integer.MIN_VALUE); + } else { + comp = new MinIntComp(); + Arrays.fill(ordVals, Integer.MAX_VALUE); + this.nullVal = Integer.MAX_VALUE; + } + + if(needsScores) { + this.scores = new float[ords.length]; + if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + nullScores = new FloatArrayList(); + } + } + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + this.vals = FieldCache.DEFAULT.getInts(context.reader(), this.field, false); + } + + public void collapse(int ord, int contextDoc, int globalDoc) throws IOException { + int val = vals.get(contextDoc); + if(ord > -1) { + if(comp.test(val, ordVals[ord])) { + ords[ord] = globalDoc; + ordVals[ord] = val; + if(needsScores) { + scores[ord] = scorer.score(); + } + } + } else if(this.collapsedSet.fastGet(globalDoc)) { + // Elevated doc so do nothing. + } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { + if(comp.test(val, nullVal)) { + nullVal = val; + nullDoc = globalDoc; + if(needsScores) { + nullScore = scorer.score(); + } + } + } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + this.collapsedSet.fastSet(globalDoc); + if(needsScores) { + nullScores.add(scorer.score()); + } + } + } + } + + private class LongValueCollapse extends FieldValueCollapse { + + private FieldCache.Longs vals; + private LongCompare comp; + private long nullVal; + private long[] ordVals; + + public LongValueCollapse(SolrIndexSearcher searcher, + String field, + int nullPolicy, + int[] ords, + boolean max, + boolean needsScores, + IntOpenHashSet boostDocs) throws IOException { + super(searcher, field, nullPolicy, max, needsScores, boostDocs); + this.ords = ords; + this.ordVals = new long[ords.length]; + Arrays.fill(ords, -1); + + if(max) { + comp = new MaxLongComp(); + Arrays.fill(ordVals, Long.MIN_VALUE); + } else { + this.nullVal = Long.MAX_VALUE; + comp = new MinLongComp(); + Arrays.fill(ordVals, Long.MAX_VALUE); + } + + if(needsScores) { + this.scores = new float[ords.length]; + if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + nullScores = new FloatArrayList(); + } + } + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + this.vals = FieldCache.DEFAULT.getLongs(context.reader(), this.field, false); + } + + public void collapse(int ord, int contextDoc, int globalDoc) throws IOException { + long val = vals.get(contextDoc); + if(ord > -1) { + if(comp.test(val, ordVals[ord])) { + ords[ord] = globalDoc; + ordVals[ord] = val; + if(needsScores) { + scores[ord] = scorer.score(); + } + } + } else if (this.collapsedSet.fastGet(globalDoc)) { + //Elevated doc so do nothing + } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { + if(comp.test(val, nullVal)) { + nullVal = val; + nullDoc = globalDoc; + if(needsScores) { + nullScore = scorer.score(); + } + } + } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + this.collapsedSet.fastSet(globalDoc); + if(needsScores) { + nullScores.add(scorer.score()); + } + } + } + } + + private class FloatValueCollapse extends FieldValueCollapse { + + private FieldCache.Floats vals; + private FloatCompare comp; + private float nullVal; + private float[] ordVals; + + public FloatValueCollapse(SolrIndexSearcher searcher, + String field, + int nullPolicy, + int[] ords, + boolean max, + boolean needsScores, + IntOpenHashSet boostDocs) throws IOException { + super(searcher, field, nullPolicy, max, needsScores, boostDocs); + this.ords = ords; + this.ordVals = new float[ords.length]; + Arrays.fill(ords, -1); + + if(max) { + comp = new MaxFloatComp(); + Arrays.fill(ordVals, -Float.MAX_VALUE ); + } else { + this.nullVal = Float.MAX_VALUE; + comp = new MinFloatComp(); + Arrays.fill(ordVals, Float.MAX_VALUE); + } + + if(needsScores) { + this.scores = new float[ords.length]; + if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + nullScores = new FloatArrayList(); + } + } + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + this.vals = FieldCache.DEFAULT.getFloats(context.reader(), this.field, false); + } + + public void collapse(int ord, int contextDoc, int globalDoc) throws IOException { + float val = vals.get(contextDoc); + if(ord > -1) { + if(comp.test(val, ordVals[ord])) { + ords[ord] = globalDoc; + ordVals[ord] = val; + if(needsScores) { + scores[ord] = scorer.score(); + } + } + } else if (this.collapsedSet.fastGet(globalDoc)) { + //Elevated doc so do nothing + } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { + if(comp.test(val, nullVal)) { + nullVal = val; + nullDoc = globalDoc; + if(needsScores) { + nullScore = scorer.score(); + } + } + } else if(this.nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { + this.collapsedSet.fastSet(globalDoc); + if(needsScores) { + nullScores.add(scorer.score()); + } + } + } + } + + private interface IntCompare { + public boolean test(int i1, int i2); + } + + private interface FloatCompare { + public boolean test(float i1, float i2); + } + + private interface LongCompare { + public boolean test(long i1, long i2); + } + + private class MaxIntComp implements IntCompare { + public boolean test(int i1, int i2) { + return i1 > i2; + } + } + + private class MinIntComp implements IntCompare { + public boolean test(int i1, int i2) { + return i1 < i2; + } + } + + private class MaxFloatComp implements FloatCompare { + public boolean test(float i1, float i2) { + return i1 > i2; + } + } + + private class MinFloatComp implements FloatCompare { + public boolean test(float i1, float i2) { + return i1 < i2; + } + } + + private class MaxLongComp implements LongCompare { + public boolean test(long i1, long i2) { + return i1 > i2; + } + } + + private class MinLongComp implements LongCompare { + public boolean test(long i1, long i2) { + return i1 < i2; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index e36759c74b9..4cbe8b38dfc 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -51,7 +51,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI SwitchQParserPlugin.NAME, SwitchQParserPlugin.class, MaxScoreQParserPlugin.NAME, MaxScoreQParserPlugin.class, BlockJoinParentQParserPlugin.NAME, BlockJoinParentQParserPlugin.class, - BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class + BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class, + CollapsingQParserPlugin.NAME, CollapsingQParserPlugin.class }; /** return a {@link QParser} */ diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml new file mode 100644 index 00000000000..c2b0d73a36e --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-collapseqparser.xml @@ -0,0 +1,578 @@ + + + + + + + + + + + + ${solr.data.dir:} + + + + 1000000 + 2000000 + 3000000 + 4000000 + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + ${solr.commitwithin.softcommit:true} + + + + + + + 1024 + + + + + + + + + + + + true + + + + + + 10 + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + true + + + + + + dismax + *:* + 0.01 + + text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0 + + + text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5 + + + ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3 + + + 3<-1 5<-2 6<90% + + 100 + + + + + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + 4 + true + text,name,subject,title,whitetok + + + + + + + + lowerpunctfilt + + + default + lowerfilt + spellchecker1 + false + + + direct + DirectSolrSpellChecker + lowerfilt + 3 + + + wordbreak + solr.WordBreakSolrSpellChecker + lowerfilt + true + true + 10 + + + multipleFields + lowerfilt1and2 + spellcheckerMultipleFields + false + + + + jarowinkler + lowerfilt + + org.apache.lucene.search.spell.JaroWinklerDistance + spellchecker2 + + + + solr.FileBasedSpellChecker + external + spellings.txt + UTF-8 + spellchecker3 + + + + freq + lowerfilt + spellcheckerFreq + + freq + false + + + fqcn + lowerfilt + spellcheckerFQCN + org.apache.solr.spelling.SampleComparator + false + + + perDict + org.apache.solr.handler.component.DummyCustomParamSpellChecker + lowerfilt + + + + + + + + termsComp + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + direct + false + false + 1 + + + spellcheck + + + + + default + wordbreak + 20 + + + spellcheck + + + + + direct + wordbreak + 20 + + + spellcheck + + + + + dismax + lowerfilt1^1 + + + spellcheck + + + + + + + + + + + + + + + tvComponent + + + + + + string + elevate.xml + + + + + + explicit + + + elevate + + + + + + + + + + + + 100 + + + + + + 70 + + + + + + + ]]> + ]]> + + + + + + + + + + + + + 10 + .,!? + + + + + + WORD + en + US + + + + + + + + + + max-age=30, public + + + + + + + explicit + true + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + + + + false + true + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + false + false + id + + org.apache.solr.update.processor.Lookup3Signature + + + + + + + true + non_indexed_signature_sS + false + v_t,t_field + org.apache.solr.update.processor.TextProfileSignature + + + + + + + uniq + uniq2 + uniq3 + + + + + + + + + regex_dup_A_s + x + x_x + + + + regex_dup_B_s + x + x_x + + + + + + + + regex_dup_A_s + x + x_x + + + regex_dup_B_s + x + x_x + + + + + + diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index 57fd43b6df6..626999910eb 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -195,6 +195,16 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } } + public void testQueryCollapse() throws Exception { + SolrQueryRequest req = req("myField","foo_s"); + try { + assertQueryEquals("collapse", req, + "{!collapse field=$myField}"); + } finally { + req.close(); + } + } + public void testQueryNested() throws Exception { SolrQueryRequest req = req("df", "foo_s"); try { diff --git a/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java new file mode 100644 index 00000000000..f79bb511adc --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.*; + +public class TestCollapseQParserPlugin extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-collapseqparser.xml", "schema11.xml"); + } + + @Override + @Before + public void setUp() throws Exception { + // if you override setUp or tearDown, you better call + // the super classes version + super.setUp(); + clearIndex(); + assertU(commit()); + } + + @Test + public void testCollapseQueries() throws Exception { + String[] doc = {"id","1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000"}; + assertU(adoc(doc)); + String[] doc1 = {"id","2", "term_s","YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", "test_tf", "200"}; + assertU(adoc(doc1)); + + String[] doc2 = {"id","3", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200"}; + assertU(adoc(doc2)); + + String[] doc3 = {"id","4", "term_s", "YYYY", "test_ti", "500", "test_tl", "1000", "test_tf", "2000"}; + assertU(adoc(doc3)); + + assertU(commit()); + + //Test collapse by score + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']"); + + //Test collapse by score with elevation + + params = new ModifiableSolrParams(); + params.add("q", "YYYY"); + params.add("fq", "{!collapse field=group_s nullPolicy=collapse}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("qf", "term_s"); + params.add("qt", "/elevate"); + assertQ(req(params), "*[count(//doc)=3]", "//doc[./int[1][@name='test_ti']='5']"); + + //Test collapse by min int field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s min=test_ti}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='5']"); + + //Test collapse by max int field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s max=test_ti}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']"); + + //Test collapse by min long field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s min=test_tl}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='5']"); + + //Test collapse by max long field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s max=test_tl}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']"); + + //Test collapse by min float field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s min=test_tf}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='50']"); + + //Test collapse by min float field + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s max=test_tf}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='5']"); + + //Test nullPolicy expand + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s max=test_tf nullPolicy=expand}"); + assertQ(req(params), "*[count(//doc)=3]"); + + //Test nullPolicy collapse + params = new ModifiableSolrParams(); + params.add("q", "test_ti:(500 5000)"); + params.add("fq", "{!collapse field=group_s max=test_tf nullPolicy=collapse}"); + assertQ(req(params), "*[count(//doc)=1]", "//doc[./int[@name='test_ti']='500']"); + } +}