From 3dc5ed33c5f13309c22716c7d18b726d8a093622 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Mon, 16 Mar 2015 04:40:23 +0000 Subject: [PATCH] SOLR-7214: JSON Facet API git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1666856 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 9 + .../handler/dataimport/DataImportHandler.java | 6 +- .../solr/handler/RequestHandlerBase.java | 3 +- .../handler/component/SearchComponent.java | 2 + .../solr/handler/component/SearchHandler.java | 2 + .../apache/solr/request/SolrQueryRequest.java | 7 + .../solr/request/SolrQueryRequestBase.java | 11 + .../apache/solr/request/UnInvertedField.java | 281 ------ .../apache/solr/request/json/JSONUtil.java | 87 ++ .../apache/solr/request/json/ObjectUtil.java | 118 +++ .../apache/solr/request/json/RequestUtil.java | 319 +++++++ .../org/apache/solr/request/json/package.html | 27 + .../solr/request/macro/MacroExpander.java | 183 ++++ .../apache/solr/request/macro/package.html | 27 + .../java/org/apache/solr/search/DocSet.java | 4 +- .../org/apache/solr/search/DocSetBase.java | 7 + .../apache/solr/search/FunctionQParser.java | 75 +- .../org/apache/solr/search/QueryContext.java | 104 +++ .../org/apache/solr/search/QueryParsing.java | 40 +- .../apache/solr/search/SolrIndexSearcher.java | 4 +- .../apache/solr/search/SolrReturnFields.java | 2 +- .../apache/solr/search/ValueSourceParser.java | 88 ++ .../solr/search/facet/AggValueSource.java | 62 ++ .../org/apache/solr/search/facet/AvgAgg.java | 55 ++ .../apache/solr/search/facet/CountAgg.java | 36 + .../apache/solr/search/facet/FacetField.java | 813 +++++++++++++++++ .../apache/solr/search/facet/FacetModule.java | 824 ++++++++++++++++++ .../apache/solr/search/facet/FacetQuery.java | 60 ++ .../apache/solr/search/facet/FacetRange.java | 374 ++++++++ .../solr/search/facet/FacetRequest.java | 720 +++++++++++++++ .../apache/solr/search/facet/FieldUtil.java | 66 ++ .../apache/solr/search/facet/LegacyFacet.java | 318 +++++++ .../org/apache/solr/search/facet/MaxAgg.java | 56 ++ .../org/apache/solr/search/facet/MinAgg.java | 53 ++ .../search/facet/SimpleAggValueSource.java | 66 ++ .../org/apache/solr/search/facet/SlotAcc.java | 532 +++++++++++ .../solr/search/facet/StrAggValueSource.java | 58 ++ .../org/apache/solr/search/facet/SumAgg.java | 40 + .../apache/solr/search/facet/SumsqAgg.java | 38 + .../solr/search/facet/UnInvertedField.java | 763 ++++++++++++++++ .../apache/solr/search/facet/UniqueAgg.java | 82 ++ .../org/apache/solr/search/facet/package.html | 28 + .../org/apache/solr/util/SolrPluginUtils.java | 17 +- .../solr/collection1/conf/schema_latest.xml | 751 ++++++++++++++++ .../apache/solr/search/QueryEqualityTest.java | 14 + .../solr/search/facet/TestJsonFacets.java | 673 ++++++++++++++ .../common/params/ModifiableSolrParams.java | 5 + .../common/params/MultiMapSolrParams.java | 66 +- .../apache/solr/common/params/SolrParams.java | 9 +- .../apache/solr/common/util/NamedList.java | 6 + .../solr/common/util/SimpleOrderedMap.java | 4 + .../java/org/apache/solr/SolrTestCaseHS.java | 549 ++++++++++++ 52 files changed, 8205 insertions(+), 339 deletions(-) delete mode 100644 solr/core/src/java/org/apache/solr/request/UnInvertedField.java create mode 100644 solr/core/src/java/org/apache/solr/request/json/JSONUtil.java create mode 100644 solr/core/src/java/org/apache/solr/request/json/ObjectUtil.java create mode 100644 solr/core/src/java/org/apache/solr/request/json/RequestUtil.java create mode 100644 solr/core/src/java/org/apache/solr/request/json/package.html create mode 100644 solr/core/src/java/org/apache/solr/request/macro/MacroExpander.java create mode 100644 solr/core/src/java/org/apache/solr/request/macro/package.html create mode 100644 solr/core/src/java/org/apache/solr/search/QueryContext.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/AggValueSource.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/CountAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetField.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetModule.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetRange.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/LegacyFacet.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/MinAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/SimpleAggValueSource.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/StrAggValueSource.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/SumAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/package.html create mode 100755 solr/core/src/test-files/solr/collection1/conf/schema_latest.xml create mode 100644 solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java create mode 100644 solr/test-framework/src/java/org/apache/solr/SolrTestCaseHS.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 316ddda481a..8ae25eba878 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -166,6 +166,15 @@ New Features * SOLR-7218: lucene/solr query syntax to give any query clause a constant score. General Form: ^= Example: (color:blue color:green)^=2.0 text:shoes + (yonik) + +* SOLR-7214: New Facet module with a JSON API, facet functions, aggregations, and analytics. + Any facet type can have sub facets, and facets can be sorted by arbitrary aggregation functions. + Examples: + json.facet={x:'avg(price)', y:'unique(color)'} + json.facet={count1:{query:"price:[10 TO 20]"}, count2:{query:"color:blue AND popularity:[0 TO 50]"} } + json.facet={categories:{terms:{field:cat, sort:"x desc", facet:{x:"avg(price)", y:"sum(price)"}}}} + (yonik) Bug Fixes diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java index 2b65894b2d3..0b68da5f9de 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java @@ -21,6 +21,7 @@ import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.UpdateParams; @@ -84,11 +85,14 @@ public class DataImportHandler extends RequestHandlerBase implements public DataImporter getImporter() { return this.importer; } - + @Override @SuppressWarnings("unchecked") public void init(NamedList args) { super.init(args); + Map macro = new HashMap<>(); + macro.put("expandMacros", "false"); + defaults = SolrParams.wrapDefaults(defaults, new MapSolrParams(macro)); } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java index c79cea69035..7c142ef03db 100644 --- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java @@ -25,6 +25,7 @@ import org.apache.solr.core.PluginInfo; import org.apache.solr.core.PluginBag; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrInfoMBean; +import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestHandler; import org.apache.solr.response.SolrQueryResponse; @@ -136,7 +137,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo TimerContext timer = requestTimes.time(); try { if(pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM)) req.getContext().put(USEPARAM,pluginInfo.attributes.get(USEPARAM)); - SolrPluginUtils.setDefaults(req,defaults,appends,invariants); + SolrPluginUtils.setDefaults(this, req, defaults, appends, invariants); req.getContext().remove(USEPARAM); rsp.setHttpCaching(httpCaching); handleRequestBody( req, rsp ); diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java index 937818e8e91..6567b677053 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java @@ -25,6 +25,7 @@ import java.util.Map; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrInfoMBean; +import org.apache.solr.search.facet.FacetModule; import org.apache.solr.util.plugin.NamedListInitializedPlugin; /** @@ -134,6 +135,7 @@ public abstract class SearchComponent implements SolrInfoMBean, NamedListInitial map.put(HighlightComponent.COMPONENT_NAME, HighlightComponent.class); map.put(QueryComponent.COMPONENT_NAME, QueryComponent.class); map.put(FacetComponent.COMPONENT_NAME, FacetComponent.class); + map.put(FacetModule.COMPONENT_NAME, FacetModule.class); map.put(MoreLikeThisComponent.COMPONENT_NAME, MoreLikeThisComponent.class); map.put(StatsComponent.COMPONENT_NAME, StatsComponent.class); map.put(DebugComponent.COMPONENT_NAME, DebugComponent.class); diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 39d5a524845..391ceaea53d 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -35,6 +35,7 @@ import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.search.SolrQueryTimeoutImpl; +import org.apache.solr.search.facet.FacetModule; import org.apache.solr.util.RTimer; import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.plugin.PluginInfoInitialized; @@ -75,6 +76,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware , ArrayList names = new ArrayList<>(6); names.add( QueryComponent.COMPONENT_NAME ); names.add( FacetComponent.COMPONENT_NAME ); + names.add( FacetModule.COMPONENT_NAME ); names.add( MoreLikeThisComponent.COMPONENT_NAME ); names.add( HighlightComponent.COMPONENT_NAME ); names.add( StatsComponent.COMPONENT_NAME ); diff --git a/solr/core/src/java/org/apache/solr/request/SolrQueryRequest.java b/solr/core/src/java/org/apache/solr/request/SolrQueryRequest.java index 2777e1dd4d8..8c9fec7600c 100644 --- a/solr/core/src/java/org/apache/solr/request/SolrQueryRequest.java +++ b/solr/core/src/java/org/apache/solr/request/SolrQueryRequest.java @@ -97,6 +97,13 @@ public interface SolrQueryRequest { * @param params The new set of parameter */ public void forward(String handler, SolrParams params, SolrQueryResponse rsp); + + /** Returns any associated JSON (or null if none) in deserialized generic form. + * Java classes used to represent the JSON are as follows: Map, List, String, Long, Double, Boolean + */ + public Map getJSON(); + + public void setJSON(Map json); } diff --git a/solr/core/src/java/org/apache/solr/request/SolrQueryRequestBase.java b/solr/core/src/java/org/apache/solr/request/SolrQueryRequestBase.java index 5cb2ff3e7e9..70e83cbf0c7 100644 --- a/solr/core/src/java/org/apache/solr/request/SolrQueryRequestBase.java +++ b/solr/core/src/java/org/apache/solr/request/SolrQueryRequestBase.java @@ -50,6 +50,7 @@ public abstract class SolrQueryRequestBase implements SolrQueryRequest, Closeabl protected SolrParams params; protected Map context; protected Iterable streams; + protected Map json; private final RTimer requestTimer; @@ -172,4 +173,14 @@ public abstract class SolrQueryRequestBase implements SolrQueryRequest, Closeabl } + @Override + public Map getJSON() { + return json; + } + + @Override + public void setJSON(Map json) { + this.json = json; + } + } diff --git a/solr/core/src/java/org/apache/solr/request/UnInvertedField.java b/solr/core/src/java/org/apache/solr/request/UnInvertedField.java deleted file mode 100644 index def31a1ec63..00000000000 --- a/solr/core/src/java/org/apache/solr/request/UnInvertedField.java +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.request; - -import java.io.IOException; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.uninverting.DocTermOrds; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.solr.common.SolrException; -import org.apache.solr.core.SolrCore; -import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.TrieField; -import org.apache.solr.search.DocSet; -import org.apache.solr.search.SolrCache; -import org.apache.solr.search.SolrIndexSearcher; - -/** - * - * Final form of the un-inverted field: - * Each document points to a list of term numbers that are contained in that document. - * - * Term numbers are in sorted order, and are encoded as variable-length deltas from the - * previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A - * term number of 0 signals the end of the termNumber list. - * - * There is a single int[maxDoc()] which either contains a pointer into a byte[] for - * the termNumber lists, or directly contains the termNumber list if it fits in the 4 - * bytes of an integer. If the first byte in the integer is 1, the next 3 bytes - * are a pointer into a byte[] where the termNumber list starts. - * - * There are actually 256 byte arrays, to compensate for the fact that the pointers - * into the byte arrays are only 3 bytes long. The correct byte array for a document - * is a function of its id. - * - * To save space and speed up faceting, any term that matches enough documents will - * not be un-inverted... it will be skipped while building the un-inverted field structure, - * and will use a set intersection method during faceting. - * - * To further save memory, the terms (the actual string values) are not all stored in - * memory, but a TermIndex is used to convert term numbers to term values only - * for the terms needed after faceting has completed. Only every 128th term value - * is stored, along with its corresponding term number, and this is used as an - * index to find the closest term and iterate until the desired number is hit (very - * much like Lucene's own internal term index). - * - */ -public class UnInvertedField extends DocTermOrds { - - static class TopTerm { - BytesRef term; - int termNum; - - long memSize() { - return 8 + // obj header - 8 + 8 +term.length + //term - 4; // int - } - } - - long memsz; - final AtomicLong use = new AtomicLong(); // number of uses - - int[] maxTermCounts = new int[1024]; - - final Map bigTerms = new LinkedHashMap<>(); - - private SolrIndexSearcher.DocsEnumState deState; - private final SolrIndexSearcher searcher; - - private static UnInvertedField uifPlaceholder = new UnInvertedField(); - - private UnInvertedField() { // Dummy for synchronization. - super("fake", 0, 0); // cheapest initialization I can find. - searcher = null; - } - - @Override - protected void visitTerm(TermsEnum te, int termNum) throws IOException { - - if (termNum >= maxTermCounts.length) { - // resize by doubling - for very large number of unique terms, expanding - // by 4K and resultant GC will dominate uninvert times. Resize at end if material - int[] newMaxTermCounts = new int[maxTermCounts.length*2]; - System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum); - maxTermCounts = newMaxTermCounts; - } - - final BytesRef term = te.term(); - - if (te.docFreq() > maxTermDocFreq) { - TopTerm topTerm = new TopTerm(); - topTerm.term = BytesRef.deepCopyOf(term); - topTerm.termNum = termNum; - bigTerms.put(topTerm.termNum, topTerm); - - if (deState == null) { - deState = new SolrIndexSearcher.DocsEnumState(); - deState.fieldName = field; - deState.liveDocs = searcher.getLeafReader().getLiveDocs(); - deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail? - deState.postingsEnum = postingsEnum; - deState.minSetSizeCached = maxTermDocFreq; - } - - postingsEnum = deState.postingsEnum; - DocSet set = searcher.getDocSet(deState); - maxTermCounts[termNum] = set.size(); - } - } - - @Override - protected void setActualDocFreq(int termNum, int docFreq) { - maxTermCounts[termNum] = docFreq; - } - - public long memSize() { - // can cache the mem size since it shouldn't change - if (memsz!=0) return memsz; - long sz = super.ramBytesUsed(); - sz += 8*8 + 32; // local fields - sz += bigTerms.size() * 64; - for (TopTerm tt : bigTerms.values()) { - sz += tt.memSize(); - } - if (maxTermCounts != null) - sz += maxTermCounts.length * 4; - if (indexedTermsArray != null) { - // assume 8 byte references? - sz += 8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings; - } - memsz = sz; - return sz; - } - - public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException { - super(field, - // threshold, over which we use set intersections instead of counting - // to (1) save memory, and (2) speed up faceting. - // Add 2 for testing purposes so that there will always be some terms under - // the threshold even when the index is very - // small. - searcher.maxDoc()/20 + 2, - DEFAULT_INDEX_INTERVAL_BITS); - //System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc()); - - final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field)); - this.searcher = searcher; - try { - LeafReader r = searcher.getLeafReader(); - uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix)); - } catch (IllegalStateException ise) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise.getMessage()); - } - if (tnums != null) { - for(byte[] target : tnums) { - if (target != null && target.length > (1<<24)*.9) { - SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length); - } - } - } - - // free space if outrageously wasteful (tradeoff memory/cpu) - if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste! - int[] newMaxTermCounts = new int[numTermsInField]; - System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField); - maxTermCounts = newMaxTermCounts; - } - - SolrCore.log.info("UnInverted multi-valued field " + toString()); - //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index); - } - - public int getNumTerms() { - return numTermsInField; - } - - String getReadableValue(BytesRef termval, FieldType ft, CharsRefBuilder charsRef) { - return ft.indexedToReadable(termval, charsRef).toString(); - } - - /** may return a reused BytesRef */ - BytesRef getTermValue(TermsEnum te, int termNum) throws IOException { - //System.out.println("getTermValue termNum=" + termNum + " this=" + this + " numTerms=" + numTermsInField); - if (bigTerms.size() > 0) { - // see if the term is one of our big terms. - TopTerm tt = bigTerms.get(termNum); - if (tt != null) { - //System.out.println(" return big " + tt.term); - return tt.term; - } - } - - return lookupTerm(te, termNum); - } - - @Override - public String toString() { - final long indexSize = indexedTermsArray == null ? 0 : (8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings); // assume 8 byte references? - return "{field=" + field - + ",memSize="+memSize() - + ",tindexSize="+indexSize - + ",time="+total_time - + ",phase1="+phase1_time - + ",nTerms="+numTermsInField - + ",bigTerms="+bigTerms.size() - + ",termInstances="+termInstances - + ",uses="+use.get() - + "}"; - } - - ////////////////////////////////////////////////////////////////// - //////////////////////////// caching ///////////////////////////// - ////////////////////////////////////////////////////////////////// - - public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException { - SolrCache cache = searcher.getFieldValueCache(); - if (cache == null) { - return new UnInvertedField(field, searcher); - } - UnInvertedField uif = null; - Boolean doWait = false; - synchronized (cache) { - uif = cache.get(field); - if (uif == null) { - /** - * We use this place holder object to pull the UninvertedField construction out of the sync - * so that if many fields are accessed in a short time, the UninvertedField can be - * built for these fields in parallel rather than sequentially. - */ - cache.put(field, uifPlaceholder); - } else { - if (uif != uifPlaceholder) { - return uif; - } - doWait = true; // Someone else has put the place holder in, wait for that to complete. - } - } - while (doWait) { - try { - synchronized (cache) { - uif = cache.get(field); // Should at least return the placeholder, NPE if not is OK. - if (uif != uifPlaceholder) { // OK, another thread put this in the cache we should be good. - return uif; - } - cache.wait(); - } - } catch (InterruptedException e) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Thread interrupted in getUninvertedField."); - } - } - - uif = new UnInvertedField(field, searcher); - synchronized (cache) { - cache.put(field, uif); // Note, this cleverly replaces the placeholder. - cache.notifyAll(); - } - - return uif; - } -} diff --git a/solr/core/src/java/org/apache/solr/request/json/JSONUtil.java b/solr/core/src/java/org/apache/solr/request/json/JSONUtil.java new file mode 100644 index 00000000000..5356901f1be --- /dev/null +++ b/solr/core/src/java/org/apache/solr/request/json/JSONUtil.java @@ -0,0 +1,87 @@ +package org.apache.solr.request.json; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.noggit.JSONParser; + +public class JSONUtil { + + + public static boolean advanceToMapKey(JSONParser parser, String key, boolean deepSearch) throws IOException { + for (;;) { + int event = parser.nextEvent(); + switch (event) { + case JSONParser.STRING: + if (key != null && parser.wasKey()) { + String val = parser.getString(); + if (key.equals(val)) { + return true; + } + } + break; + case JSONParser.OBJECT_END: + return false; + case JSONParser.OBJECT_START: + if (deepSearch) { + boolean found = advanceToMapKey(parser, key, true); + if (found) { + return true; + } + } else { + advanceToMapKey(parser, null, false); + } + break; + case JSONParser.ARRAY_START: + skipArray(parser, key, deepSearch); + break; + } + } + } + + public static void skipArray(JSONParser parser, String key, boolean deepSearch) throws IOException { + for (;;) { + int event = parser.nextEvent(); + switch (event) { + case JSONParser.OBJECT_START: + advanceToMapKey(parser, key, deepSearch); + break; + case JSONParser.ARRAY_START: + skipArray(parser, key, deepSearch); + break; + case JSONParser.ARRAY_END: + return; + } + } + } + + public static void expect(JSONParser parser, int parserEventType) throws IOException { + int event = parser.nextEvent(); + if (event != parserEventType) { + throw new IOException("JSON Parser: expected " + JSONParser.getEventString(parserEventType) + " but got " + JSONParser.getEventString(event) ); + } + } + +} diff --git a/solr/core/src/java/org/apache/solr/request/json/ObjectUtil.java b/solr/core/src/java/org/apache/solr/request/json/ObjectUtil.java new file mode 100644 index 00000000000..66ac8274cfc --- /dev/null +++ b/solr/core/src/java/org/apache/solr/request/json/ObjectUtil.java @@ -0,0 +1,118 @@ +package org.apache.solr.request.json; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.noggit.JSONParser; + +public class ObjectUtil { + + public static class ConflictHandler { + protected boolean isList(Map container, List path, String key, Object current, Object previous) { + return key!=null && ("fields".equals(key) || "filter".equals(key)); + } + + public void handleConflict(Map container, List path, String key, Object current, Object previous) { + boolean handleAsList = isList(container, path, key, current, previous); + if (handleAsList) { + container.put(key, makeList(current, previous) ); + return; + } + + if (previous instanceof Map && current instanceof Map) { + Map prevMap = (Map)previous; + Map currMap = (Map)current; + if (prevMap.size() == 0) return; + mergeMap(prevMap, currMap, path); + container.put(key, prevMap); + return; + } + + // if we aren't handling as a list, and we aren't handling as a map, then just overwrite (i.e. nothing else to do) + return; + } + + + // merges srcMap onto targetMap (i.e. changes targetMap but not srcMap) + public void mergeMap(Map targetMap, Map srcMap, List path) { + if (srcMap.size() == 0) return; + // to keep ordering correct, start with prevMap and merge in currMap + for (Map.Entry srcEntry : srcMap.entrySet()) { + String subKey = srcEntry.getKey(); + Object subVal = srcEntry.getValue(); + Object subPrev = targetMap.put(subKey, subVal); + if (subPrev != null) { + // recurse + path.add(subKey); + handleConflict(targetMap, path, subKey, subVal, subPrev); + path.remove(path.size()-1); + } + } + } + + protected Object makeList(Object current, Object previous) { + ArrayList lst = new ArrayList(); + append(lst, previous); // make the original value(s) come first + append(lst, current); + return lst; + } + + protected void append(List lst, Object current) { + if (current instanceof Collection) { + lst.addAll((Collection)current); + } else { + lst.add(current); + } + } + + } + + public static void mergeObjects(Map top, List path, Object val, ConflictHandler handler) { + Map outer = top; + for (int i=0; i sub = (Map)outer.get(path.get(i)); + if (sub == null) { + sub = new LinkedHashMap(); + outer.put(path.get(i), sub); + } + outer = sub; + } + + String key = path.size() > 0 ? path.get(path.size()-1) : null; + + if (key != null) { + Object existingVal = outer.put(key, val); + if (existingVal != null) { + // OK, now we need to merge values + handler.handleConflict(outer, path, key, val, existingVal); + } + } else { + // merging at top level... + Map newMap = (Map)val; + handler.mergeMap(outer, newMap, path); + } + } + +} diff --git a/solr/core/src/java/org/apache/solr/request/json/RequestUtil.java b/solr/core/src/java/org/apache/solr/request/json/RequestUtil.java new file mode 100644 index 00000000000..59be67a99ae --- /dev/null +++ b/solr/core/src/java/org/apache/solr/request/json/RequestUtil.java @@ -0,0 +1,319 @@ +package org.apache.solr.request.json; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.commons.io.IOUtils; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.MultiMapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.component.SearchHandler; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestHandler; +import org.apache.solr.request.macro.MacroExpander; +import org.noggit.JSONParser; +import org.noggit.ObjectBuilder; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class RequestUtil { + /** + * Set default-ish params on a SolrQueryRequest as well as do standard macro processing and JSON request parsing. + * + * @param handler The search handler this is for (may be null if you don't want this method touching the content streams) + * @param req The request whose params we are interested in + * @param defaults values to be used if no values are specified in the request params + * @param appends values to be appended to those from the request (or defaults) when dealing with multi-val params, or treated as another layer of defaults for singl-val params. + * @param invariants values which will be used instead of any request, or default values, regardless of context. + */ + public static void processParams(SolrRequestHandler handler, SolrQueryRequest req, SolrParams defaults, + SolrParams appends, SolrParams invariants) { + + boolean searchHandler = handler instanceof SearchHandler; + SolrParams params = req.getParams(); + + // Handle JSON stream for search requests + if (searchHandler && req.getContentStreams() != null) { + + Map map = MultiMapSolrParams.asMultiMap(params, false); + + if (!(params instanceof MultiMapSolrParams || params instanceof ModifiableSolrParams)) { + // need to set params on request since we weren't able to access the original map + params = new MultiMapSolrParams(map); + req.setParams(params); + } + + String[] jsonFromParams = map.remove("json"); // params from the query string should come after (and hence override) JSON content streams + + for (ContentStream cs : req.getContentStreams()) { + String contentType = cs.getContentType(); + if (contentType==null || !contentType.contains("/json")) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad contentType for search handler :" + contentType + " request="+req); + } + + try { + String jsonString = IOUtils.toString( cs.getReader() ); + MultiMapSolrParams.addParam("json", jsonString, map); + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Exception reading content stream for request:"+req, e); + } + } + + // append existing "json" params + if (jsonFromParams != null) { + for (String json : jsonFromParams) { + MultiMapSolrParams.addParam("json", json, map); + } + } + } + + String[] jsonS = params.getParams("json"); + + boolean hasAdditions = defaults != null || invariants != null || appends != null || jsonS != null; + + // short circuit processing + if (!hasAdditions && !params.getBool("expandMacros", true)) { + return; // nothing to do... + } + + boolean isShard = params.getBool("isShard", false); + + Map newMap = MultiMapSolrParams.asMultiMap(params, hasAdditions); + + + // see if the json has a "params" section + // TODO: we should currently *not* do this if this is a leaf of a distributed search since it could overwrite parameters set by the top-level + // The parameters we extract will be propagated anyway. + if (jsonS != null && !isShard) { + for (String json : jsonS) { + getParamsFromJSON(newMap, json); + } + } + + // first populate defaults, etc.. + if (defaults != null) { + Map defaultsMap = MultiMapSolrParams.asMultiMap(defaults); + for (Map.Entry entry : defaultsMap.entrySet()) { + String key = entry.getKey(); + if (!newMap.containsKey(key)) { + newMap.put(key, entry.getValue()); + } + } + } + + if (appends != null) { + Map appendsMap = MultiMapSolrParams.asMultiMap(appends); + + for (Map.Entry entry : appendsMap.entrySet()) { + String key = entry.getKey(); + String[] arr = newMap.get(key); + if (arr == null) { + newMap.put(key, entry.getValue()); + } else { + String[] appendArr = entry.getValue(); + String[] newArr = new String[arr.length + appendArr.length]; + System.arraycopy(arr, 0, newArr, 0, arr.length); + System.arraycopy(appendArr, 0, newArr, arr.length, appendArr.length); + newMap.put(key, newArr); + } + } + } + + + if (invariants != null) { + newMap.putAll( MultiMapSolrParams.asMultiMap(invariants) ); + } + + String[] doMacrosStr = newMap.get("expandMacros"); + boolean doMacros = true; + if (doMacrosStr != null) { + doMacros = "true".equals(doMacrosStr[0]); + } + + if (doMacros) { + newMap = MacroExpander.expand(newMap); + } + // Set these params as soon as possible so if there is an error processing later, things like + // "wt=json" will take effect from the defaults. + SolrParams newParams = new MultiMapSolrParams(newMap); // newMap may still change below, but that should be OK + req.setParams(newParams); + + + // Skip the rest of the processing (including json processing for now) if this isn't a search handler. + // For example json.command started to be used in SOLR-6294, and that caused errors here. + if (!searchHandler) return; + + + Map json = null; + // Handle JSON body first, so query params will always overlay on that + jsonS = newMap.get("json"); + if (jsonS != null) { + if (json == null) { + json = new LinkedHashMap<>(); + } + mergeJSON(json, "json", jsonS, new ObjectUtil.ConflictHandler()); + } + for (String key : newMap.keySet()) { + // json.nl, json.wrf are existing query parameters + if (key.startsWith("json.") && !("json.nl".equals(key) || "json.wrf".equals(key))) { + if (json == null) { + json = new LinkedHashMap<>(); + } + mergeJSON(json, key, newMap.get(key), new ObjectUtil.ConflictHandler()); + } + } + + // implement compat for existing components... + if (json != null && !isShard) { + for (Map.Entry entry : json.entrySet()) { + String key = entry.getKey(); + String out = null; + boolean arr = false; + if ("query".equals(key)) { + out = "q"; + } else if ("filter".equals(key)) { + out = "fq"; + arr = true; + } else if ("fields".equals(key)) { + out = "fl"; + arr = true; + } else if ("offset".equals(key)) { + out = "start"; + } else if ("limit".equals(key)) { + out = "rows"; + } else if ("sort".equals(key)) { + out = "sort"; + } else if ("params".equals(key) || "facet".equals(key) ) { + // handled elsewhere + continue; + } else { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown top-level key in JSON request : " + key); + } + + Object val = entry.getValue(); + + if (arr) { + String[] existing = newMap.get(out); + List lst = val instanceof List ? (List)val : null; + int existingSize = existing==null ? 0 : existing.length; + int jsonSize = lst==null ? 1 : lst.size(); + String[] newval = new String[ existingSize + jsonSize ]; + for (int i=0; i json, String queryParamName, String[] vals, ObjectUtil.ConflictHandler handler) { + try { + List path = StrUtils.splitSmart(queryParamName, ".", true); + path = path.subList(1, path.size()); + for (String jsonStr : vals) { + Object o = ObjectBuilder.fromJSON(jsonStr); + ObjectUtil.mergeObjects(json, path, o, handler); + } + } catch (IOException e) { + // impossible + } + } + + + private static void getParamsFromJSON(Map params, String json) { + if (json.indexOf("params") < 0) { + return; + } + + JSONParser parser = new JSONParser(json); + try { + JSONUtil.expect(parser, JSONParser.OBJECT_START); + boolean found = JSONUtil.advanceToMapKey(parser, "params", false); + if (!found) { + return; + } + + parser.nextEvent(); // advance to the value + + Object o = ObjectBuilder.getVal(parser); + if (!(o instanceof Map)) return; + Map map = (Map)o; + // To make consistent with json.param handling, we should make query params come after json params (i.e. query params should + // appear to overwrite json params. + + // Solr params are based on String though, so we need to convert + for (Map.Entry entry : map.entrySet()) { + String key = entry.getKey(); + Object val = entry.getValue(); + if (params.get(key) != null) { + continue; + } + + if (val == null) { + params.remove(key); + } else if (val instanceof List) { + List lst = (List) val; + String[] vals = new String[lst.size()]; + for (int i = 0; i < vals.length; i++) { + vals[i] = lst.get(i).toString(); + } + params.put(key, vals); + } else { + params.put(key, new String[]{val.toString()}); + } + } + + } catch (Exception e) { + // ignore parse exceptions at this stage, they may be caused by incomplete macro expansions + return; + } + + } + + + +} diff --git a/solr/core/src/java/org/apache/solr/request/json/package.html b/solr/core/src/java/org/apache/solr/request/json/package.html new file mode 100644 index 00000000000..d87e4a4190c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/request/json/package.html @@ -0,0 +1,27 @@ + + + + + + + +

+JSON related classes +

+ + diff --git a/solr/core/src/java/org/apache/solr/request/macro/MacroExpander.java b/solr/core/src/java/org/apache/solr/request/macro/MacroExpander.java new file mode 100644 index 00000000000..a16a1391849 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/request/macro/MacroExpander.java @@ -0,0 +1,183 @@ +package org.apache.solr.request.macro; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.common.SolrException; +import org.apache.solr.search.QueryParsing; +import org.apache.solr.search.SyntaxError; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class MacroExpander { + public static final String MACRO_START = "${"; + private static final int MAX_LEVELS = 25; + + private Map orig; + private Map expanded; + private String macroStart = MACRO_START; + private char escape = '\\'; + private int level; + + + public MacroExpander(Map orig) { + this.orig = orig; + } + + public static Map expand(Map params) { + MacroExpander mc = new MacroExpander(params); + mc.expand(); + return mc.expanded; + } + + public boolean expand() { + this.expanded = new HashMap<>(orig.size()); + + boolean changed = false; + for (Map.Entry entry : orig.entrySet()) { + String k = entry.getKey(); + String newK = expand(k); + String[] values = entry.getValue(); + List newValues = null; + for (String v : values) { + String newV = expand(v); + if (newV != v) { + if (newValues == null) { + newValues = new ArrayList<>(values.length); + for (String vv : values) { + if (vv == v) break; + newValues.add(vv); + } + } + newValues.add(newV); + } + } + + if (newValues != null) { + values = newValues.toArray(new String[newValues.size()]); + changed = true; + } + + if (k != newK) { + changed = true; + } + + expanded.put( newK, values ); + } + + return changed; + } + + public String expand(String val) { + level++; + try { + if (level >= MAX_LEVELS) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Request template exceeded max nesting of " + MAX_LEVELS + " expanding '"+val+"'"); + } + return _expand(val); + } finally { + level--; + } + } + + private String _expand(String val) { + // quickest short circuit + int idx = val.indexOf(macroStart.charAt(0)); + if (idx < 0) return val; + + int start = 0; // start of the unprocessed part of the string + int end = 0; + StringBuilder sb = null; + for (;;) { + idx = val.indexOf(macroStart, idx); + int matchedStart = idx; + + // check if escaped + if (idx > 0) { + // check if escaped... + // TODO: what if you *want* to actually have a backslash... perhaps that's when we allow changing + // of the escape character? + + char ch = val.charAt(idx-1); + if (ch == escape) { + idx += macroStart.length(); + continue; + } + } + else if (idx < 0) { + if (sb == null) return val; + sb.append(val.substring(start)); + return sb.toString(); + } + + // found unescaped "${" + idx += macroStart.length(); + + int rbrace = val.indexOf('}', idx); + if (rbrace == -1) { + // no matching close brace... + continue; + } + + if (sb == null) { + sb = new StringBuilder(val.length()*2); + } + + if (matchedStart > 0) { + sb.append(val.substring(start, matchedStart)); + } + + // update "start" to be at the end of ${...} + start = rbrace + 1; + + // String inbetween = val.substring(idx, rbrace); + QueryParsing.StrParser parser = new QueryParsing.StrParser(val, idx, rbrace); + try { + String paramName = parser.getId(); + String defVal = null; + boolean hasDefault = parser.opt(':'); + if (hasDefault) { + defVal = val.substring(parser.pos, rbrace); + } + + // in the event that expansions become context dependent... consult original? + String[] replacementList = orig.get(paramName); + + // TODO - handle a list somehow... + String replacement = replacementList!=null ? replacementList[0] : defVal; + if (replacement != null) { + String expandedReplacement = expand(replacement); + sb.append(expandedReplacement); + } + + } catch (SyntaxError syntaxError) { + // append the part we would have skipped + sb.append( val.substring(matchedStart, start) ); + continue; + } + + } + + } + + + + +} diff --git a/solr/core/src/java/org/apache/solr/request/macro/package.html b/solr/core/src/java/org/apache/solr/request/macro/package.html new file mode 100644 index 00000000000..21856b45958 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/request/macro/package.html @@ -0,0 +1,27 @@ + + + + + + + +

+Parameter substitution / macro expansion. +

+ + diff --git a/solr/core/src/java/org/apache/solr/search/DocSet.java b/solr/core/src/java/org/apache/solr/search/DocSet.java index c7f64945671..d918c6dcc3c 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSet.java +++ b/solr/core/src/java/org/apache/solr/search/DocSet.java @@ -17,6 +17,8 @@ package org.apache.solr.search; +import java.io.Closeable; + import org.apache.lucene.search.Filter; import org.apache.solr.common.SolrException; @@ -30,7 +32,7 @@ import org.apache.solr.common.SolrException; * * @since solr 0.9 */ -public interface DocSet /* extends Collection */ { +public interface DocSet extends Closeable /* extends Collection */ { /** * Adds the specified document if it is not currently in the DocSet diff --git a/solr/core/src/java/org/apache/solr/search/DocSetBase.java b/solr/core/src/java/org/apache/solr/search/DocSetBase.java index 7b3a3b52f2f..3e54f8afe02 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSetBase.java +++ b/solr/core/src/java/org/apache/solr/search/DocSetBase.java @@ -17,6 +17,8 @@ package org.apache.solr.search; +import java.io.IOException; + import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BitsFilteredDocIdSet; @@ -239,4 +241,9 @@ abstract class DocSetBase implements DocSet { } } + + /** FUTURE: for off-heap */ + @Override + public void close() throws IOException { + } } diff --git a/solr/core/src/java/org/apache/solr/search/FunctionQParser.java b/solr/core/src/java/org/apache/solr/search/FunctionQParser.java index 140399927a1..ec6ecd7da24 100644 --- a/solr/core/src/java/org/apache/solr/search/FunctionQParser.java +++ b/solr/core/src/java/org/apache/solr/search/FunctionQParser.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.Query; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.facet.AggValueSource; import java.util.ArrayList; import java.util.HashMap; @@ -30,6 +31,10 @@ import java.util.List; public class FunctionQParser extends QParser { + public static final int FLAG_CONSUME_DELIMITER = 0x01; // consume delimiter after parsing arg + public static final int FLAG_IS_AGG = 0x02; + public static final int FLAG_DEFAULT = FLAG_CONSUME_DELIMITER; + /** @lucene.internal */ public QueryParsing.StrParser sp; boolean parseMultipleSources = true; @@ -37,6 +42,15 @@ public class FunctionQParser extends QParser { public FunctionQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); + setString(qstr); + } + + @Override + public void setString(String s) { + super.setString(s); + if (s != null) { + sp = new QueryParsing.StrParser( s ); + } } public void setParseMultipleSources(boolean parseMultipleSources) { @@ -59,13 +73,11 @@ public class FunctionQParser extends QParser { @Override public Query parse() throws SyntaxError { - sp = new QueryParsing.StrParser(getString()); - ValueSource vs = null; List lst = null; for(;;) { - ValueSource valsource = parseValueSource(false); + ValueSource valsource = parseValueSource(FLAG_DEFAULT & ~FLAG_CONSUME_DELIMITER); sp.eatws(); if (!parseMultipleSources) { vs = valsource; @@ -210,7 +222,7 @@ public class FunctionQParser extends QParser { public List parseValueSourceList() throws SyntaxError { List sources = new ArrayList<>(3); while (hasMoreArguments()) { - sources.add(parseValueSource(true)); + sources.add(parseValueSource(FLAG_DEFAULT | FLAG_CONSUME_DELIMITER)); } return sources; } @@ -220,7 +232,7 @@ public class FunctionQParser extends QParser { */ public ValueSource parseValueSource() throws SyntaxError { /* consume the delimiter afterward for an external call to parseValueSource */ - return parseValueSource(true); + return parseValueSource(FLAG_DEFAULT | FLAG_CONSUME_DELIMITER); } /* @@ -274,7 +286,11 @@ public class FunctionQParser extends QParser { * * @param doConsumeDelimiter whether to consume a delimiter following the ValueSource */ - protected ValueSource parseValueSource(boolean doConsumeDelimiter) throws SyntaxError { + protected ValueSource parseValueSource(boolean doConsumeDelimiter) throws SyntaxError { + return parseValueSource( doConsumeDelimiter ? (FLAG_DEFAULT | FLAG_CONSUME_DELIMITER) : (FLAG_DEFAULT & ~FLAG_CONSUME_DELIMITER) ); + } + + protected ValueSource parseValueSource(int flags) throws SyntaxError { ValueSource valueSource; int ch = sp.peek(); @@ -365,12 +381,57 @@ public class FunctionQParser extends QParser { } - if (doConsumeDelimiter) + if ((flags & FLAG_CONSUME_DELIMITER) != 0) { consumeArgumentDelimiter(); + } return valueSource; } + /** @lucene.experimental */ + public AggValueSource parseAgg(int flags) throws SyntaxError { + String id = sp.getId(); + AggValueSource vs = null; + boolean hasParen = false; + + if ("agg".equals(id)) { + hasParen = sp.opt("("); + vs = parseAgg(flags | FLAG_IS_AGG); + } else { + // parse as an aggregation... + if (!id.startsWith("agg_")) { + id = "agg_" + id; + } + + hasParen = sp.opt("("); + + ValueSourceParser argParser = req.getCore().getValueSourceParser(id); + argParser = req.getCore().getValueSourceParser(id); + if (argParser == null) { + throw new SyntaxError("Unknown aggregation " + id + " in (" + sp + ")"); + } + + ValueSource vv = argParser.parse(this); + if (!(vv instanceof AggValueSource)) { + if (argParser == null) { + throw new SyntaxError("Expected aggregation from " + id + " but got (" + vv + ") in (" + sp + ")"); + } + } + vs = (AggValueSource) vv; + } + + if (hasParen) { + sp.expect(")"); + } + + if ((flags & FLAG_CONSUME_DELIMITER) != 0) { + consumeArgumentDelimiter(); + } + + return vs; + } + + /** * Consume an argument delimiter (a comma) from the token stream. * Only consumes if more arguments should exist (no ending parens or end of string). diff --git a/solr/core/src/java/org/apache/solr/search/QueryContext.java b/solr/core/src/java/org/apache/solr/search/QueryContext.java new file mode 100644 index 00000000000..93f93fc7dd3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/QueryContext.java @@ -0,0 +1,104 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.IdentityHashMap; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrRequestInfo; + +/** + * Bridge between old style context and a real class + * @lucene.experimental + */ +public class QueryContext extends IdentityHashMap implements Closeable { + // private IdentityHashMap map; // we are the map for now (for compat w/ ValueSource) + private final SolrIndexSearcher searcher; + private final IndexSearcher indexSearcher; + private IdentityHashMap closeHooks; + + // migrated from ValueSource + public static QueryContext newContext(IndexSearcher searcher) { + QueryContext context = new QueryContext(searcher); + return context; + } + + public QueryContext(IndexSearcher searcher) { + this.searcher = searcher instanceof SolrIndexSearcher ? (SolrIndexSearcher)searcher : null; + indexSearcher = searcher; + this.put("searcher", searcher); // see ValueSource.newContext() + } + + + public SolrIndexSearcher searcher() { + return searcher; + } + + public IndexSearcher indexSearcher() { + return indexSearcher; + } + + /*** implementations obtained via inheritance + public Object get(Object key) { + return map.get(key); + } + + public Object put(Object key, Object val) { + if (map == null) { + map = new IdentityHashMap(); + } + return map.put(key, val); + } + ***/ + + public void addCloseHook(Closeable closeable) { + if (closeHooks == null) { + closeHooks = new IdentityHashMap(); + // for now, defer closing until the end of the request + SolrRequestInfo.getRequestInfo().addCloseHook(this); + } + + closeHooks.put(closeable, ""); + } + + public boolean removeCloseHook(Closeable closeable) { + return closeHooks.remove(closeable) != null; + } + + /** Don't call close explicitly! This will be automatically closed at the end of the request */ + @Override + public void close() throws IOException { + if (closeHooks != null) { + for (Closeable hook : closeHooks.keySet()) { + try { + hook.close(); + } catch (Exception e) { + SolrException.log(SolrCore.log, "Exception during close hook", e); + } + } + } + + closeHooks = null; + // map = null; + } + +} diff --git a/solr/core/src/java/org/apache/solr/search/QueryParsing.java b/solr/core/src/java/org/apache/solr/search/QueryParsing.java index 99de50ef7dd..a67432e27ea 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryParsing.java +++ b/solr/core/src/java/org/apache/solr/search/QueryParsing.java @@ -593,9 +593,9 @@ public class QueryParsing { * Note: This API is experimental and may change in non backward-compatible ways in the future */ public static class StrParser { - String val; - int pos; - int end; + public String val; + public int pos; + public int end; public StrParser(String val) { this(val, 0, val.length()); @@ -607,19 +607,19 @@ public class QueryParsing { this.end = end; } - void eatws() { + public void eatws() { while (pos < end && Character.isWhitespace(val.charAt(pos))) pos++; } - char ch() { + public char ch() { return pos < end ? val.charAt(pos) : 0; } - void skip(int nChars) { + public void skip(int nChars) { pos = Math.max(pos + nChars, end); } - boolean opt(String s) { + public boolean opt(String s) { eatws(); int slen = s.length(); if (val.regionMatches(pos, s, 0, slen)) { @@ -629,7 +629,7 @@ public class QueryParsing { return false; } - boolean opt(char ch) { + public boolean opt(char ch) { eatws(); if (pos < end && val.charAt(pos) == ch) { pos++; @@ -639,7 +639,7 @@ public class QueryParsing { } - void expect(String s) throws SyntaxError { + public void expect(String s) throws SyntaxError { eatws(); int slen = s.length(); if (val.regionMatches(pos, s, 0, slen)) { @@ -649,7 +649,7 @@ public class QueryParsing { } } - float getFloat() { + public float getFloat() { eatws(); char[] arr = new char[end - pos]; int i; @@ -669,7 +669,7 @@ public class QueryParsing { return Float.parseFloat(new String(arr, 0, i)); } - Number getNumber() { + public Number getNumber() { eatws(); int start = pos; boolean flt = false; @@ -694,7 +694,7 @@ public class QueryParsing { } } - double getDouble() { + public double getDouble() { eatws(); char[] arr = new char[end - pos]; int i; @@ -714,7 +714,7 @@ public class QueryParsing { return Double.parseDouble(new String(arr, 0, i)); } - int getInt() { + public int getInt() { eatws(); char[] arr = new char[end - pos]; int i; @@ -734,11 +734,11 @@ public class QueryParsing { } - String getId() throws SyntaxError { + public String getId() throws SyntaxError { return getId("Expected identifier"); } - String getId(String errMessage) throws SyntaxError { + public String getId(String errMessage) throws SyntaxError { eatws(); int id_start = pos; char ch; @@ -787,7 +787,7 @@ public class QueryParsing { * Skips leading whitespace and returns whatever sequence of non * whitespace it can find (or hte empty string) */ - String getSimpleString() { + public String getSimpleString() { eatws(); int startPos = pos; char ch; @@ -804,7 +804,7 @@ public class QueryParsing { * sort direction. (True is desc, False is asc). * Position is advanced to after the comma (or end) when result is non null */ - Boolean getSortDirection() throws SyntaxError { + public Boolean getSortDirection() throws SyntaxError { final int startPos = pos; final String order = getId(null); @@ -835,7 +835,7 @@ public class QueryParsing { } // return null if not a string - String getQuotedString() throws SyntaxError { + public String getQuotedString() throws SyntaxError { eatws(); char delim = peekChar(); if (!(delim == '\"' || delim == '\'')) { @@ -888,13 +888,13 @@ public class QueryParsing { } // next non-whitespace char - char peek() { + public char peek() { eatws(); return pos < end ? val.charAt(pos) : 0; } // next char - char peekChar() { + public char peekChar() { return pos < end ? val.charAt(pos) : 0; } diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index ea00930e6ed..4404896d383 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -80,7 +80,7 @@ import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; -import org.apache.solr.request.UnInvertedField; +import org.apache.solr.search.facet.UnInvertedField; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -775,7 +775,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn //////////////////////////////////////////////////////////////////////////////// /** expert: internal API, subject to change */ - public SolrCache getFieldValueCache() { + public SolrCache getFieldValueCache() { return fieldValueCache; } diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index d7c6964f6ce..e352dc7786a 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -151,7 +151,7 @@ public class SolrReturnFields extends ReturnFields { } // like getId, but also accepts dashes for legacy fields - String getFieldName(QueryParsing.StrParser sp) { + public static String getFieldName(QueryParsing.StrParser sp) { sp.eatws(); int id_start = sp.pos; char ch; diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index 30a157d6c23..a791d8d5a19 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -40,6 +40,14 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.*; +import org.apache.solr.search.facet.AggValueSource; +import org.apache.solr.search.facet.AvgAgg; +import org.apache.solr.search.facet.CountAgg; +import org.apache.solr.search.facet.MaxAgg; +import org.apache.solr.search.facet.MinAgg; +import org.apache.solr.search.facet.SumAgg; +import org.apache.solr.search.facet.SumsqAgg; +import org.apache.solr.search.facet.UniqueAgg; import org.apache.solr.search.function.CollapseScoreFunction; import org.apache.solr.search.function.OrdFieldSource; import org.apache.solr.search.function.ReverseOrdFieldSource; @@ -787,8 +795,88 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin { } }); + + addParser("agg", new ValueSourceParser() { + @Override + public AggValueSource parse(FunctionQParser fp) throws SyntaxError { + return fp.parseAgg(FunctionQParser.FLAG_DEFAULT); + } + }); + + addParser("agg_count", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new CountAgg(); + } + }); + + addParser("agg_unique", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new UniqueAgg(fp.parseArg()); + } + }); + + addParser("agg_sum", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new SumAgg(fp.parseValueSource()); + } + }); + + addParser("agg_avg", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new AvgAgg(fp.parseValueSource()); + } + }); + + addParser("agg_sumsq", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new SumsqAgg(fp.parseValueSource()); + } + }); + + /*** + addParser("agg_stdev", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return null; + } + }); + + addParser("agg_multistat", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return null; + } + }); + ***/ + + addParser("agg_min", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new MinAgg(fp.parseValueSource()); + } + }); + + addParser("agg_max", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new MaxAgg(fp.parseValueSource()); + } + }); + + + } + /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + + private static TInfo parseTerm(FunctionQParser fp) throws SyntaxError { TInfo tinfo = new TInfo(); diff --git a/solr/core/src/java/org/apache/solr/search/facet/AggValueSource.java b/solr/core/src/java/org/apache/solr/search/facet/AggValueSource.java new file mode 100644 index 00000000000..0336e29bbf1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/AggValueSource.java @@ -0,0 +1,62 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.search.QueryContext; + +public abstract class AggValueSource extends ValueSource { + protected String name; + + public AggValueSource(String name) { + this.name = name; + } + + public String name() { + return this.name; + } + + public ValueSource[] getChildren() { + return null; + } + + @Override + public boolean equals(Object o) { + return this.getClass() == o.getClass() && name.equals(((AggValueSource) o).name); + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { + // FUTURE + throw new UnsupportedOperationException("NOT IMPLEMENTED " + name + " " + this); + } + + // TODO: make abstract + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + throw new UnsupportedOperationException("NOT IMPLEMENTED " + name + " " + this); + } + + public abstract FacetMerger createFacetMerger(Object prototype); + +} + diff --git a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java new file mode 100644 index 00000000000..0de1e0f3d58 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java @@ -0,0 +1,55 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.queries.function.ValueSource; + + +public class AvgAgg extends SimpleAggValueSource { + public AvgAgg(ValueSource vs) { + super("avg", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new AvgSlotAcc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetMerger() { + long num; + double sum; + + @Override + public void merge(Object facetResult) { + List numberList = (List)facetResult; + num += numberList.get(0).longValue(); + sum += numberList.get(1).doubleValue(); + } + + @Override + public Object getMergedResult() { + return num==0 ? 0.0d : sum/num; + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java b/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java new file mode 100644 index 00000000000..a2ce9e8f79b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java @@ -0,0 +1,36 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +public class CountAgg extends SimpleAggValueSource { + public CountAgg() { + super("count", null); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new CountSlotAcc(fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetLongMerger(); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java new file mode 100644 index 00000000000..daede77f9c3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -0,0 +1,813 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiPostingsEnum; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.request.SimpleFacets; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.HashDocSet; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SortedIntDocSet; + + +public class FacetField extends FacetRequest { + String field; + long offset; + long limit = 10; + long mincount = 1; + boolean missing; + boolean numBuckets; + String prefix; + String sortVariable; + SortDirection sortDirection; + FacetMethod method; + boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs) + int cacheDf; // 0 means "default", -1 means "never cache" + + // TODO: put this somewhere more generic? + public static enum SortDirection { + asc(-1) , + desc(1); + + private final int multiplier; + private SortDirection(int multiplier) { + this.multiplier = multiplier; + } + + // asc==-1, desc==1 + public int getMultiplier() { + return multiplier; + } + } + + public static enum FacetMethod { + ENUM, + STREAM, + FIELDCACHE, + SMART, + ; + + public static FacetMethod fromString(String method) { + if (method == null || method.length()==0) return null; + if ("enum".equals(method)) { + return ENUM; + } else if ("fc".equals(method) || "fieldcache".equals(method)) { + return FIELDCACHE; + } else if ("smart".equals(method)) { + return SMART; + } else if ("stream".equals(method)) { + return STREAM; + } + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetField method " + method); + } + } + + + @Override + public FacetProcessor createFacetProcessor(FacetContext fcontext) { + SchemaField sf = fcontext.searcher.getSchema().getField(field); + FieldType ft = sf.getType(); + boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache(); + + if (method == FacetMethod.ENUM && sf.indexed()) { + throw new UnsupportedOperationException(); + } else if (method == FacetMethod.STREAM && sf.indexed()) { + return new FacetFieldProcessorStream(fcontext, this, sf); + } + + if (multiToken) { + return new FacetFieldProcessorUIF(fcontext, this, sf); + } else { + // single valued string + return new FacetFieldProcessorFC(fcontext, this, sf); + } + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetFieldMerger(this); + } +} + + + +abstract class FacetFieldProcessor extends FacetProcessor { + SchemaField sf; + SlotAcc sortAcc; + + FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) { + super(fcontext, freq); + this.sf = sf; + } + + @Override + public Object getResponse() { + return response; + } + + void setSortAcc(int numSlots) { + String sortKey = freq.sortVariable; + sortAcc = accMap.get(sortKey); + + if (sortAcc == null) { + if ("count".equals(sortKey)) { + sortAcc = countAcc; + } else if ("index".equals(sortKey)) { + sortAcc = new SortSlotAcc(fcontext); + // This sorting accumulator just goes by the slot number, so does not need to be collected + // and hence does not need to find it's way into the accMap or accs array. + } + } + } + + static class Slot { + int slot; + } +} + + +// base class for FC style of facet counting (single and multi-valued strings) +abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { + BytesRefBuilder prefixRef; + int startTermIndex; + int endTermIndex; + int nTerms; + int nDocs; + int maxSlots; + int allBucketsSlot; + + + public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) { + super(fcontext, freq, sf); + } + + @Override + public void process() throws IOException { + sf = fcontext.searcher.getSchema().getField(freq.field); + response = getFieldCacheCounts(); + } + + + abstract protected BytesRef lookupOrd(int ord) throws IOException; + abstract protected void findStartAndEndOrds() throws IOException; + abstract protected void collectDocs() throws IOException; + + + public SimpleOrderedMap getFieldCacheCounts() throws IOException { + String prefix = freq.prefix; + if (prefix == null || prefix.length() == 0) { + prefixRef = null; + } else { + prefixRef = new BytesRefBuilder(); + prefixRef.copyChars(prefix); + } + + findStartAndEndOrds(); + + // if we need an extra slot for the "missing" bucket, and it wasn't able to be tacked onto the beginning, + // then lets add room for it at the end. + maxSlots = (freq.missing && startTermIndex != -1) ? nTerms + 1 : nTerms; + + if (freq.allBuckets) { + allBucketsSlot = maxSlots; + maxSlots++; + } else { + allBucketsSlot = -1; + } + createAccs(nDocs, maxSlots); + setSortAcc(maxSlots); + prepareForCollection(); + + collectDocs(); + + return findTopSlots(); + } + + + protected SimpleOrderedMap findTopSlots() throws IOException { + SimpleOrderedMap res = new SimpleOrderedMap<>(); + + int numBuckets = 0; + List bucketVals = null; + if (freq.numBuckets && fcontext.isShard()) { + bucketVals = new ArrayList(100); + } + + int off = fcontext.isShard() ? 0 : (int) freq.offset; + // add a modest amount of over-request if this is a shard request + int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE; + + int maxsize = freq.limit > 0 ? off + lim : Integer.MAX_VALUE - 1; + maxsize = Math.min(maxsize, nTerms); + + final int sortMul = freq.sortDirection.getMultiplier(); + final SlotAcc sortAcc = this.sortAcc; + + PriorityQueue queue = new PriorityQueue(maxsize) { + @Override + protected boolean lessThan(Slot a, Slot b) { + int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; + return cmp == 0 ? b.slot < a.slot : cmp < 0; + } + }; + + Slot bottom = null; + int effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount); + for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) { + if (countAcc.getCount(i) < effectiveMincount) { + continue; + } + + numBuckets++; + if (bucketVals != null && bucketVals.size()<100) { + int ord = startTermIndex + i; + BytesRef br = lookupOrd(ord); + Object val = sf.getType().toObject(sf, br); + bucketVals.add(val); + } + + + if (bottom != null) { + if (sortAcc.compare(bottom.slot, i) * sortMul < 0) { + bottom.slot = i; + bottom = queue.updateTop(); + } + } else { + // queue not full + Slot s = new Slot(); + s.slot = i; + queue.add(s); + if (queue.size() >= maxsize) { + bottom = queue.top(); + } + } + } + + if (freq.numBuckets) { + if (!fcontext.isShard()) { + res.add("numBuckets", numBuckets); + } else { + SimpleOrderedMap map = new SimpleOrderedMap(2); + map.add("numBuckets", numBuckets); + map.add("vals", bucketVals); + res.add("numBuckets", map); + } + } + + // if we are deep paging, we don't have to order the highest "offset" counts. + int collectCount = Math.max(0, queue.size() - off); + assert collectCount <= lim; + int[] sortedSlots = new int[collectCount]; + for (int i = collectCount - 1; i >= 0; i--) { + sortedSlots[i] = queue.pop().slot; + } + + if (freq.allBuckets) { + SimpleOrderedMap allBuckets = new SimpleOrderedMap<>(); + for (SlotAcc acc : accs) { + countAcc.setValues(allBuckets, allBucketsSlot); + acc.setValues(allBuckets, allBucketsSlot); + } + res.add("allBuckets", allBuckets); + } + + ArrayList bucketList = new ArrayList(collectCount); + res.add("buckets", bucketList); + + + for (int slotNum : sortedSlots) { + SimpleOrderedMap bucket = new SimpleOrderedMap<>(); + + // get the ord of the slot... + int ord = startTermIndex + slotNum; + + BytesRef br = lookupOrd(ord); + Object val = sf.getType().toObject(sf, br); + + bucket.add("val", val); + // add stats for this bucket + addStats(bucket, slotNum); + + // handle sub-facets for this bucket + if (freq.getSubFacets().size() > 0) { + FacetContext subContext = fcontext.sub(); + subContext.base = fcontext.searcher.getDocSet(new TermQuery(new Term(sf.getName(), br.clone())), fcontext.base); + try { + fillBucketSubs(bucket, subContext); + } finally { + // subContext.base.decref(); // OFF-HEAP + // subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming) + } + } + + bucketList.add(bucket); + } + + if (freq.missing) { + SimpleOrderedMap missingBucket = new SimpleOrderedMap<>(); + DocSet missingDocSet = null; + try { + if (startTermIndex == -1) { + addStats(missingBucket, 0); + } else { + missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field); + // an extra slot was added to the end for this missing bucket + countAcc.incrementCount(nTerms, missingDocSet.size()); + collect(missingDocSet, nTerms); + addStats(missingBucket, nTerms); + } + + if (freq.getSubFacets().size() > 0) { + FacetContext subContext = fcontext.sub(); + // TODO: we can do better than this! + if (missingDocSet == null) { + missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field); + } + subContext.base = missingDocSet; + fillBucketSubs(missingBucket, subContext); + } + + res.add("missing", missingBucket); + } finally { + if (missingDocSet != null) { + // missingDocSet.decref(); // OFF-HEAP + missingDocSet = null; + } + } + } + + return res; + } + + +} + + +class FacetFieldProcessorFC extends FacetFieldProcessorFCBase { + SortedDocValues sortedDocValues; + + + public FacetFieldProcessorFC(FacetContext fcontext, FacetField freq, SchemaField sf) { + super(fcontext, freq, sf); + } + + protected BytesRef lookupOrd(int ord) throws IOException { + return sortedDocValues.lookupOrd(ord); + } + + protected void findStartAndEndOrds() throws IOException { + sortedDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null); + + if (prefixRef != null) { + startTermIndex = sortedDocValues.lookupTerm(prefixRef.get()); + if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; + prefixRef.append(UnicodeUtil.BIG_TERM); + endTermIndex = sortedDocValues.lookupTerm(prefixRef.get()); + assert endTermIndex < 0; + endTermIndex = -endTermIndex - 1; + } else { + startTermIndex = 0; + endTermIndex = sortedDocValues.getValueCount(); + } + + // optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1) + startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex; + + nTerms = endTermIndex - startTermIndex; + } + + protected void collectDocs() throws IOException { + final List leaves = fcontext.searcher.getIndexReader().leaves(); + final Iterator ctxIt = leaves.iterator(); + LeafReaderContext ctx = null; + int segBase = 0; + int segMax; + int adjustedMax = 0; + for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext(); ) { + final int doc = docsIt.nextDoc(); + if (doc >= adjustedMax) { + do { + ctx = ctxIt.next(); + segBase = ctx.docBase; + segMax = ctx.reader().maxDoc(); + adjustedMax = segBase + segMax; + } while (doc >= adjustedMax); + assert doc >= ctx.docBase; + setNextReader(ctx); + } + + int term = sortedDocValues.getOrd( doc ); + int arrIdx = term - startTermIndex; + if (arrIdx>=0 && arrIdx= 0 && term >= 0) { + countAcc.incrementCount(allBucketsSlot, 1); + collect(doc - segBase, allBucketsSlot); // per-seg collectors + } + } + } + } + +} + +// UnInvertedField implementation of field faceting +class FacetFieldProcessorUIF extends FacetFieldProcessorFC { + UnInvertedField uif; + TermsEnum te; + + FacetFieldProcessorUIF(FacetContext fcontext, FacetField freq, SchemaField sf) { + super(fcontext, freq, sf); + } + + @Override + protected void findStartAndEndOrds() throws IOException { + uif = UnInvertedField.getUnInvertedField(freq.field, fcontext.searcher); + te = uif.getOrdTermsEnum( fcontext.searcher.getLeafReader() ); // "te" can be null + + startTermIndex = 0; + endTermIndex = uif.numTerms(); // one past the end + + if (prefixRef != null && te != null) { + if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) { + startTermIndex = uif.numTerms(); + } else { + startTermIndex = (int) te.ord(); + } + prefixRef.append(UnicodeUtil.BIG_TERM); + if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) { + endTermIndex = uif.numTerms(); + } else { + endTermIndex = (int) te.ord(); + } + } + + nTerms = endTermIndex - startTermIndex; + } + + @Override + protected BytesRef lookupOrd(int ord) throws IOException { + return uif.getTermValue(te, ord); + } + + @Override + protected void collectDocs() throws IOException { + uif.collectDocs(this); + } +} + + + +class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable { + long bucketsToSkip; + long bucketsReturned; + + boolean closed; + boolean countOnly; + boolean hasSubFacets; // true if there are subfacets + int minDfFilterCache; + DocSet docs; + DocSet fastForRandomSet; + TermsEnum termsEnum = null; + SolrIndexSearcher.DocsEnumState deState = null; + PostingsEnum postingsEnum; + BytesRef startTermBytes; + BytesRef term; + LeafReaderContext[] leaves; + + + + FacetFieldProcessorStream(FacetContext fcontext, FacetField freq, SchemaField sf) { + super(fcontext, freq, sf); + } + + @Override + public void close() throws IOException { + if (!closed) { + closed = true; + // fcontext.base.decref(); // OFF-HEAP + } + } + + + @Override + public void process() throws IOException { + // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer). + // But if the connection is broken, we want to clean up. + // fcontext.base.incref(); // OFF-HEAP + fcontext.qcontext.addCloseHook(this); + + setup(); + response = new SimpleOrderedMap<>(); + response.add( "buckets", new Iterator() { + boolean retrieveNext = true; + Object val; + @Override + public boolean hasNext() { + if (retrieveNext) { + val = nextBucket(); + } + retrieveNext = false; + return val != null; + } + + @Override + public Object next() { + if (retrieveNext) { + val = nextBucket(); + } + retrieveNext = true; + if (val == null) { + // Last value, so clean up. In the case that we are doing streaming facets within streaming facets, + // the number of close hooks could grow very large, so we want to remove ourselves. + boolean removed = fcontext.qcontext.removeCloseHook(FacetFieldProcessorStream.this); + assert removed; + try { + close(); + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error during facet streaming close", e); + } + } + return val; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }); + } + + + + public void setup() throws IOException { + + countOnly = freq.facetStats.size() == 0 || freq.facetStats.values().iterator().next() instanceof CountAgg; + hasSubFacets = freq.subFacets.size() > 0; + bucketsToSkip = freq.offset; + + + + createAccs(-1, 1); + prepareForCollection(); + + // Minimum term docFreq in order to use the filterCache for that term. + int defaultMinDf = Math.max(fcontext.searcher.maxDoc() >> 4, 3); // (minimum of 3 is for test coverage purposes) + int minDfFilterCache = freq.cacheDf == 0 ? defaultMinDf : freq.cacheDf; + if (minDfFilterCache == -1) minDfFilterCache = Integer.MAX_VALUE; // -1 means never cache + + docs = fcontext.base; + fastForRandomSet = null; + + if (freq.prefix != null) { + String indexedPrefix = sf.getType().toInternal(freq.prefix); + startTermBytes = new BytesRef(indexedPrefix); + } + + Fields fields = fcontext.searcher.getLeafReader().fields(); + Terms terms = fields == null ? null : fields.terms(sf.getName()); + + + termsEnum = null; + deState = null; + term = null; + + + if (terms != null) { + + termsEnum = terms.iterator(null); + + // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for + // facet.offset when sorting by index order. + + if (startTermBytes != null) { + if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) { + termsEnum = null; + } else { + term = termsEnum.term(); + } + } else { + // position termsEnum on first term + term = termsEnum.next(); + } + } + + List leafList = fcontext.searcher.getTopReaderContext().leaves(); + leaves = leafList.toArray( new LeafReaderContext[ leafList.size() ]); + + + } + + + public SimpleOrderedMap nextBucket() { + try { + return _nextBucket(); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error during facet streaming", e); + } + } + + public SimpleOrderedMap _nextBucket() throws IOException { + int effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount); + DocSet termSet = null; + + try { + while (term != null) { + + if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) { + break; + } + + int df = termsEnum.docFreq(); + if (df < effectiveMincount) { + term = termsEnum.next(); + continue; + } + + if (termSet != null) { + // termSet.decref(); // OFF-HEAP + termSet = null; + } + + int c = 0; + + if (hasSubFacets || df >= minDfFilterCache) { + // use the filter cache + + if (deState == null) { + deState = new SolrIndexSearcher.DocsEnumState(); + deState.fieldName = sf.getName(); + deState.liveDocs = fcontext.searcher.getLeafReader().getLiveDocs(); + deState.termsEnum = termsEnum; + deState.postingsEnum = postingsEnum; + deState.minSetSizeCached = minDfFilterCache; + } + + if (hasSubFacets || !countOnly) { + DocSet termsAll = fcontext.searcher.getDocSet(deState); + termSet = docs.intersection(termsAll); + // termsAll.decref(); // OFF-HEAP + c = termSet.size(); + } else { + c = fcontext.searcher.numDocs(docs, deState); + } + postingsEnum = deState.postingsEnum; + + resetStats(); + + if (!countOnly) { + collect(termSet, 0); + } + + } else { + // We don't need the docset here (meaning no sub-facets). + // if countOnly, then we are calculating some other stats... + resetStats(); + + // lazy convert to fastForRandomSet + if (fastForRandomSet == null) { + fastForRandomSet = docs; + if (docs instanceof SortedIntDocSet) { // OFF-HEAP todo: also check for native version + SortedIntDocSet sset = (SortedIntDocSet) docs; + fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size()); + } + } + // iterate over TermDocs to calculate the intersection + postingsEnum = termsEnum.postings(null, postingsEnum, PostingsEnum.NONE); + + if (postingsEnum instanceof MultiPostingsEnum) { + MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs(); + int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs(); + for (int subindex = 0; subindex < numSubs; subindex++) { + MultiPostingsEnum.EnumWithSlice sub = subs[subindex]; + if (sub.postingsEnum == null) continue; + int base = sub.slice.start; + int docid; + + if (countOnly) { + while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (fastForRandomSet.exists(docid + base)) c++; + } + } else { + setNextReader(leaves[sub.slice.readerIndex]); + while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (fastForRandomSet.exists(docid + base)) { + c++; + collect(docid, 0); + } + } + } + + } + } else { + int docid; + if (countOnly) { + while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (fastForRandomSet.exists(docid)) c++; + } + } else { + setNextReader(leaves[0]); + while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (fastForRandomSet.exists(docid)) { + c++; + collect(docid, 0); + } + } + } + } + + } + + + + if (c < effectiveMincount) { + term = termsEnum.next(); + continue; + } + + // handle offset and limit + if (bucketsToSkip > 0) { + bucketsToSkip--; + term = termsEnum.next(); + continue; + } + + if (freq.limit >= 0 && ++bucketsReturned > freq.limit) { + return null; + } + + // set count in case other stats depend on it + countAcc.incrementCount(0, c); + + // OK, we have a good bucket to return... first get bucket value before moving to next term + Object bucketVal = sf.getType().toObject(sf, term); + term = termsEnum.next(); + + SimpleOrderedMap bucket = new SimpleOrderedMap<>(); + bucket.add("val", bucketVal); + addStats(bucket, 0); + if (hasSubFacets) { + processSubs(bucket, termSet); + } + + // TODO... termSet needs to stick around for streaming sub-facets? + + return bucket; + + } + + } finally { + if (termSet != null) { + // termSet.decref(); // OFF-HEAP + termSet = null; + } + } + + + // end of the iteration + return null; + } + + + +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java new file mode 100644 index 00000000000..65290be85f2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java @@ -0,0 +1,824 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.solr.client.solrj.SolrResponse; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.SearchComponent; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; +import org.apache.solr.search.QueryContext; +import org.apache.solr.search.SyntaxError; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FacetModule extends SearchComponent { + public static Logger log = LoggerFactory.getLogger(FacetModule.class); + + public static final String COMPONENT_NAME = "facet_module"; + + // Ensure these don't overlap with other PURPOSE flags in ShardRequest + // The largest current flag in ShardRequest is 0x00002000 + // We'll put our bits in the middle to avoid future ones in ShardRequest and + // custom ones that may start at the top. + public final static int PURPOSE_GET_JSON_FACETS = 0x00100000; + public final static int PURPOSE_REFINE_JSON_FACETS = 0x00200000; + + // Internal information passed down from the top level to shards for distributed faceting. + private final static String FACET_STATE = "_facet_"; + + + public FacetComponentState getFacetComponentState(ResponseBuilder rb) { + // TODO: put a map on ResponseBuilder? + // rb.componentInfo.get(FacetComponentState.class); + return (FacetComponentState) rb.req.getContext().get(FacetComponentState.class); + } + + @Override + public void process(ResponseBuilder rb) throws IOException { + // if this is null, faceting is not enabled + FacetComponentState facetState = getFacetComponentState(rb); + if (facetState == null) return; + + boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false); + + FacetContext fcontext = new FacetContext(); + fcontext.base = rb.getResults().docSet; + fcontext.req = rb.req; + fcontext.searcher = rb.req.getSearcher(); + fcontext.qcontext = QueryContext.newContext(fcontext.searcher); + if (isShard) { + fcontext.flags |= FacetContext.IS_SHARD; + } + + FacetProcessor fproc = facetState.facetRequest.createFacetProcessor(fcontext); + fproc.process(); + rb.rsp.add("facets", fproc.getResponse()); + } + + + @Override + public void prepare(ResponseBuilder rb) throws IOException { + Map json = rb.req.getJSON(); + Map jsonFacet = null; + if (json == null) { + int version = rb.req.getParams().getInt("facet.version",1); + if (version <= 1) return; + boolean facetsEnabled = rb.req.getParams().getBool(FacetParams.FACET, false); + if (!facetsEnabled) return; + jsonFacet = new LegacyFacet(rb.req.getParams()).getLegacy(); + } else { + jsonFacet = (Map) json.get("facet"); + } + if (jsonFacet == null) return; + + SolrParams params = rb.req.getParams(); + + boolean isShard = params.getBool(ShardParams.IS_SHARD, false); + if (isShard) { + String jfacet = params.get(FACET_STATE); + if (jfacet == null) { + // if this is a shard request, but there is no facet state, then don't do anything. + return; + } + } + + // At this point, we know we need to do something. Create and save the state. + rb.setNeedDocSet(true); + + // Parse the facet in the prepare phase? + FacetParser parser = new FacetTopParser(rb.req); + FacetRequest facetRequest = null; + try { + facetRequest = parser.parse(jsonFacet); + } catch (SyntaxError syntaxError) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); + } + + FacetComponentState fcState = new FacetComponentState(); + fcState.rb = rb; + fcState.isShard = isShard; + fcState.facetCommands = jsonFacet; + fcState.facetRequest = facetRequest; + + rb.req.getContext().put(FacetComponentState.class, fcState); + } + + + + @Override + public int distributedProcess(ResponseBuilder rb) throws IOException { + FacetComponentState facetState = getFacetComponentState(rb); + if (facetState == null) return ResponseBuilder.STAGE_DONE; + + return ResponseBuilder.STAGE_DONE; + } + + @Override + public void modifyRequest(ResponseBuilder rb, SearchComponent who,ShardRequest sreq) { + FacetComponentState facetState = getFacetComponentState(rb); + if (facetState == null) return; + + if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { + sreq.purpose |= FacetModule.PURPOSE_GET_JSON_FACETS; + sreq.params.set(FACET_STATE, "{}"); + } else { + // turn off faceting on other requests + sreq.params.remove("json.facet"); + sreq.params.remove(FACET_STATE); + } + } + + @Override + public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { + FacetComponentState facetState = getFacetComponentState(rb); + if (facetState == null) return; + + for (ShardResponse shardRsp : sreq.responses) { + SolrResponse rsp = shardRsp.getSolrResponse(); + NamedList top = rsp.getResponse(); + Object facet = top.get("facets"); + if (facet == null) continue; + if (facetState.merger == null) { + facetState.merger = facetState.facetRequest.createFacetMerger(facet); + } + facetState.merger.merge(facet); + } + } + + @Override + public void finishStage(ResponseBuilder rb) { + if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return; + + FacetComponentState facetState = getFacetComponentState(rb); + if (facetState == null) return; + + if (facetState.merger != null) { + rb.rsp.add("facets", facetState.merger.getMergedResult()); + } + } + + @Override + public String getDescription() { + return "Heliosearch Faceting"; + } + + @Override + public String getSource() { + return null; + } + +} + + +class FacetComponentState { + ResponseBuilder rb; + Map facetCommands; + FacetRequest facetRequest; + boolean isShard; + + // + // Only used for distributed search + // + FacetMerger merger; +} + +// +// The FacetMerger code is in the prototype stage, and this is the reason that +// many implementations are all in this file. They can be moved to separate +// files after the interfaces are locked down more. +// + +class FacetMerger { + public void merge(Object facetResult) { + + } + + public Object getMergedResult() { + return null; // TODO + } +} + + +abstract class FacetSortableMerger extends FacetMerger { + public void prepareSort() { + } + + /** Return the normal comparison sort order. The sort direction is only to be used in special circumstances (such as making NaN sort + * last regardless of sort order.) Normal sorters do not need to pay attention to direction. + */ + public abstract int compareTo(FacetSortableMerger other, FacetField.SortDirection direction); +} + +class FacetDoubleMerger extends FacetSortableMerger { + double val; + + @Override + public void merge(Object facetResult) { + val += ((Number)facetResult).doubleValue(); + } + + @Override + public Object getMergedResult() { + return val; + } + + @Override + public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + return compare(val, ((FacetDoubleMerger)other).val, direction); + } + + + public static int compare(double a, double b, FacetField.SortDirection direction) { + if (a < b) return -1; + if (a > b) return 1; + + if (a != a) { // a==NaN + if (b != b) { + return 0; // both NaN + } + return -1 * direction.getMultiplier(); // asc==-1, so this will put NaN at end of sort + } + + if (b != b) { // b is NaN so a is greater + return 1 * direction.getMultiplier(); // if sorting asc, make a less so NaN is at end + } + + // consider +-0 to be equal + return 0; + } +} + + +class FacetLongMerger extends FacetSortableMerger { + long val; + + @Override + public void merge(Object facetResult) { + val += ((Number)facetResult).longValue(); + } + + @Override + public Object getMergedResult() { + return val; + } + + @Override + public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) { + return Long.compare(val, ((FacetLongMerger)other).val); + } +} + + +// base class for facets that create buckets (and can hence have sub-facets) +class FacetBucketMerger extends FacetMerger { + FacetRequestT freq; + + public FacetBucketMerger(FacetRequestT freq) { + this.freq = freq; + } + + /** Bucketval is the representative value for the bucket. Only applicable to terms and range queries to distinguish buckets. */ + FacetBucket newBucket(Comparable bucketVal) { + return new FacetBucket(this, bucketVal); + } + + // do subs... + + // callback stuff for buckets? + // passing object gives us a chance to specialize based on value + FacetMerger createFacetMerger(String key, Object val) { + FacetRequest sub = freq.getSubFacets().get(key); + if (sub != null) { + return sub.createFacetMerger(val); + } + + AggValueSource subStat = freq.getFacetStats().get(key); + if (subStat != null) { + return subStat.createFacetMerger(val); + } + + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no merger for key=" + key + " , val=" + val); + } +} + +class FacetQueryMerger extends FacetBucketMerger { + FacetBucket bucket; + + public FacetQueryMerger(FacetQuery freq) { + super(freq); + } + + @Override + public void merge(Object facet) { + if (bucket == null) { + bucket = newBucket(null); + } + bucket.mergeBucket((SimpleOrderedMap) facet); + } + + @Override + public Object getMergedResult() { + return bucket.getMergedBucket(); + } +} + + + +class FacetBucket { + FacetBucketMerger parent; + Comparable bucketValue; + long count; + Map subs; + + public FacetBucket(FacetBucketMerger parent, Comparable bucketValue) { + this.parent = parent; + this.bucketValue = bucketValue; + } + + public long getCount() { + return count; + } + + /** returns the existing merger for the given key, or null if none yet exists */ + FacetMerger getExistingMerger(String key) { + if (subs == null) return null; + return subs.get(key); + } + + private FacetMerger getMerger(String key, Object prototype) { + FacetMerger merger = null; + if (subs != null) { + merger = subs.get(key); + if (merger != null) return merger; + } + + merger = parent.createFacetMerger(key, prototype); + + if (merger != null) { + if (subs == null) { + subs = new HashMap<>(); + } + subs.put(key, merger); + } + + return merger; + } + + public void mergeBucket(SimpleOrderedMap bucket) { + // todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets + + // drive merging off the received bucket? + for (int i=0; i mergerEntry : subs.entrySet()) { + FacetMerger subMerger = mergerEntry.getValue(); + out.add(mergerEntry.getKey(), subMerger.getMergedResult()); + } + } + + return out; + } +} + + + +class FacetFieldMerger extends FacetBucketMerger { + FacetBucket missingBucket; + FacetBucket allBuckets; + FacetMerger numBuckets; + + LinkedHashMap buckets = new LinkedHashMap(); + List sortedBuckets; + int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards + + private static class SortVal implements Comparable { + FacetBucket bucket; + FacetSortableMerger merger; + FacetField.SortDirection direction; + + @Override + public int compareTo(SortVal o) { + int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier(); + return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c; + } + } + + public FacetFieldMerger(FacetField freq) { + super(freq); + } + + @Override + public void merge(Object facetResult) { + merge((SimpleOrderedMap)facetResult); + } + + public void merge(SimpleOrderedMap facetResult) { + if (freq.missing) { + Object o = facetResult.get("missing"); + if (o != null) { + if (missingBucket == null) { + missingBucket = newBucket(null); + } + missingBucket.mergeBucket((SimpleOrderedMap)o); + } + } + + if (freq.allBuckets) { + Object o = facetResult.get("allBuckets"); + if (o != null) { + if (allBuckets == null) { + allBuckets = newBucket(null); + } + allBuckets.mergeBucket((SimpleOrderedMap)o); + } + } + + List bucketList = (List) facetResult.get("buckets"); + numReturnedBuckets += bucketList.size(); + mergeBucketList(bucketList); + + if (freq.numBuckets) { + Object nb = facetResult.get("numBuckets"); + if (nb != null) { + if (numBuckets == null) { + numBuckets = new FacetNumBucketsMerger(); + } + numBuckets.merge(nb); + } + } + + } + + public void mergeBucketList(List bucketList) { + for (SimpleOrderedMap bucketRes : bucketList) { + Comparable bucketVal = (Comparable)bucketRes.get("val"); + FacetBucket bucket = buckets.get(bucketVal); + if (bucket == null) { + bucket = newBucket(bucketVal); + buckets.put(bucketVal, bucket); + } + bucket.mergeBucket( bucketRes ); + } + } + + public void sortBuckets() { + sortedBuckets = new ArrayList<>( buckets.values() ); + + Comparator comparator = null; + + final FacetField.SortDirection direction = freq.sortDirection; + final int sortMul = direction.getMultiplier(); + + if ("count".equals(freq.sortVariable)) { + comparator = new Comparator() { + @Override + public int compare(FacetBucket o1, FacetBucket o2) { + int v = -Long.compare(o1.count, o2.count) * sortMul; + return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v; + } + }; + Collections.sort(sortedBuckets, comparator); + } else if ("index".equals(freq.sortVariable)) { + comparator = new Comparator() { + @Override + public int compare(FacetBucket o1, FacetBucket o2) { + return -o1.bucketValue.compareTo(o2.bucketValue) * sortMul; + } + }; + Collections.sort(sortedBuckets, comparator); + } else { + final String key = freq.sortVariable; + + /** + final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()]; + final int[] index = new int[arr.length]; + int start = 0; + int nullStart = index.length; + int i=0; + for (FacetBucket bucket : buckets.values()) { + FacetMerger merger = bucket.getExistingMerger(key); + if (merger == null) { + index[--nullStart] = i; + } + if (merger != null) { + arr[start] = (FacetSortableMerger)merger; + index[start] = i; + start++; + } + i++; + } + + PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() { + @Override + public int compare(int a, int b) { + return arr[index[a]].compareTo(arr[index[b]], direction); + } + }); + **/ + + // timsort may do better here given that the lists may be partially sorted. + + List lst = new ArrayList(buckets.size()); + List nulls = new ArrayList(buckets.size()>>1); + for (int i=0; i() { + @Override + public int compare(FacetBucket o1, FacetBucket o2) { + return o1.bucketValue.compareTo(o2.bucketValue); + } + }); + + ArrayList out = new ArrayList<>(buckets.size()); + for (SortVal sv : lst) { + out.add( sv.bucket ); + } + out.addAll(nulls); + sortedBuckets = out; + } + } + + @Override + public Object getMergedResult() { + SimpleOrderedMap result = new SimpleOrderedMap(); + + if (numBuckets != null) { + int removed = 0; + if (freq.mincount > 1) { + for (FacetBucket bucket : buckets.values()) { + if (bucket.count < freq.mincount) removed++; + } + } + result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed); + + // TODO: we can further increase this estimate. + // If not sorting by count, use a simple ratio to scale + // If sorting by count desc, then add up the highest_possible_missing_count from each shard + } + + sortBuckets(); + + int first = (int)freq.offset; + int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; + int last = Math.min(sortedBuckets.size(), end); + + List resultBuckets = new ArrayList<>(Math.max(0, (last - first))); + + /** this only works if there are no filters (like mincount) + for (int i=first; i= 0 ? (int)freq.limit : Integer.MAX_VALUE; + for (FacetBucket bucket : sortedBuckets) { + if (bucket.getCount() < freq.mincount) { + continue; + } + + if (off > 0) { + --off; + continue; + } + + if (resultBuckets.size() >= lim) { + break; + } + + resultBuckets.add( bucket.getMergedBucket() ); + } + + + result.add("buckets", resultBuckets); + if (missingBucket != null) { + result.add("missing", missingBucket.getMergedBucket()); + } + if (allBuckets != null) { + result.add("allBuckets", allBuckets.getMergedBucket()); + } + + return result; + } + + + private class FacetNumBucketsMerger extends FacetMerger { + long sumBuckets; + long shardsMissingSum; + long shardsTruncatedSum; + Set values; + + @Override + public void merge(Object facetResult) { + SimpleOrderedMap map = (SimpleOrderedMap)facetResult; + long numBuckets = ((Number)map.get("numBuckets")).longValue(); + sumBuckets += numBuckets; + + List vals = (List)map.get("vals"); + if (vals != null) { + if (values == null) { + values = new HashSet<>(vals.size()*4); + } + values.addAll(vals); + if (numBuckets > values.size()) { + shardsTruncatedSum += numBuckets - values.size(); + } + } else { + shardsMissingSum += numBuckets; + } + } + + @Override + public Object getMergedResult() { + long exactCount = values == null ? 0 : values.size(); + return exactCount + shardsMissingSum + shardsTruncatedSum; + // TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging) + // that should make things match for most of the small tests at least + } + } +} + + +class FacetRangeMerger extends FacetBucketMerger { + FacetBucket beforeBucket; + FacetBucket afterBucket; + FacetBucket betweenBucket; + + LinkedHashMap buckets = new LinkedHashMap(); + + + public FacetRangeMerger(FacetRange freq) { + super(freq); + } + + @Override + FacetBucket newBucket(Comparable bucketVal) { + return super.newBucket(bucketVal); + } + + @Override + FacetMerger createFacetMerger(String key, Object val) { + return super.createFacetMerger(key, val); + } + + @Override + public void merge(Object facetResult) { + merge((SimpleOrderedMap) facetResult); + } + + public void merge(SimpleOrderedMap facetResult) { + boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL); + + if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) { + Object o = facetResult.get("before"); + if (o != null) { + if (beforeBucket == null) { + beforeBucket = newBucket(null); + } + beforeBucket.mergeBucket((SimpleOrderedMap)o); + } + } + + if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) { + Object o = facetResult.get("after"); + if (o != null) { + if (afterBucket == null) { + afterBucket = newBucket(null); + } + afterBucket.mergeBucket((SimpleOrderedMap)o); + } + } + + if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) { + Object o = facetResult.get("between"); + if (o != null) { + if (betweenBucket == null) { + betweenBucket = newBucket(null); + } + betweenBucket.mergeBucket((SimpleOrderedMap)o); + } + } + + List bucketList = (List) facetResult.get("buckets"); + mergeBucketList(bucketList); + } + + public void mergeBucketList(List bucketList) { + for (SimpleOrderedMap bucketRes : bucketList) { + Comparable bucketVal = (Comparable)bucketRes.get("val"); + FacetBucket bucket = buckets.get(bucketVal); + if (bucket == null) { + bucket = newBucket(bucketVal); + buckets.put(bucketVal, bucket); + } + bucket.mergeBucket( bucketRes ); + } + } + + @Override + public Object getMergedResult() { + SimpleOrderedMap result = new SimpleOrderedMap(4); + + List resultBuckets = new ArrayList<>(buckets.size()); + // TODO: if we implement mincount for ranges, we'll need to sort buckets (see FacetFieldMerger) + + for (FacetBucket bucket : buckets.values()) { + /*** + if (bucket.getCount() < freq.mincount) { + continue; + } + ***/ + resultBuckets.add( bucket.getMergedBucket() ); + } + + result.add("buckets", resultBuckets); + + if (beforeBucket != null) { + result.add("before", beforeBucket.getMergedBucket()); + } + if (afterBucket != null) { + result.add("after", afterBucket.getMergedBucket()); + } + if (betweenBucket != null) { + result.add("between", betweenBucket.getMergedBucket()); + } + return result; + + } +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java new file mode 100644 index 00000000000..95640b27505 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java @@ -0,0 +1,60 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.SimpleOrderedMap; + +public class FacetQuery extends FacetRequest { + // query string or query? + Query q; + + @Override + public FacetProcessor createFacetProcessor(FacetContext fcontext) { + return new FacetQueryProcessor(fcontext, this); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetQueryMerger(this); + } +} + + + + +class FacetQueryProcessor extends FacetProcessor { + FacetQueryProcessor(FacetContext fcontext, FacetQuery freq) { + super(fcontext, freq); + } + + @Override + public Object getResponse() { + return response; + } + + @Override + public void process() throws IOException { + response = new SimpleOrderedMap<>(); + fillBucket(response, freq.q); + } + + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java new file mode 100644 index 00000000000..ffeffc26ee2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -0,0 +1,374 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; + +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TrieField; + +public class FacetRange extends FacetRequest { + String field; + Object start; + Object end; + Object gap; + boolean hardend = false; + EnumSet include; + EnumSet others; + + @Override + public FacetProcessor createFacetProcessor(FacetContext fcontext) { + return new FacetRangeProcessor(fcontext, this); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetRangeMerger(this); + } +} + + +class FacetRangeProcessor extends FacetProcessor { + SchemaField sf; + + + FacetRangeProcessor(FacetContext fcontext, FacetRange freq) { + super(fcontext, freq); + } + + @Override + public void process() throws IOException { + sf = fcontext.searcher.getSchema().getField(freq.field); + + response = getRangeCountsIndexed(); + } + + @Override + public Object getResponse() { + return response; + } + + + SimpleOrderedMap getRangeCountsIndexed() throws IOException { + final FieldType ft = sf.getType(); + + RangeEndpointCalculator calc = null; + + if (ft instanceof TrieField) { + final TrieField trie = (TrieField)ft; + + switch (trie.getType()) { + case FLOAT: + calc = new FloatRangeEndpointCalculator(sf); + break; + case DOUBLE: + calc = new DoubleRangeEndpointCalculator(sf); + break; + case INTEGER: + calc = new IntegerRangeEndpointCalculator(sf); + break; + case LONG: + calc = new LongRangeEndpointCalculator(sf); + break; + default: + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "Unable to range facet on tried field of unexpected type:" + freq.field); + } + } else { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "Unable to range facet on field:" + sf); + } + + return getRangeCountsIndexed(calc); + } + + private > SimpleOrderedMap getRangeCountsIndexed(RangeEndpointCalculator calc) throws IOException { + + final SimpleOrderedMap res = new SimpleOrderedMap<>(); + + List> buckets = null; + + buckets = new ArrayList<>(); + res.add("buckets", buckets); + + T start = calc.getValue(freq.start.toString()); + T end = calc.getValue(freq.end.toString()); + EnumSet include = freq.include; + + String gap = freq.gap.toString(); + + final int minCount = 0; + + T low = start; + + while (low.compareTo(end) < 0) { + T high = calc.addGap(low, gap); + if (end.compareTo(high) < 0) { + if (freq.hardend) { + high = end; + } else { + end = high; + } + } + if (high.compareTo(low) < 0) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "range facet infinite loop (is gap negative? did the math overflow?)"); + } + if (high.compareTo(low) == 0) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "range facet infinite loop: gap is either zero, or too small relative start/end and caused underflow: " + low + " + " + gap + " = " + high ); + } + + final boolean includeLower = + (include.contains(FacetParams.FacetRangeInclude.LOWER) || + (include.contains(FacetParams.FacetRangeInclude.EDGE) && + 0 == low.compareTo(start))); + final boolean includeUpper = + (include.contains(FacetParams.FacetRangeInclude.UPPER) || + (include.contains(FacetParams.FacetRangeInclude.EDGE) && + 0 == high.compareTo(end))); + + final String lowS = calc.formatValue(low); + final String highS = calc.formatValue(high); + + Object label = low; + buckets.add( rangeStats(low, minCount,lowS, highS, includeLower, includeUpper) ); + + low = high; + } + + // no matter what other values are listed, we don't do + // anything if "none" is specified. + if (! freq.others.contains(FacetParams.FacetRangeOther.NONE) ) { + + boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL); + final String startS = calc.formatValue(start); + final String endS = calc.formatValue(end); + + if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) { + // include upper bound if "outer" or if first gap doesn't already include it + res.add(FacetParams.FacetRangeOther.BEFORE.toString(), + rangeStats(null, 0, null, startS, + false, + (include.contains(FacetParams.FacetRangeInclude.OUTER) || + (!(include.contains(FacetParams.FacetRangeInclude.LOWER) || + include.contains(FacetParams.FacetRangeInclude.EDGE)))))); + + } + if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) { + // include lower bound if "outer" or if last gap doesn't already include it + res.add(FacetParams.FacetRangeOther.AFTER.toString(), + rangeStats(null, 0, endS, null, + (include.contains(FacetParams.FacetRangeInclude.OUTER) || + (!(include.contains(FacetParams.FacetRangeInclude.UPPER) || + include.contains(FacetParams.FacetRangeInclude.EDGE)))), + false)); + } + if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) { + res.add(FacetParams.FacetRangeOther.BETWEEN.toString(), + rangeStats(null, 0, startS, endS, + (include.contains(FacetParams.FacetRangeInclude.LOWER) || + include.contains(FacetParams.FacetRangeInclude.EDGE)), + (include.contains(FacetParams.FacetRangeInclude.UPPER) || + include.contains(FacetParams.FacetRangeInclude.EDGE)))); + + } + } + + + return res; + } + + private SimpleOrderedMap rangeStats(Object label, int mincount, String low, String high, boolean iLow, boolean iHigh) throws IOException { + SimpleOrderedMap bucket = new SimpleOrderedMap<>(); + + // typically the start value of the range, but null for before/after/between + if (label != null) { + bucket.add("val", label); + } + + Query rangeQ = sf.getType().getRangeQuery(null, sf, low, high, iLow, iHigh); + fillBucket(bucket, rangeQ); + + return bucket; + } + + + + + // Essentially copied from SimpleFacets... + // would be nice to unify this stuff w/ analytics component... + /** + * Perhaps someday instead of having a giant "instanceof" case + * statement to pick an impl, we can add a "RangeFacetable" marker + * interface to FieldTypes and they can return instances of these + * directly from some method -- but until then, keep this locked down + * and private. + */ + private static abstract class RangeEndpointCalculator> { + protected final SchemaField field; + public RangeEndpointCalculator(final SchemaField field) { + this.field = field; + } + + /** + * Formats a Range endpoint for use as a range label name in the response. + * Default Impl just uses toString() + */ + public String formatValue(final T val) { + return val.toString(); + } + /** + * Parses a String param into an Range endpoint value throwing + * a useful exception if not possible + */ + public final T getValue(final String rawval) { + try { + return parseVal(rawval); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Can't parse value "+rawval+" for field: " + + field.getName(), e); + } + } + /** + * Parses a String param into an Range endpoint. + * Can throw a low level format exception as needed. + */ + protected abstract T parseVal(final String rawval) + throws java.text.ParseException; + + /** + * Parses a String param into a value that represents the gap and + * can be included in the response, throwing + * a useful exception if not possible. + * + * Note: uses Object as the return type instead of T for things like + * Date where gap is just a DateMathParser string + */ + public final Object getGap(final String gap) { + try { + return parseGap(gap); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Can't parse gap "+gap+" for field: " + + field.getName(), e); + } + } + + /** + * Parses a String param into a value that represents the gap and + * can be included in the response. + * Can throw a low level format exception as needed. + * + * Default Impl calls parseVal + */ + protected Object parseGap(final String rawval) + throws java.text.ParseException { + return parseVal(rawval); + } + + /** + * Adds the String gap param to a low Range endpoint value to determine + * the corrisponding high Range endpoint value, throwing + * a useful exception if not possible. + */ + public final T addGap(T value, String gap) { + try { + return parseAndAddGap(value, gap); + } catch (Exception e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Can't add gap "+gap+" to value " + value + + " for field: " + field.getName(), e); + } + } + /** + * Adds the String gap param to a low Range endpoint value to determine + * the corrisponding high Range endpoint value. + * Can throw a low level format exception as needed. + */ + protected abstract T parseAndAddGap(T value, String gap) + throws java.text.ParseException; + + } + + private static class FloatRangeEndpointCalculator + extends RangeEndpointCalculator { + + public FloatRangeEndpointCalculator(final SchemaField f) { super(f); } + @Override + protected Float parseVal(String rawval) { + return Float.valueOf(rawval); + } + @Override + public Float parseAndAddGap(Float value, String gap) { + return new Float(value.floatValue() + Float.valueOf(gap).floatValue()); + } + } + private static class DoubleRangeEndpointCalculator + extends RangeEndpointCalculator { + + public DoubleRangeEndpointCalculator(final SchemaField f) { super(f); } + @Override + protected Double parseVal(String rawval) { + return Double.valueOf(rawval); + } + @Override + public Double parseAndAddGap(Double value, String gap) { + return new Double(value.doubleValue() + Double.valueOf(gap).doubleValue()); + } + } + private static class IntegerRangeEndpointCalculator + extends RangeEndpointCalculator { + + public IntegerRangeEndpointCalculator(final SchemaField f) { super(f); } + @Override + protected Integer parseVal(String rawval) { + return Integer.valueOf(rawval); + } + @Override + public Integer parseAndAddGap(Integer value, String gap) { + return new Integer(value.intValue() + Integer.valueOf(gap).intValue()); + } + } + private static class LongRangeEndpointCalculator + extends RangeEndpointCalculator { + + public LongRangeEndpointCalculator(final SchemaField f) { super(f); } + @Override + protected Long parseVal(String rawval) { + return Long.valueOf(rawval); + } + @Override + public Long parseAndAddGap(Long value, String gap) { + return new Long(value.longValue() + Long.valueOf(gap).longValue()); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java new file mode 100644 index 00000000000..bca80da2b0e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -0,0 +1,720 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.FunctionQParserPlugin; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryContext; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SyntaxError; + + +public abstract class FacetRequest { + protected Map facetStats; // per-bucket statistics + protected Map subFacets; // list of facets + protected List excludeFilters; + protected boolean processEmpty; + + public FacetRequest() { + facetStats = new LinkedHashMap<>(); + subFacets = new LinkedHashMap<>(); + } + + public Map getFacetStats() { + return facetStats; + } + + public Map getSubFacets() { + return subFacets; + } + + public void addStat(String key, AggValueSource stat) { + facetStats.put(key, stat); + } + + public void addSubFacet(String key, FacetRequest facetRequest) { + subFacets.put(key, facetRequest); + } + + public abstract FacetProcessor createFacetProcessor(FacetContext fcontext); + + public abstract FacetMerger createFacetMerger(Object prototype); +} + + +class FacetContext { + // Context info for actually executing a local facet command + public static final int IS_SHARD=0x01; + + QueryContext qcontext; + SolrQueryRequest req; // TODO: replace with params? + SolrIndexSearcher searcher; + DocSet base; + FacetContext parent; + int flags; + + public boolean isShard() { + return (flags & IS_SHARD) != 0; + } + + public FacetContext sub() { + FacetContext ctx = new FacetContext(); + ctx.flags = flags; + ctx.qcontext = qcontext; + ctx.req = req; + ctx.searcher = searcher; + ctx.base = base; + + ctx.parent = this; + return ctx; + } +} + + +class FacetProcessor { + protected SimpleOrderedMap response; + protected FacetContext fcontext; + protected FacetRequestT freq; + + LinkedHashMap accMap; + protected SlotAcc[] accs; + protected CountSlotAcc countAcc; + + FacetProcessor(FacetContext fcontext, FacetRequestT freq) { + this.fcontext = fcontext; + this.freq = freq; + } + + public void process() throws IOException { + + + } + + public Object getResponse() { + return null; + } + + + protected void createAccs(int docCount, int slotCount) throws IOException { + accMap = new LinkedHashMap(); + countAcc = new CountSlotAcc(fcontext, slotCount); + countAcc.key = "count"; + for (Map.Entry entry : freq.getFacetStats().entrySet()) { + SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount); + acc.key = entry.getKey(); + accMap.put(acc.key, acc); + } + } + + /** Create the actual accs array from accMap before starting to collect stats. */ + protected void prepareForCollection() { + accs = new SlotAcc[accMap.size()]; + int i=0; + for (SlotAcc acc : accMap.values()) { + accs[i++] = acc; + } + } + + protected void resetStats() { + countAcc.reset(); + for (SlotAcc acc : accs) { + acc.reset(); + } + } + + protected void processStats(SimpleOrderedMap bucket, DocSet docs, int docCount) throws IOException { + if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) { + bucket.add("count", docCount); + return; + } + createAccs(docCount, 1); + prepareForCollection(); + int collected = collect(docs, 0); + countAcc.incrementCount(0, collected); + assert collected == docCount; + addStats(bucket, 0); + } + + + protected void fillBucketSubs(SimpleOrderedMap response, FacetContext subContext) throws IOException { + for (Map.Entry sub : freq.getSubFacets().entrySet()) { + FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext); + subProcessor.process(); + response.add( sub.getKey(), subProcessor.getResponse() ); + } + } + + int collect(DocSet docs, int slot) throws IOException { + int count = 0; + SolrIndexSearcher searcher = fcontext.searcher; + + final List leaves = searcher.getIndexReader().leaves(); + final Iterator ctxIt = leaves.iterator(); + LeafReaderContext ctx = null; + int segBase = 0; + int segMax; + int adjustedMax = 0; + for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) { + final int doc = docsIt.nextDoc(); + if (doc >= adjustedMax) { + do { + ctx = ctxIt.next(); + if (ctx == null) { + // should be impossible + throw new RuntimeException("INTERNAL FACET ERROR"); + } + segBase = ctx.docBase; + segMax = ctx.reader().maxDoc(); + adjustedMax = segBase + segMax; + } while (doc >= adjustedMax); + assert doc >= ctx.docBase; + setNextReader(ctx); + } + count++; + collect(doc - segBase, slot); // per-seg collectors + } + return count; + } + + void collect(int segDoc, int slot) throws IOException { + for (SlotAcc acc : accs) { + acc.collect(segDoc, slot); + } + } + + void setNextReader(LeafReaderContext ctx) throws IOException { + // countAcc.setNextReader is a no-op + for (SlotAcc acc : accs) { + acc.setNextReader(ctx); + } + } + + void addStats(SimpleOrderedMap target, int slotNum) throws IOException { + int count = countAcc.getCount(slotNum); + target.add("count", count); + if (count > 0 || freq.processEmpty) { + for (SlotAcc acc : accs) { + acc.setValues(target, slotNum); + } + } + } + + + + + + public void fillBucket(SimpleOrderedMap bucket, Query q) throws IOException { + boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0; + + // TODO: always collect counts or not??? + + DocSet result = null; + int count; + + if (needDocSet) { + if (q == null) { + result = fcontext.base; + // result.incref(); // OFF-HEAP + } else { + result = fcontext.searcher.getDocSet(q, fcontext.base); + } + count = result.size(); + } else { + if (q == null) { + count = fcontext.base.size(); + } else { + count = fcontext.searcher.numDocs(q, fcontext.base); + } + } + + try { + processStats(bucket, result, (int) count); + processSubs(bucket, result); + } finally { + if (result != null) { + // result.decref(); // OFF-HEAP + result = null; + } + } + } + + + + + protected void processSubs(SimpleOrderedMap bucket, DocSet result) throws IOException { + // TODO: process exclusions, etc + + if (result == null || result.size() == 0 && !freq.processEmpty) { + return; + } + + FacetContext subContext = fcontext.sub(); + subContext.base = result; + + fillBucketSubs(bucket, subContext); + } + + + public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException { + SchemaField sf = searcher.getSchema().getField(fieldName); + DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false)); + DocSet answer = docs.andNot(hasVal); + // hasVal.decref(); // OFF-HEAP + return answer; + } + +} + + + + + +abstract class FacetParser { + protected FacetRequestT facet; + protected FacetParser parent; + protected String key; + + public FacetParser(FacetParser parent,String key) { + this.parent = parent; + this.key = key; + } + + public String getKey() { + return key; + } + + public String getPathStr() { + if (parent == null) { + return "/" + key; + } + return parent.getKey() + "/" + key; + } + + protected RuntimeException err(String msg) { + return new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg + " ,path="+getPathStr()); + } + + public abstract FacetRequest parse(Object o) throws SyntaxError; + + // TODO: put the FacetRequest on the parser object? + public void parseSubs(Object o) throws SyntaxError { + if (o==null) return; + if (o instanceof Map) { + Map m = (Map) o; + for (Map.Entry entry : m.entrySet()) { + String key = entry.getKey(); + Object value = entry.getValue(); + + if ("processEmpty".equals(key)) { + facet.processEmpty = getBoolean(m, "processEmpty", false); + continue; + } + + // "my_prices" : { "range" : { "field":... + // key="my_prices", value={"range":.. + + Object parsedValue = parseFacetOrStat(key, value); + + // TODO: have parseFacetOrStat directly add instead of return? + if (parsedValue instanceof FacetRequest) { + facet.addSubFacet(key, (FacetRequest)parsedValue); + } else if (parsedValue instanceof AggValueSource) { + facet.addStat(key, (AggValueSource)parsedValue); + } else { + throw new RuntimeException("Huh? TODO: " + parsedValue); + } + } + } else { + // facet : my_field? + throw err("Expected map for facet/stat"); + } + } + + public Object parseFacetOrStat(String key, Object o) throws SyntaxError { + if (o instanceof String) { + return parseStringFacetOrStat(key, (String)o); + } + + if (!(o instanceof Map)) { + throw err("expected Map but got " + o); + } + + // { "range" : { "field":... + Map m = (Map)o; + if (m.size() != 1) { + throw err("expected facet/stat type name, like {range:{... but got " + m); + } + + // Is this most efficient way? + Map.Entry entry = m.entrySet().iterator().next(); + String type = entry.getKey(); + Object args = entry.getValue(); + return parseFacetOrStat(key, type, args); + } + + public Object parseFacetOrStat(String key, String type, Object args) throws SyntaxError { + // TODO: a place to register all these facet types? + + if ("field".equals(type) || "terms".equals(type)) { + return parseFieldFacet(key, args); + } else if ("query".equals(type)) { + return parseQueryFacet(key, args); + } else if ("range".equals(type)) { + return parseRangeFacet(key, args); + } + + return parseStat(key, type, args); + } + + + + FacetField parseFieldFacet(String key, Object args) throws SyntaxError { + FacetFieldParser parser = new FacetFieldParser(this, key); + return parser.parse(args); + } + + FacetQuery parseQueryFacet(String key, Object args) throws SyntaxError { + FacetQueryParser parser = new FacetQueryParser(this, key); + return parser.parse(args); + } + + FacetRange parseRangeFacet(String key, Object args) throws SyntaxError { + FacetRangeParser parser = new FacetRangeParser(this, key); + return parser.parse(args); + } + + public Object parseStringFacetOrStat(String key, String s) throws SyntaxError { + // "avg(myfield)" + return parseStringStat(key, s); + // TODO - simple string representation of facets + } + + // parses avg(x) + private AggValueSource parseStringStat(String key, String stat) throws SyntaxError { + FunctionQParser parser = (FunctionQParser)QParser.getParser(stat, FunctionQParserPlugin.NAME, getSolrRequest()); + AggValueSource agg = parser.parseAgg(FunctionQParser.FLAG_DEFAULT); + return agg; + } + + public AggValueSource parseStat(String key, String type, Object args) throws SyntaxError { + return null; + } + + + public String getField(Map args) { + Object fieldName = args.get("field"); // TODO: pull out into defined constant + if (fieldName == null) { + fieldName = args.get("f"); // short form + } + if (fieldName == null) { + throw err("Missing 'field'"); + } + + if (!(fieldName instanceof String)) { + throw err("Expected string for 'field', got" + fieldName); + } + + return (String)fieldName; + } + + + public Long getLongOrNull(Map args, String paramName, boolean required) { + Object o = args.get(paramName); + if (o == null) { + if (required) { + throw err("Missing required parameter '" + paramName + "'"); + } + return null; + } + if (!(o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte)) { + throw err("Expected integer type for param '"+paramName + "' but got " + o); + } + + return ((Number)o).longValue(); + } + + public long getLong(Map args, String paramName, long defVal) { + Object o = args.get(paramName); + if (o == null) { + return defVal; + } + if (!(o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte)) { + throw err("Expected integer type for param '"+paramName + "' but got " + o.getClass().getSimpleName() + " = " + o); + } + + return ((Number)o).longValue(); + } + + public boolean getBoolean(Map args, String paramName, boolean defVal) { + Object o = args.get(paramName); + if (o == null) { + return defVal; + } + // TODO: should we be more flexible and accept things like "true" (strings)? + // Perhaps wait until the use case comes up. + if (!(o instanceof Boolean)) { + throw err("Expected boolean type for param '"+paramName + "' but got " + o.getClass().getSimpleName() + " = " + o); + } + + return (Boolean)o; + } + + public String getString(Map args, String paramName, String defVal) { + Object o = args.get(paramName); + if (o == null) { + return defVal; + } + if (!(o instanceof String)) { + throw err("Expected string type for param '"+paramName + "' but got " + o.getClass().getSimpleName() + " = " + o); + } + + return (String)o; + } + + + public IndexSchema getSchema() { + return parent.getSchema(); + } + + public SolrQueryRequest getSolrRequest() { + return parent.getSolrRequest(); + } + +} + + +class FacetTopParser extends FacetParser { + private SolrQueryRequest req; + + public FacetTopParser(SolrQueryRequest req) { + super(null, "facet"); + this.facet = new FacetQuery(); + this.req = req; + } + + @Override + public FacetQuery parse(Object args) throws SyntaxError { + parseSubs(args); + return facet; + } + + @Override + public SolrQueryRequest getSolrRequest() { + return req; + } + + @Override + public IndexSchema getSchema() { + return req.getSchema(); + } +} + +class FacetQueryParser extends FacetParser { + public FacetQueryParser(FacetParser parent, String key) { + super(parent, key); + facet = new FacetQuery(); + } + + @Override + public FacetQuery parse(Object arg) throws SyntaxError { + String qstring = null; + if (arg instanceof String) { + // just the field name... + qstring = (String)arg; + + } else if (arg instanceof Map) { + Map m = (Map) arg; + qstring = getString(m, "q", null); + if (qstring == null) { + qstring = getString(m, "query", null); + } + + // OK to parse subs before we have parsed our own query? + // as long as subs don't need to know about it. + parseSubs( m.get("facet") ); + } + + // TODO: substats that are from defaults!!! + + if (qstring != null) { + QParser parser = QParser.getParser(qstring, null, getSolrRequest()); + facet.q = parser.getQuery(); + } + + return facet; + } +} + +class FacetFieldParser extends FacetParser { + public FacetFieldParser(FacetParser parent, String key) { + super(parent, key); + facet = new FacetField(); + } + + public FacetField parse(Object arg) throws SyntaxError { + + if (arg instanceof String) { + // just the field name... + facet.field = (String)arg; + parseSort( null ); // TODO: defaults + + } else if (arg instanceof Map) { + Map m = (Map) arg; + facet.field = getField(m); + facet.offset = getLong(m, "offset", facet.offset); + facet.limit = getLong(m, "limit", facet.limit); + facet.mincount = getLong(m, "mincount", facet.mincount); + facet.missing = getBoolean(m, "missing", facet.missing); + facet.numBuckets = getBoolean(m, "numBuckets", facet.numBuckets); + facet.prefix = getString(m, "prefix", facet.prefix); + facet.allBuckets = getBoolean(m, "allBuckets", facet.allBuckets); + facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null)); + facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf); + + // facet.sort may depend on a facet stat... + // should we be parsing / validating this here, or in the execution environment? + Object o = m.get("facet"); + parseSubs(o); + + parseSort( m.get("sort") ); + } + + return facet; + } + + + // Sort specification is currently + // sort : 'mystat desc' + // OR + // sort : { mystat : 'desc' } + private void parseSort(Object sort) { + if (sort == null) { + facet.sortVariable = "count"; + facet.sortDirection = FacetField.SortDirection.desc; + } else if (sort instanceof String) { + String sortStr = (String)sort; + if (sortStr.endsWith(" asc")) { + facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length()); + facet.sortDirection = FacetField.SortDirection.asc; + } else if (sortStr.endsWith(" desc")) { + facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length()); + facet.sortDirection = FacetField.SortDirection.desc; + } else { + facet.sortDirection = "index".equals(facet.sortVariable) ? FacetField.SortDirection.asc : FacetField.SortDirection.desc; // default direction for "index" is ascending + } + } else { + // sort : { myvar : 'desc' } + Map map = (Map)sort; + // TODO: validate + Map.Entry entry = map.entrySet().iterator().next(); + String k = entry.getKey(); + Object v = entry.getValue(); + facet.sortVariable = k; + facet.sortDirection = FacetField.SortDirection.valueOf(v.toString()); + } + + } +} + + + +class FacetRangeParser extends FacetParser { + public FacetRangeParser(FacetParser parent, String key) { + super(parent, key); + facet = new FacetRange(); + } + + public FacetRange parse(Object arg) throws SyntaxError { + if (!(arg instanceof Map)) { + throw err("Missing range facet arguments"); + } + + Map m = (Map) arg; + + facet.field = getString(m, "field", null); + + facet.start = m.get("start"); + facet.end = m.get("end"); + facet.gap = m.get("gap"); + facet.hardend = getBoolean(m, "hardend", facet.hardend); + + // TODO: refactor list-of-options code + + Object o = m.get("include"); + String[] includeList = null; + if (o != null) { + List lst = null; + + if (o instanceof List) { + lst = (List)o; + } else if (o instanceof String) { + lst = StrUtils.splitSmart((String)o, ','); + } + + includeList = (String[])lst.toArray(new String[lst.size()]); + } + facet.include = FacetParams.FacetRangeInclude.parseParam( includeList ); + + facet.others = EnumSet.noneOf(FacetParams.FacetRangeOther.class); + + o = m.get("other"); + if (o != null) { + List lst = null; + + if (o instanceof List) { + lst = (List)o; + } else if (o instanceof String) { + lst = StrUtils.splitSmart((String)o, ','); + } + + for (String otherStr : lst) { + facet.others.add( FacetParams.FacetRangeOther.get(otherStr) ); + } + } + + + Object facetObj = m.get("facet"); + parseSubs(facetObj); + + return facet; + } + +} + + + diff --git a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java new file mode 100644 index 00000000000..7a8c1e576e5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java @@ -0,0 +1,66 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryContext; +import org.apache.solr.search.SolrIndexSearcher; + +/** @lucene.internal + * Porting helper... may be removed if it offers no value in the future. + */ +public class FieldUtil { + + /** Simpler method that creates a request context and looks up the field for you */ + public static SortedDocValues getSortedDocValues(SolrIndexSearcher searcher, String field) throws IOException { + SchemaField sf = searcher.getSchema().getField(field); + QueryContext qContext = QueryContext.newContext(searcher); + return getSortedDocValues( qContext, sf, null ); + } + + + public static SortedDocValues getSortedDocValues(QueryContext context, SchemaField field, QParser qparser) throws IOException { + SortedDocValues si = context.searcher().getLeafReader().getSortedDocValues( field.getName() ); + // if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) { + // } + + return si == null ? EMPTY_SortedDocValues : si; + } + + private static SortedDocValues EMPTY_SortedDocValues = new SortedDocValues() { + @Override + public int getOrd(int docID) { + return -1; + } + + @Override + public BytesRef lookupOrd(int ord) { + return null; + } + + @Override + public int getValueCount() { + return 0; + } + }; +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/LegacyFacet.java b/solr/core/src/java/org/apache/solr/search/facet/LegacyFacet.java new file mode 100644 index 00000000000..3f07f57b5b3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/LegacyFacet.java @@ -0,0 +1,318 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.RequiredSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.QueryParsing; +import org.apache.solr.search.SolrReturnFields; +import org.apache.solr.search.SyntaxError; + +public class LegacyFacet { + private SolrParams params; + private Map json; + private Map currentCommand = null; // always points to the current facet command + private Map currentSubs; // always points to the current facet:{} block + + String facetValue; + String key; + SolrParams localParams; + SolrParams orig; + SolrParams required; + + Map> subFacets; // only parsed once + + public LegacyFacet(SolrParams params) { + this.params = params; + this.orig = params; + this.json = new LinkedHashMap<>(); + this.currentSubs = json; + } + + + Map getLegacy() { + subFacets = parseSubFacets(params); + String[] queries = params.getParams(FacetParams.FACET_QUERY); + if (queries != null) { + for (String q : queries) { + addQueryFacet(q); + } + } + String[] fields = params.getParams(FacetParams.FACET_FIELD); + if (fields != null) { + for (String field : fields) { + addFieldFacet(field); + } + } + String[] ranges = params.getParams(FacetParams.FACET_RANGE); + if (ranges != null) { + for (String range : ranges) { + addRangeFacet(range); + } + } + // SolrCore.log.error("###################### JSON FACET:" + json); + return json; + } + + + protected static class Subfacet { + public String parentKey; + public String type; // query, range, field + public String value; // the actual field or the query, including possible local params + } + + + protected static Map> parseSubFacets(SolrParams params) { + Map> map = new HashMap<>(); + Iterator iter = params.getParameterNamesIterator(); + + String SUBFACET="subfacet."; + while (iter.hasNext()) { + String key = iter.next(); + + if (key.startsWith(SUBFACET)) { + List parts = StrUtils.splitSmart(key, '.'); + if (parts.size() != 3) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "expected subfacet parameter name of the form subfacet.mykey.field, got:" + key); + } + Subfacet sub = new Subfacet(); + sub.parentKey = parts.get(1); + sub.type = parts.get(2); + sub.value = params.get(key); + + List subs = map.get(sub.parentKey); + if (subs == null) { + subs = new ArrayList<>(1); + } + subs.add(sub); + map.put(sub.parentKey, subs); + } + } + + return map; + } + + + protected void addQueryFacet(String q) { + parseParams(FacetParams.FACET_QUERY, q); + Map cmd = new HashMap(2); + Map type = new HashMap(1); + type.put("query", cmd); + cmd.put("q", q); + addSub(key, type); + handleSubs(cmd); + } + + protected void addRangeFacet(String field) { + parseParams(FacetParams.FACET_RANGE, field); + Map cmd = new HashMap(5); + Map type = new HashMap(1); + type.put("range", cmd); + + String f = key; + cmd.put("field", facetValue); + cmd.put("start", required.getFieldParam(f,FacetParams.FACET_RANGE_START)); + cmd.put("end", required.getFieldParam(f,FacetParams.FACET_RANGE_END)); + cmd.put("gap", required.getFieldParam(f, FacetParams.FACET_RANGE_GAP)); + String[] p = params.getFieldParams(f, FacetParams.FACET_RANGE_OTHER); + if (p != null) cmd.put("other", p.length==1 ? p[0] : Arrays.asList(p)); + p = params.getFieldParams(f, FacetParams.FACET_RANGE_INCLUDE); + if (p != null) cmd.put("include", p.length==1 ? p[0] : Arrays.asList(p)); + + final int mincount = params.getFieldInt(f,FacetParams.FACET_MINCOUNT, 0); + cmd.put("mincount", mincount); + + boolean hardend = params.getFieldBool(f,FacetParams.FACET_RANGE_HARD_END,false); + if (hardend) cmd.put("hardend", hardend); + + addSub(key, type); + handleSubs(cmd); + } + + protected void addFieldFacet(String field) { + parseParams(FacetParams.FACET_FIELD, field); + + String f = key; // the parameter to use for per-field parameters... f.key.facet.limit=10 + + int offset = params.getFieldInt(f, FacetParams.FACET_OFFSET, 0); + int limit = params.getFieldInt(f, FacetParams.FACET_LIMIT, 10); + + int mincount = params.getFieldInt(f, FacetParams.FACET_MINCOUNT, 1); + + boolean missing = params.getFieldBool(f, FacetParams.FACET_MISSING, false); + + // default to sorting if there is a limit. + String sort = params.getFieldParam(f, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX); + String prefix = params.getFieldParam(f, FacetParams.FACET_PREFIX); + + Map cmd = new HashMap(); + cmd.put("field", facetValue); + if (offset != 0) cmd.put("offset", offset); + if (limit != 10) cmd.put("limit", limit); + if (mincount != 1) cmd.put("mincount", mincount); + if (missing) cmd.put("missing", missing); + if (prefix != null) cmd.put("prefix", prefix); + if (sort.equals("count")) { + // our default + } else if (sort.equals("index")) { + cmd.put("sort", "index asc"); + } else { + cmd.put("sort", sort); // can be sort by one of our stats + } + + Map type = new HashMap<>(1); + type.put("terms", cmd); + + addSub(key, type); + handleSubs(cmd); + } + + private void handleSubs(Map cmd) { + Map savedCmd = currentCommand; + Map savedSubs = currentSubs; + try { + currentCommand = cmd; + currentSubs = null; + + // parse stats for this facet + String[] stats = params.getFieldParams(key, "facet.stat"); + if (stats != null) { + for (String stat : stats) { + addStat(stat); + } + } + + List subs = subFacets.get(key); + if (subs != null) { + for (Subfacet subfacet : subs) { + if ("field".equals(subfacet.type)) { + addFieldFacet(subfacet.value); + } else if ("query".equals(subfacet.type)) { + addQueryFacet(subfacet.value); + } else if ("range".equals(subfacet.type)) { + addQueryFacet(subfacet.value); + } + } + } + + + } finally { + currentCommand = savedCmd; + currentSubs = savedSubs; + } + } + + + private void addStat(String val) { + QueryParsing.StrParser sp = new QueryParsing.StrParser(val); + int start = 0; + sp.eatws(); + if (sp.pos >= sp.end) addStat(val, val); + + // try key:func() format + String key = null; + String funcStr = val; + + if (key == null) { + key = SolrReturnFields.getFieldName(sp); + if (key != null && sp.opt(':')) { + // OK, we got the key + funcStr = val.substring(sp.pos); + } else { + // an invalid key... it must not be present. + sp.pos = start; + key = null; + } + } + + if (key == null) { + key = funcStr; // not really ideal + } + + addStat(key, funcStr); + } + + private void addStat(String key, String val) { + if ("count".equals(val) || "count()".equals(val)) return; // we no longer have a count function, we always return the count + getCurrentSubs().put(key, val); + } + + private void addSub(String key, Map sub) { + getCurrentSubs().put(key, sub); + } + + private Map getCurrentSubs() { + if (currentSubs == null) { + currentSubs = new LinkedHashMap(); + currentCommand.put("facet", currentSubs); + } + return currentSubs; + } + + + + protected void parseParams(String type, String param) { + facetValue = param; + key = param; + + try { + localParams = QueryParsing.getLocalParams(param, orig); + + if (localParams == null) { + params = orig; + required = new RequiredSolrParams(params); + // setupStats(); + return; + } + + params = SolrParams.wrapDefaults(localParams, orig); + required = new RequiredSolrParams(params); + + // remove local params unless it's a query + if (type != FacetParams.FACET_QUERY) { + facetValue = localParams.get(CommonParams.VALUE); + } + + // reset set the default key now that localParams have been removed + key = facetValue; + + // allow explicit set of the key + key = localParams.get(CommonParams.OUTPUT_KEY, key); + + // setupStats(); + } catch (SyntaxError e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java new file mode 100644 index 00000000000..c15ee4e4f02 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/MaxAgg.java @@ -0,0 +1,56 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.queries.function.ValueSource; + + +public class MaxAgg extends SimpleAggValueSource { + public MaxAgg(ValueSource vs) { + super("max", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new MaxSlotAcc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetMerger() { + double val = Double.NaN; + + @Override + public void merge(Object facetResult) { + double result = ((Number)facetResult).doubleValue(); + if (result > val || Double.isNaN(val)) { + val = result; + } + } + + @Override + public Object getMergedResult() { + return val; + } + }; + } + + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java new file mode 100644 index 00000000000..0cf725235ee --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/MinAgg.java @@ -0,0 +1,53 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.queries.function.ValueSource; + +public class MinAgg extends SimpleAggValueSource { + public MinAgg(ValueSource vs) { + super("min", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new MinSlotAcc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetMerger() { + double val = Double.NaN; + + @Override + public void merge(Object facetResult) { + double result = ((Number)facetResult).doubleValue(); + if (result < val || Double.isNaN(val)) { + val = result; + } + } + + @Override + public Object getMergedResult() { + return val; + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/SimpleAggValueSource.java b/solr/core/src/java/org/apache/solr/search/facet/SimpleAggValueSource.java new file mode 100644 index 00000000000..19df3d61031 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/SimpleAggValueSource.java @@ -0,0 +1,66 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.search.QueryContext; + + +import java.io.IOException; +import java.util.Map; + +public abstract class SimpleAggValueSource extends AggValueSource { + ValueSource arg; + + public SimpleAggValueSource(String name, ValueSource vs) { + super(name); + this.arg = vs; + } + + public ValueSource getArg() { + return arg; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object o) { + if (!super.equals(o)) return false; + ValueSource otherArg = ((SimpleAggValueSource)o).arg; + if (arg == otherArg) return true; + return (arg != null && arg.equals(otherArg)); + } + + @Override + public int hashCode() { + return getClass().hashCode() + (arg == null ? 0 : arg.hashCode()); + } + + @Override + public String description() { + return name() + "(" + (arg==null ? "" : arg.description()) + ")"; + } + +} + + diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java new file mode 100644 index 00000000000..9485388843e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java @@ -0,0 +1,532 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrIndexSearcher; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + + +public abstract class SlotAcc implements Closeable { + String key; // todo... + protected final FacetContext fcontext; + + public SlotAcc(FacetContext fcontext) { + this.fcontext = fcontext; + } + + public void setNextReader(LeafReaderContext readerContext) throws IOException { + } + + public abstract void collect(int doc, int slot) throws IOException; + + public abstract int compare(int slotA, int slotB); + + public abstract Object getValue(int slotNum) throws IOException; + + public void setValues(SimpleOrderedMap bucket, int slotNum) throws IOException { + if (key == null) return; + bucket.add(key, getValue(slotNum)); + } + + public abstract void reset(); + + @Override + public void close() throws IOException { + } +} + + +// TODO: we should really have a decoupled value provider... +// This would enhance reuse and also prevent multiple lookups of same value across diff stats +abstract class FuncSlotAcc extends SlotAcc { + protected final ValueSource valueSource; + protected FunctionValues values; + + public FuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + super(fcontext); + this.valueSource = values; + } + + @Override + public void setNextReader(LeafReaderContext readerContext) throws IOException { + values = valueSource.getValues(fcontext.qcontext, readerContext); + } +} + + +// have a version that counts the number of times a Slot has been hit? (for avg... what else?) + +// TODO: make more sense to have func as the base class rather than double? +// double-slot-func -> func-slot -> slot -> acc +// double-slot-func -> double-slot -> slot -> acc + +abstract class DoubleFuncSlotAcc extends FuncSlotAcc { + double[] result; // TODO: use DoubleArray + double initialValue; + + public DoubleFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + this(values, fcontext, numSlots, 0); + } + + public DoubleFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots, double initialValue) { + super(values, fcontext, numSlots); + this.initialValue = initialValue; + result = new double[numSlots]; + if (initialValue != 0) { + reset(); + } + } + + @Override + public int compare(int slotA, int slotB) { + return Double.compare(result[slotA], result[slotB]); + } + + + @Override + public Object getValue(int slot) { + return result[slot]; + } + + @Override + public void reset() { + for (int i=0; i= currMin)) { // val>=currMin will be false for staring value: val>=NaN + result[slotNum] = val; + } + } +} + +class MaxSlotAcc extends DoubleFuncSlotAcc { + public MaxSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + super(values, fcontext, numSlots, Double.NaN); + } + + @Override + public void collect(int doc, int slotNum) { + double val = values.doubleVal(doc); + if (val == 0 && !values.exists(doc)) return; // depend on fact that non existing values return 0 for func query + + double currMax = result[slotNum]; + if (!(val <= currMax)) { // reversed order to handle NaN + result[slotNum] = val; + } + } + +} + + +class AvgSlotAcc extends DoubleFuncSlotAcc { + int[] counts; + + public AvgSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + super(values, fcontext, numSlots); + counts = new int[numSlots]; + } + + @Override + public void reset() { + super.reset(); + for (int i=0; i= unique) break; + ord = ords.nextSetBit(ord); + if (ord == DocIdSetIterator.NO_MORE_DOCS) break; + BytesRef val = lookupOrd(ord); + Object o = field.getType().toObject(field, val); + lst.add(o); + } + } + + map.add("vals", lst); + } + + return map; + } + + protected abstract BytesRef lookupOrd(int ord) throws IOException; + + // we only calculate all the counts when sorting by count + public void calcCounts() { + counts = new int[arr.length]; + for (int i=0; i= unique) break; + ord = ords.nextSetBit(ord); + if (ord == DocIdSetIterator.NO_MORE_DOCS) break; + BytesRef val = docToTerm.lookupOrd(ord); + Object o = field.getType().toObject(field, val); + lst.add(o); + } + } + + map.add("vals", lst); + } + + return map; + } + + @Override + protected BytesRef lookupOrd(int ord) throws IOException { + return docToTerm.lookupOrd(ord); + } + + private FixedBitSet bits; // bits for the current slot, only set for the callback + @Override + public void call(int termNum) { + bits.set(termNum); + } + + @Override + public void collect(int doc, int slotNum) throws IOException { + bits = arr[slotNum]; + if (bits == null) { + bits = new FixedBitSet(nTerms); + arr[slotNum] = bits; + } + docToTerm.getTerms(doc + currentDocBase, this); // this will call back to our Callback.call(int termNum) + } + + @Override + public void close() throws IOException { + if (docToTerm != null) { + docToTerm.close(); + docToTerm = null; + } + } +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/facet/StrAggValueSource.java b/solr/core/src/java/org/apache/solr/search/facet/StrAggValueSource.java new file mode 100644 index 00000000000..61a90fd161e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/StrAggValueSource.java @@ -0,0 +1,58 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.solr.search.QueryContext; + + +import java.io.IOException; + +public abstract class StrAggValueSource extends AggValueSource { + protected String arg; + + public StrAggValueSource(String name, String arg) { + super(name); + this.arg = arg; + } + + public String getArg() { + return arg; + } + + @Override + public boolean equals(Object o) { + if (!super.equals(o)) return false; + String otherArg = ((StrAggValueSource)o).arg; + if (arg == otherArg) return true; + return (arg != null && arg.equals(otherArg)); + } + + @Override + public int hashCode() { + return getClass().hashCode() + (arg == null ? 0 : arg.hashCode()); + } + + @Override + public String description() { + return name() + "(" + arg + ")"; + } +} + + diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java new file mode 100644 index 00000000000..16070c73501 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java @@ -0,0 +1,40 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.queries.function.ValueSource; + +public class SumAgg extends SimpleAggValueSource { + + public SumAgg(ValueSource vs) { + super("sum", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new SumSlotAcc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetDoubleMerger(); + } +} + diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java new file mode 100644 index 00000000000..8373baa119b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java @@ -0,0 +1,38 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.queries.function.ValueSource; + +public class SumsqAgg extends SimpleAggValueSource { + public SumsqAgg(ValueSource vs) { + super("sumsq", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new SumsqSlotAcc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetDoubleMerger(); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java new file mode 100644 index 00000000000..6b4ac8b201e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java @@ -0,0 +1,763 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SlowCompositeReaderWrapper; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.uninverting.DocTermOrds; +import org.apache.lucene.uninverting.UninvertingReader; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.component.FieldFacetStats; +import org.apache.solr.handler.component.StatsField; +import org.apache.solr.handler.component.StatsValues; +import org.apache.solr.handler.component.StatsValuesFactory; +import org.apache.solr.request.SimpleFacets; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TrieField; +import org.apache.solr.search.BitDocSet; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.SolrCache; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.LongPriorityQueue; +import org.apache.solr.util.PrimUtils; + +/** + * + * Final form of the un-inverted field: + * Each document points to a list of term numbers that are contained in that document. + * + * Term numbers are in sorted order, and are encoded as variable-length deltas from the + * previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A + * term number of 0 signals the end of the termNumber list. + * + * There is a single int[maxDoc()] which either contains a pointer into a byte[] for + * the termNumber lists, or directly contains the termNumber list if it fits in the 4 + * bytes of an integer. If the first byte in the integer is 1, the next 3 bytes + * are a pointer into a byte[] where the termNumber list starts. + * + * There are actually 256 byte arrays, to compensate for the fact that the pointers + * into the byte arrays are only 3 bytes long. The correct byte array for a document + * is a function of its id. + * + * To save space and speed up faceting, any term that matches enough documents will + * not be un-inverted... it will be skipped while building the un-inverted field structure, + * and will use a set intersection method during faceting. + * + * To further save memory, the terms (the actual string values) are not all stored in + * memory, but a TermIndex is used to convert term numbers to term values only + * for the terms needed after faceting has completed. Only every 128th term value + * is stored, along with its corresponding term number, and this is used as an + * index to find the closest term and iterate until the desired number is hit (very + * much like Lucene's own internal term index). + * + */ +public class UnInvertedField extends DocTermOrds { + private static int TNUM_OFFSET=2; + + static class TopTerm { + BytesRef term; + int termNum; + + long memSize() { + return 8 + // obj header + 8 + 8 +term.length + //term + 4; // int + } + } + + long memsz; + final AtomicLong use = new AtomicLong(); // number of uses + + int[] maxTermCounts = new int[1024]; + + final Map bigTerms = new LinkedHashMap<>(); + + private SolrIndexSearcher.DocsEnumState deState; + private final SolrIndexSearcher searcher; + + private static UnInvertedField uifPlaceholder = new UnInvertedField(); + + private UnInvertedField() { // Dummy for synchronization. + super("fake", 0, 0); // cheapest initialization I can find. + searcher = null; + } + + @Override + protected void visitTerm(TermsEnum te, int termNum) throws IOException { + + if (termNum >= maxTermCounts.length) { + // resize by doubling - for very large number of unique terms, expanding + // by 4K and resultant GC will dominate uninvert times. Resize at end if material + int[] newMaxTermCounts = new int[maxTermCounts.length*2]; + System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum); + maxTermCounts = newMaxTermCounts; + } + + final BytesRef term = te.term(); + + if (te.docFreq() > maxTermDocFreq) { + TopTerm topTerm = new TopTerm(); + topTerm.term = BytesRef.deepCopyOf(term); + topTerm.termNum = termNum; + bigTerms.put(topTerm.termNum, topTerm); + + if (deState == null) { + deState = new SolrIndexSearcher.DocsEnumState(); + deState.fieldName = field; + deState.liveDocs = searcher.getLeafReader().getLiveDocs(); + deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail? + deState.postingsEnum = postingsEnum; + deState.minSetSizeCached = maxTermDocFreq; + } + + postingsEnum = deState.postingsEnum; + DocSet set = searcher.getDocSet(deState); + maxTermCounts[termNum] = set.size(); + } + } + + @Override + protected void setActualDocFreq(int termNum, int docFreq) { + maxTermCounts[termNum] = docFreq; + } + + public long memSize() { + // can cache the mem size since it shouldn't change + if (memsz!=0) return memsz; + long sz = super.ramBytesUsed(); + sz += 8*8 + 32; // local fields + sz += bigTerms.size() * 64; + for (TopTerm tt : bigTerms.values()) { + sz += tt.memSize(); + } + if (maxTermCounts != null) + sz += maxTermCounts.length * 4; + if (indexedTermsArray != null) { + // assume 8 byte references? + sz += 8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings; + } + memsz = sz; + return sz; + } + + public UnInvertedField(String field, SolrIndexSearcher searcher) throws IOException { + super(field, + // threshold, over which we use set intersections instead of counting + // to (1) save memory, and (2) speed up faceting. + // Add 2 for testing purposes so that there will always be some terms under + // the threshold even when the index is very + // small. + searcher.maxDoc()/20 + 2, + DEFAULT_INDEX_INTERVAL_BITS); + //System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc()); + + final String prefix = TrieField.getMainValuePrefix(searcher.getSchema().getFieldType(field)); + this.searcher = searcher; + try { + // TODO: it's wasteful to create one of these each time + // but DocTermOrds will throw an exception if it thinks the field has doc values (which is faked by UnInvertingReader) + LeafReader r = SlowCompositeReaderWrapper.wrap(searcher.getRawReader()); + uninvert(r, r.getLiveDocs(), prefix == null ? null : new BytesRef(prefix)); + } catch (IllegalStateException ise) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ise); + } + if (tnums != null) { + for(byte[] target : tnums) { + if (target != null && target.length > (1<<24)*.9) { + SolrCore.log.warn("Approaching too many values for UnInvertedField faceting on field '"+field+"' : bucket size=" + target.length); + } + } + } + + // free space if outrageously wasteful (tradeoff memory/cpu) + if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste! + int[] newMaxTermCounts = new int[numTermsInField]; + System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField); + maxTermCounts = newMaxTermCounts; + } + + SolrCore.log.info("UnInverted multi-valued field " + toString()); + //System.out.println("CREATED: " + toString() + " ti.index=" + ti.index); + } + + public int getNumTerms() { + return numTermsInField; + } + + + + public class DocToTerm implements Closeable { + private final DocSet[] bigTermSets; + private final int[] bigTermNums; + private TermsEnum te; + + public DocToTerm() throws IOException { + bigTermSets = new DocSet[bigTerms.size()]; + bigTermNums = new int[bigTerms.size()]; + int i=0; + for (TopTerm tt : bigTerms.values()) { + bigTermSets[i] = searcher.getDocSet(new TermQuery(new Term(field, tt.term))); + bigTermNums[i] = tt.termNum; + i++; + } + } + + public BytesRef lookupOrd(int ord) throws IOException { + return getTermValue( getTermsEnum() , ord ); + } + + public TermsEnum getTermsEnum() throws IOException { + if (te == null) { + te = getOrdTermsEnum(searcher.getLeafReader()); + } + return te; + } + + public void getTerms(int doc, Callback target) throws IOException { + if (bigTermSets != null) { + for (int i=0; i 0) { + int code = index[doc]; + + if ((code & 0xff)==1) { + int pos = code>>>8; + int whichArray = (doc >>> 16) & 0xff; + byte[] arr = tnums[whichArray]; + int tnum = 0; + for(;;) { + int delta = 0; + for(;;) { + byte b = arr[pos++]; + delta = (delta << 7) | (b & 0x7f); + if ((b & 0x80) == 0) break; + } + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + target.call(tnum); + } + } else { + int tnum = 0; + int delta = 0; + for (;;) { + delta = (delta << 7) | (code & 0x7f); + if ((code & 0x80)==0) { + if (delta==0) break; + tnum += delta - TNUM_OFFSET; + target.call(tnum); + delta = 0; + } + code >>>= 8; + } + } + } + } + + @Override + public void close() throws IOException { + for (DocSet set : bigTermSets) { + // set.decref(); // OFF-HEAP + } + } + } + + public interface Callback { + public void call(int termNum); + } + + + public NamedList getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException { + use.incrementAndGet(); + + FieldType ft = searcher.getSchema().getFieldType(field); + + NamedList res = new NamedList<>(); // order is important + + DocSet docs = baseDocs; + int baseSize = docs.size(); + int maxDoc = searcher.maxDoc(); + + //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField); + if (baseSize >= mincount) { + + final int[] index = this.index; + // tricky: we add more more element than we need because we will reuse this array later + // for ordering term ords before converting to term labels. + final int[] counts = new int[numTermsInField + 1]; + + // + // If there is prefix, find its start and end term numbers + // + int startTerm = 0; + int endTerm = numTermsInField; // one past the end + + TermsEnum te = getOrdTermsEnum(searcher.getLeafReader()); + if (te != null && prefix != null && prefix.length() > 0) { + final BytesRefBuilder prefixBr = new BytesRefBuilder(); + prefixBr.copyChars(prefix); + if (te.seekCeil(prefixBr.get()) == TermsEnum.SeekStatus.END) { + startTerm = numTermsInField; + } else { + startTerm = (int) te.ord(); + } + prefixBr.append(UnicodeUtil.BIG_TERM); + if (te.seekCeil(prefixBr.get()) == TermsEnum.SeekStatus.END) { + endTerm = numTermsInField; + } else { + endTerm = (int) te.ord(); + } + } + + /*********** + // Alternative 2: get the docSet of the prefix (could take a while) and + // then do the intersection with the baseDocSet first. + if (prefix != null && prefix.length() > 0) { + docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs); + // The issue with this method are problems of returning 0 counts for terms w/o + // the prefix. We can't just filter out those terms later because it may + // mean that we didn't collect enough terms in the queue (in the sorted case). + } + ***********/ + + boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 + && startTerm==0 && endTerm==numTermsInField + && docs instanceof BitDocSet; + + if (doNegative) { + FixedBitSet bs = ((BitDocSet)docs).getBits().clone(); + bs.flip(0, maxDoc); + // TODO: when iterator across negative elements is available, use that + // instead of creating a new bitset and inverting. + docs = new BitDocSet(bs, maxDoc - baseSize); + // simply negating will mean that we have deleted docs in the set. + // that should be OK, as their entries in our table should be empty. + //System.out.println(" NEG"); + } + + // For the biggest terms, do straight set intersections + for (TopTerm tt : bigTerms.values()) { + //System.out.println(" do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString()); + // TODO: counts could be deferred if sorted==false + if (tt.termNum >= startTerm && tt.termNum < endTerm) { + counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs); + //System.out.println(" count=" + counts[tt.termNum]); + } else { + //System.out.println("SKIP term=" + tt.termNum); + } + } + + // TODO: we could short-circuit counting altogether for sorted faceting + // where we already have enough terms from the bigTerms + + // TODO: we could shrink the size of the collection array, and + // additionally break when the termNumber got above endTerm, but + // it would require two extra conditionals in the inner loop (although + // they would be predictable for the non-prefix case). + // Perhaps a different copy of the code would be warranted. + + if (termInstances > 0) { + DocIterator iter = docs.iterator(); + while (iter.hasNext()) { + int doc = iter.nextDoc(); + //System.out.println("iter doc=" + doc); + int code = index[doc]; + + if ((code & 0xff)==1) { + //System.out.println(" ptr"); + int pos = code>>>8; + int whichArray = (doc >>> 16) & 0xff; + byte[] arr = tnums[whichArray]; + int tnum = 0; + for(;;) { + int delta = 0; + for(;;) { + byte b = arr[pos++]; + delta = (delta << 7) | (b & 0x7f); + if ((b & 0x80) == 0) break; + } + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + //System.out.println(" tnum=" + tnum); + counts[tnum]++; + } + } else { + //System.out.println(" inlined"); + int tnum = 0; + int delta = 0; + for (;;) { + delta = (delta << 7) | (code & 0x7f); + if ((code & 0x80)==0) { + if (delta==0) break; + tnum += delta - TNUM_OFFSET; + //System.out.println(" tnum=" + tnum); + counts[tnum]++; + delta = 0; + } + code >>>= 8; + } + } + } + } + final CharsRefBuilder charsRef = new CharsRefBuilder(); + + int off=offset; + int lim=limit>=0 ? limit : Integer.MAX_VALUE; + + if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { + int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1; + maxsize = Math.min(maxsize, numTermsInField); + LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE); + + int min=mincount-1; // the smallest value in the top 'N' values + //System.out.println("START=" + startTerm + " END=" + endTerm); + for (int i=startTerm; imin) { + // NOTE: we use c>min rather than c>=min as an optimization because we are going in + // index order, so we already know that the keys are ordered. This can be very + // important if a lot of the counts are repeated (like zero counts would be). + + // smaller term numbers sort higher, so subtract the term number instead + long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i); + boolean displaced = queue.insert(pair); + if (displaced) min=(int)(queue.top() >>> 32); + } + } + + // now select the right page from the results + + // if we are deep paging, we don't have to order the highest "offset" counts. + int collectCount = Math.max(0, queue.size() - off); + assert collectCount <= lim; + + // the start and end indexes of our list "sorted" (starting with the highest value) + int sortedIdxStart = queue.size() - (collectCount - 1); + int sortedIdxEnd = queue.size() + 1; + final long[] sorted = queue.sort(collectCount); + + final int[] indirect = counts; // reuse the counts array for the index into the tnums array + assert indirect.length >= sortedIdxEnd; + + for (int i=sortedIdxStart; i>> 32); + int tnum = Integer.MAX_VALUE - (int)pair; + + indirect[i] = i; // store the index for indirect sorting + sorted[i] = tnum; // reuse the "sorted" array to store the term numbers for indirect sorting + + // add a null label for now... we'll fill it in later. + res.add(null, c); + } + + // now sort the indexes by the term numbers + PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() { + @Override + public int compare(int a, int b) { + return (int)sorted[a] - (int)sorted[b]; + } + + @Override + public boolean lessThan(int a, int b) { + return sorted[a] < sorted[b]; + } + + @Override + public boolean equals(int a, int b) { + return sorted[a] == sorted[b]; + } + }); + + // convert the term numbers to term values and set + // as the label + //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd); + for (int i=sortedIdxStart; i= adjustedMax); + assert doc >= ctx.docBase; + processor.setNextReader(ctx); + } + int segDoc = doc - segBase; + + + int code = index[doc]; + + if ((code & 0xff)==1) { + int pos = code>>>8; + int whichArray = (doc >>> 16) & 0xff; + byte[] arr = tnums[whichArray]; + int tnum = 0; + for(;;) { + int delta = 0; + for(;;) { + byte b = arr[pos++]; + delta = (delta << 7) | (b & 0x7f); + if ((b & 0x80) == 0) break; + } + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + int arrIdx = tnum - startTermIndex; + if (arrIdx < 0) continue; + if (arrIdx >= nTerms) break; + processor.countAcc.incrementCount(arrIdx, 1); + processor.collect(segDoc, arrIdx); + if (processor.allBucketsSlot >= 0) { + processor.countAcc.incrementCount(processor.allBucketsSlot, 1); + processor.collect(segDoc, processor.allBucketsSlot); + } + } + } else { + int tnum = 0; + int delta = 0; + for (;;) { + delta = (delta << 7) | (code & 0x7f); + if ((code & 0x80)==0) { + if (delta==0) break; + tnum += delta - TNUM_OFFSET; + int arrIdx = tnum - startTermIndex; + if (arrIdx < 0) continue; + if (arrIdx >= nTerms) break; + processor.countAcc.incrementCount(arrIdx, 1); + processor.collect(segDoc, arrIdx); + if (processor.allBucketsSlot >= 0) { + processor.countAcc.incrementCount(processor.allBucketsSlot, 1); + processor.collect(segDoc, processor.allBucketsSlot); + } + + delta = 0; + } + code >>>= 8; + } + } + } + } + + + } + + + + String getReadableValue(BytesRef termval, FieldType ft, CharsRefBuilder charsRef) { + return ft.indexedToReadable(termval, charsRef).toString(); + } + + /** may return a reused BytesRef */ + BytesRef getTermValue(TermsEnum te, int termNum) throws IOException { + //System.out.println("getTermValue termNum=" + termNum + " this=" + this + " numTerms=" + numTermsInField); + if (bigTerms.size() > 0) { + // see if the term is one of our big terms. + TopTerm tt = bigTerms.get(termNum); + if (tt != null) { + //System.out.println(" return big " + tt.term); + return tt.term; + } + } + + return lookupTerm(te, termNum); + } + + @Override + public String toString() { + final long indexSize = indexedTermsArray == null ? 0 : (8+8+8+8+(indexedTermsArray.length<<3)+sizeOfIndexedStrings); // assume 8 byte references? + return "{field=" + field + + ",memSize="+memSize() + + ",tindexSize="+indexSize + + ",time="+total_time + + ",phase1="+phase1_time + + ",nTerms="+numTermsInField + + ",bigTerms="+bigTerms.size() + + ",termInstances="+termInstances + + ",uses="+use.get() + + "}"; + } + + ////////////////////////////////////////////////////////////////// + //////////////////////////// caching ///////////////////////////// + ////////////////////////////////////////////////////////////////// + + public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException { + SolrCache cache = searcher.getFieldValueCache(); + if (cache == null) { + return new UnInvertedField(field, searcher); + } + UnInvertedField uif = null; + Boolean doWait = false; + synchronized (cache) { + uif = cache.get(field); + if (uif == null) { + /** + * We use this place holder object to pull the UninvertedField construction out of the sync + * so that if many fields are accessed in a short time, the UninvertedField can be + * built for these fields in parallel rather than sequentially. + */ + cache.put(field, uifPlaceholder); + } else { + if (uif != uifPlaceholder) { + return uif; + } + doWait = true; // Someone else has put the place holder in, wait for that to complete. + } + } + while (doWait) { + try { + synchronized (cache) { + uif = cache.get(field); // Should at least return the placeholder, NPE if not is OK. + if (uif != uifPlaceholder) { // OK, another thread put this in the cache we should be good. + return uif; + } + cache.wait(); + } + } catch (InterruptedException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Thread interrupted in getUninvertedField."); + } + } + + uif = new UnInvertedField(field, searcher); + synchronized (cache) { + cache.put(field, uif); // Note, this cleverly replaces the placeholder. + cache.notifyAll(); + } + + return uif; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java new file mode 100644 index 00000000000..63b1e41a6e2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java @@ -0,0 +1,82 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.solr.common.util.SimpleOrderedMap; + +public class UniqueAgg extends StrAggValueSource { + public static String UNIQUE = "unique"; + + // internal constants used for aggregating values from multiple shards + static String VALS = "vals"; + + public UniqueAgg(String field) { + super(UNIQUE, field); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + if (fcontext.qcontext.searcher().getSchema().getField(getArg()).multiValued()) + return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots); + else + return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetMerger() { + long sumUnique; + Set values; + int shardsMissing; + long shardsMissingSum; + long shardsMissingMax; + + @Override + public void merge(Object facetResult) { + SimpleOrderedMap map = (SimpleOrderedMap)facetResult; + long unique = ((Number)map.get("unique")).longValue(); + sumUnique += unique; + + List vals = (List)map.get("vals"); + if (vals != null) { + if (values == null) { + values = new HashSet<>(vals.size()*4); + } + values.addAll(vals); + } else { + shardsMissing++; + shardsMissingSum += unique; + shardsMissingMax = Math.max(shardsMissingMax, unique); + } + + // TODO: somehow get & use the count in the bucket? + } + + @Override + public Object getMergedResult() { + long exactCount = values == null ? 0 : values.size(); + return exactCount + shardsMissingSum; + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/package.html b/solr/core/src/java/org/apache/solr/search/facet/package.html new file mode 100644 index 00000000000..a0590cb9a57 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/package.html @@ -0,0 +1,28 @@ + + + + + + + +

+APIs and classes for the JSON Facet API. +This is currently experimental! +

+ + diff --git a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java index e4ad67466ad..a98c5ab7b77 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java +++ b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java @@ -58,6 +58,8 @@ import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.highlight.SolrHighlighter; import org.apache.solr.parser.QueryParser; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestHandler; +import org.apache.solr.request.json.RequestUtil; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -88,7 +90,7 @@ import org.apache.solr.search.SyntaxError; * default parameter settings. */ public class SolrPluginUtils { - + /** * Map containing all the possible purposes codes of a request as key and @@ -130,7 +132,11 @@ public class SolrPluginUtils { * @param appends values to be appended to those from the request (or defaults) when dealing with multi-val params, or treated as another layer of defaults for singl-val params. * @param invariants values which will be used instead of any request, or default values, regardless of context. */ - public static void setDefaults(SolrQueryRequest req, SolrParams defaults, + public static void setDefaults(SolrQueryRequest req, SolrParams defaults, SolrParams appends, SolrParams invariants) { + setDefaults(null, req, defaults, appends, invariants); + } + + public static void setDefaults(SolrRequestHandler handler, SolrQueryRequest req, SolrParams defaults, SolrParams appends, SolrParams invariants) { List paramNames = null; @@ -156,12 +162,7 @@ public class SolrPluginUtils { } } - SolrParams p = req.getParams(); - p = SolrParams.wrapDefaults(p, defaults); - p = SolrParams.wrapAppended(p, appends); - p = SolrParams.wrapDefaults(invariants, p); - - req.setParams(p); + RequestUtil.processParams(handler, req, defaults, appends, invariants); } diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml new file mode 100755 index 00000000000..0a6bee1e852 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml @@ -0,0 +1,751 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index 338112cd257..752b07cda8f 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -945,5 +945,19 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { } + public void testAggs() throws Exception { + assertFuncEquals("agg(avg(foo_i))", "agg(avg(foo_i))"); + assertFuncEquals("agg(avg(foo_i))", "agg_avg(foo_i)"); + assertFuncEquals("agg_min(foo_i)", "agg(min(foo_i))"); + assertFuncEquals("agg_max(foo_i)", "agg(max(foo_i))"); + + assertFuncEquals("agg_avg(foo_i)", "agg_avg(foo_i)"); + assertFuncEquals("agg_sum(foo_i)", "agg_sum(foo_i)"); + assertFuncEquals("agg_count()", "agg_count()"); + assertFuncEquals("agg_unique(foo_i)", "agg_unique(foo_i)"); + assertFuncEquals("agg_sumsq(foo_i)", "agg_sumsq(foo_i)"); + // assertFuncEquals("agg_stdev(foo_i)", "agg_stdev(foo_i)"); + // assertFuncEquals("agg_multistat(foo_i)", "agg_multistat(foo_i)"); + } } diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java new file mode 100644 index 00000000000..68cf650f060 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -0,0 +1,673 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.JSONTestUtil; +import org.apache.solr.SolrTestCaseHS; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.request.macro.MacroExpander; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +@LuceneTestCase.SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Lucene45","Appending"}) +public class TestJsonFacets extends SolrTestCaseHS { + + private static SolrInstances servers; // for distributed testing + + @BeforeClass + public static void beforeTests() throws Exception { + JSONTestUtil.failRepeatedKeys = true; + initCore("solrconfig-tlog.xml","schema_latest.xml"); + } + + public static void initServers() throws Exception { + if (servers == null) { + servers = new SolrInstances(3, "solrconfig-tlog.xml", "schema_latest.xml"); + } + } + + @AfterClass + public static void afterTests() throws Exception { + JSONTestUtil.failRepeatedKeys = false; + if (servers != null) { + servers.stop(); + } + } + + // attempt to reproduce https://github.com/Heliosearch/heliosearch/issues/33 + @Test + public void testComplex() throws Exception { + Random r = random(); + + Client client = Client.localClient; + + double price_low = 11000; + double price_high = 100000; + + ModifiableSolrParams p = params("make_s","make_s", "model_s","model_s", "price_low",Double.toString(price_low), "price_high",Double.toString(price_high)); + + + MacroExpander m = new MacroExpander( p.getMap() ); + + String make_s = m.expand("${make_s}"); + String model_s = m.expand("${model_s}"); + + client.deleteByQuery("*:*", null); + + + int nDocs = 99; + String[] makes = {"honda", "toyota", "ford", null}; + Double[] prices = {10000.0, 30000.0, 50000.0, 0.0, null}; + String[] honda_models = {"accord", "civic", "fit", "pilot", null}; // make sure this is alphabetized to match tiebreaks in index + String[] other_models = {"a", "b", "c", "x", "y", "z", null}; + + int nHonda = 0; + final int[] honda_model_counts = new int[honda_models.length]; + + for (int i=0; i= price_low && price <= price_high; + + String make = rand(makes); + if (make != null) { + doc.addField(make_s, make); + } + + if ("honda".equals(make)) { + int modelNum = r.nextInt(honda_models.length); + String model = honda_models[modelNum]; + if (model != null) { + doc.addField(model_s, model); + } + if (matches_price) { + nHonda++; + honda_model_counts[modelNum]++; + } + } else if (make == null) { + doc.addField(model_s, rand(honda_models)); // add some docs w/ model but w/o make + } else { + // other makes + doc.addField(model_s, rand(other_models)); // add some docs w/ model but w/o make + } + + client.add(doc, null); + if (r.nextInt(10) == 0) { + client.add(doc, null); // dup, causing a delete + } + if (r.nextInt(20) == 0) { + client.commit(); // force new seg + } + } + + client.commit(); + + // now figure out top counts + List idx = new ArrayList<>(); + for (int i=0; i() { + @Override + public int compare(Integer o1, Integer o2) { + int cmp = honda_model_counts[o2] - honda_model_counts[o1]; + return cmp == 0 ? o1 - o2 : cmp; + } + }); + + + + // straight query facets + client.testJQ(params(p, "q", "*:*", "rows","0", "fq","+${make_s}:honda +cost_f:[${price_low} TO ${price_high}]" + , "json.facet", "{makes:{terms:{field:${make_s}, facet:{models:{terms:{field:${model_s}, limit:2, mincount:0}}}}}}}" + , "facet","true", "facet.pivot","make_s,model_s", "facet.limit", "2" + ) + , "facets=={count:" + nHonda + ", makes:{buckets:[{val:honda, count:" + nHonda + ", models:{buckets:[" + + "{val:" + honda_models[idx.get(0)] + ", count:" + honda_model_counts[idx.get(0)] + "}," + + "{val:" + honda_models[idx.get(1)] + ", count:" + honda_model_counts[idx.get(1)] + "}]}" + + "}]}}" + ); + + + } + + + + public void testStatsSimple() throws Exception { + assertU(delQ("*:*")); + assertU(add(doc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", "val_b", "true", "sparse_s","one"))); + assertU(add(doc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5", "val_b", "false"))); + assertU(add(doc("id", "3"))); + assertU(commit()); + assertU(add(doc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3"))); + assertU(add(doc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7", "sparse_s","two"))); + assertU(commit()); + assertU(add(doc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5"))); + assertU(commit()); + + // test multiple json.facet commands + assertJQ(req("q", "*:*", "rows", "0" + , "json.facet", "{x:'sum(num_d)'}" + , "json.facet", "{y:'min(num_d)'}" + ) + , "facets=={count:6 , x:3.0, y:-9.0 }" + ); + + + // test streaming + assertJQ(req("q", "*:*", "rows", "0" + , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream }}" + + ", cat2:{terms:{field:'cat_s', method:stream, sort:'index asc' }}" + // default sort + ", cat3:{terms:{field:'cat_s', method:stream, mincount:3 }}" + // mincount + ", cat4:{terms:{field:'cat_s', method:stream, prefix:B }}" + // prefix + ", cat5:{terms:{field:'cat_s', method:stream, offset:1 }}" + // offset + " }" + ) + , "facets=={count:6 " + + ", cat :{buckets:[{val:A, count:2},{val:B, count:3}]}" + + ", cat2:{buckets:[{val:A, count:2},{val:B, count:3}]}" + + ", cat3:{buckets:[{val:B, count:3}]}" + + ", cat4:{buckets:[{val:B, count:3}]}" + + ", cat5:{buckets:[{val:B, count:3}]}" + + " }" + ); + + + // test nested streaming under non-streaming + assertJQ(req("q", "*:*", "rows", "0" + , "json.facet", "{ cat:{terms:{field:'cat_s', sort:'index asc', facet:{where:{terms:{field:where_s,method:stream}}} }}}" + ) + , "facets=={count:6 " + + ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1},{val:NY,count:1}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2},{val:NY,count:1}]} }]}" + + "}" + ); + + // test nested streaming under streaming + assertJQ(req("q", "*:*", "rows", "0" + , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream, facet:{where:{terms:{field:where_s,method:stream}}} }}}" + ) + , "facets=={count:6 " + + ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1},{val:NY,count:1}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2},{val:NY,count:1}]} }]}" + + "}" + ); + + // test nested streaming with stats under streaming + assertJQ(req("q", "*:*", "rows", "0" + , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream, facet:{ where:{terms:{field:where_s,method:stream, facet:{x:'max(num_d)'} }}} }}}" + ) + , "facets=={count:6 " + + ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0},{val:NY,count:1,x:4.0}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0},{val:NY,count:1,x:-5.0}]} }]}" + + "}" + ); + + // test nested streaming with stats under streaming with stats + assertJQ(req("q", "*:*", "rows", "0", + "facet","true" + , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream, facet:{ y:'min(num_d)', where:{terms:{field:where_s,method:stream, facet:{x:'max(num_d)'} }}} }}}" + ) + , "facets=={count:6 " + + ", cat :{buckets:[{val:A, count:2, y:2.0, where:{buckets:[{val:NJ,count:1,x:2.0},{val:NY,count:1,x:4.0}]} },{val:B, count:3, y:-9.0, where:{buckets:[{val:NJ,count:2,x:11.0},{val:NY,count:1,x:-5.0}]} }]}" + + "}" + ); + + + assertJQ(req("q", "*:*", "fq","cat_s:A") + , "response/numFound==2" + ); + } + + @Test + public void testStats() throws Exception { + // single valued strings + doStats(Client.localClient, params()); + } + + public void doStats(Client client, ModifiableSolrParams p) throws Exception { + // single valued strings + doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "super_s","super_s", "val_b","val_b", "sparse_s","sparse_s" ,"multi_ss","multi_ss") ); + + // multi-valued strings + doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_d", "num_i","num_i", "super_s","super_ss", "val_b","val_b", "sparse_s","sparse_ss", "multi_ss","multi_ss") ); + } + + public static void doStatsTemplated(Client client, ModifiableSolrParams p) throws Exception { + MacroExpander m = new MacroExpander( p.getMap() ); + + String cat_s = m.expand("${cat_s}"); + String where_s = m.expand("${where_s}"); + String num_d = m.expand("${num_d}"); + String num_i = m.expand("${num_i}"); + String val_b = m.expand("${val_b}"); + String super_s = m.expand("${super_s}"); + String sparse_s = m.expand("${sparse_s}"); + String multi_ss = m.expand("${multi_ss}"); + + client.deleteByQuery("*:*", null); + + client.add(sdoc("id", "1", cat_s, "A", where_s, "NY", num_d, "4", num_i, "2", super_s, "zodiac", val_b, "true", sparse_s, "one"), null); + client.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", super_s,"superman", val_b, "false" , multi_ss,"a", "multi_ss","b" ), null); + client.add(sdoc("id", "3"), null); + client.commit(); + client.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", num_i, "3", super_s,"spiderman" , multi_ss, "b"), null); + client.add(sdoc("id", "5", cat_s, "B", where_s, "NJ", num_d, "11", num_i, "7", super_s,"batman" ,sparse_s,"two", multi_ss, "a"), null); + client.commit(); + client.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", super_s,"hulk" , multi_ss, "b", multi_ss, "a" ), null); + client.commit(); + + + // straight query facets + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{catA:{query:{q:'${cat_s}:A'}}, catA2:{query:{query:'${cat_s}:A'}}, catA3:{query:'${cat_s}:A'} }" + ) + , "facets=={ 'count':6, 'catA':{ 'count':2}, 'catA2':{ 'count':2}, 'catA3':{ 'count':2}}" + ); + + // nested query facets + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{ catB:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} }}}" + ) + , "facets=={ 'count':6, 'catB':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}}" + ); + + // nested query facets on subset + client.testJQ(params(p, "q", "id:(2 3)" + , "json.facet", "{ catB:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} }}}" + ) + , "facets=={ 'count':2, 'catB':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}}" + ); + + // nested query facets with stats + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{ catB:{query:{q:'${cat_s}:B', facet:{nj:{query:{q:'${where_s}:NJ'}}, ny:{query:'${where_s}:NY'}} }}}" + ) + , "facets=={ 'count':6, 'catB':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}}" + ); + + + // field/terms facet + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{c1:{field:'${cat_s}'}, c2:{field:{field:'${cat_s}'}}, c3:{terms:{field:'${cat_s}'}} }" + ) + , "facets=={ 'count':6, " + + "'c1':{ 'buckets':[{ 'val':'B', 'count':3}, { 'val':'A', 'count':2}]}, " + + "'c2':{ 'buckets':[{ 'val':'B', 'count':3}, { 'val':'A', 'count':2}]}, " + + "'c3':{ 'buckets':[{ 'val':'B', 'count':3}, { 'val':'A', 'count':2}]}} " + ); + + // test mincount + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:'${cat_s}', mincount:3}}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{ 'val':'B', 'count':3}]} } " + ); + + // test default mincount of 1 + client.testJQ(params(p, "q", "id:1" + , "json.facet", "{f1:{terms:'${cat_s}'}}" + ) + , "facets=={ 'count':1, " + + "'f1':{ 'buckets':[{ 'val':'A', 'count':1}]} } " + ); + + // test mincount of 0 - need processEmpty for distrib to match up + client.testJQ(params(p, "q", "id:1" + , "json.facet", "{processEmpty:true, f1:{terms:{field:'${cat_s}', mincount:0}}}" + ) + , "facets=={ 'count':1, " + + "'f1':{ 'buckets':[{ 'val':'A', 'count':1}, { 'val':'B', 'count':0}]} } " + ); + + // test mincount of 0 with stats, need processEmpty for distrib to match up + client.testJQ(params(p, "q", "id:1" + , "json.facet", "{processEmpty:true, f1:{terms:{field:'${cat_s}', mincount:0, allBuckets:true, facet:{n1:'sum(${num_d})'} }}}" + ) + , "facets=={ 'count':1, " + + "'f1':{ allBuckets:{ 'count':1, n1:4.0}, 'buckets':[{ 'val':'A', 'count':1, n1:4.0}, { 'val':'B', 'count':0 /*, n1:0.0 */ }]} } " + ); + + // test sorting by stat + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'sum(${num_d})'} }}" + + " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'sum(${num_d})'} }} }" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'A', count:2, n1:6.0 }, { val:'B', count:3, n1:-3.0}]}" + + ", f2:{ 'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }" + ); + + // test sorting by count/index order + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'count desc' } }" + + " , f2:{terms:{field:'${cat_s}', sort:'count asc' } }" + + " , f3:{terms:{field:'${cat_s}', sort:'index asc' } }" + + " , f4:{terms:{field:'${cat_s}', sort:'index desc' } }" + + "}" + ) + , "facets=={ count:6 " + + " ,f1:{buckets:[ {val:B,count:3}, {val:A,count:2} ] }" + + " ,f2:{buckets:[ {val:A,count:2}, {val:B,count:3} ] }" + + " ,f3:{buckets:[ {val:A,count:2}, {val:B,count:3} ] }" + + " ,f4:{buckets:[ {val:B,count:3}, {val:A,count:2} ] }" + + "}" + ); + + + // test tiebreaks when sorting by count + client.testJQ(params(p, "q", "id:1 id:6" + , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'count desc' } }" + + " , f2:{terms:{field:'${cat_s}', sort:'count asc' } }" + + "}" + ) + , "facets=={ count:2 " + + " ,f1:{buckets:[ {val:A,count:1}, {val:B,count:1} ] }" + + " ,f2:{buckets:[ {val:A,count:1}, {val:B,count:1} ] }" + + "}" + ); + + // terms facet with nested query facet + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{cat:{terms:{field:'${cat_s}', facet:{nj:{query:'${where_s}:NJ'}} } }} }" + ) + , "facets=={ 'count':6, " + + "'cat':{ 'buckets':[{ 'val':'B', 'count':3, 'nj':{ 'count':2}}, { 'val':'A', 'count':2, 'nj':{ 'count':1}}]} }" + ); + + // terms facet with nested query facet on subset + client.testJQ(params(p, "q", "id:(2 5 4)" + , "json.facet", "{cat:{terms:{field:'${cat_s}', facet:{nj:{query:'${where_s}:NJ'}} } }} }" + ) + , "facets=={ 'count':3, " + + "'cat':{ 'buckets':[{ 'val':'B', 'count':2, 'nj':{ 'count':2}}, { 'val':'A', 'count':1, 'nj':{ 'count':1}}]} }" + ); + + // test prefix + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${super_s}, prefix:s, mincount:0 }}}" // even with mincount=0, we should only see buckets with the prefix + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{val:spiderman, count:1}, {val:superman, count:1}]} } " + ); + + // test prefix that doesn't exist + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${super_s}, prefix:ttt, mincount:0 }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[]} } " + ); + + // test prefix that doesn't exist at start + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${super_s}, prefix:aaaaaa, mincount:0 }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[]} } " + ); + + // test prefix that doesn't exist at end + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${super_s}, prefix:zzzzzz, mincount:0 }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[]} } " + ); + + // + // missing + // + + // test missing w/ non-existent field + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${noexist}, missing:true}}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[], missing:{count:6} } } " + ); + + // test missing + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{val:one, count:1}, {val:two, count:1}], missing:{count:4} } } " + ); + + // test missing with stats + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, facet:{x:'sum(num_d)'} }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{val:one, count:1, x:4.0}, {val:two, count:1, x:11.0}], missing:{count:4, x:-12.0} } } " + ); + + // test that the missing bucket is not affected by any prefix + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, prefix:on, facet:{x:'sum(num_d)'} }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{val:one, count:1, x:4.0}], missing:{count:4, x:-12.0} } } " + ); + + // test missing with prefix that doesn't exist + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, prefix:ppp, facet:{x:'sum(num_d)'} }}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[], missing:{count:4, x:-12.0} } } " + ); + + // test numBuckets + client.testJQ(params(p, "q", "*:*", "rows", "0", "facet", "true" + , "json.facet", "{f1:{terms:{field:${cat_s}, numBuckets:true, limit:1}}}" // TODO: limit:0 produced an error + ) + , "facets=={ 'count':6, " + + "'f1':{ numBuckets:2, buckets:[{val:B, count:3}]} } " + ); + + // prefix should lower numBuckets + client.testJQ(params(p, "q", "*:*", "rows", "0", "facet", "true" + , "json.facet", "{f1:{terms:{field:${cat_s}, numBuckets:true, prefix:B}}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ numBuckets:1, buckets:[{val:B, count:3}]} } " + ); + + // mincount should lower numBuckets + client.testJQ(params(p, "q", "*:*", "rows", "0", "facet", "true" + , "json.facet", "{f1:{terms:{field:${cat_s}, numBuckets:true, mincount:3}}}" + ) + , "facets=={ 'count':6, " + + "'f1':{ numBuckets:1, buckets:[{val:B, count:3}]} } " + ); + + // basic range facet + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5}}}" + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1}, {val:0.0,count:2}, {val:5.0,count:0} ] } }" + ); + + // basic range facet with "include" params + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5, include:upper}}}" + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:0}, {val:0.0,count:2}, {val:5.0,count:0} ] } }" + ); + + // range facet with sub facets and stats + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5, facet:{ x:'sum(${num_i})', ny:{query:'${where_s}:NY'}} }}}" + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,x:-5.0,ny:{count:1}}, {val:0.0,count:2,x:5.0,ny:{count:1}}, {val:5.0,count:0 /* ,x:0.0,ny:{count:0} */ } ] } }" + ); + + // range facet with sub facets and stats, with "other:all" + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ x:'sum(${num_i})', ny:{query:'${where_s}:NY'}} }}}" + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,x:-5.0,ny:{count:1}}, {val:0.0,count:2,x:5.0,ny:{count:1}}, {val:5.0,count:0 /* ,x:0.0,ny:{count:0} */} ]" + + ",before: {count:1,x:-5.0,ny:{count:0}}" + + ",after: {count:1,x:7.0, ny:{count:0}}" + + ",between:{count:3,x:0.0, ny:{count:2}}" + + " } }" + ); + + + // range facet with sub facets and stats, with "other:all", on subset + client.testJQ(params(p, "q", "id:(3 4 6)" + , "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ x:'sum(${num_i})', ny:{query:'${where_s}:NY'}} }}}" + ) + , "facets=={count:3, f:{buckets:[ {val:-5.0,count:1,x:-5.0,ny:{count:1}}, {val:0.0,count:1,x:3.0,ny:{count:0}}, {val:5.0,count:0 /* ,x:0.0,ny:{count:0} */} ]" + + ",before: {count:0 /* ,x:0.0,ny:{count:0} */ }" + + ",after: {count:0 /* ,x:0.0,ny:{count:0} */}" + + ",between:{count:2,x:-2.0, ny:{count:1}}" + + " } }" + ); + + // stats at top level + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})' }" + ) + , "facets=={ 'count':6, " + + "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2 }" + ); + + // stats at top level, no matches + client.testJQ(params(p, "q", "id:DOESNOTEXIST" + , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})' }" + ) + , "facets=={count:0 " + + "/* ,sum1:0.0, sumsq1:0.0, avg1:0.0, min1:'NaN', max1:'NaN', numwhere:0 */ }" + ); + + // + // tests on a multi-valued field with actual multiple values, just to ensure that we are + // using a multi-valued method for the rest of the tests when appropriate. + // + + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{cat:{terms:{field:'${multi_ss}', facet:{nj:{query:'${where_s}:NJ'}} } }} }" + ) + , "facets=={ 'count':6, " + + "'cat':{ 'buckets':[{ 'val':'a', 'count':3, 'nj':{ 'count':2}}, { 'val':'b', 'count':3, 'nj':{ 'count':2}}]} }" + ); + + // test unique on multi-valued field + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{x:'unique(${multi_ss})', y:{query:{q:'id:2', facet:{x:'unique(${multi_ss})'} }} }" + ) + , "facets=={ 'count':6, " + + "x:2," + + "y:{count:1, x:2}" + // single document should yield 2 unique values + " }" + ); + + + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + // test converting legacy facets + + // test mincount + client.testJQ(params(p, "q", "*:*" + // , "json.facet", "{f1:{terms:{field:'${cat_s}', mincount:3}}}" + , "facet","true", "facet.version", "2", "facet.field","{!key=f1}${cat_s}", "facet.mincount","3" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{ 'val':'B', 'count':3}]} } " + ); + + // test prefix + client.testJQ(params(p, "q", "*:*" + // , "json.facet", "{f1:{terms:{field:${super_s}, prefix:s, mincount:0 }}}" // even with mincount=0, we should only see buckets with the prefix + , "facet","true", "facet.version", "2", "facet.field","{!key=f1}${super_s}", "facet.prefix","s", "facet.mincount","0" + ) + , "facets=={ 'count':6, " + + "'f1':{ 'buckets':[{val:spiderman, count:1}, {val:superman, count:1}]} } " + ); + + // range facet with sub facets and stats + client.testJQ(params(p, "q", "*:*" + // , "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5, facet:{ x:'sum(${num_i})', ny:{query:'${where_s}:NY'}} }}}" + , "facet","true", "facet.version", "2", "facet.range","{!key=f}${num_d}", "facet.range.start","-5", "facet.range.end","10", "facet.range.gap","5" + , "f.f.facet.stat","x:sum(${num_i})", "subfacet.f.query","{!key=ny}${where_s}:NY" + + ) + , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,x:-5.0,ny:{count:1}}, {val:0.0,count:2,x:5.0,ny:{count:1}}, {val:5.0,count:0 /* ,x:0.0,ny:{count:0} */ } ] } }" + ); + + // test sorting by stat + client.testJQ(params(p, "q", "*:*" + // , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'sum(${num_d})'} }}" + + // " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'sum(${num_d})'} }} }" + , "facet","true", "facet.version", "2", "facet.field","{!key=f1}${cat_s}", "f.f1.facet.sort","n1 desc", "facet.stat","n1:sum(num_d)" + , "facet.field","{!key=f2}${cat_s}", "f.f1.facet.sort","n1 asc" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'A', count:2, n1:6.0 }, { val:'B', count:3, n1:-3.0}]}" + + ", f2:{ 'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }" + ); + + // range facet with sub facets and stats, with "other:all", on subset + client.testJQ(params(p, "q", "id:(3 4 6)" + //, "json.facet", "{f:{range:{field:${num_d}, start:-5, end:10, gap:5, other:all, facet:{ x:'sum(${num_i})', ny:{query:'${where_s}:NY'}} }}}" + , "facet","true", "facet.version", "2", "facet.range","{!key=f}${num_d}", "facet.range.start","-5", "facet.range.end","10", "facet.range.gap","5" + , "f.f.facet.stat","x:sum(${num_i})", "subfacet.f.query","{!key=ny}${where_s}:NY", "facet.range.other","all" + ) + , "facets=={count:3, f:{buckets:[ {val:-5.0,count:1,x:-5.0,ny:{count:1}}, {val:0.0,count:1,x:3.0,ny:{count:0}}, {val:5.0,count:0 /* ,x:0.0,ny:{count:0} */} ]" + + ",before: {count:0 /* ,x:0.0,ny:{count:0} */ }" + + ",after: {count:0 /* ,x:0.0,ny:{count:0} */}" + + ",between:{count:2,x:-2.0, ny:{count:1}}" + + " } }" + ); + + + + + // TODO: + // numdocs('query') stat (don't make a bucket... just a count) + // missing(field) + // make missing configurable in min, max, etc + // exclusions + // zeroes + // instead of json.facet make it facet? + } + + + @Test + public void testDistrib() throws Exception { + initServers(); + Client client = servers.getClient( random().nextInt() ); + client.queryDefaults().set( "shards", servers.getShards() ); + doStats( client, params() ); + } + +} diff --git a/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java index b84f4aaa117..a1f176a226f 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java @@ -57,6 +57,11 @@ public class ModifiableSolrParams extends SolrParams } } + public Map getMap() { + return vals; + } + + //---------------------------------------------------------------- //---------------------------------------------------------------- diff --git a/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java index 684396b696a..859d698048d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java @@ -19,6 +19,7 @@ package org.apache.solr.common.params; import org.apache.solr.common.util.StrUtils; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.io.IOException; @@ -30,18 +31,33 @@ public class MultiMapSolrParams extends SolrParams { protected final Map map; public static void addParam(String name, String val, Map map) { - String[] arr = map.get(name); - if (arr ==null) { - arr =new String[]{val}; - } else { - String[] newarr = new String[arr.length+1]; - System.arraycopy(arr,0,newarr,0,arr.length); - newarr[arr.length]=val; - arr =newarr; - } - map.put(name, arr); + String[] arr = map.get(name); + if (arr == null) { + arr = new String[]{val}; + } else { + String[] newarr = new String[arr.length+1]; + System.arraycopy(arr, 0, newarr, 0, arr.length); + newarr[arr.length] = val; + arr = newarr; + } + map.put(name, arr); } + public static void addParam(String name, String[] vals, Map map) { + String[] arr = map.put(name, vals); + if (arr == null) { + return; + } + + String[] newarr = new String[arr.length+vals.length]; + System.arraycopy(arr, 0, newarr, 0, arr.length); + System.arraycopy(vals, 0, newarr, arr.length, vals.length); + arr = newarr; + + map.put(name, arr); + } + + public MultiMapSolrParams(Map map) { this.map = map; } @@ -88,5 +104,35 @@ public class MultiMapSolrParams extends SolrParams { return sb.toString(); } + /** Returns a MultiMap view of the SolrParams as efficiently as possible. The returned map may or may not be a backing implementation. */ + public static Map asMultiMap(SolrParams params) { + return asMultiMap(params, false); + } + + /** Returns a MultiMap view of the SolrParams. A new map will be created if newCopy==true */ + public static Map asMultiMap(SolrParams params, boolean newCopy) { + if (params instanceof MultiMapSolrParams) { + Map map = ((MultiMapSolrParams)params).getMap(); + if (newCopy) { + return new HashMap<>(map); + } + return map; + } else if (params instanceof ModifiableSolrParams) { + Map map = ((ModifiableSolrParams)params).getMap(); + if (newCopy) { + return new HashMap<>(map); + } + return map; + } else { + Map map = new HashMap<>(); + Iterator iterator = params.getParameterNamesIterator(); + while (iterator.hasNext()) { + String name = iterator.next(); + map.put(name, params.getParams(name)); + } + return map; + } + } + } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java index 36d0df1a117..6a798dede35 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java @@ -320,13 +320,8 @@ public abstract class SolrParams implements Serializable { /** Create SolrParams from NamedList. */ public static SolrParams toSolrParams(NamedList params) { - // if no keys are repeated use the faster MapSolrParams - HashMap map = new HashMap<>(); - for (int i=0; i implements Cloneable, Serializable, Iterable(); } + + public NamedList(int sz) { + nvPairs = new ArrayList<>(sz<<1); + } + + /** * Creates a NamedList instance containing the "name,value" pairs contained in the * Entry[]. diff --git a/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java b/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java index c9996d1636d..c53fca0d5e0 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java @@ -43,6 +43,10 @@ public class SimpleOrderedMap extends NamedList { super(); } + public SimpleOrderedMap(int sz) { + super(sz); + } + /** * Creates an instance backed by an explicitly specified list of * pairwise names/values. diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseHS.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseHS.java new file mode 100644 index 00000000000..8db045786b7 --- /dev/null +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseHS.java @@ -0,0 +1,549 @@ +package org.apache.solr; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import com.google.common.base.Charsets; +import org.apache.commons.io.FileUtils; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.impl.NoOpResponseParser; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.response.UpdateResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.servlet.DirectSolrConnection; +import org.noggit.JSONUtil; +import org.noggit.ObjectBuilder; +import org.slf4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.Set; + + +@SolrTestCaseJ4.SuppressSSL +@LuceneTestCase.SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Lucene45","Appending","Asserting"}) +public class SolrTestCaseHS extends SolrTestCaseJ4 { + + @SafeVarargs + public static Set set(T... a) { + LinkedHashSet s = new LinkedHashSet<>(); + for (T t : a) { + s.add(t); + } + return s; + } + + public static T rand(T... vals) { + return vals[ random().nextInt(vals.length) ]; + } + + + public static ModifiableSolrParams params(SolrParams params, String... moreParams) { + ModifiableSolrParams msp = new ModifiableSolrParams(params); + for (int i=0; i toObject(Doc doc, IndexSchema schema, Collection fieldNames) { + Map result = new HashMap<>(); + for (Fld fld : doc.fields) { + if (fieldNames != null && !fieldNames.contains(fld.ftype.fname)) continue; + SchemaField sf = schema.getField(fld.ftype.fname); + if (!sf.multiValued()) { + result.put(fld.ftype.fname, fld.vals.get(0)); + } else { + result.put(fld.ftype.fname, fld.vals); + } + } + return result; + } + + + public static Object createDocObjects(Map fullModel, Comparator sort, int rows, Collection fieldNames) { + List docList = new ArrayList<>(fullModel.values()); + Collections.sort(docList, sort); + List sortedDocs = new ArrayList(rows); + for (Doc doc : docList) { + if (sortedDocs.size() >= rows) break; + Map odoc = toObject(doc, h.getCore().getLatestSchema(), fieldNames); + sortedDocs.add(toObject(doc, h.getCore().getLatestSchema(), fieldNames)); + } + return sortedDocs; + } + + + public static void compare(SolrQueryRequest req, String path, Object model, Map fullModel) throws Exception { + String strResponse = h.query(req); + + Object realResponse = ObjectBuilder.fromJSON(strResponse); + String err = JSONTestUtil.matchObj(path, realResponse, model); + if (err != null) { + log.error("RESPONSE MISMATCH: " + err + + "\n\trequest="+req + + "\n\tresult="+strResponse + + "\n\texpected="+ JSONUtil.toJSON(model) + + "\n\tmodel="+ fullModel + ); + + // re-execute the request... good for putting a breakpoint here for debugging + String rsp = h.query(req); + + fail(err); + } + + } + + /** Pass "null" for the client to query the local server */ + public static void assertJQ(SolrClient client, SolrParams args, String... tests) throws Exception { + String resp; + resp = getJSON(client, args); + matchJSON(resp, tests); + } + + public static void matchJSON(String response, String... tests) throws Exception { + boolean failed = false; + + for (String test : tests) { + if (test == null || test.length()==0) continue; + + try { + failed = true; + String err = JSONTestUtil.match(response, test, JSONTestUtil.DEFAULT_DELTA); + failed = false; + if (err != null) { + log.error("query failed JSON validation. error=" + err + + "\n expected =" + test + + "\n response = " + response + ); + throw new RuntimeException(err); + } + } finally { + if (failed) { + log.error("JSON query validation threw an exception." + + "\n expected =" + test + + "\n response = " + response + ); + } + } + } + } + + /*** + public static void clearNCache() { + SolrQueryRequest req = req(); + req.getSearcher().getnCache().clear(); // OFF-HEAP + req.close(); + }***/ + + public static void clearQueryCache() { + SolrQueryRequest req = req(); + req.getSearcher(); + req.close(); + } + + + public static String getQueryResponse(SolrClient client, String wt, SolrParams params) throws Exception { + if (client == null) { + return getQueryResponse(wt, params); + } + ModifiableSolrParams p = new ModifiableSolrParams(params); + p.set("wt", wt); + String path = p.get("qt"); + p.remove("qt"); + p.set("indent","true"); + + QueryRequest query = new QueryRequest( p ); + if (path != null) { + query.setPath(path); + } + + query.setResponseParser(new NoOpResponseParser(wt)); + NamedList rsp = client.request(query); + + String raw = (String)rsp.get("response"); + + return raw; + } + + public static String getQueryResponse(String wt, SolrParams params) throws Exception { + ModifiableSolrParams p = new ModifiableSolrParams(params); + p.set("wt", wt); + String path = p.get("qt"); + p.remove("qt"); + p.set("indent","true"); + + DirectSolrConnection connection = new DirectSolrConnection(h.getCore()); + String raw = connection.request(path, p, null); + return raw; + } + + + public static String getJSON(SolrClient client, SolrParams params) throws Exception { + return getQueryResponse(client, "json", params); + } + + /** Adds a document using the specific client, or to the local test core if null. + * Returns the version. TODO: work in progress... version not always returned. */ + public static Long add(SolrClient client, SolrInputDocument sdoc, ModifiableSolrParams params) throws Exception { + if (client == null) { + Long version = addAndGetVersion( sdoc, params ); + return version; + } else { + UpdateRequest updateRequest = new UpdateRequest(); + if (params != null) { + updateRequest.setParams(params); + } + updateRequest.add( sdoc ); + UpdateResponse rsp = updateRequest.process( client ); + // TODO - return version + return null; + } + } + + + + public static class Client { + ClientProvider provider; + ModifiableSolrParams queryDefaults; + + public static Client localClient = new Client(null, 1); + public static Client localClient() { + return new Client(null, 1); + } + + public Client(List clients, int seed) { + if (clients != null) { + provider = new ClientProvider(clients, seed); + } + } + + public static int hash(int x) { + // from Thomas Mueller + x = ((x >>> 16) ^ x) * 0x45d9f3b; + x = ((x >>> 16) ^ x) * 0x45d9f3b; + x = ((x >>> 16) ^ x); + return x; + } + + public ModifiableSolrParams queryDefaults() { + if (queryDefaults == null) { + queryDefaults = new ModifiableSolrParams(); + } + return queryDefaults; + } + + public boolean local() { + return provider == null; + } + + public void testJQ(SolrParams args, String... tests) throws Exception { + if (queryDefaults != null) { + ModifiableSolrParams newParams = params(queryDefaults); + newParams.add(args); + args = newParams; + } + SolrClient client = provider==null ? null : provider.client(null, args); + SolrTestCaseHS.assertJQ(client, args, tests); + } + + public Long add(SolrInputDocument sdoc, ModifiableSolrParams params) throws Exception { + SolrClient client = provider==null ? null : provider.client(sdoc, params); + return SolrTestCaseHS.add(client, sdoc, params); + } + + public void commit() throws IOException, SolrServerException { + if (local()) { + assertU(SolrTestCaseJ4.commit()); + return; + } + + for (SolrClient client : provider.all()) { + client.commit(); + } + } + + public void deleteByQuery(String query, ModifiableSolrParams params) throws IOException, SolrServerException { + if (local()) { + assertU(delQ(query)); // todo - handle extra params + return; + } + + for (SolrClient client : provider.all()) { + client.deleteByQuery(query); // todo - handle extra params + } + } + + } + + + public static class ClientProvider { + public static String idField = "id"; + + List clients; + Random r; + int hashSeed; + + // thisIsIgnored needed because we need a diff signature + public ClientProvider(List clients, int seed) { + this.hashSeed = Client.hash(seed); + this.clients = clients; + r = new Random(seed); + } + + public SolrClient client(SolrInputDocument sdoc, SolrParams params) { + String idStr = null; + if (sdoc != null) { + idStr = sdoc.getFieldValue(idField).toString(); + } else if (params!=null) { + idStr = params.get(idField); + } + + int hash; + if (idStr != null) { + // make the client chosen the same for a duplicate ID + hash = idStr.hashCode() ^ hashSeed; + } else { + hash = r.nextInt(); + } + + + return clients.get( (hash & Integer.MAX_VALUE) % clients.size() ); + } + + public List all() { + return clients; + } + } + + + // + // Helper to run an internal Jetty instance. + // Example: + // SolrInstance s1 = new SolrInstance(createTempDir("s1"), "solrconfig-tlog.xml", "schema_latest.xml"); + // s1.start(); + // SolrClient c1 = s1.getSolrJ(); + // assertJQ(c1, params("q", "*:*"), "/response/numFound==3"); + // String json = getJSON(c1, params("q","id:1")); + // s1.stop(); + // + // To manage multiple servers, see SolrInstances + // + public static class SolrInstance { + private static Logger log = SolrTestCaseJ4.log; + private String collection = "collection1"; + private int port = 0; + private String solrconfigFile; + private String schemaFile; + private File baseDir; + private JettySolrRunner jetty; + private SolrClient solrj; + + private boolean homeCreated = false; + + + public SolrInstance(File homeDir, String solrconfigFile, String schemaFile) { + this.baseDir = homeDir; + this.solrconfigFile = solrconfigFile; + this.schemaFile = schemaFile; + } + + public String getBaseDir() { + return baseDir.toString(); + } + + public String getBaseURL() { + return (SolrTestCaseJ4.isSSLMode() ? "https" : "http") + "://127.0.0.1:" + port + "/solr"; + } + + public String getCollectionURL() { + return getBaseURL() + "/" + collection; + } + + /** string appropriate for passing in shards param (i.e. missing http://) */ + public String getShardURL() { + return "127.0.0.1:" + port + "/solr" + "/" + collection; + } + + public SolrClient getSolrJ() { + if (solrj == null) { + solrj = new HttpSolrClient(getCollectionURL()); + } + return solrj; + } + + /** If it needs to change */ + public void setPort(int port) { + this.port = port; + } + + public void createHome() throws Exception { + homeCreated=true; + SolrTestCaseJ4.copySolrHomeToTemp(baseDir, collection); + copyConfFile(baseDir, collection, solrconfigFile); + copyConfFile(baseDir, collection, schemaFile); + + File collDir = new File(baseDir, collection); + try (Writer w = new OutputStreamWriter(Files.newOutputStream(collDir.toPath().resolve("core.properties")), Charsets.UTF_8)) { + Properties coreProps = new Properties(); + coreProps.put("name", "collection1"); + coreProps.put("config", solrconfigFile); + coreProps.put("schema", schemaFile); + coreProps.store(w, ""); + } + } + + + public void start() throws Exception { + if (!homeCreated) { + createHome(); + } + + if (jetty == null) { + jetty = new JettySolrRunner(baseDir.getAbsolutePath(), "/solr", port, solrconfigFile, schemaFile, true, null, null, null); + } + + // silly stuff included from solrconfig.snippet.randomindexconfig.xml + System.setProperty("solr.tests.maxBufferedDocs", String.valueOf(100000)); + + jetty.start(); + port = jetty.getLocalPort(); + log.info("===> Started solr server port=" + port + " home="+getBaseDir()); + } + + public void stop() throws Exception { + jetty.stop(); + if (solrj != null) solrj.close(); + } + + public void tearDown() throws Exception { + IOUtils.deleteFilesIfExist(baseDir.toPath()); + } + + private static void copyConfFile(File dstRoot, String destCollection, String file) throws Exception { + File subHome = new File(dstRoot, destCollection + File.separator + "conf"); + String top = SolrTestCaseJ4.TEST_HOME() + "/collection1/conf"; + FileUtils.copyFile(new File(top, file), new File(subHome, file)); + } + + public void copyConfigFile(File dstRoot, String destCollection, String file) throws Exception { + if (!homeCreated) { + createHome(); + } + + File subHome = new File(dstRoot, destCollection + File.separator + "conf"); + String top = SolrTestCaseJ4.TEST_HOME() + "/collection1/conf"; + FileUtils.copyFile(new File(top, file), new File(subHome, file)); + } + + } + + + // Manages a number of Solr servers and provides a Client to partition documents and randomly assign query requests. + // Example: + // SolrInstances servers = new SolrInstances(3, "solrconfig-tlog.xml","schema_latest.xml"); + // Client = servers.getClient(0); + // client.add(sdoc("id", "3"), null); + // client.commit(); + // client.testJQ(params("q", "*:*"), "/response/numFound==3") + // servers.stop(); + // + public static class SolrInstances { + public List slist; + public Client client; + + public SolrInstances(int numServers, String solrconfig, String schema) throws Exception { + slist = new ArrayList<>(numServers); + for (int i=0; i getSolrJs() { + List solrjs = new ArrayList<>(slist.size()); + for (SolrInstance instance : slist) { + solrjs.add( instance.getSolrJ() ); + } + return solrjs; + } + + public Client getClient(int seed) { + if (client == null) { + client = new Client(getSolrJs(), seed); + } + return client; + } + + public static String getShardsParam(List instances) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (SolrInstance instance : instances) { + if (first) { + first = false; + } else { + sb.append(','); + } + sb.append( instance.getShardURL() ); + } + return sb.toString(); + } + + + + } + +}