From f41a65a4570567b2ed3d216785320253dcaa10a6 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Sun, 10 May 2015 03:13:36 +0000 Subject: [PATCH] SOLR-7522: single valued numeric field faceting git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1678535 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 + .../apache/solr/search/facet/FacetField.java | 46 +- .../facet/FacetFieldProcessorNumeric.java | 482 ++++++++++++++++++ .../apache/solr/search/facet/FacetRange.java | 77 ++- .../solr/search/facet/FacetRequest.java | 9 +- .../solr/search/facet/TestJsonFacets.java | 52 +- 6 files changed, 651 insertions(+), 19 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a32644d7f9e..7ae1a8b80a3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -177,6 +177,10 @@ New Features * SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues'. 'calcdistinct' is still supported as an alias for both options (hossman) +* SOLR-7522: Facet Module - Implement field/terms faceting over single-valued + numeric fields. (yonik) + + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java index cd2d0d0f37c..7870d41f0f6 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -48,6 +48,7 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TrieField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.HashDocSet; @@ -120,16 +121,26 @@ public class FacetField extends FacetRequest { return new FacetFieldProcessorStream(fcontext, this, sf); } - if (!multiToken || sf.hasDocValues()) { + org.apache.lucene.document.FieldType.NumericType ntype = ft.getNumericType(); + + if (sf.hasDocValues() && ntype==null) { + // single and multi-valued string docValues return new FacetFieldProcessorDV(fcontext, this, sf); } - if (multiToken) { - return new FacetFieldProcessorUIF(fcontext, this, sf); - } else { - // single valued string - return new FacetFieldProcessorFC(fcontext, this, sf); + if (!multiToken) { + if (sf.getType().getNumericType() != null) { + // single valued numeric (docvalues or fieldcache) + return new FacetFieldProcessorNumeric(fcontext, this, sf); + } else { + // single valued string... + return new FacetFieldProcessorDV(fcontext, this, sf); + // what about FacetFieldProcessorFC? + } } + + // Multi-valued field cache (UIF) + return new FacetFieldProcessorUIF(fcontext, this, sf); } @Override @@ -143,6 +154,7 @@ public class FacetField extends FacetRequest { abstract class FacetFieldProcessor extends FacetProcessor { SchemaField sf; SlotAcc sortAcc; + SlotAcc indexOrderAcc; int effectiveMincount; FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) { @@ -157,6 +169,12 @@ abstract class FacetFieldProcessor extends FacetProcessor { } void setSortAcc(int numSlots) { + if (indexOrderAcc == null) { + // This sorting accumulator just goes by the slot number, so does not need to be collected + // and hence does not need to find it's way into the accMap or accs array. + indexOrderAcc = new SortSlotAcc(fcontext); + } + String sortKey = freq.sortVariable; sortAcc = accMap.get(sortKey); @@ -164,15 +182,16 @@ abstract class FacetFieldProcessor extends FacetProcessor { if ("count".equals(sortKey)) { sortAcc = countAcc; } else if ("index".equals(sortKey)) { - sortAcc = new SortSlotAcc(fcontext); - // This sorting accumulator just goes by the slot number, so does not need to be collected - // and hence does not need to find it's way into the accMap or accs array. + sortAcc = indexOrderAcc; } } } static class Slot { int slot; + public int tiebreakCompare(int slotA, int slotB) { + return slotB - slotA; + } } } @@ -249,7 +268,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { // add a modest amount of over-request if this is a shard request int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE; - int maxsize = freq.limit > 0 ? off + lim : Integer.MAX_VALUE - 1; + int maxsize = (int)(freq.limit > 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); maxsize = Math.min(maxsize, nTerms); final int sortMul = freq.sortDirection.getMultiplier(); @@ -612,6 +631,11 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable if (freq.prefix != null) { String indexedPrefix = sf.getType().toInternal(freq.prefix); startTermBytes = new BytesRef(indexedPrefix); + } else if (sf.getType().getNumericType() != null) { + String triePrefix = TrieField.getMainValuePrefix(sf.getType()); + if (triePrefix != null) { + startTermBytes = new BytesRef(triePrefix); + } } Fields fields = fcontext.searcher.getLeafReader().fields(); @@ -644,8 +668,6 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable List leafList = fcontext.searcher.getTopReaderContext().leaves(); leaves = leafList.toArray( new LeafReaderContext[ leafList.size() ]); - - } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java new file mode 100644 index 00000000000..43406a24684 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java @@ -0,0 +1,482 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BitUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PriorityQueue; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; + +class FacetFieldProcessorNumeric extends FacetFieldProcessor { + static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests + + static class LongCounts { + + static final float LOAD_FACTOR = 0.7f; + + long numAdds; + long[] vals; + int[] counts; // maintain the counts here since we need them to tell if there was actually a value anyway + int[] oldToNewMapping; + + int cardinality; + int threshold; + + /** sz must be a power of two */ + LongCounts(int sz) { + vals = new long[sz]; + counts = new int[sz]; + threshold = (int) (sz * LOAD_FACTOR); + } + + /** Current number of slots in the hash table */ + public int numSlots() { + return vals.length; + } + + private int hash(long val) { + // For floats: exponent bits start at bit 23 for single precision, + // and bit 52 for double precision. + // Many values will only have significant bits just to the right of that, + // and the leftmost bits will all be zero. + + // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash + // The upper bits of our hash will be irrelevant. + int h = (int) (val + (val >>> 44) + (val >>> 15)); + return h; + } + + /** returns the slot */ + int add(long val) { + if (cardinality >= threshold) { + rehash(); + } + + numAdds++; + int h = hash(val); + for (int slot = h & (vals.length-1); ;slot = (slot + ((h>>7)|1)) & (vals.length-1)) { + int count = counts[slot]; + if (count == 0) { + counts[slot] = 1; + vals[slot] = val; + cardinality++; + return slot; + } else if (vals[slot] == val) { + // val is already in the set + counts[slot] = count + 1; + return slot; + } + } + } + + protected void rehash() { + long[] oldVals = vals; + int[] oldCounts = counts; // after retrieving the count, this array is reused as a mapping to new array + int newCapacity = vals.length << 1; + vals = new long[newCapacity]; + counts = new int[newCapacity]; + threshold = (int) (newCapacity * LOAD_FACTOR); + + for (int i=0; i>7)|1)) & (vals.length-1); + } + counts[slot] = count; + vals[slot] = val; + oldCounts[i] = slot; + } + + oldToNewMapping = oldCounts; + } + + int cardinality() { + return cardinality; + } + + } + + + + FacetFieldProcessorNumeric(FacetContext fcontext, FacetField freq, SchemaField sf) { + super(fcontext, freq, sf); + } + + int missingSlot = -1; + int allBucketsSlot = -1; + + + @Override + public void process() throws IOException { + super.process(); + response = calcFacets(); + } + + + private void doRehash(LongCounts table) { + if (accs.length == 0) return; // TODO: FUTURE: only need to resize acc we will sort on + + // Our "count" acc is backed by the hash table and will already be rehashed + + int newTableSize = table.numSlots(); + int numSlots = newTableSize; + final int oldMissingSlot = missingSlot; + final int oldAllBucketsSlot = allBucketsSlot; + if (oldMissingSlot >= 0) { + missingSlot = numSlots++; + } + if (allBucketsSlot >= 0) { + allBucketsSlot = numSlots++; + } + + final int finalNumSlots = numSlots; + final int[] mapping = table.oldToNewMapping; + + SlotAcc.Resizer resizer = new SlotAcc.Resizer() { + @Override + public int getNewSize() { + return finalNumSlots; + } + + @Override + public int getNewSlot(int oldSlot) { + if (oldSlot < mapping.length) { + return mapping[oldSlot]; + } + if (oldSlot == oldMissingSlot) { + return missingSlot; + } + if (oldSlot == oldAllBucketsSlot) { + return allBucketsSlot; + } + return -1; + } + }; + + for (SlotAcc acc : accs) { + acc.resize( resizer ); + } + } + + public SimpleOrderedMap calcFacets() throws IOException { + + + final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf); + + + // TODO: it would be really nice to know the number of unique values!!!! + + int possibleValues = fcontext.base.size(); + // size smaller tables so that no resize will be necessary + int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1)); + currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE); + final LongCounts table = new LongCounts(currHashSize) { + @Override + protected void rehash() { + super.rehash(); + doRehash(this); + oldToNewMapping = null; // allow for gc + } + }; + + int numSlots = currHashSize; + + int numMissing = 0; + + if (freq.missing) { + missingSlot = numSlots++; + } + if (freq.allBuckets) { + allBucketsSlot = numSlots++; + } + + indexOrderAcc = new SlotAcc(fcontext) { + @Override + public void collect(int doc, int slot) throws IOException { + } + + @Override + public int compare(int slotA, int slotB) { + long s1 = calc.bitsToSortableBits(table.vals[slotA]); + long s2 = calc.bitsToSortableBits(table.vals[slotB]); + return Long.compare(s1, s2); + } + + @Override + public Object getValue(int slotNum) throws IOException { + return null; + } + + @Override + public void reset() { + } + + @Override + public void resize(Resizer resizer) { + } + }; + + countAcc = new CountSlotAcc(fcontext) { + @Override + public void incrementCount(int slot, int count) { + throw new UnsupportedOperationException(); + } + + @Override + public int getCount(int slot) { + return table.counts[slot]; + } + + @Override + public Object getValue(int slotNum) { + return getCount(slotNum); + } + + @Override + public void reset() { + throw new UnsupportedOperationException(); + } + + @Override + public void collect(int doc, int slot) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int compare(int slotA, int slotB) { + return Integer.compare( table.counts[slotA], table.counts[slotB] ); + } + + @Override + public void resize(Resizer resizer) { + throw new UnsupportedOperationException(); + } + }; + + + // we set the countAcc first so it won't be created here + createAccs(fcontext.base.size(), numSlots); + setSortAcc(numSlots); + prepareForCollection(); + + + NumericDocValues values = null; + Bits docsWithField = null; + + // TODO: factor this code out so it can be shared... + final List leaves = fcontext.searcher.getIndexReader().leaves(); + final Iterator ctxIt = leaves.iterator(); + LeafReaderContext ctx = null; + int segBase = 0; + int segMax; + int adjustedMax = 0; + for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext(); ) { + final int doc = docsIt.nextDoc(); + if (doc >= adjustedMax) { + do { + ctx = ctxIt.next(); + segBase = ctx.docBase; + segMax = ctx.reader().maxDoc(); + adjustedMax = segBase + segMax; + } while (doc >= adjustedMax); + assert doc >= ctx.docBase; + setNextReader(ctx); + + values = DocValues.getNumeric(ctx.reader(), sf.getName()); + docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName()); + } + + int segDoc = doc - segBase; + long val = values.get(segDoc); + if (val == 0 && !docsWithField.get(segDoc)) { + // missing + if (missingSlot >= 0) { + numMissing++; + collect(segDoc, missingSlot); + } + } else { + int slot = table.add(val); // this can trigger a rehash rehash + + collect(segDoc, slot); + + if (allBucketsSlot >= 0) { + collect(segDoc, allBucketsSlot); + } + } + } + + + // + // collection done, time to find the top slots + // + + int numBuckets = 0; + List bucketVals = null; + if (freq.numBuckets && fcontext.isShard()) { + bucketVals = new ArrayList(100); + } + + int off = fcontext.isShard() ? 0 : (int) freq.offset; + // add a modest amount of over-request if this is a shard request + int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE; + + int maxsize = (int)(freq.limit > 0 ? freq.offset + lim : Integer.MAX_VALUE - 1); + maxsize = Math.min(maxsize, table.cardinality); + + final int sortMul = freq.sortDirection.getMultiplier(); + + PriorityQueue queue = new PriorityQueue(maxsize) { + @Override + protected boolean lessThan(Slot a, Slot b) { + // TODO: sort-by-index-order + int cmp = sortAcc.compare(a.slot, b.slot) * sortMul; + return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0; + } + }; + + // TODO: create a countAcc that wrapps the table so we can reuse more code? + + Slot bottom = null; + for (int i=0; i 1 + + long val = table.vals[i]; + if (bucketVals != null && bucketVals.size()<100) { + bucketVals.add( calc.bitsToValue(val) ); + } + + if (bottom == null) { + bottom = new Slot(); + } + bottom.slot = i; + + bottom = queue.insertWithOverflow(bottom); + } + + + SimpleOrderedMap res = new SimpleOrderedMap(); + if (freq.numBuckets) { + if (!fcontext.isShard()) { + res.add("numBuckets", numBuckets); + } else { + SimpleOrderedMap map = new SimpleOrderedMap(2); + map.add("numBuckets", numBuckets); + map.add("vals", bucketVals); + res.add("numBuckets", map); + } + } + + if (freq.allBuckets) { + SimpleOrderedMap allBuckets = new SimpleOrderedMap<>(); + // countAcc.setValues(allBuckets, allBucketsSlot); + allBuckets.add("count", table.numAdds); + for (SlotAcc acc : accs) { + acc.setValues(allBuckets, allBucketsSlot); + } + // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?) + res.add("allBuckets", allBuckets); + } + + if (freq.missing) { + SimpleOrderedMap missingBucket = new SimpleOrderedMap<>(); + // countAcc.setValues(missingBucket, missingSlot); + missingBucket.add("count", numMissing); + for (SlotAcc acc : accs) { + acc.setValues(missingBucket, missingSlot); + } + + if (freq.getSubFacets().size() > 0) { + // TODO: we can do better than this! + DocSet missingDocSet = null; + if (missingDocSet == null) { + missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field); + } + processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet); + } + res.add("missing", missingBucket); + } + + // if we are deep paging, we don't have to order the highest "offset" counts. + int collectCount = Math.max(0, queue.size() - off); + assert collectCount <= lim; + int[] sortedSlots = new int[collectCount]; + for (int i = collectCount - 1; i >= 0; i--) { + sortedSlots[i] = queue.pop().slot; + } + + ArrayList bucketList = new ArrayList(collectCount); + res.add("buckets", bucketList); + + + for (int slotNum : sortedSlots) { + SimpleOrderedMap bucket = new SimpleOrderedMap<>(); + Comparable val = calc.bitsToValue(table.vals[slotNum]); + bucket.add("val", val); + + // add stats for this bucket + // TODO: this gets count from countAcc + // addStats(bucket, slotNum); + bucket.add("count", table.counts[slotNum]); + + for (SlotAcc acc : accs) { + acc.setValues(bucket, slotNum); + } + + // handle sub-facets for this bucket + if (freq.getSubFacets().size() > 0) { + Query filter = sf.getType().getFieldQuery(null, sf, calc.formatValue(val)); + processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) ); + } + + bucketList.add(bucket); + } + + + + return res; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index 74be2b8c880..fea7b7f7fd9 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -24,6 +24,7 @@ import java.util.EnumSet; import java.util.List; import org.apache.lucene.search.Query; +import org.apache.lucene.util.NumericUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.SimpleOrderedMap; @@ -101,6 +102,41 @@ class FacetRangeProcessor extends FacetProcessor { } } + public static Calc getNumericCalc(SchemaField sf) { + Calc calc; + final FieldType ft = sf.getType(); + + if (ft instanceof TrieField) { + final TrieField trie = (TrieField)ft; + + switch (trie.getType()) { + case FLOAT: + calc = new FloatCalc(sf); + break; + case DOUBLE: + calc = new DoubleCalc(sf); + break; + case INTEGER: + calc = new IntCalc(sf); + break; + case LONG: + calc = new LongCalc(sf); + break; + case DATE: + calc = new DateCalc(sf, null); + break; + default: + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "Expected numeric field type :" + sf); + } + } else { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "Expected numeric field type :" + sf); + } + return calc; + } private SimpleOrderedMap getRangeCounts() throws IOException { final FieldType ft = sf.getType(); @@ -317,14 +353,22 @@ class FacetRangeProcessor extends FacetProcessor { * directly from some method -- but until then, keep this locked down * and private. */ - private static abstract class Calc { + static abstract class Calc { protected final SchemaField field; public Calc(final SchemaField field) { this.field = field; } + public Comparable bitsToValue(long bits) { + return bits; + } + + public long bitsToSortableBits(long bits) { + return bits; + } + /** - * Formats a Range endpoint for use as a range label name in the response. + * Formats a value into a label used in a response * Default Impl just uses toString() */ public String formatValue(final Comparable val) { @@ -332,7 +376,7 @@ class FacetRangeProcessor extends FacetProcessor { } /** - * Parses a String param into an Range endpoint value throwing + * Parses a String param into a value throwing * an exception if not possible */ public final Comparable getValue(final String rawval) { @@ -346,7 +390,7 @@ class FacetRangeProcessor extends FacetProcessor { } /** - * Parses a String param into an Range endpoint. + * Parses a String param into a value. * Can throw a low level format exception as needed. */ protected abstract Comparable parseStr(final String rawval) @@ -407,6 +451,16 @@ class FacetRangeProcessor extends FacetProcessor { private static class FloatCalc extends Calc { + @Override + public Comparable bitsToValue(long bits) { + return Float.intBitsToFloat( (int)bits ); + } + + @Override + public long bitsToSortableBits(long bits) { + return NumericUtils.sortableDoubleBits(bits); + } + public FloatCalc(final SchemaField f) { super(f); } @Override protected Float parseStr(String rawval) { @@ -418,6 +472,15 @@ class FacetRangeProcessor extends FacetProcessor { } } private static class DoubleCalc extends Calc { + @Override + public Comparable bitsToValue(long bits) { + return Double.longBitsToDouble(bits); + } + + @Override + public long bitsToSortableBits(long bits) { + return NumericUtils.sortableDoubleBits(bits); + } public DoubleCalc(final SchemaField f) { super(f); } @Override @@ -463,6 +526,12 @@ class FacetRangeProcessor extends FacetProcessor { throw new IllegalArgumentException("SchemaField must use field type extending TrieDateField or DateRangeField"); } } + + @Override + public Comparable bitsToValue(long bits) { + return new Date(bits); + } + @Override public String formatValue(Comparable val) { return ((TrieDateField)field.getType()).toExternal( (Date)val ); diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index aeb30d7b4b6..0eec43100b9 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -206,8 +206,13 @@ class FacetProcessor { protected void createAccs(int docCount, int slotCount) throws IOException { accMap = new LinkedHashMap(); - countAcc = new CountSlotArrAcc(fcontext, slotCount); - countAcc.key = "count"; + + // allow a custom count acc to be used + if (countAcc == null) { + countAcc = new CountSlotArrAcc(fcontext, slotCount); + countAcc.key = "count"; + } + for (Map.Entry entry : freq.getFacetStats().entrySet()) { SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount); acc.key = entry.getKey(); diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 2ae4d63598e..e00b3286098 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Random; import com.tdunning.math.stats.AVLTreeDigest; +import org.apache.lucene.queryparser.flexible.standard.processors.NumericQueryNodeProcessor; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; @@ -44,10 +45,13 @@ import org.junit.Test; public class TestJsonFacets extends SolrTestCaseHS { private static SolrInstances servers; // for distributed testing + private static int origTableSize; @BeforeClass public static void beforeTests() throws Exception { JSONTestUtil.failRepeatedKeys = true; + origTableSize = FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE; + FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing initCore("solrconfig-tlog.xml","schema_latest.xml"); } @@ -60,6 +64,7 @@ public class TestJsonFacets extends SolrTestCaseHS { @AfterClass public static void afterTests() throws Exception { JSONTestUtil.failRepeatedKeys = false; + FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=origTableSize; if (servers != null) { servers.stop(); servers = null; @@ -368,7 +373,8 @@ public class TestJsonFacets extends SolrTestCaseHS { client.commit(); - // straight query facets + + // straight query facets client.testJQ(params(p, "q", "*:*" , "json.facet", "{catA:{query:{q:'${cat_s}:A'}}, catA2:{query:{query:'${cat_s}:A'}}, catA3:{query:'${cat_s}:A'} }" ) @@ -883,6 +889,50 @@ public class TestJsonFacets extends SolrTestCaseHS { ); + // + // facet on numbers + // + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{" + + " f1:{ type:field, field:${num_i} }" + + ",f2:{ type:field, field:${num_i}, sort:'count asc' }" + + ",f3:{ type:field, field:${num_i}, sort:'index asc' }" + + ",f4:{ type:field, field:${num_i}, sort:'index desc' }" + + ",f5:{ type:field, field:${num_i}, sort:'index desc', limit:1, missing:true, allBuckets:true, numBuckets:true }" + + ",f6:{ type:field, field:${num_i}, sort:'index desc', mincount:2, numBuckets:true }" + // mincount should lower numbuckets + ",f7:{ type:field, field:${num_i}, sort:'index desc', offset:2, numBuckets:true }" + // test offset + ",f8:{ type:field, field:${num_i}, sort:'index desc', offset:100, numBuckets:true }" + // test high offset + ",f9:{ type:field, field:${num_i}, sort:'x desc', facet:{x:'avg(${num_d})'}, missing:true, allBuckets:true, numBuckets:true }" + // test stats + ",f10:{ type:field, field:${num_i}, facet:{a:{query:'${cat_s}:A'}}, missing:true, allBuckets:true, numBuckets:true }" + // test subfacets + "}" + ) + , "facets=={count:6 " + + ",f1:{ buckets:[{val:-5,count:2},{val:2,count:1},{val:3,count:1},{val:7,count:1} ] } " + + ",f2:{ buckets:[{val:2,count:1},{val:3,count:1},{val:7,count:1},{val:-5,count:2} ] } " + + ",f3:{ buckets:[{val:-5,count:2},{val:2,count:1},{val:3,count:1},{val:7,count:1} ] } " + + ",f4:{ buckets:[{val:7,count:1},{val:3,count:1},{val:2,count:1},{val:-5,count:2} ] } " + + ",f5:{ buckets:[{val:7,count:1}] , numBuckets:4, allBuckets:{count:5}, missing:{count:1} } " + + ",f6:{ buckets:[{val:-5,count:2}] , numBuckets:1 } " + + ",f7:{ buckets:[{val:2,count:1},{val:-5,count:2}] , numBuckets:4 } " + + ",f8:{ buckets:[] , numBuckets:4 } " + + ",f9:{ buckets:[{val:7,count:1,x:11.0},{val:2,count:1,x:4.0},{val:3,count:1,x:2.0},{val:-5,count:2,x:-7.0} ], numBuckets:4, allBuckets:{count:5,x:0.6},missing:{count:1,x:0.0} } " + // TODO: should missing exclude "x" because no values were collected? + ",f10:{ buckets:[{val:-5,count:2,a:{count:0}},{val:2,count:1,a:{count:1}},{val:3,count:1,a:{count:1}},{val:7,count:1,a:{count:0}} ], numBuckets:4, allBuckets:{count:5},missing:{count:1,a:{count:0}} } " + + "}" + ); + + + // facet on a float field - shares same code with integers/longs currently, so we only need to test labels/sorting + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{" + + " f1:{ type:field, field:${num_d} }" + + ",f2:{ type:field, field:${num_d}, sort:'index desc' }" + + "}" + ) + , "facets=={count:6 " + + ",f1:{ buckets:[{val:-9.0,count:1},{val:-5.0,count:1},{val:2.0,count:1},{val:4.0,count:1},{val:11.0,count:1} ] } " + + ",f2:{ buckets:[{val:11.0,count:1},{val:4.0,count:1},{val:2.0,count:1},{val:-5.0,count:1},{val:-9.0,count:1} ] } " + + "}" + ); }