From fb63b4c3db53ead91b5b0f19e35bf484f9ef0525 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Sat, 9 May 2015 20:50:55 +0000 Subject: [PATCH] SOLR-7519: SlotAcc resize ability with Resizer git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1678522 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/solr/search/facet/CountAgg.java | 2 +- .../solr/search/facet/FacetRequest.java | 2 +- .../solr/search/facet/PercentileAgg.java | 5 + .../org/apache/solr/search/facet/SlotAcc.java | 125 +++++++++++++++++- .../solr/search/facet/UnInvertedField.java | 18 +-- .../apache/solr/search/facet/UniqueAgg.java | 5 + 6 files changed, 140 insertions(+), 17 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java b/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java index a2ce9e8f79b..ff6fc384d3b 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/CountAgg.java @@ -26,7 +26,7 @@ public class CountAgg extends SimpleAggValueSource { @Override public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - return new CountSlotAcc(fcontext, numSlots); + return new CountSlotArrAcc(fcontext, numSlots); } @Override diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 288d1a82d37..aeb30d7b4b6 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -206,7 +206,7 @@ class FacetProcessor { protected void createAccs(int docCount, int slotCount) throws IOException { accMap = new LinkedHashMap(); - countAcc = new CountSlotAcc(fcontext, slotCount); + countAcc = new CountSlotArrAcc(fcontext, slotCount); countAcc.key = "count"; for (Map.Entry entry : freq.getFacetStats().entrySet()) { SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount); diff --git a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java index 40a5e9b3d68..ec06d0bf36f 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java @@ -178,6 +178,11 @@ public class PercentileAgg extends SimpleAggValueSource { digests = new AVLTreeDigest[digests.length]; sortvals = null; } + + @Override + public void resize(Resizer resizer) { + digests = resizer.resize(digests, null); + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java index 69006610dde..d3f81f1d157 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java @@ -33,7 +33,9 @@ import org.apache.solr.search.SolrIndexSearcher; import java.io.Closeable; import java.io.IOException; +import java.lang.reflect.Array; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; @@ -61,11 +63,71 @@ public abstract class SlotAcc implements Closeable { public abstract void reset(); + public abstract void resize(Resizer resizer); + @Override public void close() throws IOException { } -} + public static abstract class Resizer { + public abstract int getNewSize(); + + public abstract int getNewSlot(int oldSlot); + + public double[] resize(double[] old, double defaultValue) { + double[] values = new double[getNewSize()]; + if (defaultValue != 0) { + Arrays.fill(values, 0, values.length, defaultValue); + } + for (int i = 0; i < old.length; i++) { + double val = old[i]; + if (val != defaultValue) { + int newSlot = getNewSlot(i); + if (newSlot >= 0) { + values[newSlot] = val; + } + } + } + return values; + } + + public int[] resize(int[] old, int defaultValue) { + int[] values = new int[getNewSize()]; + if (defaultValue != 0) { + Arrays.fill(values, 0, values.length, defaultValue); + } + for (int i = 0; i < old.length; i++) { + int val = old[i]; + if (val != defaultValue) { + int newSlot = getNewSlot(i); + if (newSlot >= 0) { + values[newSlot] = val; + } + } + } + return values; + } + + public T[] resize(T[] old, T defaultValue) { + T[] values = (T[]) Array.newInstance(old.getClass().getComponentType(), getNewSize()); + if (defaultValue != null) { + Arrays.fill(values, 0, values.length, defaultValue); + } + for (int i = 0; i < old.length; i++) { + T val = old[i]; + if (val != defaultValue) { + int newSlot = getNewSlot(i); + if (newSlot >= 0) { + values[newSlot] = val; + } + } + } + return values; + } + + } // end class Resizer + +} // TODO: we should really have a decoupled value provider... // This would enhance reuse and also prevent multiple lookups of same value across diff stats @@ -125,6 +187,11 @@ abstract class DoubleFuncSlotAcc extends FuncSlotAcc { result[i] = initialValue; } } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, initialValue); + } } abstract class IntSlotAcc extends SlotAcc { @@ -156,6 +223,11 @@ abstract class IntSlotAcc extends SlotAcc { result[i] = initialValue; } } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, initialValue); + } } @@ -270,18 +342,44 @@ class AvgSlotAcc extends DoubleFuncSlotAcc { } } + @Override + public void resize(Resizer resizer) { + super.resize(resizer); + counts = resizer.resize(counts, 0); + } +} + +abstract class CountSlotAcc extends SlotAcc { + public CountSlotAcc(FacetContext fcontext) { + super(fcontext); + } + + public abstract void incrementCount(int slot, int count); + public abstract int getCount(int slot); } -class CountSlotAcc extends IntSlotAcc { - public CountSlotAcc(FacetContext fcontext, int numSlots) { - super(fcontext, numSlots, 0); +class CountSlotArrAcc extends CountSlotAcc { + int[] result; + public CountSlotArrAcc(FacetContext fcontext, int numSlots) { + super(fcontext); + result = new int[numSlots]; } @Override public void collect(int doc, int slotNum) { // TODO: count arrays can use fewer bytes based on the number of docs in the base set (that's the upper bound for single valued) - look at ttf? - result[slotNum] = result[slotNum] + 1; + result[slotNum]++; + } + + @Override + public int compare(int slotA, int slotB) { + return Integer.compare( result[slotA], result[slotB] ); + } + + @Override + public Object getValue(int slotNum) throws IOException { + return result[slotNum]; } public void incrementCount(int slot, int count) { @@ -299,7 +397,12 @@ class CountSlotAcc extends IntSlotAcc { @Override public void reset() { - super.reset(); + Arrays.fill(result, 0); + } + + @Override + public void resize(Resizer resizer) { + resizer.resize(result, 0); } } @@ -327,6 +430,12 @@ class SortSlotAcc extends SlotAcc { public void reset() { // no-op } + + @Override + public void resize(Resizer resizer) { + // sort slot only works with direct-mapped accumulators + throw new UnsupportedOperationException(); + } } @@ -427,6 +536,10 @@ abstract class UniqueSlotAcc extends SlotAcc { return counts[slotA] - counts[slotB]; } + @Override + public void resize(Resizer resizer) { + arr = resizer.resize(arr, null); + } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java index d239918475e..44bd5f40a39 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java @@ -304,7 +304,7 @@ public class UnInvertedField extends DocTermOrds { - private void getCountsInArray(FacetFieldProcessorUIF processor, int[] counts) throws IOException { + private void getCounts(FacetFieldProcessorUIF processor, CountSlotAcc counts) throws IOException { DocSet docs = processor.fcontext.base; int baseSize = docs.size(); int maxDoc = searcher.maxDoc(); @@ -330,7 +330,7 @@ public class UnInvertedField extends DocTermOrds { // For the biggest terms, do straight set intersections for (TopTerm tt : bigTerms.values()) { // TODO: counts could be deferred if sorting by index order - counts[tt.termNum] = searcher.numDocs(tt.termQuery, docs); + counts.incrementCount(tt.termNum, searcher.numDocs(tt.termQuery, docs)); } // TODO: we could short-circuit counting altogether for sorted faceting @@ -356,7 +356,7 @@ public class UnInvertedField extends DocTermOrds { } if (delta == 0) break; tnum += delta - TNUM_OFFSET; - counts[tnum]++; + counts.incrementCount(tnum,1); } } else { int tnum = 0; @@ -366,7 +366,7 @@ public class UnInvertedField extends DocTermOrds { if ((code & 0x80) == 0) { if (delta == 0) break; tnum += delta - TNUM_OFFSET; - counts[tnum]++; + counts.incrementCount(tnum,1); delta = 0; } code >>>= 8; @@ -377,16 +377,17 @@ public class UnInvertedField extends DocTermOrds { if (doNegative) { for (int i=0; i= 0) { int all = 0; // overflow potential for (int i=0; i= numTermsInField) { - int[] arr = processor.countAcc.getCountArray(); - getCountsInArray(processor, arr); + getCounts(processor, processor.countAcc); /*** debugging int sz = processor.countAcc.getCountArray().length; diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java index efe4af723f5..539e16b8a62 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java @@ -209,6 +209,11 @@ public class UniqueAgg extends StrAggValueSource { sets = new LongSet[sets.length]; } + @Override + public void resize(Resizer resizer) { + resizer.resize(sets, null); + } + @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { values = DocValues.getNumeric(readerContext.reader(), sf.getName());