diff --git a/TODO b/TODO index cc5b8afb545..f63cf13c227 100644 --- a/TODO +++ b/TODO @@ -2,6 +2,7 @@ nocommit this! TODO - associations + - simplify ddq api - SSDVValueSourceFacets? - we could put more stuff into the "schema", e.g. this field is sorted-set-DV and that one is taxo? diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java index b2654ce6eb7..661164f4025 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java @@ -180,7 +180,7 @@ public class DrillSideways { // Just do ordinary search when there are no drill-downs: FacetsCollector c = FacetsCollector.create(getDrillDownAccumulator(fsp)); searcher.search(query, MultiCollector.wrap(hitCollector, c)); - return new DrillSidewaysResult(c.getFacetResults(), null); + return new DrillSidewaysResult(c.getFacetResults(), null); } List ddRequests = new ArrayList(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/MultiFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/MultiFacets.java new file mode 100644 index 00000000000..ca73edba8b2 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/MultiFacets.java @@ -0,0 +1,56 @@ +package org.apache.lucene.facet.simple; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** Maps specified dims to provided Facets impls; else, uses + * the default Facets impl. */ +public class MultiFacets extends Facets { + private final Map dimToFacets; + private final Facets defaultFacets; + + public MultiFacets(Map dimToFacets, Facets defaultFacets) { + this.dimToFacets = dimToFacets; + this.defaultFacets = defaultFacets; + } + + public SimpleFacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + Facets facets = dimToFacets.get(dim); + if (facets == null) { + facets = defaultFacets; + } + return facets.getTopChildren(topN, dim, path); + } + + public Number getSpecificValue(String dim, String... path) throws IOException { + Facets facets = dimToFacets.get(dim); + if (facets == null) { + facets = defaultFacets; + } + return facets.getSpecificValue(dim, path); + } + + public List getAllDims(int topN) throws IOException { + // nocommit can/should we impl this? ie, sparse + // faceting after drill sideways + throw new UnsupportedOperationException(); + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java index eb98893bff1..cc0681c2bd3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java @@ -118,17 +118,17 @@ public final class SimpleDrillDownQuery extends Query { * Adds one dimension of drill downs; if you pass multiple values they are * OR'd, and then the entire dimension is AND'd against the base query. */ - // nocommit can we remove CatPath here? + // nocommit can we remove FacetLabel here? public void add(FacetLabel... paths) { add(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME, Constants.DEFAULT_DELIM_CHAR, paths); } - // nocommit can we remove CatPath here? + // nocommit can we remove FacetLabel here? public void add(String field, FacetLabel... paths) { add(field, Constants.DEFAULT_DELIM_CHAR, paths); } - // nocommit can we remove CatPath here? + // nocommit can we remove FacetLabel here? public void add(String field, char delimChar, FacetLabel... paths) { Query q; if (paths[0].length == 0) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSideways.java new file mode 100644 index 00000000000..edca6caaf47 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSideways.java @@ -0,0 +1,429 @@ +package org.apache.lucene.facet.simple; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.DrillDownQuery; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiCollector; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; + +/** + * Computes drill down and sideways counts for the provided + * {@link DrillDownQuery}. Drill sideways counts include + * alternative values/aggregates for the drill-down + * dimensions so that a dimension does not disappear after + * the user drills down into it. + * + *

Use one of the static search + * methods to do the search, and then get the hits and facet + * results from the returned {@link DrillSidewaysResult}. + * + *

NOTE: this allocates one {@link + * FacetsCollector} for each drill-down, plus one. If your + * index has high number of facet labels then this will + * multiply your memory usage. + * + * @lucene.experimental + */ + +public class SimpleDrillSideways { + + protected final IndexSearcher searcher; + protected final TaxonomyReader taxoReader; + protected final SortedSetDocValuesReaderState state; + protected final FacetsConfig facetsConfig; + + /** + * Create a new {@code DrillSideways} instance, assuming the categories were + * indexed with {@link FacetFields}. + */ + public SimpleDrillSideways(IndexSearcher searcher, FacetsConfig facetsConfig, TaxonomyReader taxoReader) { + this(searcher, facetsConfig, taxoReader, null); + } + + /** + * Create a new {@code DrillSideways} instance, assuming the categories were + * indexed with {@link SortedSetDocValuesFacetFields}. + */ + public SimpleDrillSideways(IndexSearcher searcher, FacetsConfig facetsConfig, SortedSetDocValuesReaderState state) { + this(searcher, facetsConfig, null, state); + } + + /** + * Create a new {@code DrillSideways} instance, where some + * dimensions are sorted set facets and others are + * taxononmy facets. + */ + public SimpleDrillSideways(IndexSearcher searcher, FacetsConfig facetsConfig, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state) { + this.searcher = searcher; + this.facetsConfig = facetsConfig; + this.taxoReader = taxoReader; + this.state = state; + } + + /** Subclass can override to customize per-dim Facets + * impl. */ + protected Facets buildFacetsResult(SimpleFacetsCollector drillDowns, SimpleFacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { + + Facets drillDownFacets = new TaxonomyFacetCounts(taxoReader, facetsConfig, drillDowns); + + if (drillSideways == null) { + return drillDownFacets; + } else { + Map drillSidewaysFacets = new HashMap(); + for(int i=0;i drillDownDims = query.getDims(); + + SimpleFacetsCollector drillDownCollector = new SimpleFacetsCollector(); + + if (drillDownDims.isEmpty()) { + // There are no drill-down dims, so there is no + // drill-sideways to compute: + searcher.search(query, MultiCollector.wrap(hitCollector, drillDownCollector)); + return new SimpleDrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null); + } + + BooleanQuery ddq = query.getBooleanQuery(); + BooleanClause[] clauses = ddq.getClauses(); + + Query baseQuery; + int startClause; + if (clauses.length == drillDownDims.size()) { + // TODO: we could optimize this pure-browse case by + // making a custom scorer instead: + baseQuery = new MatchAllDocsQuery(); + startClause = 0; + } else { + assert clauses.length == 1+drillDownDims.size(); + baseQuery = clauses[0].getQuery(); + startClause = 1; + } + + SimpleFacetsCollector[] drillSidewaysCollectors = new SimpleFacetsCollector[drillDownDims.size()]; + + int idx = 0; + for(String dim : drillDownDims.keySet()) { + drillSidewaysCollectors[idx++] = new SimpleFacetsCollector(); + } + + boolean useCollectorMethod = scoreSubDocsAtOnce(); + + Term[][] drillDownTerms = null; + + if (!useCollectorMethod) { + // Optimistic: assume subQueries of the DDQ are either + // TermQuery or BQ OR of TermQuery; if this is wrong + // then we detect it and fallback to the mome general + // but slower DrillSidewaysCollector: + drillDownTerms = new Term[clauses.length-startClause][]; + for(int i=startClause;i drillDownDims = ddq.getDims(); + + BooleanQuery topQuery = new BooleanQuery(true); + final SimpleDrillSidewaysCollector collector = new SimpleDrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors, + drillDownDims); + + // TODO: if query is already a BQ we could copy that and + // add clauses to it, instead of doing BQ inside BQ + // (should be more efficient)? Problem is this can + // affect scoring (coord) ... too bad we can't disable + // coord on a clause by clause basis: + topQuery.add(baseQuery, BooleanClause.Occur.MUST); + + // NOTE: in theory we could just make a single BQ, with + // +query a b c minShouldMatch=2, but in this case, + // annoyingly, BS2 wraps a sub-scorer that always + // returns 2 as the .freq(), not how many of the + // SHOULD clauses matched: + BooleanQuery subQuery = new BooleanQuery(true); + + Query wrappedSubQuery = new QueryWrapper(subQuery, + new SetWeight() { + @Override + public void set(Weight w) { + collector.setWeight(w, -1); + } + }); + Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery); + + // Don't impact score of original query: + constantScoreSubQuery.setBoost(0.0f); + + topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST); + + // Unfortunately this sub-BooleanQuery + // will never get BS1 because today BS1 only works + // if topScorer=true... and actually we cannot use BS1 + // anyways because we need subDocsScoredAtOnce: + int dimIndex = 0; + for(int i=startClause;i weightToIndex = new IdentityHashMap(); + + private Scorer mainScorer; + + public SimpleDrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors, + Map dims) { + this.hitCollector = hitCollector; + this.drillDownCollector = drillDownCollector; + this.drillSidewaysCollectors = drillSidewaysCollectors; + subScorers = new Scorer[dims.size()]; + + if (dims.size() == 1) { + // When we have only one dim, we insert the + // MatchAllDocsQuery, bringing the clause count to + // 2: + exactCount = 2; + } else { + exactCount = dims.size(); + } + } + + @Override + public void collect(int doc) throws IOException { + //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount); + + if (mainScorer == null) { + // This segment did not have any docs with any + // drill-down field & value: + return; + } + + if (mainScorer.freq() == exactCount) { + // All sub-clauses from the drill-down filters + // matched, so this is a "real" hit, so we first + // collect in both the hitCollector and the + // drillDown collector: + //System.out.println(" hit " + drillDownCollector); + hitCollector.collect(doc); + if (drillDownCollector != null) { + drillDownCollector.collect(doc); + } + + // Also collect across all drill-sideways counts so + // we "merge in" drill-down counts for this + // dimension. + for(int i=0;i doc: "subDoc=" + subDoc + " doc=" + doc; + drillSidewaysCollectors[i].collect(doc); + assert allMatchesFrom(i+1, doc); + found = true; + break; + } + } + assert found; + } + } + + // Only used by assert: + private boolean allMatchesFrom(int startFrom, int doc) { + for(int i=startFrom;i 1 || (nullCount == 1 && dims.length == 1)) { + return null; + } + + // Sort drill-downs by most restrictive first: + Arrays.sort(dims); + + // TODO: it could be better if we take acceptDocs + // into account instead of baseScorer? + Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); + + if (baseScorer == null) { + return null; + } + + return new SimpleDrillSidewaysScorer(this, context, + baseScorer, + drillDownCollector, dims); + } + }; + } + + // TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode()); + result = prime * result + + ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode()); + result = prime * result + Arrays.hashCode(drillDownTerms); + result = prime * result + Arrays.hashCode(drillSidewaysCollectors); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + SimpleDrillSidewaysQuery other = (SimpleDrillSidewaysQuery) obj; + if (baseQuery == null) { + if (other.baseQuery != null) return false; + } else if (!baseQuery.equals(other.baseQuery)) return false; + if (drillDownCollector == null) { + if (other.drillDownCollector != null) return false; + } else if (!drillDownCollector.equals(other.drillDownCollector)) return false; + if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false; + if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false; + return true; + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSidewaysScorer.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSidewaysScorer.java new file mode 100644 index 00000000000..6be41af1f23 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSidewaysScorer.java @@ -0,0 +1,654 @@ +package org.apache.lucene.facet.simple; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.FixedBitSet; + +class SimpleDrillSidewaysScorer extends Scorer { + + //private static boolean DEBUG = false; + + private final Collector drillDownCollector; + + private final DocsEnumsAndFreq[] dims; + + // DrillDown DocsEnums: + private final Scorer baseScorer; + + private final AtomicReaderContext context; + + private static final int CHUNK = 2048; + private static final int MASK = CHUNK-1; + + private int collectDocID = -1; + private float collectScore; + + SimpleDrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector, + DocsEnumsAndFreq[] dims) { + super(w); + this.dims = dims; + this.context = context; + this.baseScorer = baseScorer; + this.drillDownCollector = drillDownCollector; + } + + @Override + public void score(Collector collector) throws IOException { + //if (DEBUG) { + // System.out.println("\nscore: reader=" + context.reader()); + //} + //System.out.println("score r=" + context.reader()); + collector.setScorer(this); + if (drillDownCollector != null) { + drillDownCollector.setScorer(this); + drillDownCollector.setNextReader(context); + } + for(DocsEnumsAndFreq dim : dims) { + dim.sidewaysCollector.setScorer(this); + dim.sidewaysCollector.setNextReader(context); + } + + // TODO: if we ever allow null baseScorer ... it will + // mean we DO score docs out of order ... hmm, or if we + // change up the order of the conjuntions below + assert baseScorer != null; + + // Position all scorers to their first matching doc: + baseScorer.nextDoc(); + for(DocsEnumsAndFreq dim : dims) { + for (DocsEnum docsEnum : dim.docsEnums) { + if (docsEnum != null) { + docsEnum.nextDoc(); + } + } + } + + final int numDims = dims.length; + + DocsEnum[][] docsEnums = new DocsEnum[numDims][]; + Collector[] sidewaysCollectors = new Collector[numDims]; + long drillDownCost = 0; + for(int dim=0;dim 1 && (dims[1].maxCost < baseQueryCost/10)) { + //System.out.println("drillDownAdvance"); + doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors); + } else { + //System.out.println("union"); + doUnionScoring(collector, docsEnums, sidewaysCollectors); + } + } + + /** Used when drill downs are highly constraining vs + * baseQuery. */ + private void doDrillDownAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException { + final int maxDoc = context.reader().maxDoc(); + final int numDims = dims.length; + + //if (DEBUG) { + // System.out.println(" doDrillDownAdvanceScoring"); + //} + + // TODO: maybe a class like BS, instead of parallel arrays + int[] filledSlots = new int[CHUNK]; + int[] docIDs = new int[CHUNK]; + float[] scores = new float[CHUNK]; + int[] missingDims = new int[CHUNK]; + int[] counts = new int[CHUNK]; + + docIDs[0] = -1; + int nextChunkStart = CHUNK; + + final FixedBitSet seen = new FixedBitSet(CHUNK); + + while (true) { + //if (DEBUG) { + // System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]); + //} + + // First dim: + //if (DEBUG) { + // System.out.println(" dim0"); + //} + for(DocsEnum docsEnum : docsEnums[0]) { + if (docsEnum == null) { + continue; + } + int docID = docsEnum.docID(); + while (docID < nextChunkStart) { + int slot = docID & MASK; + + if (docIDs[slot] != docID) { + seen.set(slot); + // Mark slot as valid: + //if (DEBUG) { + // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id")); + //} + docIDs[slot] = docID; + missingDims[slot] = 1; + counts[slot] = 1; + } + + docID = docsEnum.nextDoc(); + } + } + + // Second dim: + //if (DEBUG) { + // System.out.println(" dim1"); + //} + for(DocsEnum docsEnum : docsEnums[1]) { + if (docsEnum == null) { + continue; + } + int docID = docsEnum.docID(); + while (docID < nextChunkStart) { + int slot = docID & MASK; + + if (docIDs[slot] != docID) { + // Mark slot as valid: + seen.set(slot); + //if (DEBUG) { + // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id")); + //} + docIDs[slot] = docID; + missingDims[slot] = 0; + counts[slot] = 1; + } else { + // TODO: single-valued dims will always be true + // below; we could somehow specialize + if (missingDims[slot] >= 1) { + missingDims[slot] = 2; + counts[slot] = 2; + //if (DEBUG) { + // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id")); + //} + } else { + counts[slot] = 1; + //if (DEBUG) { + // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id")); + //} + } + } + + docID = docsEnum.nextDoc(); + } + } + + // After this we can "upgrade" to conjunction, because + // any doc not seen by either dim 0 or dim 1 cannot be + // a hit or a near miss: + + //if (DEBUG) { + // System.out.println(" baseScorer"); + //} + + // Fold in baseScorer, using advance: + int filledCount = 0; + int slot0 = 0; + while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != -1) { + int ddDocID = docIDs[slot0]; + assert ddDocID != -1; + + int baseDocID = baseScorer.docID(); + if (baseDocID < ddDocID) { + baseDocID = baseScorer.advance(ddDocID); + } + if (baseDocID == ddDocID) { + //if (DEBUG) { + // System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); + //} + scores[slot0] = baseScorer.score(); + filledSlots[filledCount++] = slot0; + counts[slot0]++; + } else { + //if (DEBUG) { + // System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id")); + //} + docIDs[slot0] = -1; + + // TODO: we could jump slot0 forward to the + // baseDocID ... but we'd need to set docIDs for + // intervening slots to -1 + } + slot0++; + } + seen.clear(0, CHUNK); + + if (filledCount == 0) { + if (nextChunkStart >= maxDoc) { + break; + } + nextChunkStart += CHUNK; + continue; + } + + // TODO: factor this out & share w/ union scorer, + // except we start from dim=2 instead: + for(int dim=2;dim= dim) { + // TODO: single-valued dims will always be true + // below; we could somehow specialize + if (missingDims[slot] >= dim) { + //if (DEBUG) { + // System.out.println(" set docID=" + docID + " count=" + (dim+2)); + //} + missingDims[slot] = dim+1; + counts[slot] = dim+2; + } else { + //if (DEBUG) { + // System.out.println(" set docID=" + docID + " missing count=" + (dim+1)); + //} + counts[slot] = dim+1; + } + } + // TODO: sometimes use advance? + docID = docsEnum.nextDoc(); + } + } + } + + // Collect: + //if (DEBUG) { + // System.out.println(" now collect: " + filledCount + " hits"); + //} + for(int i=0;i= maxDoc) { + break; + } + + nextChunkStart += CHUNK; + } + } + + @Override + public int docID() { + return collectDocID; + } + + @Override + public float score() { + return collectScore; + } + + @Override + public int freq() { + return 1+dims.length; + } + + @Override + public int nextDoc() { + throw new UnsupportedOperationException(); + } + + @Override + public int advance(int target) { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return baseScorer.cost(); + } + + @Override + public Collection getChildren() { + return Collections.singletonList(new ChildScorer(baseScorer, "MUST")); + } + + static class DocsEnumsAndFreq implements Comparable { + DocsEnum[] docsEnums; + // Max cost for all docsEnums for this dim: + long maxCost; + Collector sidewaysCollector; + String dim; + + @Override + public int compareTo(DocsEnumsAndFreq other) { + if (maxCost < other.maxCost) { + return -1; + } else if (maxCost > other.maxCost) { + return 1; + } else { + return 0; + } + } + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleFacetResult.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleFacetResult.java index 28cedba0352..4be86b74b70 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleFacetResult.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleFacetResult.java @@ -31,6 +31,8 @@ public final class SimpleFacetResult { /** Child counts. */ public final LabelAndValue[] labelValues; + + // nocommit also return number of children? public SimpleFacetResult(FacetLabel path, Number value, LabelAndValue[] labelValues) { this.path = path; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java new file mode 100644 index 00000000000..c8033296a8d --- /dev/null +++ b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java @@ -0,0 +1,1152 @@ +package org.apache.lucene.facet.simple; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.simple.SimpleDrillSideways.SimpleDrillSidewaysResult; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; +import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField.Type; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InPlaceMergeSorter; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util._TestUtil; +import org.junit.Test; + +public class TestSimpleDrillSideways extends FacetTestCase { + + private DirectoryTaxonomyWriter taxoWriter; + private RandomIndexWriter writer; + private FacetFields facetFields; + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + FacetsConfig config = new FacetsConfig(); + config.setHierarchical("Publish Date"); + + IndexWriter writer = new FacetIndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())), taxoWriter, config); + + Document doc = new Document(); + doc.add(new FacetField("Author", "Bob")); + doc.add(new FacetField("Publish Date", "2010", "10", "15")); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new FacetField("Author", "Lisa")); + doc.add(new FacetField("Publish Date", "2010", "10", "20")); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new FacetField("Author", "Lisa")); + doc.add(new FacetField("Publish Date", "2012", "1", "1")); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new FacetField("Author", "Susan")); + doc.add(new FacetField("Publish Date", "2012", "1", "7")); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new FacetField("Author", "Frank")); + doc.add(new FacetField("Publish Date", "1999", "5", "5")); + writer.addDocument(doc); + + // NRT open + IndexSearcher searcher = newSearcher(DirectoryReader.open(writer, true)); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + SimpleDrillSideways ds = new SimpleDrillSideways(searcher, config, taxoReader); + + // Simple case: drill-down on a single field; in this + // case the drill-sideways + drill-down counts == + // drill-down of just the query: + SimpleDrillDownQuery ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Author", "Lisa")); + SimpleDrillSidewaysResult r = ds.search(null, ddq, 10); + assertEquals(2, r.hits.totalHits); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date (2)\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("Author (5)\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + + // Same simple case, but no baseQuery (pure browse): + // drill-down on a single field; in this case the + // drill-sideways + drill-down counts == drill-down of + // just the query: + ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Author", "Lisa")); + r = ds.search(null, ddq, 10); + + assertEquals(2, r.hits.totalHits); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date (2)\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("Author (5)\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + + // Another simple case: drill-down on on single fields + // but OR of two values + ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Author", "Lisa"), new FacetLabel("Author", "Bob")); + r = ds.search(null, ddq, 10); + assertEquals(3, r.hits.totalHits); + // Publish Date is only drill-down: Lisa and Bob + // (drill-down) published twice in 2010 and once in 2012: + assertEquals("Publish Date (3)\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("Author (5)\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + + // More interesting case: drill-down on two fields + ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Author", "Lisa")); + ddq.add(new FacetLabel("Publish Date", "2010")); + r = ds.search(null, ddq, 10); + assertEquals(1, r.hits.totalHits); + // Publish Date is drill-sideways + drill-down: Lisa + // (drill-down) published once in 2010 and once in 2012: + assertEquals("Publish Date (2)\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + // Author is drill-sideways + drill-down: + // only Lisa & Bob published (once each) in 2010: + assertEquals("Author (2)\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString()); + + // Even more interesting case: drill down on two fields, + // but one of them is OR + ddq = new SimpleDrillDownQuery(); + + // Drill down on Lisa or Bob: + ddq.add(new FacetLabel("Author", "Lisa"), + new FacetLabel("Author", "Bob")); + ddq.add(new FacetLabel("Publish Date", "2010")); + r = ds.search(null, ddq, 10); + assertEquals(2, r.hits.totalHits); + // Publish Date is both drill-sideways + drill-down: + // Lisa or Bob published twice in 2010 and once in 2012: + assertEquals("Publish Date (3)\n 2010 (2)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + // Author is drill-sideways + drill-down: + // only Lisa & Bob published (once each) in 2010: + assertEquals("Author (2)\n Bob (1)\n Lisa (1)\n", r.facets.getTopChildren(10, "Author").toString()); + + // Test drilling down on invalid field: + ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Foobar", "Baz")); + r = ds.search(null, ddq, 10); + assertEquals(0, r.hits.totalHits); + assertNull(r.facets.getTopChildren(10, "Publish Date")); + assertNull(r.facets.getTopChildren(10, "Foobar")); + + // Test drilling down on valid term or'd with invalid term: + ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Author", "Lisa"), + new FacetLabel("Author", "Tom")); + r = ds.search(null, ddq, 10); + assertEquals(2, r.hits.totalHits); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date (2)\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("Author (5)\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", r.facets.getTopChildren(10, "Author").toString()); + + // LUCENE-4915: test drilling down on a dimension but + // NOT facet counting it: + ddq = new SimpleDrillDownQuery(); + ddq.add(new FacetLabel("Author", "Lisa"), + new FacetLabel("Author", "Tom")); + r = ds.search(null, ddq, 10); + assertEquals(2, r.hits.totalHits); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date (2)\n 2010 (1)\n 2012 (1)\n", r.facets.getTopChildren(10, "Publish Date").toString()); + + // Test main query gets null scorer: + ddq = new SimpleDrillDownQuery(new TermQuery(new Term("foobar", "baz"))); + ddq.add(new FacetLabel("Author", "Lisa")); + r = ds.search(null, ddq, 10); + + assertEquals(0, r.hits.totalHits); + assertNull(r.facets.getTopChildren(10, "Publish Date")); + assertNull(r.facets.getTopChildren(10, "Author")); + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + /* + public void testSometimesInvalidDrillDown() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + writer.commit(); + // 2nd segment has no Author: + add("Foobar/Lisa", "Publish Date/2012/1/1"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Count both "Publish Date" and "Author" dimensions, in + // drill-down: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new FacetLabel("Publish Date"), 10), + new CountFacetRequest(new FacetLabel("Author"), 10)); + + DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + ddq.add(new FacetLabel("Author", "Lisa")); + DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + + assertEquals(1, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date: 2010=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published once, and Bob + // published once: + assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + private static class Doc implements Comparable { + String id; + String contentToken; + + public Doc() {} + + // -1 if the doc is missing this dim, else the index + // -into the values for this dim: + int[] dims; + + // 2nd value per dim for the doc (so we test + // multi-valued fields): + int[] dims2; + boolean deleted; + + @Override + public int compareTo(Doc other) { + return id.compareTo(other.id); + } + } + + private double aChance, bChance, cChance; + + private String randomContentToken(boolean isQuery) { + double d = random().nextDouble(); + if (isQuery) { + if (d < 0.33) { + return "a"; + } else if (d < 0.66) { + return "b"; + } else { + return "c"; + } + } else { + if (d <= aChance) { + return "a"; + } else if (d < aChance + bChance) { + return "b"; + } else { + return "c"; + } + } + } + + public void testMultipleRequestsPerDim() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("dim/a/x"); + add("dim/a/y"); + add("dim/a/z"); + add("dim/b"); + add("dim/c"); + add("dim/d"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Two requests against the same dim: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new FacetLabel("dim"), 10), + new CountFacetRequest(new FacetLabel("dim", "a"), 10)); + + DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + ddq.add(new FacetLabel("dim", "a")); + DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + + assertEquals(3, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("dim: a=3 d=1 c=1 b=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("a (3)\n z (1)\n y (1)\n x (1)\n", FacetTestUtils.toSimpleString(r.facetResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + public void testRandom() throws Exception { + + boolean canUseDV = defaultCodecSupportsSortedSet(); + + while (aChance == 0.0) { + aChance = random().nextDouble(); + } + while (bChance == 0.0) { + bChance = random().nextDouble(); + } + while (cChance == 0.0) { + cChance = random().nextDouble(); + } + //aChance = .01; + //bChance = 0.5; + //cChance = 1.0; + double sum = aChance + bChance + cChance; + aChance /= sum; + bChance /= sum; + cChance /= sum; + + int numDims = _TestUtil.nextInt(random(), 2, 5); + //int numDims = 3; + int numDocs = atLeast(3000); + //int numDocs = 20; + if (VERBOSE) { + System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance); + } + String[][] dimValues = new String[numDims][]; + int valueCount = 2; + + for(int dim=0;dim values = new HashSet(); + while (values.size() < valueCount) { + String s; + while (true) { + s = _TestUtil.randomRealisticUnicodeString(random()); + //s = _TestUtil.randomSimpleString(random()); + // We cannot include this character else we hit + // IllegalArgExc: + if (s.indexOf(FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR) == -1 && + (!canUseDV || s.indexOf('/') == -1)) { + break; + } + } + if (s.length() > 0) { + values.add(s); + } + } + dimValues[dim] = values.toArray(new String[values.size()]); + valueCount *= 2; + } + + List docs = new ArrayList(); + for(int i=0;i paths = new ArrayList(); + + if (VERBOSE) { + System.out.println(" doc id=" + rawDoc.id + " token=" + rawDoc.contentToken); + } + for(int dim=0;dim lastDocID; + lastDocID = doc; + } + + @Override + public void setNextReader(AtomicReaderContext context) { + lastDocID = -1; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + }, fsp); + + // Also separately verify that DS respects the + // scoreSubDocsAtOnce method, to ensure that all + // subScorers are on the same docID: + if (!anyMultiValuedDrillDowns) { + // Can only do this test when there are no OR'd + // drill-down values, beacuse in that case it's + // easily possible for one of the DD terms to be on + // a future docID: + new DrillSideways(s, tr) { + @Override + protected boolean scoreSubDocsAtOnce() { + return true; + } + }.search(ddq, new AssertingSubDocsAtOnceCollector(), fsp); + } + + SimpleFacetResult expected = slowDrillSidewaysSearch(s, requests, docs, contentToken, drillDowns, dimValues, filter); + + Sort sort = new Sort(new SortField("id", SortField.Type.STRING)); + DrillSideways ds; + if (doUseDV) { + ds = new DrillSideways(s, sortedSetDVState); + } else { + ds = new DrillSideways(s, tr); + } + + // Retrieve all facets: + DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp); + + TopDocs hits = s.search(baseQuery, numDocs); + Map scores = new HashMap(); + for(ScoreDoc sd : hits.scoreDocs) { + scores.put(s.doc(sd.doc).get("id"), sd.score); + } + if (VERBOSE) { + System.out.println(" verify all facets"); + } + verifyEquals(requests, dimValues, s, expected, actual, scores, -1, doUseDV); + + // Retrieve topN facets: + int topN = _TestUtil.nextInt(random(), 1, 20); + + List newRequests = new ArrayList(); + for(FacetRequest oldRequest : requests) { + newRequests.add(new CountFacetRequest(oldRequest.categoryPath, topN)); + } + fsp = new FacetSearchParams(newRequests); + actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp); + if (VERBOSE) { + System.out.println(" verify topN=" + topN); + } + verifyEquals(newRequests, dimValues, s, expected, actual, scores, topN, doUseDV); + + // Make sure drill down doesn't change score: + TopDocs ddqHits = s.search(ddq, filter, numDocs); + assertEquals(expected.hits.size(), ddqHits.totalHits); + for(int i=0;i hits; + int[][] counts; + int[] uniqueCounts; + public SimpleFacetResult() {} + } + + private int[] getTopNOrds(final int[] counts, final String[] values, int topN) { + final int[] ids = new int[counts.length]; + for(int i=0;i countj) { + return -1; + } else if (counti < countj) { + return 1; + } else { + // ... then by label ascending: + return new BytesRef(values[ids[i]]).compareTo(new BytesRef(values[ids[j]])); + } + } + + }.sort(0, ids.length); + + if (topN > ids.length) { + topN = ids.length; + } + + int numSet = topN; + for(int i=0;i requests, List docs, + String contentToken, String[][] drillDowns, + String[][] dimValues, Filter onlyEven) throws Exception { + int numDims = dimValues.length; + + List hits = new ArrayList(); + Counters drillDownCounts = new Counters(dimValues); + Counters[] drillSidewaysCounts = new Counters[dimValues.length]; + for(int dim=0;dim requests, String[][] dimValues, IndexSearcher s, SimpleFacetResult expected, + DrillSidewaysResult actual, Map scores, int topN, boolean isSortedSetDV) throws Exception { + if (VERBOSE) { + System.out.println(" verify totHits=" + expected.hits.size()); + } + assertEquals(expected.hits.size(), actual.hits.totalHits); + assertEquals(expected.hits.size(), actual.hits.scoreDocs.length); + for(int i=0;i subResults = fr.getFacetResultNode().subResults; + if (VERBOSE) { + System.out.println(" dim" + dim); + System.out.println(" actual"); + } + + Map actualValues = new HashMap(); + idx = 0; + for(FacetResultNode childNode : subResults) { + actualValues.put(childNode.label.components[1], (int) childNode.value); + if (VERBOSE) { + System.out.println(" " + idx + ": " + new BytesRef(childNode.label.components[1]) + ": " + (int) childNode.value); + idx++; + } + } + + if (topN != -1) { + int[] topNIDs = getTopNOrds(expected.counts[dim], dimValues[dim], topN); + if (VERBOSE) { + idx = 0; + System.out.println(" expected (sorted)"); + for(int i=0;i